Browse Source

some performance improvement

2upstream
orignal 7 years ago
parent
commit
660d790014
  1. 4
      gost/cuda_gosthash.cu

4
gost/cuda_gosthash.cu

@ -724,7 +724,7 @@ void GOST_FS(uint64_t* const state64, uint64_t* return_state)
#if (__CUDA_ARCH__ >= 500) #if (__CUDA_ARCH__ >= 500)
return_state[b] = __ldg (&T0[EXTRACT_BYTE(state32[14], b)]) return_state[b] = __ldg (&T0[EXTRACT_BYTE(state32[14], b)])
^ __ldg (&T1[EXTRACT_BYTE(state32[12], b)]) ^ __ldg (&T1[EXTRACT_BYTE(state32[12], b)])
^ __ldg (&T2[EXTRACT_BYTE(state32[10], b)]) ^ T2S[EXTRACT_BYTE(state32[10], b)]
^ __ldg (&T3[EXTRACT_BYTE(state32[8], b)]) ^ __ldg (&T3[EXTRACT_BYTE(state32[8], b)])
^ T4S[EXTRACT_BYTE(state32[6], b)] ^ T4S[EXTRACT_BYTE(state32[6], b)]
^ T5S[EXTRACT_BYTE(state32[4], b)] ^ T5S[EXTRACT_BYTE(state32[4], b)]
@ -736,7 +736,7 @@ void GOST_FS(uint64_t* const state64, uint64_t* return_state)
^ T2S[EXTRACT_BYTE(state32[11], b)] ^ T2S[EXTRACT_BYTE(state32[11], b)]
^ T3S[EXTRACT_BYTE(state32[9], b)] ^ T3S[EXTRACT_BYTE(state32[9], b)]
^ __ldg (&T4[EXTRACT_BYTE(state32[7], b)]) ^ __ldg (&T4[EXTRACT_BYTE(state32[7], b)])
^ __ldg (&T5[EXTRACT_BYTE(state32[5], b)]) ^ T5S[EXTRACT_BYTE(state32[5], b)]
^ __ldg (&T6[EXTRACT_BYTE(state32[3], b)]) ^ __ldg (&T6[EXTRACT_BYTE(state32[3], b)])
^ __ldg (&T7[EXTRACT_BYTE(state32[1], b)]); ^ __ldg (&T7[EXTRACT_BYTE(state32[1], b)]);
#else #else

Loading…
Cancel
Save