diff --git a/cuda_helper.h b/cuda_helper.h index 21f1b1d..0e65368 100644 --- a/cuda_helper.h +++ b/cuda_helper.h @@ -343,7 +343,7 @@ uint64_t ROTL64(const uint64_t x, const int offset) #endif __device__ __forceinline__ -uint64_t SWAPDWORDS(const uint64_t value) +uint64_t SWAPDWORDS(uint64_t value) { #if __CUDA_ARCH__ >= 320 uint2 temp; diff --git a/groestl_functions_quad.cu b/groestl_functions_quad.cu index 4085283..7688839 100644 --- a/groestl_functions_quad.cu +++ b/groestl_functions_quad.cu @@ -32,8 +32,8 @@ void G256_AddRoundConstantQ_quad(uint32_t &x7, uint32_t &x6, uint32_t &x5, uint3 int andmask = 0xFFFF0000; #else - /* from sp: weird but faster */ - int andmask = ((~((threadIdx.x & 0x03) - 3)) & 0xFFFF0000); + /* from sp: faster (branching problem with if ?) */ + uint32_t andmask = -((threadIdx.x & 3) == 3) & 0xFFFF0000U; #endif x0 ^= ((- (rnd & 0x01) ) & andmask);