From 2f541065fb67ff67d4672dec2375f7eebd4f7288 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Tue, 12 May 2015 17:13:58 +0200 Subject: [PATCH] cuda_helper: rename correctly hiword/loword functions --- Algo256/cuda_blake256.cu | 2 +- Algo256/cuda_keccak256.cu | 2 +- cuda_helper.h | 14 +++++++------- heavy/cuda_blake512.cu | 18 +++++++++--------- pentablake.cu | 8 ++++---- quark/cuda_bmw512.cu | 2 +- quark/cuda_bmw512_30.cu | 2 +- quark/cuda_quark_blake512.cu | 8 ++++---- quark/cuda_skein512.cu | 4 ++-- qubit/qubit_luffa512.cu | 4 ++-- x15/cuda_whirlpoolx.cu | 2 +- x15/cuda_x15_whirlpool.cu | 4 ++-- 12 files changed, 35 insertions(+), 35 deletions(-) diff --git a/Algo256/cuda_blake256.cu b/Algo256/cuda_blake256.cu index 2001f04..5ce26f2 100644 --- a/Algo256/cuda_blake256.cu +++ b/Algo256/cuda_blake256.cu @@ -12,7 +12,7 @@ extern "C" { #include static __device__ uint64_t cuda_swab32ll(uint64_t x) { - return MAKE_ULONGLONG(cuda_swab32(_LOWORD(x)), cuda_swab32(_HIWORD(x))); + return MAKE_ULONGLONG(cuda_swab32(_LODWORD(x)), cuda_swab32(_HIDWORD(x))); } __constant__ static uint32_t c_data[20]; diff --git a/Algo256/cuda_keccak256.cu b/Algo256/cuda_keccak256.cu index a7c918e..cef2688 100644 --- a/Algo256/cuda_keccak256.cu +++ b/Algo256/cuda_keccak256.cu @@ -199,7 +199,7 @@ void keccak256_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outputH if (i<9) keccak_gpu_state[i] = c_PaddedMessage80[i]; else keccak_gpu_state[i] = 0; } - keccak_gpu_state[9] = REPLACE_HIWORD(c_PaddedMessage80[9], cuda_swab32(nounce)); + keccak_gpu_state[9] = REPLACE_HIDWORD(c_PaddedMessage80[9], cuda_swab32(nounce)); keccak_gpu_state[10] = 0x0000000000000001; keccak_gpu_state[16] = 0x8000000000000000; diff --git a/cuda_helper.h b/cuda_helper.h index 3cbc749..841cbd9 100644 --- a/cuda_helper.h +++ b/cuda_helper.h @@ -81,12 +81,12 @@ __device__ __forceinline__ uint64_t MAKE_ULONGLONG(uint32_t LO, uint32_t HI) } // das Hi Word in einem 64 Bit Typen ersetzen -__device__ __forceinline__ uint64_t REPLACE_HIWORD(const uint64_t &x, const uint32_t &y) { +__device__ __forceinline__ uint64_t REPLACE_HIDWORD(const uint64_t &x, const uint32_t &y) { return (x & 0xFFFFFFFFULL) | (((uint64_t)y) << 32U); } // das Lo Word in einem 64 Bit Typen ersetzen -__device__ __forceinline__ uint64_t REPLACE_LOWORD(const uint64_t &x, const uint32_t &y) { +__device__ __forceinline__ uint64_t REPLACE_LODWORD(const uint64_t &x, const uint32_t &y) { return (x & 0xFFFFFFFF00000000ULL) | ((uint64_t)y); } @@ -105,7 +105,7 @@ __device__ __forceinline__ uint32_t cuda_swab32(uint32_t x) #endif // das Lo Word aus einem 64 Bit Typen extrahieren -__device__ __forceinline__ uint32_t _LOWORD(const uint64_t &x) { +__device__ __forceinline__ uint32_t _LODWORD(const uint64_t &x) { #if __CUDA_ARCH__ >= 130 return (uint32_t)__double2loint(__longlong_as_double(x)); #else @@ -114,7 +114,7 @@ __device__ __forceinline__ uint32_t _LOWORD(const uint64_t &x) { } // das Hi Word aus einem 64 Bit Typen extrahieren -__device__ __forceinline__ uint32_t _HIWORD(const uint64_t &x) { +__device__ __forceinline__ uint32_t _HIDWORD(const uint64_t &x) { #if __CUDA_ARCH__ >= 130 return (uint32_t)__double2hiint(__longlong_as_double(x)); #else @@ -128,7 +128,7 @@ __device__ __forceinline__ uint64_t cuda_swab64(uint64_t x) // Input: 77665544 33221100 // Output: 00112233 44556677 uint64_t result = __byte_perm((uint32_t) x, 0, 0x0123); - return (result << 32) | __byte_perm(_HIWORD(x), 0, 0x0123); + return (result << 32) | __byte_perm(_HIDWORD(x), 0, 0x0123); } #else /* host */ @@ -483,8 +483,8 @@ void LOHI(uint32_t &lo, uint32_t &hi, uint64_t x) { asm("mov.b64 {%0,%1},%2; \n\t" : "=r"(lo), "=r"(hi) : "l"(x)); #else - lo = _LOWORD(x); - hi = _HIWORD(x); + lo = _LODWORD(x); + hi = _HIDWORD(x); #endif } diff --git a/heavy/cuda_blake512.cu b/heavy/cuda_blake512.cu index ba12f23..22f413b 100644 --- a/heavy/cuda_blake512.cu +++ b/heavy/cuda_blake512.cu @@ -149,20 +149,20 @@ template __global__ void blake512_gpu_hash(uint32_t threads, uin for (int i=0; i < 16; ++i) buf[i] = c_PaddedMessage[i]; // die Nounce durch die thread-spezifische ersetzen - buf[9] = REPLACE_HIWORD(buf[9], nounce); + buf[9] = REPLACE_HIDWORD(buf[9], nounce); uint32_t *hefty = heftyHashes + 8 * hashPosition; if (BLOCKSIZE == 84) { // den thread-spezifischen Hefty1 hash einsetzen // aufwändig, weil das nicht mit uint64_t Wörtern aligned ist. - buf[10] = REPLACE_HIWORD(buf[10], hefty[0]); - buf[11] = REPLACE_LOWORD(buf[11], hefty[1]); - buf[11] = REPLACE_HIWORD(buf[11], hefty[2]); - buf[12] = REPLACE_LOWORD(buf[12], hefty[3]); - buf[12] = REPLACE_HIWORD(buf[12], hefty[4]); - buf[13] = REPLACE_LOWORD(buf[13], hefty[5]); - buf[13] = REPLACE_HIWORD(buf[13], hefty[6]); - buf[14] = REPLACE_LOWORD(buf[14], hefty[7]); + buf[10] = REPLACE_HIDWORD(buf[10], hefty[0]); + buf[11] = REPLACE_LODWORD(buf[11], hefty[1]); + buf[11] = REPLACE_HIDWORD(buf[11], hefty[2]); + buf[12] = REPLACE_LODWORD(buf[12], hefty[3]); + buf[12] = REPLACE_HIDWORD(buf[12], hefty[4]); + buf[13] = REPLACE_LODWORD(buf[13], hefty[5]); + buf[13] = REPLACE_HIDWORD(buf[13], hefty[6]); + buf[14] = REPLACE_LODWORD(buf[14], hefty[7]); } else if (BLOCKSIZE == 80) { buf[10] = MAKE_ULONGLONG(hefty[0], hefty[1]); diff --git a/pentablake.cu b/pentablake.cu index e3bd64e..f539902 100644 --- a/pentablake.cu +++ b/pentablake.cu @@ -199,8 +199,8 @@ void pentablake_gpu_hash_80(uint32_t threads, const uint32_t startNounce, void * uint32_t *outHash = (uint32_t *)outputHash + 16 * thread; #pragma unroll 8 for (uint32_t i=0; i < 8; i++) { - outHash[2*i] = cuda_swab32( _HIWORD(h[i]) ); - outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) ); + outHash[2*i] = cuda_swab32( _HIDWORD(h[i]) ); + outHash[2*i+1] = cuda_swab32( _LODWORD(h[i]) ); } #else uint64_t *outHash = (uint64_t *)outputHash + 8 * thread; @@ -258,8 +258,8 @@ void pentablake_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_ uint32_t *outHash = (uint32_t*)&g_hash[thread<<3]; #pragma unroll 8 for (int i=0; i < 8; i++) { - outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) ); - outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) ); + outHash[2*i+0] = cuda_swab32( _HIDWORD(h[i]) ); + outHash[2*i+1] = cuda_swab32( _LODWORD(h[i]) ); } #else uint64_t *outHash = &g_hash[thread<<3]; diff --git a/quark/cuda_bmw512.cu b/quark/cuda_bmw512.cu index 44b8a31..51376b6 100644 --- a/quark/cuda_bmw512.cu +++ b/quark/cuda_bmw512.cu @@ -417,7 +417,7 @@ void quark_bmw512_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint64_t * message[i] = vectorize(c_PaddedMessage80[i]); // die Nounce durch die thread-spezifische ersetzen - message[9].y = cuda_swab32(nounce); //REPLACE_HIWORD(message[9], cuda_swab32(nounce)); + message[9].y = cuda_swab32(nounce); //REPLACE_HIDWORD(message[9], cuda_swab32(nounce)); // Compression 1 Compression512(message, h); diff --git a/quark/cuda_bmw512_30.cu b/quark/cuda_bmw512_30.cu index 8b2e858..d14795b 100644 --- a/quark/cuda_bmw512_30.cu +++ b/quark/cuda_bmw512_30.cu @@ -232,7 +232,7 @@ void quark_bmw512_gpu_hash_80_30(uint32_t threads, uint32_t startNounce, uint64_ for(int i=0;i<16;i++) message[i] = c_PaddedMessage80[i]; - message[9] = REPLACE_HIWORD(message[9], cuda_swab32(nounce)); + message[9] = REPLACE_HIDWORD(message[9], cuda_swab32(nounce)); // Compression 1 Compression512_30(message, h); diff --git a/quark/cuda_quark_blake512.cu b/quark/cuda_quark_blake512.cu index ca335db..a2454a0 100644 --- a/quark/cuda_quark_blake512.cu +++ b/quark/cuda_quark_blake512.cu @@ -166,8 +166,8 @@ void quark_blake512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t uint32_t *outHash = (uint32_t*)&g_hash[hashPosition * 8U]; #pragma unroll 8 for (int i=0; i < 8; i++) { - outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) ); - outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) ); + outHash[2*i+0] = cuda_swab32( _HIDWORD(h[i]) ); + outHash[2*i+1] = cuda_swab32( _LODWORD(h[i]) ); } #else uint64_t *outHash = &g_hash[hashPosition * 8U]; @@ -210,8 +210,8 @@ void quark_blake512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *ou uint32_t *outHash = (uint32_t*)outputHash + (thread * 16U); #pragma unroll 8 for (uint32_t i=0; i < 8; i++) { - outHash[2*i] = cuda_swab32( _HIWORD(h[i]) ); - outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) ); + outHash[2*i] = cuda_swab32( _HIDWORD(h[i]) ); + outHash[2*i+1] = cuda_swab32( _LODWORD(h[i]) ); } #else uint64_t *outHash = (uint64_t*)outputHash + (thread * 8U); diff --git a/quark/cuda_skein512.cu b/quark/cuda_skein512.cu index 472341e..ae30c19 100644 --- a/quark/cuda_skein512.cu +++ b/quark/cuda_skein512.cu @@ -613,7 +613,7 @@ void skein512_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint64_t *outp t2 = vectorize(c_PaddedMessage80[18]); uint32_t nonce = swap ? cuda_swab32(startNounce + thread) : startNounce + thread; - uint2 nonce2 = make_uint2(_LOWORD(c_PaddedMessage80[9]), nonce); + uint2 nonce2 = make_uint2(_LODWORD(c_PaddedMessage80[9]), nonce); uint2 p[8]; p[0] = vectorize(c_PaddedMessage80[8]); @@ -714,7 +714,7 @@ void skein512_gpu_hash_80_sm3(uint32_t threads, uint32_t startNounce, uint64_t * h7 = c_PaddedMessage80[7] ^ p[7]; uint32_t nonce = swap ? cuda_swab32(startNounce + thread) : startNounce + thread; - uint64_t nonce64 = MAKE_ULONGLONG(_LOWORD(c_PaddedMessage80[9]), nonce); + uint64_t nonce64 = MAKE_ULONGLONG(_LODWORD(c_PaddedMessage80[9]), nonce); // skein_big_close -> etype = 0x160, ptr = 16, bcount = 1, extra = 16 p[0] = c_PaddedMessage80[8]; diff --git a/qubit/qubit_luffa512.cu b/qubit/qubit_luffa512.cu index a3be27f..e137106 100644 --- a/qubit/qubit_luffa512.cu +++ b/qubit/qubit_luffa512.cu @@ -367,7 +367,7 @@ void qubit_luffa512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *ou for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i]; // die Nounce durch die thread-spezifische ersetzen - buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce)); + buff.buf64[9] = REPLACE_HIDWORD(buff.buf64[9], cuda_swab32(nounce)); hashState state; @@ -398,7 +398,7 @@ void qubit_luffa512_gpu_finalhash_80(uint32_t threads, uint32_t startNounce, voi for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i]; // Tested nonce - buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce)); + buff.buf64[9] = REPLACE_HIDWORD(buff.buf64[9], cuda_swab32(nounce)); hashState state; #pragma unroll 40 diff --git a/x15/cuda_whirlpoolx.cu b/x15/cuda_whirlpoolx.cu index 129a1f9..570b863 100644 --- a/x15/cuda_whirlpoolx.cu +++ b/x15/cuda_whirlpoolx.cu @@ -421,7 +421,7 @@ void whirlpoolx_gpu_hash(uint32_t threads, uint32_t startNounce, uint32_t *resNo uint64_t tmp[8]; uint32_t nounce = startNounce + thread; - n[1] = xor1(REPLACE_HIWORD(c_PaddedMessage80[9], cuda_swab32(nounce)),c_xtra[0]); + n[1] = xor1(REPLACE_HIDWORD(c_PaddedMessage80[9], cuda_swab32(nounce)),c_xtra[0]); uint32_t* n32 = (uint32_t*)&n[0]; n[0]=sharedMemory[__byte_perm(n32[3], 0, 0x4443) + 1792]; diff --git a/x15/cuda_x15_whirlpool.cu b/x15/cuda_x15_whirlpool.cu index f53e941..c6c98fd 100644 --- a/x15/cuda_x15_whirlpool.cu +++ b/x15/cuda_x15_whirlpool.cu @@ -2337,7 +2337,7 @@ void oldwhirlpool_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outp /// round 2 /////// ////////////////////////////////// n[0] = c_PaddedMessage80[8]; //read data - n[1] = REPLACE_HIWORD(c_PaddedMessage80[9], cuda_swab32(nounce)); //whirlpool + n[1] = REPLACE_HIDWORD(c_PaddedMessage80[9], cuda_swab32(nounce)); //whirlpool n[2] = 0x0000000000000080; //whirlpool n[3] = 0; n[4] = 0; @@ -2359,7 +2359,7 @@ void oldwhirlpool_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outp } state[0] = xor3(state[0], n[0], c_PaddedMessage80[8]); - state[1] = xor3(state[1], n[1], REPLACE_HIWORD(c_PaddedMessage80[9], cuda_swab32(nounce)) ); + state[1] = xor3(state[1], n[1], REPLACE_HIDWORD(c_PaddedMessage80[9], cuda_swab32(nounce)) ); state[2] = xor3(state[2], n[2], 0x0000000000000080); state[3] = xor1(state[3], n[3]); state[4] = xor1(state[4], n[4]);