From 2113be6eecddaee1573c108b666e7a508193466c Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Fri, 24 Apr 2015 14:12:21 +0200 Subject: [PATCH] blake80: some changes and launch bounds, no perf changes --- cuda_nist5.cu | 8 +- quark/cuda_quark_blake512.cu | 138 +++++++++++++++-------------------- quark/quarkcoin.cu | 8 +- x11/x11.cu | 9 +-- x13/x13.cu | 9 +-- x15/x14.cu | 8 +- x15/x15.cu | 8 +- x17/x17.cu | 8 +- 8 files changed, 88 insertions(+), 108 deletions(-) diff --git a/cuda_nist5.cu b/cuda_nist5.cu index e691615..8287ca1 100644 --- a/cuda_nist5.cu +++ b/cuda_nist5.cu @@ -14,8 +14,8 @@ extern "C" static uint32_t *d_hash[MAX_GPUS]; extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); -extern void quark_blake512_cpu_setBlock_80(void *pdata); -extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_setBlock_80(int thr_id, uint32_t *pdata); +extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash); extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads); extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); @@ -99,14 +99,14 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata, for (int k=0; k < 20; k++) be32enc(&endiandata[k], pdata[k]); - quark_blake512_cpu_setBlock_80((void*)endiandata); + quark_blake512_cpu_setBlock_80(thr_id, endiandata); cuda_check_cpu_setTarget(ptarget); do { int order = 0; // Hash with CUDA - quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); diff --git a/quark/cuda_quark_blake512.cu b/quark/cuda_quark_blake512.cu index 24d2975..ca335db 100644 --- a/quark/cuda_quark_blake512.cu +++ b/quark/cuda_quark_blake512.cu @@ -7,43 +7,41 @@ #define USE_SHUFFLE 0 -__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) +__constant__ +static uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) // ---------------------------- BEGIN CUDA quark_blake512 functions ------------------------------------ -__constant__ uint8_t c_sigma[16][16]; - -const uint8_t host_sigma[16][16] = -{ - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, - {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, - {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, - {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, - {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, - {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } -}; +__device__ __constant__ +static const uint8_t c_sigma[16][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }}; __device__ __constant__ -const uint64_t c_u512[16] = +static const uint64_t c_u512[16] = { - 0x243f6a8885a308d3ULL, 0x13198a2e03707344ULL, - 0xa4093822299f31d0ULL, 0x082efa98ec4e6c89ULL, - 0x452821e638d01377ULL, 0xbe5466cf34e90c6cULL, - 0xc0ac29b7c97c50ddULL, 0x3f84d5b5b5470917ULL, - 0x9216d5d98979fb1bULL, 0xd1310ba698dfb5acULL, - 0x2ffd72dbd01adfb7ULL, 0xb8e1afed6a267e96ULL, - 0xba7c9045f12c7f99ULL, 0x24a19947b3916cf7ULL, - 0x0801f2e2858efc16ULL, 0x636920d871574e69ULL + 0x243f6a8885a308d3ULL, 0x13198a2e03707344ULL, + 0xa4093822299f31d0ULL, 0x082efa98ec4e6c89ULL, + 0x452821e638d01377ULL, 0xbe5466cf34e90c6cULL, + 0xc0ac29b7c97c50ddULL, 0x3f84d5b5b5470917ULL, + 0x9216d5d98979fb1bULL, 0xd1310ba698dfb5acULL, + 0x2ffd72dbd01adfb7ULL, 0xb8e1afed6a267e96ULL, + 0xba7c9045f12c7f99ULL, 0x24a19947b3916cf7ULL, + 0x0801f2e2858efc16ULL, 0x636920d871574e69ULL }; #define G(a,b,c,d,x) { \ @@ -111,14 +109,8 @@ void quark_blake512_compress( uint64_t *h, const uint64_t *block, const uint8_t // Hash-Padding __device__ __constant__ static const uint64_t d_constHashPadding[8] = { - 0x0000000000000080ull, - 0, - 0, - 0, - 0, - 0x0100000000000000ull, - 0, - 0x0002000000000000ull + 0x0000000000000080ull, 0, 0, 0, + 0, 0x0100000000000000ull, 0, 0x0002000000000000ull }; __global__ __launch_bounds__(256, 4) @@ -171,14 +163,14 @@ void quark_blake512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t quark_blake512_compress( h, buf, c_sigma, c_u512, 512 ); #if __CUDA_ARCH__ <= 350 - uint32_t *outHash = (uint32_t*)&g_hash[8 * hashPosition]; + uint32_t *outHash = (uint32_t*)&g_hash[hashPosition * 8U]; #pragma unroll 8 for (int i=0; i < 8; i++) { outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) ); outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) ); } #else - uint64_t *outHash = &g_hash[8 * hashPosition]; + uint64_t *outHash = &g_hash[hashPosition * 8U]; for (int i=0; i < 8; i++) { outHash[i] = cuda_swab64(h[i]); } @@ -186,13 +178,20 @@ void quark_blake512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t } } -__global__ void quark_blake512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outputHash) +__global__ __launch_bounds__(256,4) +void quark_blake512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outputHash) { uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint64_t buf[16]; - uint32_t nounce = startNounce + thread; + #pragma unroll + for (int i=0; i < 16; ++i) + buf[i] = c_PaddedMessage80[i]; + + // The test Nonce + const uint32_t nounce = startNounce + thread; + ((uint32_t*)buf)[19] = cuda_swab32(nounce); uint64_t h[8] = { 0x6a09e667f3bcc908ULL, @@ -205,30 +204,21 @@ __global__ void quark_blake512_gpu_hash_80(uint32_t threads, uint32_t startNounc 0x5be0cd19137e2179ULL }; - // Message für die erste Runde in Register holen - #pragma unroll 16 - for (int i=0; i < 16; ++i) - buf[i] = c_PaddedMessage80[i]; - - // The test Nonce - ((uint32_t*)buf)[19] = cuda_swab32(nounce); - - quark_blake512_compress( h, buf, c_sigma, c_u512, 640 ); + quark_blake512_compress(h, buf, c_sigma, c_u512, 640); #if __CUDA_ARCH__ <= 350 - uint32_t *outHash = (uint32_t *)outputHash + 16 * thread; + uint32_t *outHash = (uint32_t*)outputHash + (thread * 16U); #pragma unroll 8 for (uint32_t i=0; i < 8; i++) { outHash[2*i] = cuda_swab32( _HIWORD(h[i]) ); outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) ); } #else - uint64_t *outHash = (uint64_t *)outputHash + 8 * thread; + uint64_t *outHash = (uint64_t*)outputHash + (thread * 8U); for (uint32_t i=0; i < 8; i++) { outHash[i] = cuda_swab64( h[i] ); } #endif - } } @@ -238,30 +228,24 @@ __global__ void quark_blake512_gpu_hash_80(uint32_t threads, uint32_t startNounc __host__ void quark_blake512_cpu_init(int thr_id, uint32_t threads) { - // Kopiere die Hash-Tabellen in den GPU-Speicher - CUDA_CALL_OR_RET( cudaMemcpyToSymbol(c_sigma, - host_sigma, - sizeof(host_sigma), - 0, cudaMemcpyHostToDevice)); + CUDA_SAFE_CALL(cudaGetLastError()); } -// Blake512 für 80 Byte grosse Eingangsdaten __host__ -void quark_blake512_cpu_setBlock_80(void *pdata) +void quark_blake512_cpu_setBlock_80(int thr_id, uint32_t *pdata) { - // Message mit Padding bereitstellen - // lediglich die korrekte Nonce ist noch ab Byte 76 einzusetzen. - unsigned char PaddedMessage[128]; - memcpy(PaddedMessage, pdata, 80); - memset(PaddedMessage+80, 0, 48); - PaddedMessage[80] = 0x80; - PaddedMessage[111] = 1; - PaddedMessage[126] = 0x02; - PaddedMessage[127] = 0x80; - - CUDA_SAFE_CALL( - cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice) - ); + uint64_t message[16]; + + memcpy(message, pdata, 80); + message[10] = 0x80; + message[11] = 0; + message[12] = 0; + message[13] = 0x0100000000000000ull; + message[14] = 0; + message[15] = 0x8002000000000000ull; // 0x280 + + cudaMemcpyToSymbol(c_PaddedMessage80, message, sizeof(message), 0, cudaMemcpyHostToDevice); + CUDA_SAFE_CALL(cudaGetLastError()); } __host__ @@ -269,18 +253,16 @@ void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNoun { const uint32_t threadsperblock = 256; - // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); quark_blake512_gpu_hash_64<<>>(threads, startNounce, d_nonceVector, (uint64_t*)d_outputHash); - // Strategisches Sleep Kommando zur Senkung der CPU Last //MyStreamSynchronize(NULL, order, thr_id); } __host__ -void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int order) +void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash) { const uint32_t threadsperblock = 256; diff --git a/quark/quarkcoin.cu b/quark/quarkcoin.cu index 2fda2da..8f0d4d8 100644 --- a/quark/quarkcoin.cu +++ b/quark/quarkcoin.cu @@ -20,8 +20,8 @@ static uint32_t *d_branch2Nonces[MAX_GPUS]; static uint32_t *d_branch3Nonces[MAX_GPUS]; extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); -extern void quark_blake512_cpu_setBlock_80(void *pdata); -extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_setBlock_80(int thr_id, uint32_t *pdata); +extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash); extern void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads); @@ -170,7 +170,7 @@ extern "C" int scanhash_quark(int thr_id, uint32_t *pdata, for (int k=0; k < 20; k++) be32enc(&endiandata[k], pdata[k]); - quark_blake512_cpu_setBlock_80((void*)endiandata); + quark_blake512_cpu_setBlock_80(thr_id, endiandata); cuda_check_cpu_setTarget(ptarget); do { @@ -178,7 +178,7 @@ extern "C" int scanhash_quark(int thr_id, uint32_t *pdata, size_t nrm1=0, nrm2=0, nrm3=0; // erstes Blake512 Hash mit CUDA - quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; // das ist der unbedingte Branch für BMW512 quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); diff --git a/x11/x11.cu b/x11/x11.cu index fce4997..cfef513 100644 --- a/x11/x11.cu +++ b/x11/x11.cu @@ -23,8 +23,8 @@ extern "C" static uint32_t *d_hash[MAX_GPUS]; extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); -extern void quark_blake512_cpu_setBlock_80(void *pdata); -extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_setBlock_80(int thr_id, uint32_t *pdata); +extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash); extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads); extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); @@ -181,8 +181,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, for (int k=0; k < 20; k++) be32enc(&endiandata[k], pdata[k]); - quark_blake512_cpu_setBlock_80((void*)endiandata); - + quark_blake512_cpu_setBlock_80(thr_id, endiandata); cuda_check_cpu_setTarget(ptarget); do { @@ -190,7 +189,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, uint32_t foundNonce; // Hash with CUDA - quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; TRACE("blake :"); quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); TRACE("bmw :"); diff --git a/x13/x13.cu b/x13/x13.cu index 77d01d7..40d107f 100644 --- a/x13/x13.cu +++ b/x13/x13.cu @@ -25,10 +25,9 @@ extern "C" static uint32_t *d_hash[MAX_GPUS]; - extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); -extern void quark_blake512_cpu_setBlock_80(void *pdata); -extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_setBlock_80(int thr_id, uint32_t *pdata); +extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash); extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads); extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); @@ -188,14 +187,14 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, for (int k=0; k < 20; k++) be32enc(&endiandata[k], pdata[k]); - quark_blake512_cpu_setBlock_80((void*)endiandata); + quark_blake512_cpu_setBlock_80(thr_id, endiandata); cuda_check_cpu_setTarget(ptarget); do { uint32_t foundNonce; int order = 0; - quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); diff --git a/x15/x14.cu b/x15/x14.cu index abd398f..02a5b2d 100644 --- a/x15/x14.cu +++ b/x15/x14.cu @@ -30,8 +30,8 @@ extern "C" { static uint32_t *d_hash[MAX_GPUS]; extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); -extern void quark_blake512_cpu_setBlock_80(void *pdata); -extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_setBlock_80(int thr_id, uint32_t *pdata); +extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash); extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads); extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); @@ -197,12 +197,12 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, for (int k = 0; k < 20; k++) be32enc(&endiandata[k], pdata[k]); - quark_blake512_cpu_setBlock_80((void*)endiandata); + quark_blake512_cpu_setBlock_80(thr_id, endiandata); cuda_check_cpu_setTarget(ptarget); do { int order = 0; - quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); diff --git a/x15/x15.cu b/x15/x15.cu index 2a220ec..d3a6c78 100644 --- a/x15/x15.cu +++ b/x15/x15.cu @@ -31,8 +31,8 @@ extern "C" { static uint32_t *d_hash[MAX_GPUS]; extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); -extern void quark_blake512_cpu_setBlock_80(void *pdata); -extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_setBlock_80(int thr_id, uint32_t *pdata); +extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash); extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads); extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); @@ -208,12 +208,12 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, for (int k=0; k < 20; k++) be32enc(&endiandata[k], pdata[k]); - quark_blake512_cpu_setBlock_80((void*)endiandata); + quark_blake512_cpu_setBlock_80(thr_id, endiandata); cuda_check_cpu_setTarget(ptarget); do { int order = 0; - quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); diff --git a/x17/x17.cu b/x17/x17.cu index 6aa74a3..6d7e071 100644 --- a/x17/x17.cu +++ b/x17/x17.cu @@ -34,8 +34,8 @@ extern "C" static uint32_t *d_hash[MAX_GPUS]; extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); -extern void quark_blake512_cpu_setBlock_80(void *pdata); -extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_setBlock_80(int thr_id, uint32_t *pdata); +extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash); extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads); extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); @@ -230,14 +230,14 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata, for (int k=0; k < 20; k++) be32enc(&endiandata[k], pdata[k]); - quark_blake512_cpu_setBlock_80((void*)endiandata); + quark_blake512_cpu_setBlock_80(thr_id, endiandata); cuda_check_cpu_setTarget(ptarget); do { int order = 0; // Hash with CUDA - quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);