From 0ff75791e5d27bb7b51dba5f3a29307ae32e2ef1 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Sun, 29 Jan 2017 00:52:13 +0100 Subject: [PATCH] migrate 2nd nonce storage of most algos This allow to keep pdata[19] as cursor between scans, and later, to sort them.. remains... heavy, scrypt, sia... --- Algo256/blake256.cu | 41 +++++++++++++++------------ Algo256/bmw.cu | 40 +++++++++++++++++---------- Algo256/decred.cu | 10 +++---- Algo256/keccak256.cu | 31 ++++++++++++--------- Algo256/vanilla.cu | 29 ++++++++++++++------ JHA/jackpotcoin.cu | 46 +++++++++++++++++-------------- ccminer.cpp | 22 ++++----------- lyra2/lyra2RE.cu | 54 ++++++++++++++++++------------------ lyra2/lyra2REv2.cu | 50 ++++++++++++++++----------------- myriadgroestl.cpp | 34 ++++++++++++----------- neoscrypt/neoscrypt.cpp | 23 +++++++++------- pentablake.cu | 33 +++++++++++++++------- quark/nist5.cu | 47 ++++++++++++++++--------------- quark/quarkcoin.cu | 43 +++++++++++++++++++---------- qubit/deep.cu | 46 ++++++++++++++++--------------- qubit/luffa.cu | 41 +++++++++++++++++---------- qubit/qubit.cu | 45 +++++++++++++++--------------- skein.cu | 61 ++++++++++++++++------------------------- skein2.cpp | 39 +++++++++++++------------- x11/c11.cu | 47 ++++++++++++++++--------------- x11/fresh.cu | 51 ++++++++++++++++++---------------- x11/s3.cu | 50 ++++++++++++++++----------------- x11/sib.cu | 47 +++++++++++++++---------------- x11/veltor.cu | 23 +++++++++------- x11/x11.cu | 47 ++++++++++++++++--------------- x11/x11evo.cu | 53 ++++++++++++++++++----------------- x13/x13.cu | 39 +++++++++++++------------- x15/whirlpool.cu | 29 ++++++++------------ x15/x14.cu | 45 ++++++++++++++++-------------- x15/x15.cu | 45 ++++++++++++++++-------------- x17/x17.cu | 47 ++++++++++++++++--------------- zr5.cu | 49 ++++++++++++++++----------------- 32 files changed, 688 insertions(+), 619 deletions(-) diff --git a/Algo256/blake256.cu b/Algo256/blake256.cu index 9d6e138..6458664 100644 --- a/Algo256/blake256.cu +++ b/Algo256/blake256.cu @@ -45,7 +45,7 @@ static uint32_t *h_resNonce[MAX_GPUS]; /* max count of found nonces in one call */ #define NBN 2 -static uint32_t extra_results[NBN] = { UINT32_MAX }; +static __thread uint32_t extra_results[NBN] = { UINT32_MAX }; #define GSPREC(a,b,c,d,x,y) { \ v[a] += (m[x] ^ c_u256[y]) + v[b]; \ @@ -519,46 +519,51 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non do { // GPU HASH (second block only, first is midstate) - uint32_t foundNonce = blake256_cpu_hash_16(thr_id, throughput, pdata[19], targetHigh, blakerounds); + work->nonces[0] = blake256_cpu_hash_16(thr_id, throughput, pdata[19], targetHigh, blakerounds); - if (foundNonce != UINT32_MAX) + *hashes_done = pdata[19] - first_nonce + throughput; + + if (work->nonces[0] != UINT32_MAX) { - uint32_t vhashcpu[8]; - uint32_t Htarg = ptarget[6]; + uint32_t _ALIGN(64) vhashcpu[8]; + const uint32_t Htarg = ptarget[6]; for (int k=16; k < 19; k++) be32enc(&endiandata[k], pdata[k]); - be32enc(&endiandata[19], foundNonce); + be32enc(&endiandata[19], work->nonces[0]); blake256hash(vhashcpu, endiandata, blakerounds); if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget)) { - rc = 1; + work->valid_nonces = 1; work_set_target_ratio(work, vhashcpu); - *hashes_done = pdata[19] - first_nonce + throughput; - pdata[19] = foundNonce; #if NBN > 1 if (extra_results[0] != UINT32_MAX) { - be32enc(&endiandata[19], extra_results[0]); + work->nonces[1] = extra_results[0]; + be32enc(&endiandata[19], work->nonces[1]); blake256hash(vhashcpu, endiandata, blakerounds); if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget)) { - pdata[21] = extra_results[0]; if (bn_hash_target_ratio(vhashcpu, ptarget) > work->shareratio[0]) { work_set_target_ratio(work, vhashcpu); - xchg(pdata[21], pdata[19]); + xchg(work->nonces[0], work->nonces[1]); + } else { + bn_set_target_ratio(work, vhashcpu, 1); } - rc = 2; + work->valid_nonces = 2; } + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; extra_results[0] = UINT32_MAX; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } #endif - return rc; + return work->valid_nonces; } - else if (opt_debug) { - applog_hash((uchar*)ptarget); - applog_compare_hash((uchar*)vhashcpu, (uchar*)ptarget); - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + else if (vhashcpu[6] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } diff --git a/Algo256/bmw.cu b/Algo256/bmw.cu index 12d9667..3b5ed21 100644 --- a/Algo256/bmw.cu +++ b/Algo256/bmw.cu @@ -43,7 +43,6 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; - bool swapnonce = true; uint32_t throughput = cuda_default_throughput(thr_id, 1U << 21); if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); @@ -77,24 +76,37 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u cuda_check_cpu_setTarget(ptarget); do { - bmw256_cpu_hash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], (int) swapnonce); - uint32_t foundNonce = cuda_check_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id]); + bmw256_cpu_hash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], 1); *hashes_done = pdata[19] - first_nonce + throughput; - if (foundNonce != UINT32_MAX) + work->nonces[0] = cuda_check_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { - uint32_t _ALIGN(64) vhash64[8]; - endiandata[19] = swab32_if(foundNonce, swapnonce); - bmw_hash(vhash64, endiandata); - - if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { - pdata[19] = swab32_if(foundNonce,!swapnonce); - work_set_target_ratio(work, vhash64); - return 1; + const uint32_t Htarg = ptarget[7]; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); + bmw_hash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + bmw_hash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor + } + return work->valid_nonces; } - else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } diff --git a/Algo256/decred.cu b/Algo256/decred.cu index 9ee0f00..ff5bc36 100644 --- a/Algo256/decred.cu +++ b/Algo256/decred.cu @@ -391,6 +391,8 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce // GPU HASH decred_gpu_hash_nonce <<>> (throughput, (*pnonce), d_resNonce[thr_id], targetHigh); + *hashes_done = (*pnonce) - first_nonce + throughput; + // first cell contains the valid nonces count cudaMemcpy(resNonces, d_resNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); @@ -404,9 +406,8 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce decred_hash(vhash, endiandata); if (vhash[6] <= ptarget[6] && fulltest(vhash, ptarget)) { - int rc = work->valid_nonces = 1; + work->valid_nonces = 1; work_set_target_ratio(work, vhash); - *hashes_done = (*pnonce) - first_nonce + throughput; work->nonces[0] = swab32(resNonces[1]); *pnonce = work->nonces[0]; @@ -417,7 +418,6 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce decred_hash(vhash, endiandata); if (vhash[6] <= ptarget[6] && fulltest(vhash, ptarget)) { work->nonces[1] = swab32(resNonces[n]); - if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) { // we really want the best first ? depends... work->shareratio[1] = work->shareratio[0]; @@ -429,7 +429,7 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce bn_set_target_ratio(work, vhash, 1); work->valid_nonces++; } - rc = 2; // MAX_NONCES submit limited to 2 + work->valid_nonces = 2; // MAX_NONCES submit limited to 2 gpulog(LOG_DEBUG, thr_id, "multiple nonces 1:%08x (%g) %u:%08x (%g)", work->nonces[0], work->sharediff[0], n, work->nonces[1], work->sharediff[1]); @@ -438,7 +438,7 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce gpulog(LOG_WARNING, thr_id, "result %u for %08x does not validate on CPU!", n, resNonces[n]); } } - return rc; + return work->valid_nonces; } else if (vhash[6] > ptarget[6]) { gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", resNonces[1]); diff --git a/Algo256/keccak256.cu b/Algo256/keccak256.cu index c8c47ea..1885d4e 100644 --- a/Algo256/keccak256.cu +++ b/Algo256/keccak256.cu @@ -65,7 +65,7 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no init[thr_id] = true; } - for (int k=0; k < 20; k++) { + for (int k=0; k < 19; k++) { be32enc(&endiandata[k], pdata[k]); } @@ -75,20 +75,25 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no *hashes_done = pdata[19] - first_nonce + throughput; - uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); - if (foundNonce != UINT32_MAX && bench_algo < 0) + work->nonces[0] = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + if (work->nonces[0] != UINT32_MAX && bench_algo < 0) { - uint32_t _ALIGN(64) vhash64[8]; - be32enc(&endiandata[19], foundNonce); - keccak256_hash(vhash64, endiandata); - - if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { - work_set_target_ratio(work, vhash64); - pdata[19] = foundNonce; - return 1; + const uint32_t Htarg = ptarget[7]; + uint32_t _ALIGN(64) vhash[8]; + + be32enc(&endiandata[19], work->nonces[0]); + keccak256_hash(vhash, endiandata); + + if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); + pdata[19] = work->nonces[0] + 1; + return work->valid_nonces; } - else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } diff --git a/Algo256/vanilla.cu b/Algo256/vanilla.cu index c089ad8..f3c4388 100644 --- a/Algo256/vanilla.cu +++ b/Algo256/vanilla.cu @@ -416,6 +416,7 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc do { vanilla_gpu_hash_16_8<<>>(throughput, pdata[19], d_resNonce[thr_id], targetHigh); cudaMemcpyAsync(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost,streams[thr_id]); + *hashes_done = pdata[19] - first_nonce + throughput; cudaStreamSynchronize(streams[thr_id]); if (h_resNonce[thr_id][0] != UINT32_MAX){ @@ -429,31 +430,41 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc vanillahash(vhashcpu, endiandata, blakerounds); if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget)) { - rc = 1; + work->valid_nonces = 1; + work->nonces[0] = h_resNonce[thr_id][0]; work_set_target_ratio(work, vhashcpu); - *hashes_done = pdata[19] - first_nonce + throughput; - pdata[19] = h_resNonce[thr_id][0]; #if NBN > 1 if (h_resNonce[thr_id][1] != UINT32_MAX) { + work->nonces[1] = h_resNonce[thr_id][1]; be32enc(&endiandata[19], h_resNonce[thr_id][1]); vanillahash(vhashcpu, endiandata, blakerounds); - pdata[21] = h_resNonce[thr_id][1]; if (bn_hash_target_ratio(vhashcpu, ptarget) > work->shareratio[0]) { work_set_target_ratio(work, vhashcpu); - xchg(pdata[19], pdata[21]); + xchg(work->nonces[0], work->nonces[1]); } - rc = 2; + work->valid_nonces = 2; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } #endif - return rc; + return work->valid_nonces; } - else { + else if (vhashcpu[6] > Htarg) { gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", h_resNonce[thr_id][0]); + pdata[19] = work->nonces[0] + 1; + continue; } } + if ((uint64_t) throughput + pdata[19] >= max_nonce) { + pdata[19] = max_nonce; + break; + } + pdata[19] += throughput; - } while (!work_restart[thr_id].restart && ((uint64_t)max_nonce > ((uint64_t)(pdata[19]) + (uint64_t)throughput))); + + } while (!work_restart[thr_id].restart); *hashes_done = pdata[19] - first_nonce; MyStreamSynchronize(NULL, 0, dev_id); diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu index b902b82..a65fcc5 100644 --- a/JHA/jackpotcoin.cu +++ b/JHA/jackpotcoin.cu @@ -214,34 +214,40 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc CUDA_LOG_ERROR(); - uint32_t foundNonce = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++); + work->nonces[0] = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++); - if (foundNonce != UINT32_MAX) + if (work->nonces[0] != UINT32_MAX) { - uint32_t vhash64[8]; - be32enc(&endiandata[19], foundNonce); + const uint32_t Htarg = ptarget[7]; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); // jackpothash function gibt die Zahl der Runden zurück - jackpothash(vhash64, endiandata); + jackpothash(vhash, endiandata); - if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { - int res = 1; - work_set_target_ratio(work, vhash64); + if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); #if 0 - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - if (secNonce != 0) { - be32enc(&endiandata[19], secNonce); - nist5hash(vhash64, endiandata); - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash64); - pdata[21] = secNonce; - res++; + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + jackpothash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } +#else + pdata[19] = work->nonces[0] + 1; // cursor #endif - pdata[19] = foundNonce; - return res; - } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } diff --git a/ccminer.cpp b/ccminer.cpp index 98115a8..2cef2c4 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -2343,24 +2343,13 @@ static void *miner_thread(void *userdata) /* record scanhash elapsed time */ gettimeofday(&tv_end, NULL); - // todo: update all algos to use work->nonces and pdata[19] as counter switch (opt_algo) { - case ALGO_BLAKE2S: - case ALGO_CRYPTOLIGHT: - case ALGO_CRYPTONIGHT: - case ALGO_DECRED: - case ALGO_LBRY: + // algos to migrate to replace pdata[21] by work.nonces[] + case ALGO_HEAVY: + case ALGO_SCRYPT: + case ALGO_SCRYPT_JANE: case ALGO_SIA: - case ALGO_VELTOR: - case ALGO_WILDKECCAK: - // migrated algos - break; - case ALGO_ZR5: - // algos with only work.nonces[1] set - work.nonces[0] = nonceptr[0]; - break; - default: - // algos with 2 results in pdata and work.nonces unset + //case ALGO_WHIRLPOOLX: work.nonces[0] = nonceptr[0]; work.nonces[1] = nonceptr[2]; } @@ -2483,7 +2472,6 @@ static void *miner_thread(void *userdata) work.submit_nonce_id = 1; nonceptr[0] = work.nonces[1]; if (opt_algo == ALGO_ZR5) { - // todo: use + 4..6 index for pok to allow multiple nonces work.data[0] = work.data[22]; // pok work.data[22] = 0; } diff --git a/lyra2/lyra2RE.cu b/lyra2/lyra2RE.cu index 668d99e..4cacbb9 100644 --- a/lyra2/lyra2RE.cu +++ b/lyra2/lyra2RE.cu @@ -136,7 +136,6 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, do { int order = 0; - uint32_t foundNonce; blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); keccak256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); @@ -146,35 +145,34 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + throughput; - foundNonce = groestl256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); - if (foundNonce != UINT32_MAX) + work->nonces[0] = groestl256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + if (work->nonces[0] != UINT32_MAX) { - uint32_t _ALIGN(64) vhash64[8]; - - be32enc(&endiandata[19], foundNonce); - lyra2re_hash(vhash64, endiandata); - - if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { - int res = 1; - uint32_t secNonce = groestl256_getSecNonce(thr_id, 1); - work_set_target_ratio(work, vhash64); - if (secNonce != UINT32_MAX) - { - be32enc(&endiandata[19], secNonce); - lyra2re_hash(vhash64, endiandata); - if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { - if (opt_debug) - gpulog(LOG_BLUE, thr_id, "found second nonce %08x", secNonce); - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash64); - pdata[21] = secNonce; - res++; - } + const uint32_t Htarg = ptarget[7]; + uint32_t _ALIGN(64) vhash[8]; + + be32enc(&endiandata[19], work->nonces[0]); + lyra2re_hash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); + work->nonces[1] = groestl256_getSecNonce(thr_id, 1); + if (work->nonces[1] != UINT32_MAX) { + be32enc(&endiandata[19], work->nonces[1]); + lyra2re_hash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - pdata[19] = foundNonce; - return res; - } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } diff --git a/lyra2/lyra2REv2.cu b/lyra2/lyra2REv2.cu index 6866fed..1917651 100644 --- a/lyra2/lyra2REv2.cu +++ b/lyra2/lyra2REv2.cu @@ -142,7 +142,6 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc do { int order = 0; - uint32_t foundNonces[2] = { 0, 0 }; blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); TRACE("blake :"); @@ -157,37 +156,36 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc cubehash256_cpu_hash_32(thr_id, throughput,pdata[19], d_hash[thr_id], order++); TRACE("cube :"); - bmw256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], foundNonces); + memset(work->nonces, 0, sizeof(work->nonces)); + bmw256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], work->nonces); *hashes_done = pdata[19] - first_nonce + throughput; - if (foundNonces[0] != 0) + if (work->nonces[0] != 0) { - uint32_t vhash64[8]; - be32enc(&endiandata[19], foundNonces[0]); - lyra2v2_hash(vhash64, endiandata); - if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) - { - int res = 1; - work_set_target_ratio(work, vhash64); - pdata[19] = foundNonces[0]; - // check if there was another one... - if (foundNonces[1] != 0) - { - be32enc(&endiandata[19], foundNonces[1]); - lyra2v2_hash(vhash64, endiandata); - pdata[21] = foundNonces[1]; - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) { - work_set_target_ratio(work, vhash64); - xchg(pdata[19], pdata[21]); - } - res++; + const uint32_t Htarg = ptarget[7]; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); + lyra2v2_hash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + lyra2v2_hash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - return res; + return work->valid_nonces; } - else - { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonces[0]); + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } diff --git a/myriadgroestl.cpp b/myriadgroestl.cpp index 7c834c8..9e1803a 100644 --- a/myriadgroestl.cpp +++ b/myriadgroestl.cpp @@ -67,34 +67,36 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned myriadgroestl_cpu_setBlock(thr_id, endiandata, ptarget); do { - // GPU - uint32_t foundNonces[2] = { UINT32_MAX, UINT32_MAX }; + memset(work->nonces, 0xff, sizeof(work->nonces)); - myriadgroestl_cpu_hash(thr_id, throughput, pdata[19], foundNonces); + // GPU + myriadgroestl_cpu_hash(thr_id, throughput, pdata[19], work->nonces); *hashes_done = pdata[19] - start_nonce + throughput; - if (foundNonces[0] < UINT32_MAX && bench_algo < 0) + if (work->nonces[0] < UINT32_MAX && bench_algo < 0) { uint32_t _ALIGN(64) vhash[8]; - endiandata[19] = swab32(foundNonces[0]); + endiandata[19] = swab32(work->nonces[0]); myriadhash(vhash, endiandata); if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; work_set_target_ratio(work, vhash); - work->nonces[0] = foundNonces[0]; - pdata[19] = foundNonces[0]; - // search for another nonce - if (foundNonces[1] != UINT32_MAX) { - endiandata[19] = swab32(foundNonces[1]); + if (work->nonces[1] != UINT32_MAX) { + endiandata[19] = swab32(work->nonces[1]); myriadhash(vhash, endiandata); - pdata[21] = foundNonces[1]; // to drop - work->nonces[1] = foundNonces[1]; bn_set_target_ratio(work, vhash, 1); - return 2; + work->valid_nonces = 2; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - return 1; - } else if (vhash[7] > ptarget[7]) { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonces[0]); + return work->valid_nonces; + } + else if (vhash[7] > ptarget[7]) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } diff --git a/neoscrypt/neoscrypt.cpp b/neoscrypt/neoscrypt.cpp index 93160c3..2a14117 100644 --- a/neoscrypt/neoscrypt.cpp +++ b/neoscrypt/neoscrypt.cpp @@ -62,28 +62,31 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign neoscrypt_setBlockTarget(endiandata,ptarget); do { - uint32_t foundNonces[2] = { UINT32_MAX, UINT32_MAX }; - neoscrypt_hash_k4(thr_id, throughput, pdata[19], foundNonces, have_stratum); + memset(work->nonces, 0xff, sizeof(work->nonces)); + neoscrypt_hash_k4(thr_id, throughput, pdata[19], work->nonces, have_stratum); *hashes_done = pdata[19] - first_nonce + throughput; - if (foundNonces[0] != UINT32_MAX) + if (work->nonces[0] != UINT32_MAX) { + const uint32_t Htarg = ptarget[7]; uint32_t _ALIGN(64) vhash[8]; if (have_stratum) { - be32enc(&endiandata[19], foundNonces[0]); + be32enc(&endiandata[19], work->nonces[0]); } else { - endiandata[19] = foundNonces[0]; + endiandata[19] = work->nonces[0]; } neoscrypt((uchar*)vhash, (uchar*) endiandata, 0x80000620U); - if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; work_set_target_ratio(work, vhash); - pdata[19] = foundNonces[0]; - return 1; - } else { - gpulog(LOG_WARNING, thr_id, "nonce %08x does not validate on CPU!", foundNonces[0]); + pdata[19] = work->nonces[0] + 1; // cursor + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "nonce %08x does not validate on CPU!", work->nonces[0]); } } diff --git a/pentablake.cu b/pentablake.cu index 5a34bc4..37808d5 100644 --- a/pentablake.cu +++ b/pentablake.cu @@ -98,21 +98,34 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n *hashes_done = pdata[19] - first_nonce + throughput; - uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { - uint32_t vhash[8]; + const uint32_t Htarg = ptarget[7]; + uint32_t _ALIGN(64) vhash[8]; - be32enc(&endiandata[19], foundNonce); + be32enc(&endiandata[19], work->nonces[0]); pentablakehash(vhash, endiandata); - if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { - rc = 1; + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; work_set_target_ratio(work, vhash); - pdata[19] = foundNonce; - return rc; - } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + pentablakehash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor + } + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } diff --git a/quark/nist5.cu b/quark/nist5.cu index 2574bbe..9ae26c9 100644 --- a/quark/nist5.cu +++ b/quark/nist5.cu @@ -56,7 +56,6 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; - int res = 0; uint32_t throughput = cuda_default_throughput(thr_id, 1 << 20); // 256*256*16 if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); @@ -101,6 +100,8 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, quark_blake512_cpu_setBlock_80(thr_id, endiandata); cuda_check_cpu_setTarget(ptarget); + work->valid_nonces = 0; + do { int order = 0; @@ -113,31 +114,33 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + throughput; - uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { const uint32_t Htarg = ptarget[7]; - uint32_t vhash64[8]; - be32enc(&endiandata[19], foundNonce); - nist5hash(vhash64, endiandata); - - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { - res = 1; - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - work_set_target_ratio(work, vhash64); - if (secNonce != 0) { - be32enc(&endiandata[19], secNonce); - nist5hash(vhash64, endiandata); - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash64); - pdata[21] = secNonce; - res++; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); + nist5hash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + nist5hash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - pdata[19] = foundNonce; goto out; } - else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } @@ -157,7 +160,7 @@ out: cudaStreamDestroy(stream[i]); #endif - return res; + return work->valid_nonces; } // ressources cleanup diff --git a/quark/quarkcoin.cu b/quark/quarkcoin.cu index beea1d5..21fde2f 100644 --- a/quark/quarkcoin.cu +++ b/quark/quarkcoin.cu @@ -180,7 +180,6 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce, do { int order = 0; - uint32_t foundNonce; uint32_t nrm1=0, nrm2=0, nrm3=0; quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; @@ -229,8 +228,8 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce, quark_keccak512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++); quark_jh512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++); - foundNonce = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++); - + work->nonces[0] = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++); + work->nonces[1] = 0; } else { /* algo permutations are made with 2 different buffers */ @@ -263,31 +262,47 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce, TRACE("perm3 :"); CUDA_LOG_ERROR(); - foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); } *hashes_done = pdata[19] - first_nonce + throughput; - if (foundNonce != UINT32_MAX) + if (work->nonces[0] != UINT32_MAX) { - uint32_t vhash[8]; - be32enc(&endiandata[19], foundNonce); + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); quarkhash(vhash, endiandata); if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; work_set_target_ratio(work, vhash); - pdata[19] = foundNonce; - return 1; - } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); - applog_hash((uchar*) vhash); - applog_hash((uchar*) ptarget); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + quarkhash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor + } + return work->valid_nonces; + } + else if (vhash[7] > ptarget[7]) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } + if ((uint64_t) throughput + pdata[19] >= max_nonce) { + pdata[19] = max_nonce; + break; + } + pdata[19] += throughput; - } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + } while (!work_restart[thr_id].restart); return 0; } diff --git a/qubit/deep.cu b/qubit/deep.cu index affaca3..b400757 100644 --- a/qubit/deep.cu +++ b/qubit/deep.cu @@ -96,30 +96,33 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + throughput; - uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { - uint32_t _ALIGN(64) vhash64[8]; - be32enc(&endiandata[19], foundNonce); - deephash(vhash64, endiandata); - - if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { - int res = 1; - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - work_set_target_ratio(work, vhash64); - if (secNonce != 0) { - be32enc(&endiandata[19], secNonce); - deephash(vhash64, endiandata); - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash64); - pdata[21] = secNonce; - res++; + const uint32_t Htarg = ptarget[7]; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); + deephash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + deephash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - pdata[19] = foundNonce; - return res; + return work->valid_nonces; } - else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } @@ -127,7 +130,6 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, pdata[19] = max_nonce; break; } - pdata[19] += throughput; } while (!work_restart[thr_id].restart); diff --git a/qubit/luffa.cu b/qubit/luffa.cu index f92011d..a619c25 100644 --- a/qubit/luffa.cu +++ b/qubit/luffa.cu @@ -72,27 +72,40 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + throughput; - uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { - uint32_t _ALIGN(64) vhash64[8]; - be32enc(&endiandata[19], foundNonce); - luffa_hash(vhash64, endiandata); - - if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { - work_set_target_ratio(work, vhash64); - pdata[19] = foundNonce; - return 1; - } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + const uint32_t Htarg = ptarget[7]; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); + luffa_hash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + luffa_hash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor + } + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } - if ((uint64_t) throughput + pdata[19] >= max_nonce) { + if ((uint64_t)throughput + pdata[19] >= max_nonce) { pdata[19] = max_nonce; break; } - pdata[19] += throughput; } while (!work_restart[thr_id].restart); diff --git a/qubit/qubit.cu b/qubit/qubit.cu index 38802d5..a0a5f03 100644 --- a/qubit/qubit.cu +++ b/qubit/qubit.cu @@ -112,31 +112,33 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + throughput; - uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { const uint32_t Htarg = ptarget[7]; - uint32_t vhash64[8]; - be32enc(&endiandata[19], foundNonce); - qubithash(vhash64, endiandata); - - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { - int res = 1; - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - work_set_target_ratio(work, vhash64); - if (secNonce != 0) { - be32enc(&endiandata[19], secNonce); - qubithash(vhash64, endiandata); - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash64); - pdata[21] = secNonce; - res++; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); + qubithash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + qubithash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - pdata[19] = foundNonce; - return res; + return work->valid_nonces; } - else { - applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce); + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } @@ -144,7 +146,6 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce, pdata[19] = max_nonce; break; } - pdata[19] += throughput; } while (!work_restart[thr_id].restart); diff --git a/skein.cu b/skein.cu index b928f5d..066540f 100644 --- a/skein.cu +++ b/skein.cu @@ -342,10 +342,6 @@ extern "C" void skeincoinhash(void *output, const void *input) memcpy(output, hash, 32); } -static __inline uint32_t swab32_if(uint32_t val, bool iftrue) { - return iftrue ? swab32(val) : val; -} - static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) @@ -355,7 +351,6 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; - const int swap = 1; sm5 = (device_sm[device_map[thr_id]] >= 500); bool checkSecnonce = (have_stratum || have_longpoll) && !sm5; @@ -363,7 +358,6 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no uint32_t throughput = cuda_default_throughput(thr_id, 1U << 20); if (init[thr_id]) throughput = min(throughput, (max_nonce - first_nonce)); - uint32_t foundNonce, secNonce = 0; uint64_t target64 = 0; if (opt_benchmark) @@ -409,54 +403,45 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no if (sm5) { /* cuda_skeincoin.cu */ - foundNonce = skeincoin_hash_sm5(thr_id, throughput, pdata[19], swap, target64, &secNonce); + work->nonces[0] = skeincoin_hash_sm5(thr_id, throughput, pdata[19], 1, target64, &work->nonces[1]); } else { /* quark/cuda_skein512.cu */ - skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], swap); + skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1); sha2_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id]); - foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); } - if (foundNonce != UINT32_MAX) + if (work->nonces[0] != UINT32_MAX) { uint32_t _ALIGN(64) vhash[8]; - endiandata[19] = swab32_if(foundNonce, swap); + endiandata[19] = swab32(work->nonces[0]); skeincoinhash(vhash, endiandata); - if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { - int res = 1; - uint8_t num = res; + work->valid_nonces = 1; work_set_target_ratio(work, vhash); if (checkSecnonce) { - secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num); - } - while (secNonce != 0 && res < 2) /* todo: up to 6 */ - { - endiandata[19] = swab32_if(secNonce, swap); - skeincoinhash(vhash, endiandata); - if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { - // todo: use 19 20 21... zr5 pok to adapt... - endiandata[19] = swab32_if(secNonce, swap); + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], work->valid_nonces); + if (work->nonces[1] != 0) { + endiandata[19] = swab32(work->nonces[1]); skeincoinhash(vhash, endiandata); - if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash); - pdata[19+res*2] = swab32_if(secNonce, !swap); - res++; + if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { + work->valid_nonces++; + bn_set_target_ratio(work, vhash, 1); + } + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; } - num++; - //if (checkSecnonce) - // secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num); - //else - break; // only one secNonce... + } else { + pdata[19] = work->nonces[0] + 1; // cursor for next scan } - if (res > 1 && opt_debug) - applog(LOG_BLUE, "GPU #%d: %d/%d valid nonces !!!", device_map[thr_id], res, (int)num); - pdata[19] = swab32_if(foundNonce, !swap); - return res; + return work->valid_nonces; } - else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + else if (vhash[7] > ptarget[7]) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } diff --git a/skein2.cpp b/skein2.cpp index 1987205..ba9bef4 100644 --- a/skein2.cpp +++ b/skein2.cpp @@ -35,17 +35,12 @@ void skein2hash(void *output, const void *input) static bool init[MAX_GPUS] = { 0 }; -static __inline uint32_t swab32_if(uint32_t val, bool iftrue) { - return iftrue ? swab32(val) : val; -} - int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) { int dev_id = device_map[thr_id]; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; - const int swap = 1; // to toggle nonce endian uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 256*256*8 if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); @@ -85,35 +80,39 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned int order = 0; // Hash with CUDA - skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], swap); + skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1); quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); *hashes_done = pdata[19] - first_nonce + throughput; - uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { uint32_t _ALIGN(64) vhash[8]; - endiandata[19] = swab32_if(foundNonce, swap); + endiandata[19] = swab32(work->nonces[0]); skein2hash(vhash, endiandata); if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { - int res = 1; - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + work->valid_nonces = 1; + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work_set_target_ratio(work, vhash); - if (secNonce != 0) { - endiandata[19] = swab32_if(secNonce, swap); + if (work->nonces[1] != 0) { + endiandata[19] = swab32(work->nonces[1]); skein2hash(vhash, endiandata); + work->valid_nonces++; bn_set_target_ratio(work, vhash, 1); - pdata[21] = work->nonces[1] = swab32_if(secNonce, !swap); - gpulog(LOG_DEBUG, thr_id, "found second nonce %08x!", swab32(secNonce)); - res++; + gpulog(LOG_DEBUG, thr_id, "found second nonce %08x!", endiandata[19]); + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor for next scan } - pdata[19] = work->nonces[0] = swab32_if(foundNonce, !swap); - return res; - } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + return work->valid_nonces; + } + else if (vhash[7] > ptarget[7]) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } diff --git a/x11/c11.cu b/x11/c11.cu index 4cfd217..72274a0 100644 --- a/x11/c11.cu +++ b/x11/c11.cu @@ -155,7 +155,6 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u do { int order = 0; - uint32_t foundNonce; // Hash with CUDA quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; @@ -181,32 +180,32 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u *hashes_done = pdata[19] - first_nonce + throughput; - foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { const uint32_t Htarg = ptarget[7]; - uint32_t vhash64[8]; - be32enc(&endiandata[19], foundNonce); - c11hash(vhash64, endiandata); - - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { - int res = 1; - // check if there was some other ones... - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - work_set_target_ratio(work, vhash64); - if (secNonce != 0) { - be32enc(&endiandata[19], secNonce); - c11hash(vhash64, endiandata); - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash64); - pdata[21] = secNonce; - res++; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); + c11hash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + c11hash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - pdata[19] = foundNonce; - return res; - } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); - pdata[19] = foundNonce + 1; + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; continue; } } diff --git a/x11/fresh.cu b/x11/fresh.cu index 7e58ba0..d81d6e7 100644 --- a/x11/fresh.cu +++ b/x11/fresh.cu @@ -108,7 +108,6 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce, x11_shavite512_setBlock_80((void*)endiandata); cuda_check_cpu_setTarget(ptarget); do { - uint32_t foundNonce; int order = 0; // GPU Hash @@ -126,35 +125,41 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce, #endif *hashes_done = pdata[19] - first_nonce + throughput; - foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { - uint32_t vhash64[8]; - be32enc(&endiandata[19], foundNonce); - fresh_hash(vhash64, endiandata); - - if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { - int res = 1; - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - work_set_target_ratio(work, vhash64); - if (secNonce != 0) { - be32enc(&endiandata[19], secNonce); - fresh_hash(vhash64, endiandata); - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash64); - pdata[21] = secNonce; - res++; + const uint32_t Htarg = ptarget[7]; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); + fresh_hash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + work_set_target_ratio(work, vhash); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + fresh_hash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - pdata[19] = foundNonce; - return res; - } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); } } + if ((uint64_t) throughput + pdata[19] >= max_nonce) { + pdata[19] = max_nonce; + break; + } pdata[19] += throughput; - } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + } while (!work_restart[thr_id].restart); *hashes_done = pdata[19] - first_nonce + 1; return 0; diff --git a/x11/s3.cu b/x11/s3.cu index 1ecb879..cea6760 100644 --- a/x11/s3.cu +++ b/x11/s3.cu @@ -107,8 +107,6 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un cuda_check_cpu_setTarget(ptarget); do { - const uint32_t Htarg = ptarget[7]; - uint32_t foundNonce; int order = 0; x11_shavite512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); @@ -120,31 +118,33 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un *hashes_done = pdata[19] - first_nonce + throughput; - foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - - if (foundNonce != UINT32_MAX) + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { - uint32_t vhash64[8]; - be32enc(&endiandata[19], foundNonce); - s3hash(vhash64, endiandata); - - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { - int res = 1; - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - work_set_target_ratio(work, vhash64); - if (secNonce != 0) { - be32enc(&endiandata[19], secNonce); - s3hash(vhash64, endiandata); - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash64); - pdata[21] = secNonce; - res++; + const uint32_t Htarg = ptarget[7]; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); + s3hash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + s3hash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - pdata[19] = foundNonce; - return res; - - } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } diff --git a/x11/sib.cu b/x11/sib.cu index 8804821..6ccddbb 100644 --- a/x11/sib.cu +++ b/x11/sib.cu @@ -152,7 +152,6 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u do { int order = 0; - uint32_t foundNonce; // Hash with CUDA quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; @@ -178,32 +177,34 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); TRACE("echo => "); - foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { const uint32_t Htarg = ptarget[7]; - uint32_t vhash64[8]; - be32enc(&endiandata[19], foundNonce); - sibhash(vhash64, endiandata); - - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { - int res = 1; - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - work_set_target_ratio(work, vhash64); + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); + sibhash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); + work->nonces[1] =cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); *hashes_done = pdata[19] - first_nonce + throughput; - if (secNonce != 0) { - be32enc(&endiandata[19], secNonce); - sibhash(vhash64, endiandata); - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash64); - pdata[21] = secNonce; - res++; + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + sibhash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - pdata[19] = foundNonce; - return res; - } else if (vhash64[7] > Htarg && !opt_quiet) { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); - pdata[19] = foundNonce + 1; + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + if (!opt_quiet) + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; continue; } } diff --git a/x11/veltor.cu b/x11/veltor.cu index 452a346..43450bf 100644 --- a/x11/veltor.cu +++ b/x11/veltor.cu @@ -120,31 +120,34 @@ extern "C" int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce veltorhash(vhash, endiandata); if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { - int res = 1; - work_set_target_ratio(work, vhash); work->nonces[0] = startNounce + h_resNonce[0]; + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); if (h_resNonce[1] != UINT32_MAX) { uint32_t secNonce = work->nonces[1] = startNounce + h_resNonce[1]; - gpulog(LOG_DEBUG, thr_id, "Found 2nd nonce: %08x", secNonce); be32enc(&endiandata[19], secNonce); veltorhash(vhash, endiandata); work->nonces[1] = secNonce; - if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) { work_set_target_ratio(work, vhash); xchg(work->nonces[1], work->nonces[0]); } else { - bn_set_target_ratio(work, vhash, res); + bn_set_target_ratio(work, vhash, work->valid_nonces); } - res++; + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; // next scan - return res; + return work->valid_nonces; } - else if (vhash[7] > Htarg && !opt_quiet) { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", h_resNonce[0]); + else if (vhash[7] > Htarg) { + if (!opt_quiet) + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", h_resNonce[0]); cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t)); + pdata[19] = h_resNonce[0] + 1; + continue; } } if ((uint64_t) throughput + pdata[19] >= max_nonce) { diff --git a/x11/x11.cu b/x11/x11.cu index 1701a4e..81eddc7 100644 --- a/x11/x11.cu +++ b/x11/x11.cu @@ -144,7 +144,6 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u do { int order = 0; - uint32_t foundNonce; // Hash with CUDA quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; @@ -168,33 +167,33 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); TRACE("echo => "); - foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + *hashes_done = pdata[19] - first_nonce + throughput; + + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { const uint32_t Htarg = ptarget[7]; - uint32_t vhash64[8]; - be32enc(&endiandata[19], foundNonce); - x11hash(vhash64, endiandata); - - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { - int res = 1; - // check if there was some other ones... - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - work_set_target_ratio(work, vhash64); - *hashes_done = pdata[19] - first_nonce + throughput; - if (secNonce != 0) { - be32enc(&endiandata[19], secNonce); - x11hash(vhash64, endiandata); - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash64); - pdata[21] = secNonce; - res++; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); + x11hash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + x11hash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - pdata[19] = foundNonce; - return res; + return work->valid_nonces; } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); - pdata[19] = foundNonce + 1; + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; continue; } } diff --git a/x11/x11evo.cu b/x11/x11evo.cu index b5713ff..1b16e58 100644 --- a/x11/x11evo.cu +++ b/x11/x11evo.cu @@ -290,7 +290,6 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce do { int order = 1; - uint32_t foundNonce; // Hash with CUDA quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); @@ -344,38 +343,38 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce break; case ECHO: x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - TRACE("echo => "); + TRACE("echo :"); break; } } - foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + *hashes_done = pdata[19] - first_nonce + throughput; + + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { - uint32_t _ALIGN(64) vhash64[8]; const uint32_t Htarg = ptarget[7]; - be32enc(&endiandata[19], foundNonce); - x11evo_hash(vhash64, endiandata); - - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { - int res = 1; - // check if there was some other ones... - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - work_set_target_ratio(work, vhash64); - *hashes_done = pdata[19] - first_nonce + throughput; - if (secNonce != 0) { - be32enc(&endiandata[19], secNonce); - x11evo_hash(vhash64, endiandata); - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash64); - pdata[21] = secNonce; - res++; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); + x11evo_hash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + pdata[19] = work->nonces[0] + 1; // cursor + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + x11evo_hash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + gpulog(LOG_DEBUG, thr_id, "second nonce %08x! cursor %08x", work->nonces[1], pdata[19]); + work->valid_nonces++; } - pdata[19] = foundNonce; - return res; - } else if (vhash64[7] > Htarg) { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); - pdata[19] = foundNonce + 1; + return work->valid_nonces; + } else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; continue; } } @@ -388,7 +387,7 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce } while (!work_restart[thr_id].restart); - *hashes_done = pdata[19] - first_nonce; + *hashes_done = pdata[19] - first_nonce + 1; return 0; } diff --git a/x13/x13.cu b/x13/x13.cu index 501e9cb..2fdf5f4 100644 --- a/x13/x13.cu +++ b/x13/x13.cu @@ -165,7 +165,6 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u cuda_check_cpu_setTarget(ptarget); do { - uint32_t foundNonce; int order = 0; quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; @@ -185,31 +184,33 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u CUDA_LOG_ERROR(); - foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { - uint32_t vhash[8]; - be32enc(&endiandata[19], foundNonce); + const uint32_t Htarg = ptarget[7]; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); x13hash(vhash, endiandata); if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { - int res = 1; - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + work->valid_nonces = 1; + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work_set_target_ratio(work, vhash); - pdata[19] = foundNonce; - if (secNonce != 0) { - be32enc(&endiandata[19], secNonce); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); x13hash(vhash, endiandata); - pdata[21] = secNonce; - if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) { - work_set_target_ratio(work, vhash); - xchg(pdata[19], pdata[21]); - } - res++; + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - return res; - } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } diff --git a/x15/whirlpool.cu b/x15/whirlpool.cu index 2a490de..f275299 100644 --- a/x15/whirlpool.cu +++ b/x15/whirlpool.cu @@ -101,7 +101,6 @@ extern "C" int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce, whirlpool512_setBlock_80((void*)endiandata, ptarget); do { - uint32_t foundNonce; int order = 0; *hashes_done = pdata[19] - first_nonce + throughput; @@ -113,28 +112,24 @@ extern "C" int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce, x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); TRACE64(" 64 :", d_hash); - foundNonce = whirlpool512_cpu_finalhash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - if (foundNonce != UINT32_MAX && bench_algo < 0) + work->nonces[0] = whirlpool512_cpu_finalhash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + if (work->nonces[0] != UINT32_MAX && bench_algo < 0) { const uint32_t Htarg = ptarget[7]; - uint32_t vhash[8]; - be32enc(&endiandata[19], foundNonce); + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); wcoinhash(vhash, endiandata); if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { - int res = 1; + work->valid_nonces = 1; work_set_target_ratio(work, vhash); - #if 0 - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - if (secNonce != 0) { - pdata[21] = secNonce; - res++; - } - #endif - pdata[19] = foundNonce; - return res; - } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + pdata[19] = work->nonces[0] + 1; // cursor + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } if ((uint64_t) throughput + pdata[19] >= max_nonce) { diff --git a/x15/x14.cu b/x15/x14.cu index 2094c1a..3a43429 100644 --- a/x15/x14.cu +++ b/x15/x14.cu @@ -198,32 +198,35 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + throughput; - uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + if (work->nonces[0] != UINT32_MAX) { const uint32_t Htarg = ptarget[7]; - uint32_t vhash64[8]; + uint32_t _ALIGN(64) vhash[8]; /* check now with the CPU to confirm */ - be32enc(&endiandata[19], foundNonce); - x14hash(vhash64, endiandata); - - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { - int res = 1; - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - work_set_target_ratio(work, vhash64); - if (secNonce != 0) { - be32enc(&endiandata[19], secNonce); - x14hash(vhash64, endiandata); - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash64); - pdata[21] = secNonce; - res++; + be32enc(&endiandata[19], work->nonces[0]); + x14hash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + work_set_target_ratio(work, vhash); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + x14hash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - pdata[19] = foundNonce; - return res; - } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } diff --git a/x15/x15.cu b/x15/x15.cu index 5abc577..c5362ce 100644 --- a/x15/x15.cu +++ b/x15/x15.cu @@ -205,31 +205,34 @@ extern "C" int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + throughput; - uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { const uint32_t Htarg = ptarget[7]; - uint32_t vhash64[8]; + uint32_t _ALIGN(64) vhash[8]; /* check now with the CPU to confirm */ - be32enc(&endiandata[19], foundNonce); - x15hash(vhash64, endiandata); - - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { - int res = 1; - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - work_set_target_ratio(work, vhash64); - if (secNonce != 0) { - be32enc(&endiandata[19], secNonce); - x15hash(vhash64, endiandata); - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash64); - pdata[21] = secNonce; - res++; + be32enc(&endiandata[19], work->nonces[0]); + x15hash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + work_set_target_ratio(work, vhash); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + x15hash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - pdata[19] = foundNonce; - return res; - } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; } } diff --git a/x17/x17.cu b/x17/x17.cu index 59002b1..1dfc125 100644 --- a/x17/x17.cu +++ b/x17/x17.cu @@ -230,34 +230,37 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u *hashes_done = pdata[19] - first_nonce + throughput; - uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { const uint32_t Htarg = ptarget[7]; - uint32_t _ALIGN(64) vhash64[8]; - be32enc(&endiandata[19], foundNonce); - x17hash(vhash64, endiandata); - - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { - int res = 1; - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - work_set_target_ratio(work, vhash64); - if (secNonce != 0) { - be32enc(&endiandata[19], secNonce); - x17hash(vhash64, endiandata); - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash64); - pdata[21] = secNonce; - res++; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); + x17hash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + work_set_target_ratio(work, vhash); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + x17hash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor } - pdata[19] = foundNonce; - return res; - } else { + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { // x11+ coins could do some random error, but not on retry if (!warn) { - warn++; continue; + warn++; + pdata[19] = work->nonces[0] + 1; + continue; } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); warn = 0; } } diff --git a/zr5.cu b/zr5.cu index c9a34fc..cbca488 100644 --- a/zr5.cu +++ b/zr5.cu @@ -431,12 +431,12 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work, if (work_restart[thr_id].restart) return -1; - uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) { - uint32_t vhash64[8]; + uint32_t _ALIGN(64) vhash[8]; uint32_t oldp19 = pdata[19]; - uint32_t offset = foundNonce - pdata[19]; + uint32_t offset = work->nonces[0] - pdata[19]; uint32_t pok = 0; uint16_t h_pok; @@ -444,32 +444,31 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work, cudaMemcpy(&h_pok, d_poks[thr_id] + offset, sizeof(uint16_t), cudaMemcpyDeviceToHost); pok = version | (0x10000UL * h_pok); - pdata[0] = pok; pdata[19] = foundNonce; - zr5hash(vhash64, pdata); - if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { - int res = 1; - work_set_target_ratio(work, vhash64); - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, oldp19, d_hash[thr_id], 1); - if (secNonce != 0) { - offset = secNonce - oldp19; + pdata[0] = pok; pdata[19] = work->nonces[0]; + zr5hash(vhash, pdata); + if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, oldp19, d_hash[thr_id], 1); + if (work->nonces[1] != 0) { + offset = work->nonces[1] - oldp19; cudaMemcpy(&h_pok, d_poks[thr_id] + offset, sizeof(uint16_t), cudaMemcpyDeviceToHost); pok = version | (0x10000UL * h_pok); memcpy(tmpdata, pdata, 80); - tmpdata[0] = pok; tmpdata[19] = secNonce; - zr5hash(vhash64, tmpdata); - if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { - if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) - work_set_target_ratio(work, vhash64); - pdata[21] = secNonce; - pdata[22] = pok; - res++; + tmpdata[0] = pok; tmpdata[19] = work->nonces[1]; + zr5hash(vhash, tmpdata); + if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { + bn_set_target_ratio(work, vhash, 1); + pdata[19] = max(pdata[19], work->nonces[1]); // cursor + pdata[20] = pok; // second nonce "pok" + work->valid_nonces++; } + pdata[19]++; } - return res; - } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); - - pdata[19]++; + return work->valid_nonces; + } + else if (vhash[7] > ptarget[7]) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); pdata[0] = oldp0; } } else