From c3bdb623e82ed4bc6906ff06d616a2b13494b424 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Fri, 5 Dec 2014 14:59:32 +0100 Subject: [PATCH] Check and submit multiple nonces in one loop Added to most algos, checkhash function scans a big range and can find multiple nonces at once if the difficulty is low. Stop ignoring them, submit second one if found... Clean the draft code for rc=2 implemented for blake and pentablake btw... fix the reduced displayed hashrate when a nonce is found... Signed-off-by: Tanguy Pruvot --- JHA/jackpotcoin.cu | 17 +++++++----- blake32.cu | 30 +++++---------------- ccminer.cpp | 56 +++++++++++++++++--------------------- cuda_helper.h | 2 +- cuda_nist5.cu | 20 +++++++++----- keccak/keccak256.cu | 19 ++++++++----- pentablake.cu | 60 ++++++++++------------------------------- quark/animecoin.cu | 17 +++++++----- qubit/deep.cu | 20 +++++++++----- qubit/doom.cu | 15 ++++++----- qubit/qubit.cu | 20 +++++++++----- qubit/qubit_luffa512.cu | 10 ++++--- x11/fresh.cu | 12 ++++++--- x11/s3.cu | 13 ++++++--- x11/x11.cu | 17 +++++++----- x13/x13.cu | 14 +++++++--- x15/whirlpool.cu | 17 +++++++----- x15/x14.cu | 17 ++++++++---- x15/x15.cu | 20 ++++++++------ x17/x17.cu | 17 +++++++----- 20 files changed, 217 insertions(+), 196 deletions(-) diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu index d66751a..29059a3 100644 --- a/JHA/jackpotcoin.cu +++ b/JHA/jackpotcoin.cu @@ -218,13 +218,18 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata, // diese jackpothash Funktion gibt die Zahl der Runden zurück rounds = jackpothash(vhash64, endiandata); - if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) { - + if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { + int res = 1; + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + pdata[21] = secNonce; + res++; + } pdata[19] = foundNonce; - *hashes_done = foundNonce - first_nonce + 1; - //applog(LOG_INFO, "GPU #%d: result for nonce $%08X does validate on CPU (%d rounds)!", thr_id, foundNonce, rounds); - return 1; - } else { + return res; + } + else { applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU (%d rounds)!", thr_id, foundNonce, rounds); } } diff --git a/blake32.cu b/blake32.cu index b37f50c..ad426e3 100644 --- a/blake32.cu +++ b/blake32.cu @@ -398,19 +398,6 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt int rc = 0; -#if NBN > 1 - if (extra_results[0] != UINT32_MAX) { - // possible extra result found in previous call - if (first_nonce <= extra_results[0] && max_nonce >= extra_results[0]) { - pdata[19] = extra_results[0]; - *hashes_done = pdata[19] - first_nonce + 1; - extra_results[0] = UINT32_MAX; - rc = 1; - goto exit_scan; - } - } -#endif - if (opt_benchmark) { targetHigh = 0x1ULL << 32; ((uint32_t*)ptarget)[6] = swab32(0xff); @@ -464,25 +451,23 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt //applog(LOG_BLUE, "%08x %16llx", vhashcpu[6], targetHigh); if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget)) { - pdata[19] = foundNonce; rc = 1; - + *hashes_done = pdata[19] - first_nonce + throughput; + pdata[19] = foundNonce; +#if NBN > 1 if (extra_results[0] != UINT32_MAX) { - // Rare but possible if the throughput is big be32enc(&endiandata[19], extra_results[0]); - blake256hash(vhashcpu, endiandata, blakerounds); if (vhashcpu[6] <= Htarg /* && fulltest(vhashcpu, ptarget) */) { - applog(LOG_NOTICE, "GPU found more than one result " CL_GRN "yippee!"); + pdata[21] = extra_results[0]; rc = 2; - } else { - extra_results[0] = UINT32_MAX; } + extra_results[0] = UINT32_MAX; } - +#endif //applog_hash((uint8_t*)ptarget); //applog_compare_hash((uint8_t*)vhashcpu,(uint8_t*)ptarget); - goto exit_scan; + return rc; } else if (opt_debug) { applog_hash((uchar*)ptarget); @@ -500,7 +485,6 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt } while (!work_restart[thr_id].restart); -exit_scan: *hashes_done = pdata[19] - first_nonce + 1; // (+1 to prevent locks) return rc; } diff --git a/ccminer.cpp b/ccminer.cpp index 820a3d4..147c739 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -570,7 +570,6 @@ static bool submit_upstream_work(CURL *curl, struct work *work) le32enc(&ntime, work->data[17]); le32enc(&nonce, work->data[19]); - be16enc(&nvote, *((uint16_t*)&work->data[20])); noncestr = bin2hex((const uchar*)(&nonce), 4); @@ -593,6 +592,7 @@ static bool submit_upstream_work(CURL *curl, struct work *work) xnonce2str = bin2hex(work->xnonce2, work->xnonce2_len); if (opt_algo == ALGO_HEAVY) { + be16enc(&nvote, *((uint16_t*)&work->data[20])); nvotestr = bin2hex((const uchar*)(&nvote), 2); sprintf(s, "{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}", @@ -1027,7 +1027,7 @@ static void *miner_thread(void *userdata) nonceptr = (uint32_t*) (((char*)work.data) + wcmplen); pthread_mutex_lock(&g_work_lock); extrajob |= work_done; - if ((*nonceptr) >= end_nonce || extrajob) { + if (nonceptr[0] >= end_nonce || extrajob) { work_done = false; extrajob = false; stratum_gen_work(&stratum, &g_work); @@ -1036,8 +1036,7 @@ static void *miner_thread(void *userdata) int min_scantime = scan_time; /* obtain new work from internal workio thread */ pthread_mutex_lock(&g_work_lock); - if (time(NULL) - g_work_time >= min_scantime || - (*nonceptr) >= end_nonce) { + if (time(NULL) - g_work_time >= min_scantime || nonceptr[0] >= end_nonce) { if (unlikely(!get_work(mythr, &g_work))) { applog(LOG_ERR, "work retrieval failed, exiting " "mining thread %d", mythr->id); @@ -1047,21 +1046,6 @@ static void *miner_thread(void *userdata) g_work_time = time(NULL); } } -#if 0 - if (!opt_benchmark && g_work.job_id[0] == '\0') { - applog(LOG_ERR, "work data not read yet"); - extrajob = true; - work_done = true; - sleep(1); - //continue; - } -#endif - if (rc > 1) { - /* if we found more than one on last loop */ - /* todo: handle an array to get them directly */ - pthread_mutex_unlock(&g_work_lock); - goto continue_scan; - } if (!opt_benchmark && memcmp(work.target, g_work.target, sizeof(work.target))) { calc_diff(&g_work, 0); @@ -1071,7 +1055,7 @@ static void *miner_thread(void *userdata) } memcpy(work.target, g_work.target, sizeof(work.target)); work.difficulty = g_work.difficulty; - (*nonceptr) = (0xffffffffUL / opt_n_threads) * thr_id; // 0 if single thr + nonceptr[0] = (UINT32_MAX / opt_n_threads) * thr_id; // 0 if single thr /* on new target, ignoring nonce, clear sent data (hashlog) */ if (memcmp(work.target, g_work.target, sizeof(work.target))) { hashlog_purge_job(work.job_id); @@ -1090,9 +1074,9 @@ static void *miner_thread(void *userdata) } #endif memcpy(&work, &g_work, sizeof(struct work)); - (*nonceptr) = (0xffffffffUL / opt_n_threads) * thr_id; // 0 if single thr + nonceptr[0] = (UINT32_MAX / opt_n_threads) * thr_id; // 0 if single thr } else - (*nonceptr)++; //?? + nonceptr[0]++; //?? work_restart[thr_id].restart = 0; pthread_mutex_unlock(&g_work_lock); @@ -1131,7 +1115,7 @@ static void *miner_thread(void *userdata) // we can't scan more than uint capacity max64 = min(UINT32_MAX, max64); - start_nonce = *nonceptr; + start_nonce = nonceptr[0]; #if 0 /* do not recompute something already scanned (hashharder workaround) */ if (opt_algo == ALGO_BLAKE && opt_n_threads == 1) { @@ -1163,7 +1147,7 @@ static void *miner_thread(void *userdata) stats_purge_old(); // wait a bit for a new job... usleep(500*1000); - (*nonceptr) = end_nonce + 1; + nonceptr[0] = end_nonce + 1; work_done = true; continue; } @@ -1182,14 +1166,13 @@ static void *miner_thread(void *userdata) // todo: keep it rounded for gpu threads ? work.scanned_from = start_nonce; - (*nonceptr) = start_nonce; + nonceptr[0] = start_nonce; if (opt_debug) applog(LOG_DEBUG, "GPU #%d: start=%08x end=%08x range=%08x", device_map[thr_id], start_nonce, max_nonce, (max_nonce-start_nonce)); hashes_done = 0; -continue_scan: gettimeofday(&tv_start, NULL); /* scan nonces for a proof-of-work hash */ @@ -1326,7 +1309,9 @@ continue_scan: gettimeofday(&tv_end, NULL); if (rc && opt_debug) - applog(LOG_NOTICE, CL_CYN "found => %08x" CL_GRN " %08x", *nonceptr, swab32(*nonceptr)); + applog(LOG_NOTICE, CL_CYN "found => %08x" CL_GRN " %08x", nonceptr[0], swab32(nonceptr[0])); // data[19] + if (rc > 1 && opt_debug) + applog(LOG_NOTICE, CL_CYN "found => %08x" CL_GRN " %08x", nonceptr[2], swab32(nonceptr[2])); // data[21] timeval_subtract(&diff, &tv_end, &tv_start); @@ -1346,22 +1331,22 @@ continue_scan: pthread_mutex_lock(&stats_lock); if (diff.tv_sec + 1e-6 * diff.tv_usec > 0.0) { thr_hashrates[thr_id] = hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec); - if (rc > 1) - thr_hashrates[thr_id] = (rc * hashes_done) / (diff.tv_sec + 1e-6 * diff.tv_usec); thr_hashrates[thr_id] *= rate_factor; stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id], (uint8_t) rc, work.height); } pthread_mutex_unlock(&stats_lock); } - if (rc) - work.scanned_to = *nonceptr; + if (rc > 1) + work.scanned_to = nonceptr[2]; + else if (rc) + work.scanned_to = nonceptr[0]; else { work.scanned_to = max_nonce; if (opt_debug && opt_benchmark) { // to debug nonce ranges applog(LOG_DEBUG, "GPU #%d: ends=%08x range=%llx", device_map[thr_id], - *nonceptr, ((*nonceptr) - start_nonce)); + nonceptr[0], (nonceptr[0] - start_nonce)); } } @@ -1393,6 +1378,13 @@ continue_scan: if (rc && !opt_benchmark) { if (!submit_work(mythr, &work)) break; + // second nonce found, submit too + if (rc > 1 && work.data[21]) { + work.data[19] = work.data[21]; + work.data[21] = 0; + if (!submit_work(mythr, &work)) + break; + } } loopcnt++; diff --git a/cuda_helper.h b/cuda_helper.h index 4714103..5d60288 100644 --- a/cuda_helper.h +++ b/cuda_helper.h @@ -23,8 +23,8 @@ extern uint32_t cuda_check_hash(int thr_id, int threads, uint32_t startNounce, u extern uint32_t cuda_check_hash_suppl(int thr_id, int threads, uint32_t startNounce, uint32_t *d_inputHash, uint8_t numNonce); extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); extern void cudaReportHardwareFailure(int thr_id, cudaError_t error, const char* func); - extern __device__ __device_builtin__ void __syncthreads(void); +extern __device__ __device_builtin__ void __threadfence(void); #ifndef __CUDA_ARCH__ // define blockDim and threadIdx for host diff --git a/cuda_nist5.cu b/cuda_nist5.cu index ad4a21f..e88017f 100644 --- a/cuda_nist5.cu +++ b/cuda_nist5.cu @@ -113,19 +113,25 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata, quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != 0xffffffff) + if (foundNonce != UINT32_MAX) { + const uint32_t Htarg = ptarget[7]; uint32_t vhash64[8]; - uint32_t Htarg = ptarget[7]; be32enc(&endiandata[19], foundNonce); nist5hash(vhash64, endiandata); - if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) { - + if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { + int res = 1; + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + pdata[21] = secNonce; + res++; + } pdata[19] = foundNonce; - *hashes_done = foundNonce - first_nonce + 1; - return 1; - } else { + return res; + } + else { applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce); } } diff --git a/keccak/keccak256.cu b/keccak/keccak256.cu index f09bfe1..f1f1200 100644 --- a/keccak/keccak256.cu +++ b/keccak/keccak256.cu @@ -67,20 +67,25 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata, int order = 0; uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); - if (foundNonce != 0xffffffff) + if (foundNonce != UINT32_MAX) { - uint32_t vhash64[8]; uint32_t Htarg = ptarget[7]; + uint32_t vhash64[8]; be32enc(&endiandata[19], foundNonce); keccak256_hash(vhash64, endiandata); if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { - + int res = 1; + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + pdata[21] = secNonce; + res++; + } pdata[19] = foundNonce; - *hashes_done = foundNonce - first_nonce + 1; - return 1; - - } else { + return res; + } + else { applog(LOG_DEBUG, "GPU #%d: result for nounce %08x does not validate on CPU!", thr_id, foundNonce); } } diff --git a/pentablake.cu b/pentablake.cu index 8616772..49f49e3 100644 --- a/pentablake.cu +++ b/pentablake.cu @@ -43,8 +43,6 @@ extern "C" void pentablakehash(void *output, const void *input) #include "cuda_helper.h" -#define MAXU 0xffffffffU - __constant__ static uint32_t __align__(32) c_Target[8]; @@ -54,7 +52,7 @@ static uint64_t __align__(32) c_data[32]; static uint32_t *d_hash[8]; static uint32_t *d_resNounce[8]; static uint32_t *h_resNounce[8]; -static uint32_t extra_results[2] = { MAXU, MAXU }; +static uint32_t extra_results[2] = { UINT32_MAX, UINT32_MAX }; /* prefer uint32_t to prevent size conversions = speed +5/10 % */ __constant__ @@ -387,7 +385,7 @@ __host__ uint32_t pentablake_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce) { const int threadsperblock = TPB; - uint32_t result = MAXU; + uint32_t result = UINT32_MAX; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); @@ -446,7 +444,7 @@ __host__ static uint32_t pentablake_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash, int order) { const int threadsperblock = TPB; - uint32_t result = MAXU; + uint32_t result = UINT32_MAX; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); @@ -497,17 +495,6 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t * int throughput = opt_work_size ? opt_work_size : (128 * 2560); // 18.5 throughput = min(throughput, (int)(max_nonce - first_nonce)); - if (extra_results[0] != MAXU) { - // possible extra result found in previous call - if (first_nonce <= extra_results[0] && max_nonce >= extra_results[0]) { - pdata[19] = extra_results[0]; - *hashes_done = pdata[19] - first_nonce + 1; - extra_results[0] = MAXU; - rc = 1; - goto exit_scan; - } - } - if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x000F; @@ -539,39 +526,30 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t * pentablake_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], order++); uint32_t foundNonce = pentablake_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id], order++); - - if (foundNonce != MAXU) + if (foundNonce != UINT32_MAX) { + const uint32_t Htarg = ptarget[7]; uint32_t vhashcpu[8]; - uint32_t Htarg = ptarget[7]; be32enc(&endiandata[19], foundNonce); - pentablakehash(vhashcpu, endiandata); - if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget)) - { - pdata[19] = foundNonce; + if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget)) { rc = 1; - - // Rare but possible if the throughput is big - be32enc(&endiandata[19], extra_results[0]); - pentablakehash(vhashcpu, endiandata); - if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget)) { + *hashes_done = pdata[19] - first_nonce + throughput; + if (extra_results[0] != UINT32_MAX) { + // Rare but possible if the throughput is big applog(LOG_NOTICE, "GPU found more than one result yippee!"); - rc = 2; - } else { - extra_results[0] = MAXU; + pdata[21] = extra_results[0]; + extra_results[0] = UINT32_MAX; + rc++; } - - goto exit_scan; + pdata[19] = foundNonce; + return rc; } else if (vhashcpu[7] > Htarg) { applog(LOG_WARNING, "GPU #%d: result for nounce %08x is not in range: %x > %x", thr_id, foundNonce, vhashcpu[7], Htarg); } - else if (vhashcpu[6] > ptarget[6]) { - applog(LOG_WARNING, "GPU #%d: hash[6] for nounce %08x is not in range: %x > %x", thr_id, foundNonce, vhashcpu[6], ptarget[6]); - } else { applog(LOG_WARNING, "GPU #%d: result for nounce %08x does not validate on CPU!", thr_id, foundNonce); } @@ -581,17 +559,7 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t * } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); -exit_scan: *hashes_done = pdata[19] - first_nonce + 1; -#if 0 - /* reset the device to allow multiple instances - * could be made in cpu-miner... check later if required */ - if (opt_n_threads == 1) { - CUDA_SAFE_CALL(cudaDeviceReset()); - init[thr_id] = false; - } -#endif - cudaDeviceSynchronize(); return rc; } diff --git a/quark/animecoin.cu b/quark/animecoin.cu index e3205ad..a6d311a 100644 --- a/quark/animecoin.cu +++ b/quark/animecoin.cu @@ -258,18 +258,23 @@ extern "C" int scanhash_anime(int thr_id, uint32_t *pdata, // Scan nach Gewinner Hashes auf der GPU uint32_t foundNonce = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++); - if (foundNonce != 0xffffffff) + if (foundNonce != UINT32_MAX) { - uint32_t vhash64[8]; const uint32_t Htarg = ptarget[7]; + uint32_t vhash64[8]; be32enc(&endiandata[19], foundNonce); animehash(vhash64, endiandata); - if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) { - + if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { + int res = 1; + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + pdata[21] = secNonce; + res++; + } pdata[19] = foundNonce; - *hashes_done = foundNonce - first_nonce + 1; - return 1; + return res; } else { applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce); } diff --git a/qubit/deep.cu b/qubit/deep.cu index 407ce25..f2c5957 100644 --- a/qubit/deep.cu +++ b/qubit/deep.cu @@ -87,7 +87,6 @@ extern "C" int scanhash_deep(int thr_id, uint32_t *pdata, cuda_check_cpu_setTarget(ptarget); do { - const uint32_t Htarg = ptarget[7]; int order = 0; qubit_luffa512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); @@ -95,18 +94,25 @@ extern "C" int scanhash_deep(int thr_id, uint32_t *pdata, x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != 0xffffffff) + if (foundNonce != UINT32_MAX) { + const uint32_t Htarg = ptarget[7]; uint32_t vhash64[8]; be32enc(&endiandata[19], foundNonce); deephash(vhash64, endiandata); - if (vhash64[7]<=Htarg && fulltest(vhash64, ptarget) ) - { + if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { + int res = 1; + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + pdata[21] = secNonce; + res++; + } pdata[19] = foundNonce; - *hashes_done = foundNonce - first_nonce + 1; - return 1; - } else { + return res; + } + else { applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce); } } diff --git a/qubit/doom.cu b/qubit/doom.cu index 9813c2f..e390a7f 100644 --- a/qubit/doom.cu +++ b/qubit/doom.cu @@ -63,22 +63,23 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata, qubit_luffa512_cpufinal_setBlock_80((void*)endiandata,ptarget); do { - const uint32_t Htarg = ptarget[7]; int order = 0; uint32_t foundNonce = qubit_luffa512_cpu_finalhash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); - if (foundNonce != 0xffffffff) + if (foundNonce != UINT32_MAX) { + const uint32_t Htarg = ptarget[7]; uint32_t vhash64[8]; be32enc(&endiandata[19], foundNonce); doomhash(vhash64, endiandata); - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget) ) - { + if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { + int res = 1; + *hashes_done = pdata[19] - first_nonce + throughput; pdata[19] = foundNonce; - *hashes_done = foundNonce - first_nonce + 1; - return 1; - } else { + return res; + } + else { applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce); } } diff --git a/qubit/qubit.cu b/qubit/qubit.cu index 92375bf..f9c3461 100644 --- a/qubit/qubit.cu +++ b/qubit/qubit.cu @@ -110,7 +110,6 @@ extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata, cuda_check_cpu_setTarget(ptarget); do { - const uint32_t Htarg = ptarget[7]; int order = 0; // Hash with CUDA @@ -121,18 +120,25 @@ extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata, x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != 0xffffffff) + if (foundNonce != UINT32_MAX) { + const uint32_t Htarg = ptarget[7]; uint32_t vhash64[8]; be32enc(&endiandata[19], foundNonce); qubithash(vhash64, endiandata); - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget) ) - { + if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { + int res = 1; + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + pdata[21] = secNonce; + res++; + } pdata[19] = foundNonce; - *hashes_done = foundNonce - first_nonce + 1; - return 1; - } else { + return res; + } + else { applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce); } } diff --git a/qubit/qubit_luffa512.cu b/qubit/qubit_luffa512.cu index 23d95bb..a4a0ef2 100644 --- a/qubit/qubit_luffa512.cu +++ b/qubit/qubit_luffa512.cu @@ -23,7 +23,9 @@ #include "cuda_helper.h" -#define MAXU 0xffffffffU +#ifndef UINT32_MAX +#define UINT32_MAX UINT_MAX +#endif typedef unsigned char BitSequence; @@ -35,7 +37,7 @@ static uint32_t *d_resNounce[8]; #define NBN 1 /* max results, could be 2, see blake32.cu */ #if NBN > 1 -static uint32_t extra_results[2] = { MAXU, MAXU }; +static uint32_t extra_results[2] = { UINT32_MAX, UINT32_MAX }; #endif typedef struct { @@ -454,7 +456,7 @@ void qubit_luffa512_cpu_init(int thr_id, int threads) __host__ uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order) { - uint32_t result = MAXU; + uint32_t result = UINT32_MAX; cudaMemset(d_resNounce[thr_id], 0xff, NBN * sizeof(uint32_t)); const int threadsperblock = 256; @@ -517,4 +519,4 @@ void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget) CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice)); CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice)); -} \ No newline at end of file +} diff --git a/x11/fresh.cu b/x11/fresh.cu index 40c2455..408aa36 100644 --- a/x11/fresh.cu +++ b/x11/fresh.cu @@ -124,16 +124,22 @@ extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata, #endif foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != 0xffffffff) + if (foundNonce != UINT32_MAX) { uint32_t vhash64[8]; be32enc(&endiandata[19], foundNonce); fresh_hash(vhash64, endiandata); if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { + int res = 1; + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + pdata[21] = secNonce; + res++; + } pdata[19] = foundNonce; - *hashes_done = foundNonce - first_nonce + 1; - return 1; + return res; } else if (vhash64[7] > Htarg) { applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg); diff --git a/x11/s3.cu b/x11/s3.cu index 4997f2d..0b495ac 100644 --- a/x11/s3.cu +++ b/x11/s3.cu @@ -101,17 +101,22 @@ extern "C" int scanhash_s3(int thr_id, uint32_t *pdata, foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != 0xffffffff) + if (foundNonce != UINT32_MAX) { uint32_t vhash64[8]; be32enc(&endiandata[19], foundNonce); s3hash(vhash64, endiandata); if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { - + int res = 1; + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + pdata[21] = secNonce; + res++; + } pdata[19] = foundNonce; - *hashes_done = foundNonce - first_nonce + 1; - return 1; + return res; } else { applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce); diff --git a/x11/x11.cu b/x11/x11.cu index 6a838e4..5a561ee 100644 --- a/x11/x11.cu +++ b/x11/x11.cu @@ -171,8 +171,6 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, cuda_check_cpu_setTarget(ptarget); do { - const uint32_t Htarg = ptarget[7]; - int order = 0; uint32_t foundNonce; @@ -189,19 +187,24 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != 0xffffffff) + if (foundNonce != UINT32_MAX) { + const uint32_t Htarg = ptarget[7]; uint32_t vhash64[8]; be32enc(&endiandata[19], foundNonce); x11hash(vhash64, endiandata); - /* uint32_t secNonce = */ cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { - // just check if there was some other ones... + int res = 1; + // check if there was some other ones... + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + pdata[21] = secNonce; + res++; + } pdata[19] = foundNonce; - return 1; + return res; } else if (vhash64[7] > Htarg) { applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg); diff --git a/x13/x13.cu b/x13/x13.cu index fb204f1..1a0c3e9 100644 --- a/x13/x13.cu +++ b/x13/x13.cu @@ -209,17 +209,23 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != 0xffffffff) + if (foundNonce != UINT32_MAX) { const uint32_t Htarg = ptarget[7]; uint32_t vhash64[8]; be32enc(&endiandata[19], foundNonce); x13hash(vhash64, endiandata); - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget) ) { + if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { + int res = 1; + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + pdata[21] = secNonce; + res++; + } pdata[19] = foundNonce; - *hashes_done = foundNonce - first_nonce + 1; - return 1; + return res; } else if (vhash64[7] > Htarg) { applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg); diff --git a/x15/whirlpool.cu b/x15/whirlpool.cu index 4365c9d..a18ce69 100644 --- a/x15/whirlpool.cu +++ b/x15/whirlpool.cu @@ -87,18 +87,23 @@ extern "C" int scanhash_whc(int thr_id, uint32_t *pdata, x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); foundNonce = whirlpool512_cpu_finalhash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - if (foundNonce != 0xffffffff) + if (foundNonce != UINT32_MAX) { + const uint32_t Htarg = ptarget[7]; uint32_t vhash64[8]; - uint32_t Htarg = ptarget[7]; be32enc(&endiandata[19], foundNonce); wcoinhash(vhash64, endiandata); - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) - { + if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { + int res = 1; + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + pdata[21] = secNonce; + res++; + } pdata[19] = foundNonce; - *hashes_done = foundNonce - first_nonce + 1; - return 1; + return res; } else if (vhash64[7] > Htarg) { applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg); diff --git a/x15/x14.cu b/x15/x14.cu index 39c6004..d14151c 100644 --- a/x15/x14.cu +++ b/x15/x14.cu @@ -216,17 +216,24 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != 0xffffffff) + if (foundNonce != UINT32_MAX) { - /* check now with the CPU to confirm */ + const uint32_t Htarg = ptarget[7]; uint32_t vhash64[8]; + /* check now with the CPU to confirm */ be32enc(&endiandata[19], foundNonce); x14hash(vhash64, endiandata); - uint32_t Htarg = ptarget[7]; + if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { + int res = 1; + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + pdata[21] = secNonce; + res++; + } pdata[19] = foundNonce; - *hashes_done = foundNonce - first_nonce + 1; - return 1; + return res; } else if (vhash64[7] > Htarg) { applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg); diff --git a/x15/x15.cu b/x15/x15.cu index c9bff02..c714bd3 100644 --- a/x15/x15.cu +++ b/x15/x15.cu @@ -228,21 +228,25 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - /* Scan with GPU */ uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - - if (foundNonce != 0xffffffff) + if (foundNonce != UINT32_MAX) { - /* check now with the CPU to confirm */ + const uint32_t Htarg = ptarget[7]; uint32_t vhash64[8]; - uint32_t Htarg = ptarget[7]; + /* check now with the CPU to confirm */ be32enc(&endiandata[19], foundNonce); x15hash(vhash64, endiandata); + if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { + int res = 1; + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + pdata[21] = secNonce; + res++; + } pdata[19] = foundNonce; - *hashes_done = foundNonce - first_nonce + 1; - x15_whirlpool_cpu_free(thr_id); - return 1; + return res; } else if (vhash64[7] > Htarg) { applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg); diff --git a/x17/x17.cu b/x17/x17.cu index 1809a70..32b6dc7 100644 --- a/x17/x17.cu +++ b/x17/x17.cu @@ -254,18 +254,23 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata, x17_haval256_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != 0xffffffff) + if (foundNonce != UINT32_MAX) { + const uint32_t Htarg = ptarget[7]; uint32_t vhash64[8]; - uint32_t Htarg = ptarget[7]; be32enc(&endiandata[19], foundNonce); x17hash(vhash64, endiandata); - if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) - { + if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { + int res = 1; + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + pdata[21] = secNonce; + res++; + } pdata[19] = foundNonce; - *hashes_done = foundNonce - first_nonce + 1; - return 1; + return res; } else if (vhash64[7] > Htarg) { applog(LOG_INFO, "GPU #%d: result for %08x is not in range: %x > %x", thr_id, foundNonce, vhash64[7], Htarg);