From 65909ec3b778fdba97c97146ff8900795b972526 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Sat, 6 Sep 2014 10:55:44 +0200 Subject: [PATCH] blake: handle case when 2 hashes are found in a call --- blake32.cu | 50 +++++++++++++++++++++++++++++++++++++---------- cpu-miner.c | 2 +- cpuminer-config.h | 6 +++--- util.c | 6 ++++-- 4 files changed, 48 insertions(+), 16 deletions(-) diff --git a/blake32.cu b/blake32.cu index 2ce2acd..5013de7 100644 --- a/blake32.cu +++ b/blake32.cu @@ -52,6 +52,8 @@ static uint32_t __align__(32) c_data[20]; static uint32_t *d_resNounce[8]; static uint32_t *h_resNounce[8]; +static uint32_t extra_results[2] = { MAXU, MAXU }; + /* prefer uint32_t to prevent size conversions = speed +5/10 % */ __constant__ static uint32_t __align__(32) c_sigma[16][16]; @@ -225,9 +227,13 @@ void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resN } } - /* keep the smallest nounce, hmm... */ - if(resNounce[0] > nounce) + /* keep the smallest nounce, + extra one if found */ + if (resNounce[0] > nounce) { + resNounce[1] = resNounce[0]; resNounce[0] = nounce; + } + else + resNounce[1] = nounce; } } @@ -242,14 +248,15 @@ uint32_t blake256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce size_t shared_size = 0; /* Check error on Ctrl+C or kill to prevent segfaults on exit */ - if (cudaMemset(d_resNounce[thr_id], 0xff, sizeof(uint32_t)) != cudaSuccess) + if (cudaMemset(d_resNounce[thr_id], 0xff, 2*sizeof(uint32_t)) != cudaSuccess) return result; blake256_gpu_hash_80<<>>(threads, startNounce, d_resNounce[thr_id], blakerounds); cudaDeviceSynchronize(); - if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost)) { + if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost)) { cudaThreadSynchronize(); - result = *h_resNounce[thr_id]; + result = h_resNounce[thr_id][0]; + extra_results[0] = h_resNounce[thr_id][1]; } return result; } @@ -269,9 +276,20 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt { const uint32_t first_nonce = pdata[19]; static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - uint32_t throughput = min(TPB * 2048, max_nonce - first_nonce); + uint32_t throughput = min(TPB * 4096, max_nonce - first_nonce); int rc = 0; + if (extra_results[0] != MAXU) { + // possible extra result found in previous call + if (first_nonce <= extra_results[0] && max_nonce >= extra_results[0]) { + pdata[19] = extra_results[0]; + *hashes_done = pdata[19] - first_nonce + 1; + extra_results[0] = MAXU; + rc = 1; + goto exit_scan; + } + } + if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00000f; @@ -279,13 +297,13 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt if (opt_n_threads > 1) { CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); } - CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], sizeof(uint32_t))); - CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], sizeof(uint32_t))); + CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], 2*sizeof(uint32_t))); + CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], 2*sizeof(uint32_t))); init[thr_id] = true; } - if (throughput < (TPB * 2048)) - applog(LOG_WARNING, "throughput=%u, start=%x, max=%x", throughput, first_nonce, max_nonce); + if (opt_debug && throughput < (TPB * 4096)) + applog(LOG_DEBUG, "throughput=%u, start=%x, max=%x", throughput, first_nonce, max_nonce); blake256_cpu_setBlock_80(pdata, ptarget); @@ -309,6 +327,18 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt { pdata[19] = foundNonce; rc = 1; + + if (extra_results[0] != MAXU) { + // Rare but possible if the throughput is big + be32enc(&endiandata[19], extra_results[0]); + blake256hash(vhashcpu, endiandata, blakerounds); + if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget)) { + applog(LOG_NOTICE, "GPU found more than one result yippee!"); + } else { + extra_results[0] = MAXU; + } + } + goto exit_scan; } else if (vhashcpu[7] > Htarg) { diff --git a/cpu-miner.c b/cpu-miner.c index b3a6ba7..7f70a6e 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -1005,7 +1005,7 @@ static void *miner_thread(void *userdata) work_restart[thr_id].restart = 1; hashlog_purge_old(); // wait a bit for a new job... - usleep(1500*1000); + sleep(1); (*nonceptr) = end_nonce + 1; work_done = true; continue; diff --git a/cpuminer-config.h b/cpuminer-config.h index 0fafa85..11edf82 100644 --- a/cpuminer-config.h +++ b/cpuminer-config.h @@ -156,7 +156,7 @@ #define PACKAGE_NAME "ccminer" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "ccminer 2014.09.01" +#define PACKAGE_STRING "ccminer 2014.09.06" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "ccminer" @@ -165,7 +165,7 @@ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "2014.09.01" +#define PACKAGE_VERSION "2014.09.06" /* If using the C implementation of alloca, define if you know the direction of stack growth for your system; otherwise it will be @@ -188,7 +188,7 @@ #define USE_XOP 1 /* Version number of package */ -#define VERSION "2014.09.01" +#define VERSION "2014.09.06" /* Define curl_free() as free() if our version of curl lacks curl_free. */ /* #undef curl_free */ diff --git a/util.c b/util.c index fe9168b..b2c0b0f 100644 --- a/util.c +++ b/util.c @@ -557,6 +557,9 @@ bool fulltest(const uint32_t *hash, const uint32_t *target) rc = true; break; } + if (hash[0] == target[0]) { + applog(LOG_NOTICE, "We found an exact match!"); + } } if (!rc && opt_debug) { @@ -1122,8 +1125,7 @@ static bool stratum_set_difficulty(struct stratum_ctx *sctx, json_t *params) sctx->next_diff = diff; pthread_mutex_unlock(&sctx->work_lock); - if (opt_debug) - applog(LOG_DEBUG, "Stratum difficulty set to %g", diff); + applog(LOG_INFO, "Stratum difficulty set to %g", diff); return true; }