diff --git a/blake32.cu b/blake32.cu index e7aae76..65df026 100644 --- a/blake32.cu +++ b/blake32.cu @@ -39,8 +39,6 @@ extern "C" void blake256hash(void *output, const void *input, int8_t rounds = 14 #include "cuda_helper.h" -#define MAXU 0xffffffffU - #if PRECALC64 __constant__ uint32_t _ALIGN(32) d_data[12]; #else @@ -58,7 +56,7 @@ static uint32_t *h_resNonce[8]; /* max count of found nonces in one call */ #define NBN 2 -static uint32_t extra_results[NBN] = { MAXU }; +static uint32_t extra_results[NBN] = { UINT32_MAX }; /* prefer uint32_t to prevent size conversions = speed +5/10 % */ __constant__ @@ -250,7 +248,7 @@ uint32_t blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const ui const uint32_t crcsum, const int8_t rounds) { const int threadsperblock = TPB; - uint32_t result = MAXU; + uint32_t result = UINT32_MAX; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); @@ -339,7 +337,7 @@ static uint32_t blake256_cpu_hash_16(const int thr_id, const uint32_t threads, c const int8_t rounds) { const int threadsperblock = TPB; - uint32_t result = MAXU; + uint32_t result = UINT32_MAX; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); @@ -404,12 +402,12 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt int rc = 0; #if NBN > 1 - if (extra_results[0] != MAXU) { + if (extra_results[0] != UINT32_MAX) { // possible extra result found in previous call if (first_nonce <= extra_results[0] && max_nonce >= extra_results[0]) { pdata[19] = extra_results[0]; *hashes_done = pdata[19] - first_nonce + 1; - extra_results[0] = MAXU; + extra_results[0] = UINT32_MAX; rc = 1; goto exit_scan; } @@ -455,7 +453,7 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt // GPU FULL HASH blake256_cpu_hash_80(thr_id, throughput, pdata[19], targetHigh, crcsum, blakerounds); #endif - if (foundNonce != MAXU) + if (foundNonce != UINT32_MAX) { uint32_t vhashcpu[8]; uint32_t Htarg = (uint32_t)targetHigh; @@ -472,7 +470,7 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt pdata[19] = foundNonce; rc = 1; - if (extra_results[0] != MAXU) { + if (extra_results[0] != UINT32_MAX) { // Rare but possible if the throughput is big be32enc(&endiandata[19], extra_results[0]); @@ -481,7 +479,7 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt applog(LOG_NOTICE, "GPU found more than one result " CL_GRN "yippee!"); rc = 2; } else { - extra_results[0] = MAXU; + extra_results[0] = UINT32_MAX; } } diff --git a/ccminer.cpp b/ccminer.cpp index cfd72eb..e24fc97 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -1122,8 +1122,12 @@ static void *miner_thread(void *userdata) max64 = max(minmax-1, max64); } - if (opt_debug) - applog(LOG_DEBUG, "GPU #%d: start=%08x range=%llx", device_map[thr_id], *nonceptr, max64); + // we can't scan more than uint capacity + max64 = min(UINT32_MAX, max64); + + //if (opt_debug) + // applog(LOG_DEBUG, "GPU #%d: start=%08x end=%08x max64=%llx", + // device_map[thr_id], *nonceptr, end_nonce, max64); start_nonce = *nonceptr; @@ -1137,7 +1141,7 @@ static void *miner_thread(void *userdata) range.data = hashlog_get_scan_range(work.job_id); if (range.data && !opt_benchmark) { bool stall = false; - if (range.scanned[0] == 1 && range.scanned[1] == 0xFFFFFFFFUL) { + if (range.scanned[0] == 1 && range.scanned[1] == UINT32_MAX) { applog(LOG_WARNING, "detected a rescan of fully scanned job!"); } else if (range.scanned[0] > 0 && range.scanned[1] > 0 && range.scanned[1] < 0xFFFFFFF0UL) { /* continue scan the end */ @@ -1163,18 +1167,24 @@ static void *miner_thread(void *userdata) } } + /* never let small ranges at end */ + if (end_nonce >= UINT32_MAX - 256) + end_nonce = UINT32_MAX; + if ((max64 + start_nonce) >= end_nonce) max_nonce = end_nonce; else max_nonce = (uint32_t) (max64 + start_nonce); - /* never let small ranges at end */ - if (max_nonce >= UINT32_MAX - 256) - max_nonce = UINT32_MAX; + // todo: keep it rounded for gpu threads ? work.scanned_from = start_nonce; (*nonceptr) = start_nonce; + if (opt_debug) + applog(LOG_DEBUG, "GPU #%d: start=%08x end=%08x range=%08x", + device_map[thr_id], start_nonce, max_nonce, (max_nonce-start_nonce)); + hashes_done = 0; continue_scan: gettimeofday(&tv_start, NULL); diff --git a/stats.cpp b/stats.cpp index 6204006..ad60831 100644 --- a/stats.cpp +++ b/stats.cpp @@ -28,7 +28,7 @@ extern int device_map[8]; void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate, uint8_t found) { uint64_t gpu = device_map[thr_id]; - uint64_t key = (gpu << 56) + (uid++ % UINT_MAX); + uint64_t key = (gpu << 56) + (uid++ % UINT32_MAX); stats_data data; // to enough hashes to give right stats if (hashcount < 1000 || hashrate < 0.01)