Browse Source

blake: prevent empty scan ranges with multiple gpus

in some cases, an empty scan range was possible in benchmark..
master
Tanguy Pruvot 9 years ago
parent
commit
113e22de2e
  1. 10
      Algo256/blake256.cu
  2. 3
      ccminer.cpp

10
Algo256/blake256.cu

@ -281,7 +281,7 @@ __global__
void blake256_gpu_hash_16(const uint32_t threads, const uint32_t startNonce, uint32_t *resNonce, void blake256_gpu_hash_16(const uint32_t threads, const uint32_t startNonce, uint32_t *resNonce,
const uint64_t highTarget, const int rounds, const bool trace) const uint64_t highTarget, const int rounds, const bool trace)
{ {
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads) if (thread < threads)
{ {
const uint32_t nonce = startNonce + thread; const uint32_t nonce = startNonce + thread;
@ -336,6 +336,8 @@ static uint32_t blake256_cpu_hash_16(const int thr_id, const uint32_t threads, c
dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock); dim3 block(threadsperblock);
cudaGetLastError();
/* Check error on Ctrl+C or kill to prevent segfaults on exit */ /* Check error on Ctrl+C or kill to prevent segfaults on exit */
if (cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t)) != cudaSuccess) if (cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t)) != cudaSuccess)
return result; return result;
@ -347,6 +349,7 @@ static uint32_t blake256_cpu_hash_16(const int thr_id, const uint32_t threads, c
for (int n=0; n < (NBN-1); n++) for (int n=0; n < (NBN-1); n++)
extra_results[n] = h_resNonce[thr_id][n+1]; extra_results[n] = h_resNonce[thr_id][n+1];
} }
CUDA_LOG_ERROR();
return result; return result;
} }
@ -429,6 +432,9 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
#endif /* PRECALC64 */ #endif /* PRECALC64 */
do { do {
*hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = uint32_t foundNonce =
#if PRECALC64 #if PRECALC64
// GPU HASH (second block only, first is midstate) // GPU HASH (second block only, first is midstate)
@ -437,8 +443,6 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
// GPU FULL HASH // GPU FULL HASH
blake256_cpu_hash_80(thr_id, throughput, pdata[19], targetHigh, crcsum, blakerounds); blake256_cpu_hash_80(thr_id, throughput, pdata[19], targetHigh, crcsum, blakerounds);
#endif #endif
*hashes_done = pdata[19] - first_nonce + throughput;
if (foundNonce != UINT32_MAX && bench_algo == -1) if (foundNonce != UINT32_MAX && bench_algo == -1)
{ {
uint32_t vhashcpu[8]; uint32_t vhashcpu[8];

3
ccminer.cpp

@ -1953,6 +1953,9 @@ static void *miner_thread(void *userdata)
// to debug nonce ranges // to debug nonce ranges
gpulog(LOG_DEBUG, thr_id, "ends=%08x range=%08x", nonceptr[0], (nonceptr[0] - start_nonce)); gpulog(LOG_DEBUG, thr_id, "ends=%08x range=%08x", nonceptr[0], (nonceptr[0] - start_nonce));
} }
// prevent low scan ranges on next loop on fast algos (blake)
if (nonceptr[0] > UINT32_MAX - 64)
nonceptr[0] = UINT32_MAX;
} }
if (check_dups) if (check_dups)

Loading…
Cancel
Save