Browse Source

skein: restore secNonce support on sm 5+

Add a sanity check in common checkhash function to prevent the problem again...
master
Tanguy Pruvot 10 years ago
parent
commit
fcd3e4336b
  1. 18
      cuda_checkhash.cu
  2. 7
      skein.cu

18
cuda_checkhash.cu

@ -13,12 +13,14 @@ __constant__ uint32_t pTarget[8]; // 32 bytes
// store MAX_GPUS device arrays of 8 nonces // store MAX_GPUS device arrays of 8 nonces
static uint32_t* h_resNonces[MAX_GPUS]; static uint32_t* h_resNonces[MAX_GPUS];
static uint32_t* d_resNonces[MAX_GPUS]; static uint32_t* d_resNonces[MAX_GPUS];
static bool init_done = false;
__host__ __host__
void cuda_check_cpu_init(int thr_id, uint32_t threads) void cuda_check_cpu_init(int thr_id, uint32_t threads)
{ {
CUDA_CALL_OR_RET(cudaMallocHost(&h_resNonces[thr_id], 32)); CUDA_CALL_OR_RET(cudaMallocHost(&h_resNonces[thr_id], 32));
CUDA_CALL_OR_RET(cudaMalloc(&d_resNonces[thr_id], 32)); CUDA_CALL_OR_RET(cudaMalloc(&d_resNonces[thr_id], 32));
init_done = true;
} }
// Target Difficulty // Target Difficulty
@ -97,6 +99,11 @@ uint32_t cuda_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uin
dim3 grid((threads + threadsperblock - 1) / threadsperblock); dim3 grid((threads + threadsperblock - 1) / threadsperblock);
dim3 block(threadsperblock); dim3 block(threadsperblock);
if (!init_done) {
applog(LOG_ERR, "missing call to cuda_check_cpu_init");
return UINT32_MAX;
}
cuda_checkhash_64 <<<grid, block>>> (threads, startNounce, d_inputHash, d_resNonces[thr_id]); cuda_checkhash_64 <<<grid, block>>> (threads, startNounce, d_inputHash, d_resNonces[thr_id]);
cudaThreadSynchronize(); cudaThreadSynchronize();
@ -130,6 +137,11 @@ uint32_t cuda_check_hash_suppl(int thr_id, uint32_t threads, uint32_t startNounc
dim3 grid((threads + threadsperblock - 1) / threadsperblock); dim3 grid((threads + threadsperblock - 1) / threadsperblock);
dim3 block(threadsperblock); dim3 block(threadsperblock);
if (!init_done) {
applog(LOG_ERR, "missing call to cuda_check_cpu_init");
return 0;
}
// first element stores the count of found nonces // first element stores the count of found nonces
cudaMemset(d_resNonces[thr_id], 0, sizeof(uint32_t)); cudaMemset(d_resNonces[thr_id], 0, sizeof(uint32_t));
@ -180,6 +192,12 @@ uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNoun
const uint32_t threadsperblock = 256; const uint32_t threadsperblock = 256;
uint32_t result = UINT32_MAX; uint32_t result = UINT32_MAX;
if (!init_done) {
applog(LOG_ERR, "missing call to cuda_check_cpu_init");
return UINT32_MAX;
}
cudaMemset(d_resNonces[thr_id], 0xff, sizeof(uint32_t)); cudaMemset(d_resNonces[thr_id], 0xff, sizeof(uint32_t));
dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 grid((threads + threadsperblock-1)/threadsperblock);

7
skein.cu

@ -420,7 +420,7 @@ extern "C" int scanhash_skeincoin(int thr_id, uint32_t *pdata, const uint32_t *p
if (checkSecnonce) { if (checkSecnonce) {
secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num); secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num);
} }
while (checkSecnonce && secNonce != 0 && res < 2) /* todo: up to 6 */ while (secNonce != 0 && res < 2) /* todo: up to 6 */
{ {
endiandata[19] = swab32_if(secNonce, swap); endiandata[19] = swab32_if(secNonce, swap);
skeincoinhash(vhash64, endiandata); skeincoinhash(vhash64, endiandata);
@ -430,7 +430,10 @@ extern "C" int scanhash_skeincoin(int thr_id, uint32_t *pdata, const uint32_t *p
res++; res++;
} }
num++; num++;
secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num); //if (checkSecnonce)
// secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num);
//else
break; // only one secNonce...
} }
if (res > 1 && opt_debug) if (res > 1 && opt_debug)
applog(LOG_BLUE, "GPU #%d: %d/%d valid nonces !!!", device_map[thr_id], res, (int)num); applog(LOG_BLUE, "GPU #%d: %d/%d valid nonces !!!", device_map[thr_id], res, (int)num);

Loading…
Cancel
Save