skein: restore secNonce support on sm 5+

Add a sanity check in common checkhash function to prevent the problem again...
10 years ago · fcd3e4336b
2 changed files with 23 additions and 2 deletions
--- a/cuda_checkhash.cu
+++ b/cuda_checkhash.cu
@ -13,12 +13,14 @@ __constant__ uint32_t pTarget[8]; // 32 bytes
 // store MAX_GPUS device arrays of 8 nonces
 static uint32_t* h_resNonces[MAX_GPUS];
 static uint32_t* d_resNonces[MAX_GPUS];
 static bool init_done = false;
 __host__
 void cuda_check_cpu_init(int thr_id, uint32_t threads)
 {
    CUDA_CALL_OR_RET(cudaMallocHost(&h_resNonces[thr_id], 32));
    CUDA_CALL_OR_RET(cudaMalloc(&d_resNonces[thr_id], 32));
    init_done = true;
 }
 // Target Difficulty
@ -97,6 +99,11 @@ uint32_t cuda_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uin
 	dim3 grid((threads + threadsperblock - 1) / threadsperblock);
 	dim3 block(threadsperblock);
 	if (!init_done) {
 		applog(LOG_ERR, "missing call to cuda_check_cpu_init");
 		return UINT32_MAX;
 	}
 	cuda_checkhash_64 <<<grid, block>>> (threads, startNounce, d_inputHash, d_resNonces[thr_id]);
 	cudaThreadSynchronize();
@ -130,6 +137,11 @@ uint32_t cuda_check_hash_suppl(int thr_id, uint32_t threads, uint32_t startNounc
 	dim3 grid((threads + threadsperblock - 1) / threadsperblock);
 	dim3 block(threadsperblock);
 	if (!init_done) {
 		applog(LOG_ERR, "missing call to cuda_check_cpu_init");
 		return 0;
 	}
 	// first element stores the count of found nonces
 	cudaMemset(d_resNonces[thr_id], 0, sizeof(uint32_t));
@ -180,6 +192,12 @@ uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNoun
 	const uint32_t threadsperblock = 256;
 	uint32_t result = UINT32_MAX;
 	if (!init_done) {
 		applog(LOG_ERR, "missing call to cuda_check_cpu_init");
 		return UINT32_MAX;
 	}
 	cudaMemset(d_resNonces[thr_id], 0xff, sizeof(uint32_t));
 	dim3 grid((threads + threadsperblock-1)/threadsperblock);
--- a/skein.cu
+++ b/skein.cu
@ -420,7 +420,7 @@ extern "C" int scanhash_skeincoin(int thr_id, uint32_t *pdata, const uint32_t *p
 				if (checkSecnonce) {
 					secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num);
 				}
-				while (checkSecnonce && secNonce != 0 && res < 2) /* todo: up to 6 */
+				while (secNonce != 0 && res < 2) /* todo: up to 6 */
 				{
 					endiandata[19] = swab32_if(secNonce, swap);
 					skeincoinhash(vhash64, endiandata);
@ -430,7 +430,10 @@ extern "C" int scanhash_skeincoin(int thr_id, uint32_t *pdata, const uint32_t *p
 						res++;
 					}
 					num++;
-					secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num);
+					//if (checkSecnonce)
 					//	secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num);
 					//else
 						break; // only one secNonce...
 				}
 				if (res > 1 && opt_debug)
 					applog(LOG_BLUE, "GPU #%d: %d/%d valid nonces !!!", device_map[thr_id], res, (int)num);