diff --git a/cuda_checkhash.cu b/cuda_checkhash.cu index a40f75e..c0a3c16 100644 --- a/cuda_checkhash.cu +++ b/cuda_checkhash.cu @@ -13,12 +13,14 @@ __constant__ uint32_t pTarget[8]; // 32 bytes // store MAX_GPUS device arrays of 8 nonces static uint32_t* h_resNonces[MAX_GPUS]; static uint32_t* d_resNonces[MAX_GPUS]; +static bool init_done = false; __host__ void cuda_check_cpu_init(int thr_id, uint32_t threads) { CUDA_CALL_OR_RET(cudaMallocHost(&h_resNonces[thr_id], 32)); CUDA_CALL_OR_RET(cudaMalloc(&d_resNonces[thr_id], 32)); + init_done = true; } // Target Difficulty @@ -97,6 +99,11 @@ uint32_t cuda_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uin dim3 grid((threads + threadsperblock - 1) / threadsperblock); dim3 block(threadsperblock); + if (!init_done) { + applog(LOG_ERR, "missing call to cuda_check_cpu_init"); + return UINT32_MAX; + } + cuda_checkhash_64 <<>> (threads, startNounce, d_inputHash, d_resNonces[thr_id]); cudaThreadSynchronize(); @@ -130,6 +137,11 @@ uint32_t cuda_check_hash_suppl(int thr_id, uint32_t threads, uint32_t startNounc dim3 grid((threads + threadsperblock - 1) / threadsperblock); dim3 block(threadsperblock); + if (!init_done) { + applog(LOG_ERR, "missing call to cuda_check_cpu_init"); + return 0; + } + // first element stores the count of found nonces cudaMemset(d_resNonces[thr_id], 0, sizeof(uint32_t)); @@ -180,6 +192,12 @@ uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNoun const uint32_t threadsperblock = 256; uint32_t result = UINT32_MAX; + + if (!init_done) { + applog(LOG_ERR, "missing call to cuda_check_cpu_init"); + return UINT32_MAX; + } + cudaMemset(d_resNonces[thr_id], 0xff, sizeof(uint32_t)); dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/skein.cu b/skein.cu index 08ee28c..3290e4a 100644 --- a/skein.cu +++ b/skein.cu @@ -420,7 +420,7 @@ extern "C" int scanhash_skeincoin(int thr_id, uint32_t *pdata, const uint32_t *p if (checkSecnonce) { secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num); } - while (checkSecnonce && secNonce != 0 && res < 2) /* todo: up to 6 */ + while (secNonce != 0 && res < 2) /* todo: up to 6 */ { endiandata[19] = swab32_if(secNonce, swap); skeincoinhash(vhash64, endiandata); @@ -430,7 +430,10 @@ extern "C" int scanhash_skeincoin(int thr_id, uint32_t *pdata, const uint32_t *p res++; } num++; - secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num); + //if (checkSecnonce) + // secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num); + //else + break; // only one secNonce... } if (res > 1 && opt_debug) applog(LOG_BLUE, "GPU #%d: %d/%d valid nonces !!!", device_map[thr_id], res, (int)num);