reduce lyra2 blake and pentablake cpu load
This commit is contained in:
parent
010eba1760
commit
a66d78e692
@ -257,9 +257,8 @@ uint32_t blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const ui
|
||||
return result;
|
||||
|
||||
blake256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNonce, d_resNonce[thr_id], highTarget, crcsum, (int) rounds);
|
||||
cudaDeviceSynchronize();
|
||||
MyStreamSynchronize(NULL, 0, thr_id);
|
||||
if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
||||
//cudaThreadSynchronize(); /* seems no more required */
|
||||
result = h_resNonce[thr_id][0];
|
||||
for (int n=0; n < (NBN-1); n++)
|
||||
extra_results[n] = h_resNonce[thr_id][n+1];
|
||||
@ -343,9 +342,8 @@ static uint32_t blake256_cpu_hash_16(const int thr_id, const uint32_t threads, c
|
||||
return result;
|
||||
|
||||
blake256_gpu_hash_16 <<<grid, block>>> (threads, startNonce, d_resNonce[thr_id], highTarget, (int) rounds, opt_tracegpu);
|
||||
cudaDeviceSynchronize();
|
||||
MyStreamSynchronize(NULL, 0, thr_id);
|
||||
if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
||||
//cudaThreadSynchronize(); /* seems no more required */
|
||||
result = h_resNonce[thr_id][0];
|
||||
for (int n=0; n < (NBN-1); n++)
|
||||
extra_results[n] = h_resNonce[thr_id][n+1];
|
||||
|
@ -456,7 +456,7 @@ void lyra2_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *
|
||||
lyra2_gpu_hash_32_v30 <<<grid, block >>> (threads, startNounce, d_outputHash);
|
||||
}
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
//MyStreamSynchronize(NULL, order, thr_id);
|
||||
MyStreamSynchronize(NULL, order, thr_id);
|
||||
//cudaThreadSynchronize();
|
||||
}
|
||||
|
||||
|
@ -316,8 +316,7 @@ void pentablake_cpu_hash_80(int thr_id, int threads, const uint32_t startNounce,
|
||||
|
||||
pentablake_gpu_hash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash);
|
||||
|
||||
//MyStreamSynchronize(NULL, order, thr_id);
|
||||
cudaDeviceSynchronize();
|
||||
MyStreamSynchronize(NULL, order, thr_id);
|
||||
}
|
||||
|
||||
|
||||
@ -375,8 +374,7 @@ void pentablake_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint3
|
||||
|
||||
pentablake_gpu_hash_64 <<<grid, block, shared_size>>> (threads, startNounce, (uint64_t*)d_outputHash);
|
||||
|
||||
//MyStreamSynchronize(NULL, order, thr_id);
|
||||
cudaDeviceSynchronize();
|
||||
MyStreamSynchronize(NULL, order, thr_id);
|
||||
}
|
||||
|
||||
#if 0
|
||||
@ -456,7 +454,7 @@ uint32_t pentablake_check_hash(int thr_id, uint32_t threads, uint32_t startNounc
|
||||
|
||||
pentablake_gpu_check_hash <<<grid, block, shared_size>>> (threads, startNounce, d_inputHash, d_resNounce[thr_id]);
|
||||
|
||||
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
||||
CUDA_SAFE_CALL(cudaThreadSynchronize());
|
||||
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
||||
cudaThreadSynchronize();
|
||||
result = h_resNounce[thr_id][0];
|
||||
@ -560,6 +558,5 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *
|
||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
cudaDeviceSynchronize();
|
||||
return rc;
|
||||
}
|
||||
|
@ -466,7 +466,7 @@ uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t start
|
||||
size_t shared_size = 0;
|
||||
|
||||
qubit_luffa512_gpu_finalhash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash, d_resNounce[thr_id]);
|
||||
cudaDeviceSynchronize();
|
||||
cudaThreadSynchronize();
|
||||
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], NBN * sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
||||
//cudaThreadSynchronize();
|
||||
result = h_resNounce[thr_id][0];
|
||||
|
Loading…
x
Reference in New Issue
Block a user