reduce lyra2 blake and pentablake cpu load
This commit is contained in:
parent
010eba1760
commit
a66d78e692
@ -257,9 +257,8 @@ uint32_t blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const ui
|
|||||||
return result;
|
return result;
|
||||||
|
|
||||||
blake256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNonce, d_resNonce[thr_id], highTarget, crcsum, (int) rounds);
|
blake256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNonce, d_resNonce[thr_id], highTarget, crcsum, (int) rounds);
|
||||||
cudaDeviceSynchronize();
|
MyStreamSynchronize(NULL, 0, thr_id);
|
||||||
if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
||||||
//cudaThreadSynchronize(); /* seems no more required */
|
|
||||||
result = h_resNonce[thr_id][0];
|
result = h_resNonce[thr_id][0];
|
||||||
for (int n=0; n < (NBN-1); n++)
|
for (int n=0; n < (NBN-1); n++)
|
||||||
extra_results[n] = h_resNonce[thr_id][n+1];
|
extra_results[n] = h_resNonce[thr_id][n+1];
|
||||||
@ -343,9 +342,8 @@ static uint32_t blake256_cpu_hash_16(const int thr_id, const uint32_t threads, c
|
|||||||
return result;
|
return result;
|
||||||
|
|
||||||
blake256_gpu_hash_16 <<<grid, block>>> (threads, startNonce, d_resNonce[thr_id], highTarget, (int) rounds, opt_tracegpu);
|
blake256_gpu_hash_16 <<<grid, block>>> (threads, startNonce, d_resNonce[thr_id], highTarget, (int) rounds, opt_tracegpu);
|
||||||
cudaDeviceSynchronize();
|
MyStreamSynchronize(NULL, 0, thr_id);
|
||||||
if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
||||||
//cudaThreadSynchronize(); /* seems no more required */
|
|
||||||
result = h_resNonce[thr_id][0];
|
result = h_resNonce[thr_id][0];
|
||||||
for (int n=0; n < (NBN-1); n++)
|
for (int n=0; n < (NBN-1); n++)
|
||||||
extra_results[n] = h_resNonce[thr_id][n+1];
|
extra_results[n] = h_resNonce[thr_id][n+1];
|
||||||
|
@ -456,7 +456,7 @@ void lyra2_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *
|
|||||||
lyra2_gpu_hash_32_v30 <<<grid, block >>> (threads, startNounce, d_outputHash);
|
lyra2_gpu_hash_32_v30 <<<grid, block >>> (threads, startNounce, d_outputHash);
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
MyStreamSynchronize(NULL, order, thr_id);
|
||||||
//MyStreamSynchronize(NULL, order, thr_id);
|
//cudaThreadSynchronize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -316,8 +316,7 @@ void pentablake_cpu_hash_80(int thr_id, int threads, const uint32_t startNounce,
|
|||||||
|
|
||||||
pentablake_gpu_hash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash);
|
pentablake_gpu_hash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash);
|
||||||
|
|
||||||
//MyStreamSynchronize(NULL, order, thr_id);
|
MyStreamSynchronize(NULL, order, thr_id);
|
||||||
cudaDeviceSynchronize();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -375,8 +374,7 @@ void pentablake_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint3
|
|||||||
|
|
||||||
pentablake_gpu_hash_64 <<<grid, block, shared_size>>> (threads, startNounce, (uint64_t*)d_outputHash);
|
pentablake_gpu_hash_64 <<<grid, block, shared_size>>> (threads, startNounce, (uint64_t*)d_outputHash);
|
||||||
|
|
||||||
//MyStreamSynchronize(NULL, order, thr_id);
|
MyStreamSynchronize(NULL, order, thr_id);
|
||||||
cudaDeviceSynchronize();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
@ -456,7 +454,7 @@ uint32_t pentablake_check_hash(int thr_id, uint32_t threads, uint32_t startNounc
|
|||||||
|
|
||||||
pentablake_gpu_check_hash <<<grid, block, shared_size>>> (threads, startNounce, d_inputHash, d_resNounce[thr_id]);
|
pentablake_gpu_check_hash <<<grid, block, shared_size>>> (threads, startNounce, d_inputHash, d_resNounce[thr_id]);
|
||||||
|
|
||||||
CUDA_SAFE_CALL(cudaDeviceSynchronize());
|
CUDA_SAFE_CALL(cudaThreadSynchronize());
|
||||||
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
||||||
cudaThreadSynchronize();
|
cudaThreadSynchronize();
|
||||||
result = h_resNounce[thr_id][0];
|
result = h_resNounce[thr_id][0];
|
||||||
@ -560,6 +558,5 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *
|
|||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce + 1;
|
||||||
cudaDeviceSynchronize();
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
@ -466,7 +466,7 @@ uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t start
|
|||||||
size_t shared_size = 0;
|
size_t shared_size = 0;
|
||||||
|
|
||||||
qubit_luffa512_gpu_finalhash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash, d_resNounce[thr_id]);
|
qubit_luffa512_gpu_finalhash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash, d_resNounce[thr_id]);
|
||||||
cudaDeviceSynchronize();
|
cudaThreadSynchronize();
|
||||||
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], NBN * sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], NBN * sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
||||||
//cudaThreadSynchronize();
|
//cudaThreadSynchronize();
|
||||||
result = h_resNounce[thr_id][0];
|
result = h_resNounce[thr_id][0];
|
||||||
|
Loading…
x
Reference in New Issue
Block a user