@ -153,14 +153,12 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
int rc = 0;
int rc = 0;
uint32_t *hash = NULL;
uint32_t *hash = NULL;
uint32_t *cpu_nonceVector = NULL;
uint32_t *cpu_nonceVector = NULL;
CUDA_SAFE_CALL(cudaMallocHost(&hash, throughput*8*sizeof(uint32_t)));
CUDA_SAFE_CALL(cudaMallocHost(&cpu_nonceVector, throughput*sizeof(uint32_t)));
int nrmCalls[6];
int nrmCalls[6];
memset(nrmCalls, 0, sizeof(int) * 6);
memset(nrmCalls, 0, sizeof(int) * 6);
if (opt_benchmark)
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x00f f;
ptarget[7] = 0x000 f;
// für jeden Hash ein individuelles Target erstellen basierend
// für jeden Hash ein individuelles Target erstellen basierend
// auf dem höchsten Bit, das in ptarget gesetzt ist.
// auf dem höchsten Bit, das in ptarget gesetzt ist.
@ -173,6 +171,8 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
if (!init[thr_id])
if (!init[thr_id])
{
{
cudaSetDevice(device_map[thr_id]);
hefty_cpu_init(thr_id, throughput);
hefty_cpu_init(thr_id, throughput);
sha256_cpu_init(thr_id, throughput);
sha256_cpu_init(thr_id, throughput);
keccak512_cpu_init(thr_id, throughput);
keccak512_cpu_init(thr_id, throughput);
@ -185,13 +185,16 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
init[thr_id] = true;
init[thr_id] = true;
}
}
// weird but require at least one cudaSetDevice first
CUDA_SAFE_CALL(cudaMallocHost(&hash, (size_t) 32 * throughput));
CUDA_SAFE_CALL(cudaMallocHost(&cpu_nonceVector, sizeof(uint32_t) * throughput));
if (blocklen == HEAVYCOIN_BLKHDR_SZ)
if (blocklen == HEAVYCOIN_BLKHDR_SZ)
{
{
uint16_t *ext = (uint16_t*) &pdata[20];
uint16_t *ext = (uint16_t*) &pdata[20];
if (opt_vote > maxvote && !opt_benchmark) {
if (opt_vote > maxvote && !opt_benchmark) {
applog(LOG_WARNING, "Your block reward vote (%hu) exceeds "
applog(LOG_WARNING, "Your block reward vote (%hu) exceeds the maxvote reported by the pool (%hu).",
"the maxvote reported by the pool (%hu).",
opt_vote, maxvote);
opt_vote, maxvote);
}
}
@ -257,7 +260,7 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
devNoncePtrEnd = thrust::remove_if(devNoncePtr, devNoncePtrEnd, check_nonce_for_remove(*t, d_hash4output[thr_id], 16, pdata[19]));
devNoncePtrEnd = thrust::remove_if(devNoncePtr, devNoncePtrEnd, check_nonce_for_remove(*t, d_hash4output[thr_id], 16, pdata[19]));
actualNumberOfValuesInNonceVectorGPU = (uint32_t)(devNoncePtrEnd - devNoncePtr);
actualNumberOfValuesInNonceVectorGPU = (uint32_t)(devNoncePtrEnd - devNoncePtr);
#else
#else
// todo
// todo (nvlabs cub ?)
actualNumberOfValuesInNonceVectorGPU = 0;
actualNumberOfValuesInNonceVectorGPU = 0;
#endif
#endif
if(actualNumberOfValuesInNonceVectorGPU == 0)
if(actualNumberOfValuesInNonceVectorGPU == 0)
@ -275,8 +278,7 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
if(actualNumberOfValuesInNonceVectorGPU > 0)
if(actualNumberOfValuesInNonceVectorGPU > 0)
{
{
size_t size = sizeof(uint32_t) * actualNumberOfValuesInNonceVectorGPU;
size_t size = sizeof(uint32_t) * actualNumberOfValuesInNonceVectorGPU;
CUDA_SAFE_CALL(cudaMemcpy(cpu_nonceVector, heavy_nonceVector[thr_id], size, cudaMemcpyDeviceToHost));
cudaMemcpy(cpu_nonceVector, heavy_nonceVector[thr_id], size, cudaMemcpyDeviceToHost);
cudaThreadSynchronize();
for (uint32_t i=0; i < actualNumberOfValuesInNonceVectorGPU; i++)
for (uint32_t i=0; i < actualNumberOfValuesInNonceVectorGPU; i++)
{
{
@ -289,7 +291,6 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
if (memcmp(vhash, foundhash, 32)) {
if (memcmp(vhash, foundhash, 32)) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", nonce);
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", nonce);
} else {
} else {
*hashes_done = pdata[19] - first_nonce;
work_set_target_ratio(work, vhash);
work_set_target_ratio(work, vhash);
rc = 1;
rc = 1;
goto exit;
goto exit;
@ -299,15 +300,21 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
}
}
emptyNonceVector:
emptyNonceVector:
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
pdata[19] = max_nonce;
break;
}
pdata[19] += throughput;
pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
} while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce;
exit:
exit:
*hashes_done = pdata[19] - first_nonce;
cudaFreeHost(cpu_nonceVector);
cudaFreeHost(cpu_nonceVector);
cudaFreeHost(hash);
cudaFreeHost(hash);
CUDA_LOG_ERROR();
return rc;
return rc;
}
}