mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-08 22:07:56 +00:00
never interrupt global benchmark with found nonces
fix some algo weird hashrates (like blake) and reset device between algos, for better accuracy but this reset doesnt seems enough to bench all algos correctly... to test on linux, could be a driver issue... heavy: fix first alloc and indent with tabs...
This commit is contained in:
parent
2308f555c3
commit
61ff92b5b4
@ -439,7 +439,7 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
|
|||||||
#endif
|
#endif
|
||||||
*hashes_done = pdata[19] - first_nonce + throughput;
|
*hashes_done = pdata[19] - first_nonce + throughput;
|
||||||
|
|
||||||
if (foundNonce != UINT32_MAX)
|
if (foundNonce != UINT32_MAX && bench_algo == -1)
|
||||||
{
|
{
|
||||||
uint32_t vhashcpu[8];
|
uint32_t vhashcpu[8];
|
||||||
uint32_t Htarg = (uint32_t)targetHigh;
|
uint32_t Htarg = (uint32_t)targetHigh;
|
||||||
@ -478,7 +478,8 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((uint64_t) pdata[19] + throughput > (uint64_t) max_nonce) {
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,7 +91,7 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((uint64_t) throughput + pdata[19] > max_nonce) {
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
pdata[19] = max_nonce;
|
pdata[19] = max_nonce;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -46,7 +46,7 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no
|
|||||||
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
ptarget[7] = 0x00ff;
|
ptarget[7] = 0x000f;
|
||||||
|
|
||||||
if (!init[thr_id]) {
|
if (!init[thr_id]) {
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
@ -68,7 +68,7 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no
|
|||||||
*hashes_done = pdata[19] - first_nonce + throughput;
|
*hashes_done = pdata[19] - first_nonce + throughput;
|
||||||
|
|
||||||
uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
if (foundNonce != UINT32_MAX)
|
if (foundNonce != UINT32_MAX && bench_algo < 0)
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(64) vhash64[8];
|
uint32_t _ALIGN(64) vhash64[8];
|
||||||
be32enc(&endiandata[19], foundNonce);
|
be32enc(&endiandata[19], foundNonce);
|
||||||
@ -84,7 +84,8 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((uint64_t) pdata[19] + throughput > max_nonce) {
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -92,6 +93,7 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no
|
|||||||
|
|
||||||
} while (!work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -238,16 +238,17 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((uint64_t) pdata[19] + throughput > max_nonce) {
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
*hashes_done = pdata[19] - first_nonce;
|
|
||||||
pdata[19] = max_nonce;
|
pdata[19] = max_nonce;
|
||||||
return 0;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (!work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
|
|
||||||
CUDA_LOG_ERROR();
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -81,7 +81,7 @@ ccminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ @PCIFLAGS@ @OPENMP_CFLAGS@ $(CPPFLAGS) $(P
|
|||||||
|
|
||||||
nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\"
|
nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\"
|
||||||
|
|
||||||
#nvcc_ARCH += -gencode=arch=compute_52,code=\"sm_52,compute_52\"
|
nvcc_ARCH += -gencode=arch=compute_52,code=\"sm_52,compute_52\"
|
||||||
#nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\"
|
#nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\"
|
||||||
#nvcc_ARCH += -gencode=arch=compute_30,code=\"sm_30,compute_30\"
|
#nvcc_ARCH += -gencode=arch=compute_30,code=\"sm_30,compute_30\"
|
||||||
#nvcc_ARCH += -gencode=arch=compute_20,code=\"sm_21,compute_20\"
|
#nvcc_ARCH += -gencode=arch=compute_20,code=\"sm_21,compute_20\"
|
||||||
|
10
bench.cpp
10
bench.cpp
@ -89,6 +89,9 @@ bool bench_algo_switch_next(int thr_id)
|
|||||||
int prev_algo = algo;
|
int prev_algo = algo;
|
||||||
int dev_id = device_map[thr_id % MAX_GPUS];
|
int dev_id = device_map[thr_id % MAX_GPUS];
|
||||||
int mfree, mused;
|
int mfree, mused;
|
||||||
|
// doesnt seems enough to prevent device slow down
|
||||||
|
// after some algo switchs
|
||||||
|
bool need_reset = (gpu_threads == 1);
|
||||||
|
|
||||||
algo++;
|
algo++;
|
||||||
|
|
||||||
@ -143,6 +146,7 @@ bool bench_algo_switch_next(int thr_id)
|
|||||||
gpulog(LOG_WARNING, thr_id, "possible %d MB memory leak in %s! %d MB free",
|
gpulog(LOG_WARNING, thr_id, "possible %d MB memory leak in %s! %d MB free",
|
||||||
(device_mem_free[thr_id] - mfree), algo_names[prev_algo], mfree);
|
(device_mem_free[thr_id] - mfree), algo_names[prev_algo], mfree);
|
||||||
cuda_reset_device(thr_id, NULL); // force to free the leak
|
cuda_reset_device(thr_id, NULL); // force to free the leak
|
||||||
|
need_reset = false;
|
||||||
mfree = cuda_available_memory(thr_id);
|
mfree = cuda_available_memory(thr_id);
|
||||||
}
|
}
|
||||||
// store used memory per algo
|
// store used memory per algo
|
||||||
@ -152,14 +156,13 @@ bool bench_algo_switch_next(int thr_id)
|
|||||||
// store to dump a table per gpu later
|
// store to dump a table per gpu later
|
||||||
algo_hashrates[thr_id][prev_algo] = hashrate;
|
algo_hashrates[thr_id][prev_algo] = hashrate;
|
||||||
|
|
||||||
|
|
||||||
// wait the other threads to display logs correctly
|
// wait the other threads to display logs correctly
|
||||||
if (opt_n_threads > 1) {
|
if (opt_n_threads > 1) {
|
||||||
pthread_barrier_wait(&algo_barr);
|
pthread_barrier_wait(&algo_barr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (algo == ALGO_AUTO)
|
if (algo == ALGO_AUTO)
|
||||||
return false;
|
return false; // all algos done
|
||||||
|
|
||||||
// mutex primary used for the stats purge
|
// mutex primary used for the stats purge
|
||||||
pthread_mutex_lock(&bench_lock);
|
pthread_mutex_lock(&bench_lock);
|
||||||
@ -170,6 +173,9 @@ bool bench_algo_switch_next(int thr_id)
|
|||||||
thr_hashrates[thr_id] = 0; // reset for minmax64
|
thr_hashrates[thr_id] = 0; // reset for minmax64
|
||||||
pthread_mutex_unlock(&bench_lock);
|
pthread_mutex_unlock(&bench_lock);
|
||||||
|
|
||||||
|
if (need_reset)
|
||||||
|
cuda_reset_device(thr_id, NULL);
|
||||||
|
|
||||||
if (thr_id == 0)
|
if (thr_id == 0)
|
||||||
applog(LOG_BLUE, "Benchmark algo %s...", algo_names[algo]);
|
applog(LOG_BLUE, "Benchmark algo %s...", algo_names[algo]);
|
||||||
|
|
||||||
|
15
ccminer.cpp
15
ccminer.cpp
@ -479,9 +479,11 @@ void proper_exit(int reason)
|
|||||||
reason = app_exit_code;
|
reason = app_exit_code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pthread_mutex_lock(&stats_lock);
|
||||||
if (check_dups)
|
if (check_dups)
|
||||||
hashlog_purge_all();
|
hashlog_purge_all();
|
||||||
stats_purge_all();
|
stats_purge_all();
|
||||||
|
pthread_mutex_unlock(&stats_lock);
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
timeEndPeriod(1); // else never executed
|
timeEndPeriod(1); // else never executed
|
||||||
@ -496,7 +498,7 @@ void proper_exit(int reason)
|
|||||||
#endif
|
#endif
|
||||||
free(opt_syslog_pfx);
|
free(opt_syslog_pfx);
|
||||||
free(opt_api_allow);
|
free(opt_api_allow);
|
||||||
free(work_restart);
|
//free(work_restart);
|
||||||
//free(thr_info);
|
//free(thr_info);
|
||||||
exit(reason);
|
exit(reason);
|
||||||
}
|
}
|
||||||
@ -1709,18 +1711,22 @@ static void *miner_thread(void *userdata)
|
|||||||
if (max64 < minmax) {
|
if (max64 < minmax) {
|
||||||
switch (opt_algo) {
|
switch (opt_algo) {
|
||||||
case ALGO_BLAKECOIN:
|
case ALGO_BLAKECOIN:
|
||||||
case ALGO_BLAKE:
|
|
||||||
case ALGO_WHIRLPOOLX:
|
|
||||||
minmax = 0x80000000U;
|
minmax = 0x80000000U;
|
||||||
break;
|
break;
|
||||||
|
case ALGO_BLAKE:
|
||||||
case ALGO_BMW:
|
case ALGO_BMW:
|
||||||
|
case ALGO_WHIRLPOOLX:
|
||||||
minmax = 0x40000000U;
|
minmax = 0x40000000U;
|
||||||
break;
|
break;
|
||||||
|
case ALGO_KECCAK:
|
||||||
case ALGO_LUFFA:
|
case ALGO_LUFFA:
|
||||||
minmax = 0x2000000;
|
case ALGO_SKEIN:
|
||||||
|
case ALGO_SKEIN2:
|
||||||
|
minmax = 0x1000000;
|
||||||
break;
|
break;
|
||||||
case ALGO_C11:
|
case ALGO_C11:
|
||||||
case ALGO_DEEP:
|
case ALGO_DEEP:
|
||||||
|
case ALGO_HEAVY:
|
||||||
case ALGO_LYRA2v2:
|
case ALGO_LYRA2v2:
|
||||||
case ALGO_S3:
|
case ALGO_S3:
|
||||||
case ALGO_X11:
|
case ALGO_X11:
|
||||||
@ -1729,7 +1735,6 @@ static void *miner_thread(void *userdata)
|
|||||||
case ALGO_WHIRLPOOL:
|
case ALGO_WHIRLPOOL:
|
||||||
minmax = 0x400000;
|
minmax = 0x400000;
|
||||||
break;
|
break;
|
||||||
case ALGO_KECCAK:
|
|
||||||
case ALGO_JACKPOT:
|
case ALGO_JACKPOT:
|
||||||
case ALGO_X14:
|
case ALGO_X14:
|
||||||
case ALGO_X15:
|
case ALGO_X15:
|
||||||
|
4
cuda.cpp
4
cuda.cpp
@ -176,9 +176,11 @@ void cuda_reset_device(int thr_id, bool *init)
|
|||||||
}
|
}
|
||||||
cudaDeviceReset();
|
cudaDeviceReset();
|
||||||
if (opt_cudaschedule >= 0) {
|
if (opt_cudaschedule >= 0) {
|
||||||
cudaSetDevice(dev_id);
|
|
||||||
cudaSetDeviceFlags((unsigned)(opt_cudaschedule & cudaDeviceScheduleMask));
|
cudaSetDeviceFlags((unsigned)(opt_cudaschedule & cudaDeviceScheduleMask));
|
||||||
|
} else {
|
||||||
|
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
||||||
}
|
}
|
||||||
|
cudaDeviceSynchronize();
|
||||||
}
|
}
|
||||||
|
|
||||||
// return free memory in megabytes
|
// return free memory in megabytes
|
||||||
|
@ -121,6 +121,9 @@ uint32_t cuda_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uin
|
|||||||
dim3 grid((threads + threadsperblock - 1) / threadsperblock);
|
dim3 grid((threads + threadsperblock - 1) / threadsperblock);
|
||||||
dim3 block(threadsperblock);
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
|
if (bench_algo >= 0) // dont interrupt the global benchmark
|
||||||
|
return UINT32_MAX;
|
||||||
|
|
||||||
if (!init_done) {
|
if (!init_done) {
|
||||||
applog(LOG_ERR, "missing call to cuda_check_cpu_init");
|
applog(LOG_ERR, "missing call to cuda_check_cpu_init");
|
||||||
return UINT32_MAX;
|
return UINT32_MAX;
|
||||||
@ -143,6 +146,9 @@ uint32_t cuda_check_hash_32(int thr_id, uint32_t threads, uint32_t startNounce,
|
|||||||
dim3 grid((threads + threadsperblock - 1) / threadsperblock);
|
dim3 grid((threads + threadsperblock - 1) / threadsperblock);
|
||||||
dim3 block(threadsperblock);
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
|
if (bench_algo >= 0) // dont interrupt the global benchmark
|
||||||
|
return UINT32_MAX;
|
||||||
|
|
||||||
if (!init_done) {
|
if (!init_done) {
|
||||||
applog(LOG_ERR, "missing call to cuda_check_cpu_init");
|
applog(LOG_ERR, "missing call to cuda_check_cpu_init");
|
||||||
return UINT32_MAX;
|
return UINT32_MAX;
|
||||||
@ -237,9 +243,12 @@ uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNoun
|
|||||||
|
|
||||||
uint32_t result = UINT32_MAX;
|
uint32_t result = UINT32_MAX;
|
||||||
|
|
||||||
|
if (bench_algo >= 0) // dont interrupt the global benchmark
|
||||||
|
return result;
|
||||||
|
|
||||||
if (!init_done) {
|
if (!init_done) {
|
||||||
applog(LOG_ERR, "missing call to cuda_check_cpu_init");
|
applog(LOG_ERR, "missing call to cuda_check_cpu_init");
|
||||||
return UINT32_MAX;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaMemset(d_resNonces[thr_id], 0xff, sizeof(uint32_t));
|
cudaMemset(d_resNonces[thr_id], 0xff, sizeof(uint32_t));
|
||||||
|
@ -135,11 +135,17 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
#ifdef USE_STREAMS
|
#ifdef USE_STREAMS
|
||||||
for (int i = 0; i < 5; i++)
|
for (int i = 0; i < 5; i++)
|
||||||
cudaStreamDestroy(stream[i]);
|
cudaStreamDestroy(stream[i]);
|
||||||
|
@ -65,7 +65,9 @@ int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce, unsigne
|
|||||||
uint32_t foundNounce = UINT32_MAX;
|
uint32_t foundNounce = UINT32_MAX;
|
||||||
fugue256_cpu_hash(thr_id, throughput, pdata[19], NULL, &foundNounce);
|
fugue256_cpu_hash(thr_id, throughput, pdata[19], NULL, &foundNounce);
|
||||||
|
|
||||||
if (foundNounce < UINT32_MAX)
|
*hashes_done = pdata[19] - start_nonce + throughput;
|
||||||
|
|
||||||
|
if (foundNounce < UINT32_MAX && bench_algo < 0)
|
||||||
{
|
{
|
||||||
uint32_t vhash[8];
|
uint32_t vhash[8];
|
||||||
sph_fugue256_context ctx_fugue;
|
sph_fugue256_context ctx_fugue;
|
||||||
@ -79,14 +81,13 @@ int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce, unsigne
|
|||||||
{
|
{
|
||||||
work_set_target_ratio(work, vhash);
|
work_set_target_ratio(work, vhash);
|
||||||
pdata[19] = foundNounce;
|
pdata[19] = foundNounce;
|
||||||
*hashes_done = foundNounce - start_nonce + 1;
|
|
||||||
return 1;
|
return 1;
|
||||||
} else {
|
} else {
|
||||||
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNounce);
|
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNounce);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((uint64_t) pdata[19] + throughput > (uint64_t) max_nonce) {
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
pdata[19] = max_nonce;
|
pdata[19] = max_nonce;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -95,7 +96,7 @@ int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce, unsigne
|
|||||||
|
|
||||||
} while (!work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - start_nonce + 1;
|
*hashes_done = pdata[19] - start_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -39,7 +39,7 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
|
|||||||
uint32_t *outputHash = (uint32_t*)malloc((size_t) 64* throughput);
|
uint32_t *outputHash = (uint32_t*)malloc((size_t) 64* throughput);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
ptarget[7] = 0x000ff;
|
ptarget[7] = 0x001f;
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
@ -62,7 +62,7 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
|
|||||||
// GPU hash
|
// GPU hash
|
||||||
groestlcoin_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);
|
groestlcoin_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);
|
||||||
|
|
||||||
if (foundNounce < UINT32_MAX)
|
if (foundNounce < UINT32_MAX && bench_algo < 0)
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(64) vhash[8];
|
uint32_t _ALIGN(64) vhash[8];
|
||||||
endiandata[19] = swab32(foundNounce);
|
endiandata[19] = swab32(foundNounce);
|
||||||
@ -78,14 +78,15 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((uint64_t) pdata[19] + throughput > max_nonce) {
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
*hashes_done = pdata[19] - start_nonce + 1;
|
|
||||||
pdata[19] = max_nonce;
|
pdata[19] = max_nonce;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
|
*hashes_done = pdata[19] - start_nonce;
|
||||||
|
|
||||||
free(outputHash);
|
free(outputHash);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -153,14 +153,12 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
|
|||||||
int rc = 0;
|
int rc = 0;
|
||||||
uint32_t *hash = NULL;
|
uint32_t *hash = NULL;
|
||||||
uint32_t *cpu_nonceVector = NULL;
|
uint32_t *cpu_nonceVector = NULL;
|
||||||
CUDA_SAFE_CALL(cudaMallocHost(&hash, throughput*8*sizeof(uint32_t)));
|
|
||||||
CUDA_SAFE_CALL(cudaMallocHost(&cpu_nonceVector, throughput*sizeof(uint32_t)));
|
|
||||||
|
|
||||||
int nrmCalls[6];
|
int nrmCalls[6];
|
||||||
memset(nrmCalls, 0, sizeof(int) * 6);
|
memset(nrmCalls, 0, sizeof(int) * 6);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x00ff;
|
ptarget[7] = 0x000f;
|
||||||
|
|
||||||
// für jeden Hash ein individuelles Target erstellen basierend
|
// für jeden Hash ein individuelles Target erstellen basierend
|
||||||
// auf dem höchsten Bit, das in ptarget gesetzt ist.
|
// auf dem höchsten Bit, das in ptarget gesetzt ist.
|
||||||
@ -173,6 +171,8 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
|
|||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
|
||||||
hefty_cpu_init(thr_id, throughput);
|
hefty_cpu_init(thr_id, throughput);
|
||||||
sha256_cpu_init(thr_id, throughput);
|
sha256_cpu_init(thr_id, throughput);
|
||||||
keccak512_cpu_init(thr_id, throughput);
|
keccak512_cpu_init(thr_id, throughput);
|
||||||
@ -185,13 +185,16 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
|
|||||||
init[thr_id] = true;
|
init[thr_id] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// weird but require at least one cudaSetDevice first
|
||||||
|
CUDA_SAFE_CALL(cudaMallocHost(&hash, (size_t) 32 * throughput));
|
||||||
|
CUDA_SAFE_CALL(cudaMallocHost(&cpu_nonceVector, sizeof(uint32_t) * throughput));
|
||||||
|
|
||||||
if (blocklen == HEAVYCOIN_BLKHDR_SZ)
|
if (blocklen == HEAVYCOIN_BLKHDR_SZ)
|
||||||
{
|
{
|
||||||
uint16_t *ext = (uint16_t*) &pdata[20];
|
uint16_t *ext = (uint16_t*) &pdata[20];
|
||||||
|
|
||||||
if (opt_vote > maxvote && !opt_benchmark) {
|
if (opt_vote > maxvote && !opt_benchmark) {
|
||||||
applog(LOG_WARNING, "Your block reward vote (%hu) exceeds "
|
applog(LOG_WARNING, "Your block reward vote (%hu) exceeds the maxvote reported by the pool (%hu).",
|
||||||
"the maxvote reported by the pool (%hu).",
|
|
||||||
opt_vote, maxvote);
|
opt_vote, maxvote);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -257,7 +260,7 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
|
|||||||
devNoncePtrEnd = thrust::remove_if(devNoncePtr, devNoncePtrEnd, check_nonce_for_remove(*t, d_hash4output[thr_id], 16, pdata[19]));
|
devNoncePtrEnd = thrust::remove_if(devNoncePtr, devNoncePtrEnd, check_nonce_for_remove(*t, d_hash4output[thr_id], 16, pdata[19]));
|
||||||
actualNumberOfValuesInNonceVectorGPU = (uint32_t)(devNoncePtrEnd - devNoncePtr);
|
actualNumberOfValuesInNonceVectorGPU = (uint32_t)(devNoncePtrEnd - devNoncePtr);
|
||||||
#else
|
#else
|
||||||
// todo
|
// todo (nvlabs cub ?)
|
||||||
actualNumberOfValuesInNonceVectorGPU = 0;
|
actualNumberOfValuesInNonceVectorGPU = 0;
|
||||||
#endif
|
#endif
|
||||||
if(actualNumberOfValuesInNonceVectorGPU == 0)
|
if(actualNumberOfValuesInNonceVectorGPU == 0)
|
||||||
@ -275,8 +278,7 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
|
|||||||
if(actualNumberOfValuesInNonceVectorGPU > 0)
|
if(actualNumberOfValuesInNonceVectorGPU > 0)
|
||||||
{
|
{
|
||||||
size_t size = sizeof(uint32_t) * actualNumberOfValuesInNonceVectorGPU;
|
size_t size = sizeof(uint32_t) * actualNumberOfValuesInNonceVectorGPU;
|
||||||
CUDA_SAFE_CALL(cudaMemcpy(cpu_nonceVector, heavy_nonceVector[thr_id], size, cudaMemcpyDeviceToHost));
|
cudaMemcpy(cpu_nonceVector, heavy_nonceVector[thr_id], size, cudaMemcpyDeviceToHost);
|
||||||
cudaThreadSynchronize();
|
|
||||||
|
|
||||||
for (uint32_t i=0; i < actualNumberOfValuesInNonceVectorGPU; i++)
|
for (uint32_t i=0; i < actualNumberOfValuesInNonceVectorGPU; i++)
|
||||||
{
|
{
|
||||||
@ -289,7 +291,6 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
|
|||||||
if (memcmp(vhash, foundhash, 32)) {
|
if (memcmp(vhash, foundhash, 32)) {
|
||||||
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", nonce);
|
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", nonce);
|
||||||
} else {
|
} else {
|
||||||
*hashes_done = pdata[19] - first_nonce;
|
|
||||||
work_set_target_ratio(work, vhash);
|
work_set_target_ratio(work, vhash);
|
||||||
rc = 1;
|
rc = 1;
|
||||||
goto exit;
|
goto exit;
|
||||||
@ -299,15 +300,21 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
|
|||||||
}
|
}
|
||||||
|
|
||||||
emptyNonceVector:
|
emptyNonceVector:
|
||||||
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
|
break;
|
||||||
|
}
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
*hashes_done = pdata[19] - first_nonce;
|
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
|
|
||||||
cudaFreeHost(cpu_nonceVector);
|
cudaFreeHost(cpu_nonceVector);
|
||||||
cudaFreeHost(hash);
|
cudaFreeHost(hash);
|
||||||
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -122,14 +122,14 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
int order = 0;
|
int order = 0;
|
||||||
uint32_t foundNonce;
|
uint32_t foundNonce;
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + throughput;
|
|
||||||
|
|
||||||
blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
keccak256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
keccak256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
TRACE("S")
|
TRACE("S")
|
||||||
|
|
||||||
|
*hashes_done = pdata[19] - first_nonce + throughput;
|
||||||
|
|
||||||
foundNonce = groestl256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
foundNonce = groestl256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
if (foundNonce != UINT32_MAX)
|
if (foundNonce != UINT32_MAX)
|
||||||
{
|
{
|
||||||
@ -162,10 +162,15 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((uint64_t)throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
|
break;
|
||||||
|
}
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -96,14 +96,13 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc
|
|||||||
cudaDeviceReset();
|
cudaDeviceReset();
|
||||||
// reduce cpu usage
|
// reduce cpu usage
|
||||||
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
||||||
}
|
|
||||||
CUDA_LOG_ERROR();
|
CUDA_LOG_ERROR();
|
||||||
|
}
|
||||||
|
|
||||||
blake256_cpu_init(thr_id, throughput);
|
blake256_cpu_init(thr_id, throughput);
|
||||||
keccak256_cpu_init(thr_id,throughput);
|
keccak256_cpu_init(thr_id,throughput);
|
||||||
skein256_cpu_init(thr_id, throughput);
|
skein256_cpu_init(thr_id, throughput);
|
||||||
bmw256_cpu_init(thr_id, throughput);
|
bmw256_cpu_init(thr_id, throughput);
|
||||||
CUDA_LOG_ERROR();
|
|
||||||
|
|
||||||
// SM 3 implentation requires a bit more memory
|
// SM 3 implentation requires a bit more memory
|
||||||
if (device_sm[dev_id] < 500 || cuda_arch[dev_id] < 500)
|
if (device_sm[dev_id] < 500 || cuda_arch[dev_id] < 500)
|
||||||
@ -175,11 +174,15 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((uint64_t)throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
|
break;
|
||||||
|
}
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (!work_restart[thr_id].restart && (max_nonce > ((uint64_t)(pdata[19]) + throughput)));
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned
|
|||||||
uint32_t *outputHash = (uint32_t*)malloc(throughput * 64);
|
uint32_t *outputHash = (uint32_t*)malloc(throughput * 64);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
ptarget[7] = 0x0000ff;
|
||||||
|
|
||||||
// init
|
// init
|
||||||
if(!init[thr_id])
|
if(!init[thr_id])
|
||||||
@ -63,11 +63,11 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned
|
|||||||
// GPU
|
// GPU
|
||||||
uint32_t foundNounce = UINT32_MAX;
|
uint32_t foundNounce = UINT32_MAX;
|
||||||
|
|
||||||
*hashes_done = pdata[19] - start_nonce + throughput;
|
|
||||||
|
|
||||||
myriadgroestl_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);
|
myriadgroestl_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);
|
||||||
|
|
||||||
if (foundNounce < UINT32_MAX)
|
*hashes_done = pdata[19] - start_nonce + throughput;
|
||||||
|
|
||||||
|
if (foundNounce < UINT32_MAX && bench_algo < 0)
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(64) vhash[8];
|
uint32_t _ALIGN(64) vhash[8];
|
||||||
endiandata[19] = swab32(foundNounce);
|
endiandata[19] = swab32(foundNounce);
|
||||||
@ -82,8 +82,7 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((uint64_t) pdata[19] + throughput > max_nonce) {
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
*hashes_done = pdata[19] - start_nonce;
|
|
||||||
pdata[19] = max_nonce;
|
pdata[19] = max_nonce;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -91,6 +90,8 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned
|
|||||||
|
|
||||||
} while (!work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
|
*hashes_done = max_nonce - start_nonce;
|
||||||
|
|
||||||
free(outputHash);
|
free(outputHash);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -79,11 +79,16 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((uint64_t)throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (!work_restart[thr_id].restart && (max_nonce > ((uint64_t)(pdata[19]) + throughput)));
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,9 +110,14 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
@ -117,9 +117,14 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((uint64_t)throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce + 1;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -36,19 +36,21 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
uint32_t throughput = cuda_default_throughput(thr_id, 1U << 22); // 256*256*8*8
|
uint32_t throughput = cuda_default_throughput(thr_id, 1U << 21);
|
||||||
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x0000f;
|
ptarget[7] = 0x0000f;
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
if (opt_cudaschedule == -1 && gpu_threads == 1) {
|
||||||
|
cudaDeviceReset();
|
||||||
|
// reduce cpu usage
|
||||||
|
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
||||||
CUDA_LOG_ERROR();
|
CUDA_LOG_ERROR();
|
||||||
//if (opt_cudaschedule == -1) // to reduce cpu usage...
|
}
|
||||||
// cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
|
||||||
//CUDA_LOG_ERROR();
|
|
||||||
|
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));
|
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));
|
||||||
|
|
||||||
@ -65,10 +67,9 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
cuda_check_cpu_setTarget(ptarget);
|
cuda_check_cpu_setTarget(ptarget);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
int order = 0;
|
qubit_luffa512_cpu_hash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], 0);
|
||||||
*hashes_done = pdata[19] - first_nonce + throughput;
|
|
||||||
|
|
||||||
qubit_luffa512_cpu_hash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], order++);
|
*hashes_done = pdata[19] - first_nonce + throughput;
|
||||||
|
|
||||||
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
||||||
if (foundNonce != UINT32_MAX)
|
if (foundNonce != UINT32_MAX)
|
||||||
@ -86,8 +87,8 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((uint64_t) throughput + pdata[19] > max_nonce) {
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
// pdata[19] = max_nonce;
|
pdata[19] = max_nonce;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -95,7 +96,7 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
|
|
||||||
} while (!work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
6
skein.cu
6
skein.cu
@ -453,9 +453,7 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((uint64_t) throughput + pdata[19] > max_nonce) {
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
//applog(LOG_DEBUG, "done... max=%u", max_nonce);
|
|
||||||
*hashes_done = pdata[19] - first_nonce;
|
|
||||||
pdata[19] = max_nonce;
|
pdata[19] = max_nonce;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -464,6 +462,8 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
|
|||||||
|
|
||||||
} while (!work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,8 +110,7 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((uint64_t) throughput + pdata[19] > max_nonce) {
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
*hashes_done = pdata[19] - first_nonce;
|
|
||||||
pdata[19] = max_nonce;
|
pdata[19] = max_nonce;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -120,6 +119,8 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned
|
|||||||
|
|
||||||
} while (!work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
10
x11/c11.cu
10
x11/c11.cu
@ -200,14 +200,20 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u
|
|||||||
} else {
|
} else {
|
||||||
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
|
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
|
||||||
pdata[19] = foundNonce + 1;
|
pdata[19] = foundNonce + 1;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
11
x11/fresh.cu
11
x11/fresh.cu
@ -78,18 +78,19 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x00ff;
|
ptarget[7] = 0x00ff;
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
|
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t)64 * throughput + 4), -1);
|
||||||
|
|
||||||
x11_shavite512_cpu_init(thr_id, throughput);
|
x11_shavite512_cpu_init(thr_id, throughput);
|
||||||
x11_simd512_cpu_init(thr_id, throughput);
|
x11_simd512_cpu_init(thr_id, throughput);
|
||||||
x11_echo512_cpu_init(thr_id, throughput);
|
x11_echo512_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput + 4), 0);
|
|
||||||
|
|
||||||
cuda_check_cpu_init(thr_id, throughput);
|
cuda_check_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
init[thr_id] = true;
|
init[thr_id] = true;
|
||||||
@ -101,8 +102,6 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
x11_shavite512_setBlock_80((void*)endiandata);
|
x11_shavite512_setBlock_80((void*)endiandata);
|
||||||
cuda_check_cpu_setTarget(ptarget);
|
cuda_check_cpu_setTarget(ptarget);
|
||||||
do {
|
do {
|
||||||
uint32_t Htarg = ptarget[7];
|
|
||||||
|
|
||||||
uint32_t foundNonce;
|
uint32_t foundNonce;
|
||||||
int order = 0;
|
int order = 0;
|
||||||
|
|
||||||
@ -128,7 +127,7 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
be32enc(&endiandata[19], foundNonce);
|
be32enc(&endiandata[19], foundNonce);
|
||||||
fresh_hash(vhash64, endiandata);
|
fresh_hash(vhash64, endiandata);
|
||||||
|
|
||||||
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
|
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
|
||||||
int res = 1;
|
int res = 1;
|
||||||
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
|
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
|
||||||
work_set_target_ratio(work, vhash64);
|
work_set_target_ratio(work, vhash64);
|
||||||
|
@ -147,11 +147,16 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
11
x11/x11.cu
11
x11/x11.cu
@ -138,7 +138,7 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u
|
|||||||
if (x11_simd512_cpu_init(thr_id, throughput) != 0) {
|
if (x11_simd512_cpu_init(thr_id, throughput) != 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 64 * throughput), 0); // why 64 ?
|
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
|
||||||
|
|
||||||
cuda_check_cpu_init(thr_id, throughput);
|
cuda_check_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
@ -205,14 +205,19 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u
|
|||||||
} else {
|
} else {
|
||||||
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
|
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
|
||||||
pdata[19] = foundNonce + 1;
|
pdata[19] = foundNonce + 1;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
|
break;
|
||||||
|
}
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -206,7 +206,7 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((uint64_t)pdata[19] + throughput > (uint64_t)max_nonce) {
|
if ((uint64_t)throughput + pdata[19] >= max_nonce) {
|
||||||
pdata[19] = max_nonce;
|
pdata[19] = max_nonce;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -214,7 +214,7 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u
|
|||||||
|
|
||||||
} while (!work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
|
|
||||||
CUDA_LOG_ERROR();
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
|
@ -83,12 +83,14 @@ extern "C" int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
uint32_t foundNonce;
|
uint32_t foundNonce;
|
||||||
int order = 0;
|
int order = 0;
|
||||||
|
|
||||||
|
*hashes_done = pdata[19] - first_nonce + throughput;
|
||||||
|
|
||||||
whirlpool512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
whirlpool512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
|
||||||
foundNonce = whirlpool512_cpu_finalhash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
foundNonce = whirlpool512_cpu_finalhash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
if (foundNonce != UINT32_MAX)
|
if (foundNonce != UINT32_MAX && bench_algo < 0)
|
||||||
{
|
{
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
uint32_t vhash[8];
|
uint32_t vhash[8];
|
||||||
@ -97,7 +99,6 @@ extern "C" int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
|
|
||||||
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
|
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
|
||||||
int res = 1;
|
int res = 1;
|
||||||
*hashes_done = pdata[19] - first_nonce + throughput;
|
|
||||||
work_set_target_ratio(work, vhash);
|
work_set_target_ratio(work, vhash);
|
||||||
#if 0
|
#if 0
|
||||||
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
|
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
|
||||||
@ -112,9 +113,13 @@ extern "C" int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce);
|
applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
|
break;
|
||||||
|
}
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -49,12 +49,12 @@ extern "C" int scanhash_whirlx(int thr_id, struct work* work, uint32_t max_nonc
|
|||||||
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
ptarget[7] = 0x000f;
|
||||||
|
|
||||||
if (!init[thr_id]) {
|
if (!init[thr_id]) {
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
|
||||||
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
|
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), -1);
|
||||||
|
|
||||||
whirlpoolx_cpu_init(thr_id, throughput);
|
whirlpoolx_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
@ -70,15 +70,16 @@ extern "C" int scanhash_whirlx(int thr_id, struct work* work, uint32_t max_nonc
|
|||||||
|
|
||||||
do {
|
do {
|
||||||
uint32_t foundNonce = whirlpoolx_cpu_hash(thr_id, throughput, pdata[19]);
|
uint32_t foundNonce = whirlpoolx_cpu_hash(thr_id, throughput, pdata[19]);
|
||||||
if (foundNonce != UINT32_MAX)
|
|
||||||
|
*(hashes_done) = pdata[19] - first_nonce + throughput;
|
||||||
|
|
||||||
|
if (foundNonce != UINT32_MAX && bench_algo < 0)
|
||||||
{
|
{
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
uint32_t vhash64[8];
|
uint32_t vhash64[8];
|
||||||
be32enc(&endiandata[19], foundNonce);
|
be32enc(&endiandata[19], foundNonce);
|
||||||
whirlxHash(vhash64, endiandata);
|
whirlxHash(vhash64, endiandata);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + throughput;
|
|
||||||
|
|
||||||
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
|
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
|
||||||
work_set_target_ratio(work, vhash64);
|
work_set_target_ratio(work, vhash64);
|
||||||
pdata[19] = foundNonce;
|
pdata[19] = foundNonce;
|
||||||
@ -88,15 +89,16 @@ extern "C" int scanhash_whirlx(int thr_id, struct work* work, uint32_t max_nonc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pdata[19] += throughput;
|
if ((uint64_t)throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
if (((uint64_t)pdata[19]+throughput) >= max_nonce) {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (!work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*(hashes_done) = pdata[19] - first_nonce + 1;
|
*(hashes_done) = pdata[19] - first_nonce;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
10
x15/x14.cu
10
x15/x14.cu
@ -219,13 +219,19 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
|
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((uint64_t)throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
CUDA_LOG_ERROR();
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -226,11 +226,16 @@ extern "C" int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -251,11 +251,16 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((uint64_t)throughput + pdata[19] >= max_nonce) {
|
||||||
|
pdata[19] = max_nonce;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
pdata[19] += throughput;
|
pdata[19] += throughput;
|
||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user