mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-22 12:34:17 +00:00
benchmark: store all algos results + cuda fixes
Note: lyra2, lyra2v2 and script seems to have problems to coexist with other algos... to run after some of them... moved lyra2 first and skip scrypt/jane for the moment... Only stored in memory for now.. to display a table after the bench ccminer -a auto --benchmark Results may be exported later to a json file...
This commit is contained in:
parent
934555994d
commit
4e1e03b891
@ -49,7 +49,7 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u
|
|||||||
throughput = min(throughput, max_nonce - first_nonce);
|
throughput = min(throughput, max_nonce - first_nonce);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x0005;
|
ptarget[7] = 0x0005;
|
||||||
|
|
||||||
if (!init[thr_id]) {
|
if (!init[thr_id]) {
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
@ -66,6 +66,7 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u
|
|||||||
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
|
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cudaGetLastError();
|
||||||
bmw256_setBlock_80(thr_id, (void*)endiandata);
|
bmw256_setBlock_80(thr_id, (void*)endiandata);
|
||||||
|
|
||||||
cuda_check_cpu_setTarget(ptarget);
|
cuda_check_cpu_setTarget(ptarget);
|
||||||
|
148
ccminer.cpp
148
ccminer.cpp
@ -85,7 +85,9 @@ struct workio_cmd {
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum sha_algos {
|
enum sha_algos {
|
||||||
ALGO_BLAKE,
|
ALGO_BLAKE = 0,
|
||||||
|
ALGO_LYRA2, /* moved first for benchs */
|
||||||
|
ALGO_LYRA2v2,
|
||||||
ALGO_BLAKECOIN,
|
ALGO_BLAKECOIN,
|
||||||
ALGO_BMW,
|
ALGO_BMW,
|
||||||
ALGO_C11,
|
ALGO_C11,
|
||||||
@ -98,8 +100,6 @@ enum sha_algos {
|
|||||||
ALGO_KECCAK,
|
ALGO_KECCAK,
|
||||||
ALGO_JACKPOT,
|
ALGO_JACKPOT,
|
||||||
ALGO_LUFFA,
|
ALGO_LUFFA,
|
||||||
ALGO_LYRA2,
|
|
||||||
ALGO_LYRA2v2,
|
|
||||||
ALGO_MJOLLNIR, /* Hefty hash */
|
ALGO_MJOLLNIR, /* Hefty hash */
|
||||||
ALGO_MYR_GR,
|
ALGO_MYR_GR,
|
||||||
ALGO_NEOSCRYPT,
|
ALGO_NEOSCRYPT,
|
||||||
@ -127,6 +127,8 @@ enum sha_algos {
|
|||||||
|
|
||||||
static const char *algo_names[] = {
|
static const char *algo_names[] = {
|
||||||
"blake",
|
"blake",
|
||||||
|
"lyra2",
|
||||||
|
"lyra2v2",
|
||||||
"blakecoin",
|
"blakecoin",
|
||||||
"bmw",
|
"bmw",
|
||||||
"c11",
|
"c11",
|
||||||
@ -139,8 +141,6 @@ static const char *algo_names[] = {
|
|||||||
"keccak",
|
"keccak",
|
||||||
"jackpot",
|
"jackpot",
|
||||||
"luffa",
|
"luffa",
|
||||||
"lyra2",
|
|
||||||
"lyra2v2",
|
|
||||||
"mjollnir",
|
"mjollnir",
|
||||||
"myr-gr",
|
"myr-gr",
|
||||||
"neoscrypt",
|
"neoscrypt",
|
||||||
@ -171,7 +171,6 @@ bool opt_debug_diff = false;
|
|||||||
bool opt_debug_threads = false;
|
bool opt_debug_threads = false;
|
||||||
bool opt_protocol = false;
|
bool opt_protocol = false;
|
||||||
bool opt_benchmark = false;
|
bool opt_benchmark = false;
|
||||||
int algo_benchmark = -1;
|
|
||||||
bool opt_showdiff = false;
|
bool opt_showdiff = false;
|
||||||
|
|
||||||
// todo: limit use of these flags,
|
// todo: limit use of these flags,
|
||||||
@ -266,6 +265,7 @@ volatile bool abort_flag = false;
|
|||||||
struct work_restart *work_restart = NULL;
|
struct work_restart *work_restart = NULL;
|
||||||
static int app_exit_code = EXIT_CODE_OK;
|
static int app_exit_code = EXIT_CODE_OK;
|
||||||
|
|
||||||
|
pthread_mutex_t algo_lock;
|
||||||
pthread_mutex_t applog_lock;
|
pthread_mutex_t applog_lock;
|
||||||
static pthread_mutex_t stats_lock;
|
static pthread_mutex_t stats_lock;
|
||||||
static double thr_hashrates[MAX_GPUS] = { 0 };
|
static double thr_hashrates[MAX_GPUS] = { 0 };
|
||||||
@ -279,8 +279,12 @@ uint8_t conditional_state[MAX_GPUS] = { 0 };
|
|||||||
double opt_max_temp = 0.0;
|
double opt_max_temp = 0.0;
|
||||||
double opt_max_diff = -1.;
|
double opt_max_diff = -1.;
|
||||||
double opt_max_rate = -1.;
|
double opt_max_rate = -1.;
|
||||||
|
// algos benchmark
|
||||||
|
int algo_benchmark = -1;
|
||||||
|
double * algo_hashrates[MAX_GPUS] = { 0 };
|
||||||
|
int device_mem_free[MAX_GPUS] = { 0 };
|
||||||
int opt_statsavg = 30;
|
int opt_statsavg = 30;
|
||||||
|
|
||||||
// strdup on char* to allow a common free() if used
|
// strdup on char* to allow a common free() if used
|
||||||
static char* opt_syslog_pfx = strdup(PROGRAM_NAME);
|
static char* opt_syslog_pfx = strdup(PROGRAM_NAME);
|
||||||
char *opt_api_allow = strdup("127.0.0.1"); /* 0.0.0.0 for all ips */
|
char *opt_api_allow = strdup("127.0.0.1"); /* 0.0.0.0 for all ips */
|
||||||
@ -1562,6 +1566,9 @@ void miner_free_device(int thr_id)
|
|||||||
//free_sha256d(thr_id);
|
//free_sha256d(thr_id);
|
||||||
free_scrypt(thr_id);
|
free_scrypt(thr_id);
|
||||||
free_scrypt_jane(thr_id);
|
free_scrypt_jane(thr_id);
|
||||||
|
|
||||||
|
// reset remains of error..
|
||||||
|
cudaGetLastError();
|
||||||
}
|
}
|
||||||
|
|
||||||
// to benchmark all algos
|
// to benchmark all algos
|
||||||
@ -1577,40 +1584,78 @@ bool algo_switch_next(int thr_id)
|
|||||||
miner_free_device(thr_id);
|
miner_free_device(thr_id);
|
||||||
mfree = cuda_available_memory(thr_id);
|
mfree = cuda_available_memory(thr_id);
|
||||||
|
|
||||||
work_restart[thr_id].restart = 1;
|
|
||||||
|
|
||||||
algo++;
|
algo++;
|
||||||
if (algo == ALGO_AUTO)
|
|
||||||
return false;
|
// skip some duplicated algos
|
||||||
|
if (algo == ALGO_C11) algo++; // same as x11
|
||||||
|
if (algo == ALGO_DMD_GR) algo++; // same as groestl
|
||||||
|
if (algo == ALGO_WHIRLCOIN) algo++; // same as whirlpool
|
||||||
|
// and unwanted ones...
|
||||||
|
if (algo == ALGO_SCRYPT) algo++;
|
||||||
|
if (algo == ALGO_SCRYPT_JANE) algo++;
|
||||||
|
|
||||||
|
work_restart[thr_id].restart = 1;
|
||||||
|
|
||||||
// we need to wait completion on all cards before the switch
|
// we need to wait completion on all cards before the switch
|
||||||
if (opt_n_threads > 1) {
|
if (opt_n_threads > 1) {
|
||||||
pthread_mutex_lock(&stratum_sock_lock); // unused in benchmark
|
pthread_mutex_lock(&algo_lock); // wait work_restart for all
|
||||||
for (int n=0; n < opt_n_threads; n++)
|
for (int n=0; n < opt_n_threads; n++) {
|
||||||
if (!work_restart[thr_id].restart) {
|
int timeout = 1000;
|
||||||
applog(LOG_DEBUG, "GPU #%d: waiting GPU %d", dev_id, device_map[n]);
|
while (!work_restart[n].restart && --timeout) {
|
||||||
usleep(100*1000);
|
//applog(LOG_DEBUG, "GPU #%d: waiting GPU %d", dev_id, device_map[n]);
|
||||||
|
usleep(200*1000);
|
||||||
}
|
}
|
||||||
sleep(1);
|
}
|
||||||
pthread_mutex_unlock(&stratum_sock_lock);
|
pthread_mutex_unlock(&algo_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
double hashrate = stats_get_speed(thr_id, thr_hashrates[thr_id]);
|
double hashrate = stats_get_speed(thr_id, thr_hashrates[thr_id]);
|
||||||
format_hashrate(hashrate, rate);
|
format_hashrate(hashrate, rate);
|
||||||
applog(LOG_NOTICE, "GPU #%d: %s rate: %s - %d MB free", dev_id, algo_names[prev_algo], rate, mfree);
|
applog(LOG_NOTICE, "GPU #%d: %s hashrate = %s", dev_id, algo_names[prev_algo], rate);
|
||||||
|
|
||||||
|
// check if there is there is memory leaks
|
||||||
|
if (device_mem_free[thr_id] > mfree)
|
||||||
|
applog(LOG_WARNING, "GPU #%d, memory leak detected! %d MB free", dev_id, mfree);
|
||||||
|
device_mem_free[thr_id] = mfree;
|
||||||
|
|
||||||
|
// store to dump a table per gpu later
|
||||||
|
algo_hashrates[thr_id][prev_algo] = hashrate;
|
||||||
|
|
||||||
|
if (algo == ALGO_AUTO)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// wait other threads before algo switch
|
||||||
|
pthread_mutex_lock(&algo_lock);
|
||||||
|
|
||||||
|
opt_algo = (enum sha_algos) algo;
|
||||||
|
work_restart[thr_id].restart = 0;
|
||||||
|
|
||||||
stats_purge_all();
|
stats_purge_all();
|
||||||
global_hashrate = 0;
|
global_hashrate = 0;
|
||||||
|
|
||||||
opt_algo = (enum sha_algos) algo;
|
if (thr_id == 0)
|
||||||
|
applog(LOG_BLUE, "Benchmark algo %s...", algo_names[algo]);
|
||||||
|
|
||||||
applog(LOG_BLUE, "GPU #%d: Benchmark for algo %s...", dev_id, algo_names[algo]);
|
//applog(LOG_BLUE, "GPU #%d: Benchmark algo %s...", dev_id, algo_names[algo]);
|
||||||
sleep(1);
|
pthread_mutex_unlock(&algo_lock);
|
||||||
work_restart[thr_id].restart = 0;
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void display_benchmark_results()
|
||||||
|
{
|
||||||
|
for (int n=0; n < opt_n_threads; n++)
|
||||||
|
{
|
||||||
|
int dev_id = device_map[n];
|
||||||
|
applog(LOG_BLUE, "Benchmark results for GPU #%d - %s:", dev_id, device_name[dev_id]);
|
||||||
|
for (int i=0; i < ALGO_COUNT-1; i++) {
|
||||||
|
double rate = algo_hashrates[n][i];
|
||||||
|
if (rate == 0.0) continue;
|
||||||
|
applog(LOG_INFO, "%12s : %15.0f H/s", algo_names[i], rate);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void *miner_thread(void *userdata)
|
static void *miner_thread(void *userdata)
|
||||||
{
|
{
|
||||||
struct thr_info *mythr = (struct thr_info *)userdata;
|
struct thr_info *mythr = (struct thr_info *)userdata;
|
||||||
@ -1729,19 +1774,6 @@ static void *miner_thread(void *userdata)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opt_benchmark && algo_benchmark >= 0) {
|
|
||||||
if (loopcnt > 3) {
|
|
||||||
if (!algo_switch_next(thr_id)) {
|
|
||||||
proper_exit(0);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
algo_benchmark = (int) opt_algo;
|
|
||||||
// for scrypt...
|
|
||||||
opt_autotune = false;
|
|
||||||
loopcnt = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!opt_benchmark && (g_work.height != work.height || memcmp(work.target, g_work.target, sizeof(work.target))))
|
if (!opt_benchmark && (g_work.height != work.height || memcmp(work.target, g_work.target, sizeof(work.target))))
|
||||||
{
|
{
|
||||||
if (opt_debug) {
|
if (opt_debug) {
|
||||||
@ -1779,6 +1811,24 @@ static void *miner_thread(void *userdata)
|
|||||||
|
|
||||||
pthread_mutex_unlock(&g_work_lock);
|
pthread_mutex_unlock(&g_work_lock);
|
||||||
|
|
||||||
|
// -a auto --benchmark
|
||||||
|
if (opt_benchmark && algo_benchmark >= 0) {
|
||||||
|
//applog(LOG_DEBUG, "GPU #%d: loop %d", device_map[thr_id], loopcnt);
|
||||||
|
if (loopcnt >= 3) {
|
||||||
|
if (!algo_switch_next(thr_id) && thr_id == 0)
|
||||||
|
{
|
||||||
|
display_benchmark_results();
|
||||||
|
proper_exit(0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
algo_benchmark = (int) opt_algo;
|
||||||
|
// for scrypt...
|
||||||
|
opt_autotune = false;
|
||||||
|
loopcnt = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
loopcnt++;
|
||||||
|
|
||||||
/* prevent gpu scans before a job is received */
|
/* prevent gpu scans before a job is received */
|
||||||
if (have_stratum && work.data[0] == 0 && !opt_benchmark) {
|
if (have_stratum && work.data[0] == 0 && !opt_benchmark) {
|
||||||
sleep(1);
|
sleep(1);
|
||||||
@ -1877,6 +1927,7 @@ static void *miner_thread(void *userdata)
|
|||||||
minmax = 0x2000000;
|
minmax = 0x2000000;
|
||||||
break;
|
break;
|
||||||
case ALGO_C11:
|
case ALGO_C11:
|
||||||
|
case ALGO_DEEP:
|
||||||
case ALGO_LYRA2v2:
|
case ALGO_LYRA2v2:
|
||||||
case ALGO_S3:
|
case ALGO_S3:
|
||||||
case ALGO_X11:
|
case ALGO_X11:
|
||||||
@ -1885,11 +1936,11 @@ static void *miner_thread(void *userdata)
|
|||||||
case ALGO_WHIRLPOOL:
|
case ALGO_WHIRLPOOL:
|
||||||
minmax = 0x400000;
|
minmax = 0x400000;
|
||||||
break;
|
break;
|
||||||
case ALGO_LYRA2:
|
|
||||||
case ALGO_NEOSCRYPT:
|
case ALGO_NEOSCRYPT:
|
||||||
case ALGO_X15:
|
case ALGO_X15:
|
||||||
minmax = 0x300000;
|
minmax = 0x300000;
|
||||||
break;
|
break;
|
||||||
|
case ALGO_LYRA2:
|
||||||
case ALGO_SCRYPT:
|
case ALGO_SCRYPT:
|
||||||
minmax = 0x80000;
|
minmax = 0x80000;
|
||||||
break;
|
break;
|
||||||
@ -1914,7 +1965,7 @@ static void *miner_thread(void *userdata)
|
|||||||
else
|
else
|
||||||
max_nonce = (uint32_t) (max64 + start_nonce);
|
max_nonce = (uint32_t) (max64 + start_nonce);
|
||||||
|
|
||||||
// todo: keep it rounded for gpu threads ?
|
// todo: keep it rounded to a multiple of 256 ?
|
||||||
|
|
||||||
if (unlikely(start_nonce > max_nonce)) {
|
if (unlikely(start_nonce > max_nonce)) {
|
||||||
// should not happen but seen in skein2 benchmark with 2 gpus
|
// should not happen but seen in skein2 benchmark with 2 gpus
|
||||||
@ -1930,6 +1981,9 @@ static void *miner_thread(void *userdata)
|
|||||||
hashes_done = 0;
|
hashes_done = 0;
|
||||||
gettimeofday(&tv_start, NULL);
|
gettimeofday(&tv_start, NULL);
|
||||||
|
|
||||||
|
|
||||||
|
cudaGetLastError(); // reset previous errors
|
||||||
|
|
||||||
/* scan nonces for a proof-of-work hash */
|
/* scan nonces for a proof-of-work hash */
|
||||||
switch (opt_algo) {
|
switch (opt_algo) {
|
||||||
|
|
||||||
@ -2080,7 +2134,7 @@ static void *miner_thread(void *userdata)
|
|||||||
pthread_mutex_lock(&stats_lock);
|
pthread_mutex_lock(&stats_lock);
|
||||||
thr_hashrates[thr_id] = hashes_done / dtime;
|
thr_hashrates[thr_id] = hashes_done / dtime;
|
||||||
thr_hashrates[thr_id] *= rate_factor;
|
thr_hashrates[thr_id] *= rate_factor;
|
||||||
if (loopcnt) // ignore first (init time)
|
if (loopcnt > 1) // ignore first (init time)
|
||||||
stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id], (uint8_t) rc, work.height);
|
stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id], (uint8_t) rc, work.height);
|
||||||
pthread_mutex_unlock(&stats_lock);
|
pthread_mutex_unlock(&stats_lock);
|
||||||
}
|
}
|
||||||
@ -2116,7 +2170,7 @@ static void *miner_thread(void *userdata)
|
|||||||
for (int i = 0; i < opt_n_threads && thr_hashrates[i]; i++)
|
for (int i = 0; i < opt_n_threads && thr_hashrates[i]; i++)
|
||||||
hashrate += stats_get_speed(i, thr_hashrates[i]);
|
hashrate += stats_get_speed(i, thr_hashrates[i]);
|
||||||
pthread_mutex_unlock(&stats_lock);
|
pthread_mutex_unlock(&stats_lock);
|
||||||
if (opt_benchmark) {
|
if (opt_benchmark && algo_benchmark == -1) {
|
||||||
format_hashrate(hashrate, s);
|
format_hashrate(hashrate, s);
|
||||||
applog(LOG_NOTICE, "Total: %s", s);
|
applog(LOG_NOTICE, "Total: %s", s);
|
||||||
}
|
}
|
||||||
@ -2159,7 +2213,6 @@ static void *miner_thread(void *userdata)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
loopcnt++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
@ -3084,25 +3137,27 @@ static void parse_cmdline(int argc, char *argv[])
|
|||||||
parse_arg(key, optarg);
|
parse_arg(key, optarg);
|
||||||
}
|
}
|
||||||
if (optind < argc) {
|
if (optind < argc) {
|
||||||
fprintf(stderr, "%s: unsupported non-option argument '%s'\n",
|
fprintf(stderr, "%s: unsupported non-option argument '%s' (see --help)\n",
|
||||||
argv[0], argv[optind]);
|
argv[0], argv[optind]);
|
||||||
show_usage_and_exit(1);
|
//show_usage_and_exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
parse_config(opt_config);
|
parse_config(opt_config);
|
||||||
|
|
||||||
if (opt_algo == ALGO_HEAVY && opt_vote == 9999) {
|
if (opt_algo == ALGO_HEAVY && opt_vote == 9999 && !opt_benchmark) {
|
||||||
fprintf(stderr, "%s: Heavycoin hash requires block reward vote parameter (see --vote)\n",
|
fprintf(stderr, "%s: Heavycoin hash requires block reward vote parameter (see --vote)\n",
|
||||||
argv[0]);
|
argv[0]);
|
||||||
show_usage_and_exit(1);
|
show_usage_and_exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opt_algo == ALGO_AUTO) {
|
if (opt_algo == ALGO_AUTO) {
|
||||||
for (int n=0; n < MAX_GPUS; n++)
|
for (int n=0; n < MAX_GPUS; n++) {
|
||||||
gpus_intensity[n] = 0; // use default
|
gpus_intensity[n] = 0; // use default
|
||||||
|
algo_hashrates[n] = (double*) calloc(1, ALGO_COUNT * sizeof(double));
|
||||||
|
}
|
||||||
if (opt_benchmark) {
|
if (opt_benchmark) {
|
||||||
opt_autotune = false;
|
opt_autotune = false;
|
||||||
algo_benchmark = opt_algo = ALGO_BLAKE; /* first */
|
algo_benchmark = opt_algo = (enum sha_algos) 0; /* first */
|
||||||
applog(LOG_BLUE, "Starting benchmark mode");
|
applog(LOG_BLUE, "Starting benchmark mode");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3177,6 +3232,7 @@ int main(int argc, char *argv[])
|
|||||||
jane_params = strdup("");
|
jane_params = strdup("");
|
||||||
|
|
||||||
pthread_mutex_init(&applog_lock, NULL);
|
pthread_mutex_init(&applog_lock, NULL);
|
||||||
|
pthread_mutex_init(&algo_lock, NULL);
|
||||||
|
|
||||||
// number of cpus for thread affinity
|
// number of cpus for thread affinity
|
||||||
#if defined(WIN32)
|
#if defined(WIN32)
|
||||||
|
@ -83,6 +83,7 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
|
cudaDeviceSynchronize();
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
|
||||||
// Constants copy/init (no device alloc in these algos)
|
// Constants copy/init (no device alloc in these algos)
|
||||||
@ -179,4 +180,4 @@ extern "C" void free_nist5(int thr_id)
|
|||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
}
|
}
|
||||||
|
@ -10,7 +10,7 @@ extern "C" {
|
|||||||
#include "cuda_helper.h"
|
#include "cuda_helper.h"
|
||||||
|
|
||||||
static uint64_t* d_hash[MAX_GPUS];
|
static uint64_t* d_hash[MAX_GPUS];
|
||||||
//static uint64_t* d_hash2[MAX_GPUS];
|
//static uint64_t* d_matrix[MAX_GPUS];
|
||||||
|
|
||||||
extern void blake256_cpu_init(int thr_id, uint32_t threads);
|
extern void blake256_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint64_t *Hash, int order);
|
extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint64_t *Hash, int order);
|
||||||
@ -87,11 +87,12 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
throughput = min(throughput, max_nonce - first_nonce);
|
throughput = min(throughput, max_nonce - first_nonce);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x00ff;
|
ptarget[7] = 0x00ff;
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
cudaGetLastError(); // reset last error
|
||||||
|
|
||||||
blake256_cpu_init(thr_id, throughput);
|
blake256_cpu_init(thr_id, throughput);
|
||||||
keccak256_cpu_init(thr_id,throughput);
|
keccak256_cpu_init(thr_id,throughput);
|
||||||
@ -99,8 +100,8 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
groestl256_cpu_init(thr_id, throughput);
|
groestl256_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
// DMatrix
|
// DMatrix
|
||||||
// cudaMalloc(&d_hash2[thr_id], (size_t)16 * 8 * 8 * sizeof(uint64_t) * throughput);
|
// cudaMalloc(&d_matrix[thr_id], (size_t)16 * 8 * 8 * sizeof(uint64_t) * throughput);
|
||||||
// lyra2_cpu_init(thr_id, throughput, d_hash2[thr_id]);
|
// lyra2_cpu_init(thr_id, throughput, d_matrix[thr_id]);
|
||||||
|
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)32 * throughput));
|
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)32 * throughput));
|
||||||
|
|
||||||
@ -174,6 +175,7 @@ extern "C" void free_lyra2(int thr_id)
|
|||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
//cudaFree(d_matrix[thr_id]);
|
||||||
|
|
||||||
keccak256_cpu_free(thr_id);
|
keccak256_cpu_free(thr_id);
|
||||||
groestl256_cpu_free(thr_id);
|
groestl256_cpu_free(thr_id);
|
||||||
@ -181,4 +183,4 @@ extern "C" void free_lyra2(int thr_id)
|
|||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
}
|
}
|
||||||
|
@ -84,14 +84,16 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc
|
|||||||
uint32_t throughput = device_intensity(dev_id, __func__, defthr);
|
uint32_t throughput = device_intensity(dev_id, __func__, defthr);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x00ff;
|
ptarget[7] = 0x00ff;
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(dev_id);
|
||||||
//cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
//cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
||||||
//if (opt_n_gputhreads == 1)
|
//if (opt_n_gputhreads == 1)
|
||||||
// cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
|
// cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
|
||||||
|
cudaGetLastError();
|
||||||
|
|
||||||
blake256_cpu_init(thr_id, throughput);
|
blake256_cpu_init(thr_id, throughput);
|
||||||
keccak256_cpu_init(thr_id,throughput);
|
keccak256_cpu_init(thr_id,throughput);
|
||||||
skein256_cpu_init(thr_id, throughput);
|
skein256_cpu_init(thr_id, throughput);
|
||||||
@ -103,8 +105,8 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// DMatrix
|
// DMatrix (780Ti may prefer 16 instead of 12, cf djm34)
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_matrix[thr_id], (size_t)16 * sizeof(uint64_t) * 4 * 3 * throughput));
|
CUDA_SAFE_CALL(cudaMalloc(&d_matrix[thr_id], (size_t)12 * sizeof(uint64_t) * 4 * 4 * throughput));
|
||||||
lyra2v2_cpu_init(thr_id, throughput, d_matrix[thr_id]);
|
lyra2v2_cpu_init(thr_id, throughput, d_matrix[thr_id]);
|
||||||
|
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)32 * throughput));
|
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)32 * throughput));
|
||||||
|
@ -16,26 +16,28 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign
|
|||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
|
||||||
|
int dev_id = device_map[thr_id];
|
||||||
int intensity = is_windows() ? 18 : 19;
|
int intensity = is_windows() ? 18 : 19;
|
||||||
uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity);
|
uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity);
|
||||||
throughput = throughput / 32; /* set for max intensity ~= 20 */
|
throughput = throughput / 32; /* set for max intensity ~= 20 */
|
||||||
throughput = min(throughput, max_nonce - first_nonce + 1);
|
throughput = min(throughput, max_nonce - first_nonce + 1);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
ptarget[7] = 0x00ff;
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
int dev_id = device_map[thr_id];
|
cudaDeviceSynchronize();
|
||||||
cudaSetDevice(dev_id);
|
cudaSetDevice(dev_id);
|
||||||
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
|
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
|
||||||
|
cudaGetLastError(); // reset errors if device is not "reset"
|
||||||
|
|
||||||
if (device_sm[dev_id] <= 300) {
|
if (device_sm[dev_id] <= 300) {
|
||||||
applog(LOG_ERR, "Sorry neoscrypt is not supported on SM 3.0 devices");
|
applog(LOG_ERR, "Sorry neoscrypt is not supported on SM 3.0 devices");
|
||||||
proper_exit(EXIT_CODE_CUDA_ERROR);
|
proper_exit(EXIT_CODE_CUDA_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
applog(LOG_INFO, "Using %d cuda threads", throughput);
|
applog(LOG_INFO, "GPU #%d: Using %d cuda threads", dev_id, throughput);
|
||||||
neoscrypt_cpu_init(thr_id, throughput);
|
neoscrypt_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
init[thr_id] = true;
|
init[thr_id] = true;
|
||||||
@ -71,7 +73,7 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign
|
|||||||
pdata[19] = foundNonce;
|
pdata[19] = foundNonce;
|
||||||
return 1;
|
return 1;
|
||||||
} else {
|
} else {
|
||||||
applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce);
|
applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", dev_id, foundNonce);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -95,4 +97,4 @@ void free_neoscrypt(int thr_id)
|
|||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
}
|
}
|
||||||
|
@ -225,7 +225,7 @@ void quark_blake512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *ou
|
|||||||
__host__
|
__host__
|
||||||
void quark_blake512_cpu_init(int thr_id, uint32_t threads)
|
void quark_blake512_cpu_init(int thr_id, uint32_t threads)
|
||||||
{
|
{
|
||||||
CUDA_SAFE_CALL(cudaGetLastError());
|
// CUDA_SAFE_CALL(cudaGetLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
|
@ -434,6 +434,7 @@ void free_scrypt_jane(int thr_id)
|
|||||||
int dev_id = device_map[thr_id];
|
int dev_id = device_map[thr_id];
|
||||||
|
|
||||||
cudaSetDevice(dev_id);
|
cudaSetDevice(dev_id);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
cudaDeviceReset(); // well, simple way to free ;)
|
cudaDeviceReset(); // well, simple way to free ;)
|
||||||
|
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
@ -479,6 +480,7 @@ int scanhash_scrypt_jane(int thr_id, struct work *work, uint32_t max_nonce, unsi
|
|||||||
int dev_id = device_map[thr_id];
|
int dev_id = device_map[thr_id];
|
||||||
|
|
||||||
cudaSetDevice(dev_id);
|
cudaSetDevice(dev_id);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
cudaDeviceReset();
|
cudaDeviceReset();
|
||||||
cudaSetDevice(dev_id);
|
cudaSetDevice(dev_id);
|
||||||
throughput = cuda_throughput(thr_id);
|
throughput = cuda_throughput(thr_id);
|
||||||
|
@ -694,6 +694,7 @@ void free_scrypt(int thr_id)
|
|||||||
|
|
||||||
// trivial way to free all...
|
// trivial way to free all...
|
||||||
cudaSetDevice(dev_id);
|
cudaSetDevice(dev_id);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
cudaDeviceReset();
|
cudaDeviceReset();
|
||||||
|
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
@ -714,6 +715,7 @@ int scanhash_scrypt(int thr_id, struct work *work, uint32_t max_nonce, unsigned
|
|||||||
if (!init[thr_id]) {
|
if (!init[thr_id]) {
|
||||||
int dev_id = device_map[thr_id];
|
int dev_id = device_map[thr_id];
|
||||||
cudaSetDevice(dev_id);
|
cudaSetDevice(dev_id);
|
||||||
|
cudaDeviceSynchronize();
|
||||||
cudaDeviceReset();
|
cudaDeviceReset();
|
||||||
cudaSetDevice(dev_id);
|
cudaSetDevice(dev_id);
|
||||||
throughput = cuda_throughput(thr_id);
|
throughput = cuda_throughput(thr_id);
|
||||||
|
@ -168,7 +168,7 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
throughput = min(throughput, max_nonce - first_nonce);
|
throughput = min(throughput, max_nonce - first_nonce);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x000f;
|
ptarget[7] = 0x000f;
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
|
@ -178,7 +178,7 @@ extern "C" int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
throughput = min(throughput, max_nonce - first_nonce);
|
throughput = min(throughput, max_nonce - first_nonce);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x00FF;
|
ptarget[7] = 0x00FF;
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
@ -199,7 +199,7 @@ extern "C" int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
x14_shabal512_cpu_init(thr_id, throughput);
|
x14_shabal512_cpu_init(thr_id, throughput);
|
||||||
x15_whirlpool_cpu_init(thr_id, throughput, 0);
|
x15_whirlpool_cpu_init(thr_id, throughput, 0);
|
||||||
|
|
||||||
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
|
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
|
||||||
|
|
||||||
cuda_check_cpu_init(thr_id, throughput);
|
cuda_check_cpu_init(thr_id, throughput);
|
||||||
init[thr_id] = true;
|
init[thr_id] = true;
|
||||||
@ -286,4 +286,4 @@ extern "C" void free_x15(int thr_id)
|
|||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user