benchmark: allow -a auto to bench all algos at once

This commit is contained in:
Tanguy Pruvot 2015-10-08 21:41:20 +02:00
parent 922c2a5cd7
commit 934555994d
2 changed files with 95 additions and 5 deletions

View File

@ -55,13 +55,14 @@ BOOL WINAPI ConsoleHandler(DWORD);
#define HEAVYCOIN_BLKHDR_SZ 84 #define HEAVYCOIN_BLKHDR_SZ 84
#define MNR_BLKHDR_SZ 80 #define MNR_BLKHDR_SZ 80
// from cuda.cpp // decl. from cuda.cpp (to move in miner.h)
int cuda_num_devices(); int cuda_num_devices();
void cuda_devicenames(); void cuda_devicenames();
void cuda_reset_device(int thr_id, bool *init); void cuda_reset_device(int thr_id, bool *init);
void cuda_shutdown(); void cuda_shutdown();
int cuda_finddevice(char *name); int cuda_finddevice(char *name);
void cuda_print_devices(); void cuda_print_devices();
int cuda_available_memory(int thr_id);
#include "nvml.h" #include "nvml.h"
#ifdef USE_WRAPNVML #ifdef USE_WRAPNVML
@ -120,6 +121,7 @@ enum sha_algos {
ALGO_WHIRLPOOL, ALGO_WHIRLPOOL,
ALGO_WHIRLPOOLX, ALGO_WHIRLPOOLX,
ALGO_ZR5, ALGO_ZR5,
ALGO_AUTO,
ALGO_COUNT ALGO_COUNT
}; };
@ -160,6 +162,7 @@ static const char *algo_names[] = {
"whirlpool", "whirlpool",
"whirlpoolx", "whirlpoolx",
"zr5", "zr5",
"auto", /* reserved for multi algo */
"" ""
}; };
@ -168,6 +171,7 @@ bool opt_debug_diff = false;
bool opt_debug_threads = false; bool opt_debug_threads = false;
bool opt_protocol = false; bool opt_protocol = false;
bool opt_benchmark = false; bool opt_benchmark = false;
int algo_benchmark = -1;
bool opt_showdiff = false; bool opt_showdiff = false;
// todo: limit use of these flags, // todo: limit use of these flags,
@ -319,7 +323,8 @@ Options:\n\
x14 X14\n\ x14 X14\n\
x15 X15\n\ x15 X15\n\
x17 X17\n\ x17 X17\n\
whirlpool Old Whirlcoin algo\n\ whirlcoin Old Whirlcoin (Whirlpool algo)\n\
whirlpool Whirlpool algo\n\
whirlpoolx WhirlpoolX (VNL)\n\ whirlpoolx WhirlpoolX (VNL)\n\
zr5 ZR5 (ZiftrCoin)\n\ zr5 ZR5 (ZiftrCoin)\n\
-d, --devices Comma separated list of CUDA devices to use.\n\ -d, --devices Comma separated list of CUDA devices to use.\n\
@ -1559,12 +1564,60 @@ void miner_free_device(int thr_id)
free_scrypt_jane(thr_id); free_scrypt_jane(thr_id);
} }
// to benchmark all algos
bool algo_switch_next(int thr_id)
{
int algo = (int) opt_algo;
int prev_algo = algo;
int dev_id = device_map[thr_id % MAX_GPUS];
int mfree;
char rate[32] = { 0 };
// free current algo memory and track mem usage
miner_free_device(thr_id);
mfree = cuda_available_memory(thr_id);
work_restart[thr_id].restart = 1;
algo++;
if (algo == ALGO_AUTO)
return false;
// we need to wait completion on all cards before the switch
if (opt_n_threads > 1) {
pthread_mutex_lock(&stratum_sock_lock); // unused in benchmark
for (int n=0; n < opt_n_threads; n++)
if (!work_restart[thr_id].restart) {
applog(LOG_DEBUG, "GPU #%d: waiting GPU %d", dev_id, device_map[n]);
usleep(100*1000);
}
sleep(1);
pthread_mutex_unlock(&stratum_sock_lock);
}
double hashrate = stats_get_speed(thr_id, thr_hashrates[thr_id]);
format_hashrate(hashrate, rate);
applog(LOG_NOTICE, "GPU #%d: %s rate: %s - %d MB free", dev_id, algo_names[prev_algo], rate, mfree);
stats_purge_all();
global_hashrate = 0;
opt_algo = (enum sha_algos) algo;
applog(LOG_BLUE, "GPU #%d: Benchmark for algo %s...", dev_id, algo_names[algo]);
sleep(1);
work_restart[thr_id].restart = 0;
return true;
}
static void *miner_thread(void *userdata) static void *miner_thread(void *userdata)
{ {
struct thr_info *mythr = (struct thr_info *)userdata; struct thr_info *mythr = (struct thr_info *)userdata;
int switchn = pool_switch_count; int switchn = pool_switch_count;
int thr_id = mythr->id; int thr_id = mythr->id;
struct work work; struct work work;
uint64_t loopcnt = 0;
uint32_t max_nonce; uint32_t max_nonce;
uint32_t end_nonce = UINT32_MAX / opt_n_threads * (thr_id + 1) - (thr_id + 1); uint32_t end_nonce = UINT32_MAX / opt_n_threads * (thr_id + 1) - (thr_id + 1);
bool work_done = false; bool work_done = false;
@ -1676,6 +1729,19 @@ static void *miner_thread(void *userdata)
} }
} }
if (opt_benchmark && algo_benchmark >= 0) {
if (loopcnt > 3) {
if (!algo_switch_next(thr_id)) {
proper_exit(0);
break;
}
algo_benchmark = (int) opt_algo;
// for scrypt...
opt_autotune = false;
loopcnt = 0;
}
}
if (!opt_benchmark && (g_work.height != work.height || memcmp(work.target, g_work.target, sizeof(work.target)))) if (!opt_benchmark && (g_work.height != work.height || memcmp(work.target, g_work.target, sizeof(work.target))))
{ {
if (opt_debug) { if (opt_debug) {
@ -1825,8 +1891,10 @@ static void *miner_thread(void *userdata)
minmax = 0x300000; minmax = 0x300000;
break; break;
case ALGO_SCRYPT: case ALGO_SCRYPT:
minmax = 0x80000;
break;
case ALGO_SCRYPT_JANE: case ALGO_SCRYPT_JANE:
minmax = 0x100000; minmax = 0x1000;
break; break;
} }
max64 = max(minmax-1, max64); max64 = max(minmax-1, max64);
@ -2012,7 +2080,8 @@ static void *miner_thread(void *userdata)
pthread_mutex_lock(&stats_lock); pthread_mutex_lock(&stats_lock);
thr_hashrates[thr_id] = hashes_done / dtime; thr_hashrates[thr_id] = hashes_done / dtime;
thr_hashrates[thr_id] *= rate_factor; thr_hashrates[thr_id] *= rate_factor;
stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id], (uint8_t) rc, work.height); if (loopcnt) // ignore first (init time)
stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id], (uint8_t) rc, work.height);
pthread_mutex_unlock(&stats_lock); pthread_mutex_unlock(&stats_lock);
} }
} }
@ -2090,6 +2159,7 @@ static void *miner_thread(void *userdata)
break; break;
} }
} }
loopcnt++;
} }
out: out:
@ -3026,6 +3096,16 @@ static void parse_cmdline(int argc, char *argv[])
argv[0]); argv[0]);
show_usage_and_exit(1); show_usage_and_exit(1);
} }
if (opt_algo == ALGO_AUTO) {
for (int n=0; n < MAX_GPUS; n++)
gpus_intensity[n] = 0; // use default
if (opt_benchmark) {
opt_autotune = false;
algo_benchmark = opt_algo = ALGO_BLAKE; /* first */
applog(LOG_BLUE, "Starting benchmark mode");
}
}
} }
#ifndef WIN32 #ifndef WIN32

View File

@ -196,7 +196,7 @@ int cuda_gpu_clocks(struct cgpu_info *gpu)
// if we use 2 threads on the same gpu, we need to reinit the threads // if we use 2 threads on the same gpu, we need to reinit the threads
void cuda_reset_device(int thr_id, bool *init) void cuda_reset_device(int thr_id, bool *init)
{ {
int dev_id = device_map[thr_id]; int dev_id = device_map[thr_id % MAX_GPUS];
cudaSetDevice(dev_id); cudaSetDevice(dev_id);
if (init != NULL) { if (init != NULL) {
// with init array, its meant to be used in algo's scan code... // with init array, its meant to be used in algo's scan code...
@ -216,6 +216,16 @@ void cuda_reset_device(int thr_id, bool *init)
cudaSetDeviceFlags((unsigned)(opt_cudaschedule & cudaDeviceScheduleMask)); cudaSetDeviceFlags((unsigned)(opt_cudaschedule & cudaDeviceScheduleMask));
} }
// return free memory in megabytes
int cuda_available_memory(int thr_id)
{
int dev_id = device_map[thr_id % MAX_GPUS];
size_t mtotal, mfree = 0;
cudaSetDevice(dev_id);
cudaMemGetInfo(&mfree, &mtotal);
return (int) (mfree / (1024 * 1024));
}
void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func) void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func)
{ {
struct cgpu_info *gpu = &thr_info[thr_id].gpu; struct cgpu_info *gpu = &thr_info[thr_id].gpu;