benchmark: allow -a auto to bench all algos at once

This commit is contained in:
Tanguy Pruvot 2015-10-08 21:41:20 +02:00
parent 922c2a5cd7
commit 934555994d
2 changed files with 95 additions and 5 deletions

View File

@ -55,13 +55,14 @@ BOOL WINAPI ConsoleHandler(DWORD);
#define HEAVYCOIN_BLKHDR_SZ 84
#define MNR_BLKHDR_SZ 80
// from cuda.cpp
// decl. from cuda.cpp (to move in miner.h)
int cuda_num_devices();
void cuda_devicenames();
void cuda_reset_device(int thr_id, bool *init);
void cuda_shutdown();
int cuda_finddevice(char *name);
void cuda_print_devices();
int cuda_available_memory(int thr_id);
#include "nvml.h"
#ifdef USE_WRAPNVML
@ -120,6 +121,7 @@ enum sha_algos {
ALGO_WHIRLPOOL,
ALGO_WHIRLPOOLX,
ALGO_ZR5,
ALGO_AUTO,
ALGO_COUNT
};
@ -160,6 +162,7 @@ static const char *algo_names[] = {
"whirlpool",
"whirlpoolx",
"zr5",
"auto", /* reserved for multi algo */
""
};
@ -168,6 +171,7 @@ bool opt_debug_diff = false;
bool opt_debug_threads = false;
bool opt_protocol = false;
bool opt_benchmark = false;
int algo_benchmark = -1;
bool opt_showdiff = false;
// todo: limit use of these flags,
@ -319,7 +323,8 @@ Options:\n\
x14 X14\n\
x15 X15\n\
x17 X17\n\
whirlpool Old Whirlcoin algo\n\
whirlcoin Old Whirlcoin (Whirlpool algo)\n\
whirlpool Whirlpool algo\n\
whirlpoolx WhirlpoolX (VNL)\n\
zr5 ZR5 (ZiftrCoin)\n\
-d, --devices Comma separated list of CUDA devices to use.\n\
@ -1559,12 +1564,60 @@ void miner_free_device(int thr_id)
free_scrypt_jane(thr_id);
}
// to benchmark all algos
bool algo_switch_next(int thr_id)
{
int algo = (int) opt_algo;
int prev_algo = algo;
int dev_id = device_map[thr_id % MAX_GPUS];
int mfree;
char rate[32] = { 0 };
// free current algo memory and track mem usage
miner_free_device(thr_id);
mfree = cuda_available_memory(thr_id);
work_restart[thr_id].restart = 1;
algo++;
if (algo == ALGO_AUTO)
return false;
// we need to wait completion on all cards before the switch
if (opt_n_threads > 1) {
pthread_mutex_lock(&stratum_sock_lock); // unused in benchmark
for (int n=0; n < opt_n_threads; n++)
if (!work_restart[thr_id].restart) {
applog(LOG_DEBUG, "GPU #%d: waiting GPU %d", dev_id, device_map[n]);
usleep(100*1000);
}
sleep(1);
pthread_mutex_unlock(&stratum_sock_lock);
}
double hashrate = stats_get_speed(thr_id, thr_hashrates[thr_id]);
format_hashrate(hashrate, rate);
applog(LOG_NOTICE, "GPU #%d: %s rate: %s - %d MB free", dev_id, algo_names[prev_algo], rate, mfree);
stats_purge_all();
global_hashrate = 0;
opt_algo = (enum sha_algos) algo;
applog(LOG_BLUE, "GPU #%d: Benchmark for algo %s...", dev_id, algo_names[algo]);
sleep(1);
work_restart[thr_id].restart = 0;
return true;
}
static void *miner_thread(void *userdata)
{
struct thr_info *mythr = (struct thr_info *)userdata;
int switchn = pool_switch_count;
int thr_id = mythr->id;
struct work work;
uint64_t loopcnt = 0;
uint32_t max_nonce;
uint32_t end_nonce = UINT32_MAX / opt_n_threads * (thr_id + 1) - (thr_id + 1);
bool work_done = false;
@ -1676,6 +1729,19 @@ static void *miner_thread(void *userdata)
}
}
if (opt_benchmark && algo_benchmark >= 0) {
if (loopcnt > 3) {
if (!algo_switch_next(thr_id)) {
proper_exit(0);
break;
}
algo_benchmark = (int) opt_algo;
// for scrypt...
opt_autotune = false;
loopcnt = 0;
}
}
if (!opt_benchmark && (g_work.height != work.height || memcmp(work.target, g_work.target, sizeof(work.target))))
{
if (opt_debug) {
@ -1825,8 +1891,10 @@ static void *miner_thread(void *userdata)
minmax = 0x300000;
break;
case ALGO_SCRYPT:
minmax = 0x80000;
break;
case ALGO_SCRYPT_JANE:
minmax = 0x100000;
minmax = 0x1000;
break;
}
max64 = max(minmax-1, max64);
@ -2012,7 +2080,8 @@ static void *miner_thread(void *userdata)
pthread_mutex_lock(&stats_lock);
thr_hashrates[thr_id] = hashes_done / dtime;
thr_hashrates[thr_id] *= rate_factor;
stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id], (uint8_t) rc, work.height);
if (loopcnt) // ignore first (init time)
stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id], (uint8_t) rc, work.height);
pthread_mutex_unlock(&stats_lock);
}
}
@ -2090,6 +2159,7 @@ static void *miner_thread(void *userdata)
break;
}
}
loopcnt++;
}
out:
@ -3026,6 +3096,16 @@ static void parse_cmdline(int argc, char *argv[])
argv[0]);
show_usage_and_exit(1);
}
if (opt_algo == ALGO_AUTO) {
for (int n=0; n < MAX_GPUS; n++)
gpus_intensity[n] = 0; // use default
if (opt_benchmark) {
opt_autotune = false;
algo_benchmark = opt_algo = ALGO_BLAKE; /* first */
applog(LOG_BLUE, "Starting benchmark mode");
}
}
}
#ifndef WIN32

View File

@ -196,7 +196,7 @@ int cuda_gpu_clocks(struct cgpu_info *gpu)
// if we use 2 threads on the same gpu, we need to reinit the threads
void cuda_reset_device(int thr_id, bool *init)
{
int dev_id = device_map[thr_id];
int dev_id = device_map[thr_id % MAX_GPUS];
cudaSetDevice(dev_id);
if (init != NULL) {
// with init array, its meant to be used in algo's scan code...
@ -216,6 +216,16 @@ void cuda_reset_device(int thr_id, bool *init)
cudaSetDeviceFlags((unsigned)(opt_cudaschedule & cudaDeviceScheduleMask));
}
// return free memory in megabytes
int cuda_available_memory(int thr_id)
{
int dev_id = device_map[thr_id % MAX_GPUS];
size_t mtotal, mfree = 0;
cudaSetDevice(dev_id);
cudaMemGetInfo(&mfree, &mtotal);
return (int) (mfree / (1024 * 1024));
}
void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func)
{
struct cgpu_info *gpu = &thr_info[thr_id].gpu;