benchmark: allow -a auto to bench all algos at once
This commit is contained in:
parent
922c2a5cd7
commit
934555994d
88
ccminer.cpp
88
ccminer.cpp
@ -55,13 +55,14 @@ BOOL WINAPI ConsoleHandler(DWORD);
|
|||||||
#define HEAVYCOIN_BLKHDR_SZ 84
|
#define HEAVYCOIN_BLKHDR_SZ 84
|
||||||
#define MNR_BLKHDR_SZ 80
|
#define MNR_BLKHDR_SZ 80
|
||||||
|
|
||||||
// from cuda.cpp
|
// decl. from cuda.cpp (to move in miner.h)
|
||||||
int cuda_num_devices();
|
int cuda_num_devices();
|
||||||
void cuda_devicenames();
|
void cuda_devicenames();
|
||||||
void cuda_reset_device(int thr_id, bool *init);
|
void cuda_reset_device(int thr_id, bool *init);
|
||||||
void cuda_shutdown();
|
void cuda_shutdown();
|
||||||
int cuda_finddevice(char *name);
|
int cuda_finddevice(char *name);
|
||||||
void cuda_print_devices();
|
void cuda_print_devices();
|
||||||
|
int cuda_available_memory(int thr_id);
|
||||||
|
|
||||||
#include "nvml.h"
|
#include "nvml.h"
|
||||||
#ifdef USE_WRAPNVML
|
#ifdef USE_WRAPNVML
|
||||||
@ -120,6 +121,7 @@ enum sha_algos {
|
|||||||
ALGO_WHIRLPOOL,
|
ALGO_WHIRLPOOL,
|
||||||
ALGO_WHIRLPOOLX,
|
ALGO_WHIRLPOOLX,
|
||||||
ALGO_ZR5,
|
ALGO_ZR5,
|
||||||
|
ALGO_AUTO,
|
||||||
ALGO_COUNT
|
ALGO_COUNT
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -160,6 +162,7 @@ static const char *algo_names[] = {
|
|||||||
"whirlpool",
|
"whirlpool",
|
||||||
"whirlpoolx",
|
"whirlpoolx",
|
||||||
"zr5",
|
"zr5",
|
||||||
|
"auto", /* reserved for multi algo */
|
||||||
""
|
""
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -168,6 +171,7 @@ bool opt_debug_diff = false;
|
|||||||
bool opt_debug_threads = false;
|
bool opt_debug_threads = false;
|
||||||
bool opt_protocol = false;
|
bool opt_protocol = false;
|
||||||
bool opt_benchmark = false;
|
bool opt_benchmark = false;
|
||||||
|
int algo_benchmark = -1;
|
||||||
bool opt_showdiff = false;
|
bool opt_showdiff = false;
|
||||||
|
|
||||||
// todo: limit use of these flags,
|
// todo: limit use of these flags,
|
||||||
@ -319,7 +323,8 @@ Options:\n\
|
|||||||
x14 X14\n\
|
x14 X14\n\
|
||||||
x15 X15\n\
|
x15 X15\n\
|
||||||
x17 X17\n\
|
x17 X17\n\
|
||||||
whirlpool Old Whirlcoin algo\n\
|
whirlcoin Old Whirlcoin (Whirlpool algo)\n\
|
||||||
|
whirlpool Whirlpool algo\n\
|
||||||
whirlpoolx WhirlpoolX (VNL)\n\
|
whirlpoolx WhirlpoolX (VNL)\n\
|
||||||
zr5 ZR5 (ZiftrCoin)\n\
|
zr5 ZR5 (ZiftrCoin)\n\
|
||||||
-d, --devices Comma separated list of CUDA devices to use.\n\
|
-d, --devices Comma separated list of CUDA devices to use.\n\
|
||||||
@ -1559,12 +1564,60 @@ void miner_free_device(int thr_id)
|
|||||||
free_scrypt_jane(thr_id);
|
free_scrypt_jane(thr_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// to benchmark all algos
|
||||||
|
bool algo_switch_next(int thr_id)
|
||||||
|
{
|
||||||
|
int algo = (int) opt_algo;
|
||||||
|
int prev_algo = algo;
|
||||||
|
int dev_id = device_map[thr_id % MAX_GPUS];
|
||||||
|
int mfree;
|
||||||
|
char rate[32] = { 0 };
|
||||||
|
|
||||||
|
// free current algo memory and track mem usage
|
||||||
|
miner_free_device(thr_id);
|
||||||
|
mfree = cuda_available_memory(thr_id);
|
||||||
|
|
||||||
|
work_restart[thr_id].restart = 1;
|
||||||
|
|
||||||
|
algo++;
|
||||||
|
if (algo == ALGO_AUTO)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// we need to wait completion on all cards before the switch
|
||||||
|
if (opt_n_threads > 1) {
|
||||||
|
pthread_mutex_lock(&stratum_sock_lock); // unused in benchmark
|
||||||
|
for (int n=0; n < opt_n_threads; n++)
|
||||||
|
if (!work_restart[thr_id].restart) {
|
||||||
|
applog(LOG_DEBUG, "GPU #%d: waiting GPU %d", dev_id, device_map[n]);
|
||||||
|
usleep(100*1000);
|
||||||
|
}
|
||||||
|
sleep(1);
|
||||||
|
pthread_mutex_unlock(&stratum_sock_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
double hashrate = stats_get_speed(thr_id, thr_hashrates[thr_id]);
|
||||||
|
format_hashrate(hashrate, rate);
|
||||||
|
applog(LOG_NOTICE, "GPU #%d: %s rate: %s - %d MB free", dev_id, algo_names[prev_algo], rate, mfree);
|
||||||
|
|
||||||
|
stats_purge_all();
|
||||||
|
global_hashrate = 0;
|
||||||
|
|
||||||
|
opt_algo = (enum sha_algos) algo;
|
||||||
|
|
||||||
|
applog(LOG_BLUE, "GPU #%d: Benchmark for algo %s...", dev_id, algo_names[algo]);
|
||||||
|
sleep(1);
|
||||||
|
work_restart[thr_id].restart = 0;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static void *miner_thread(void *userdata)
|
static void *miner_thread(void *userdata)
|
||||||
{
|
{
|
||||||
struct thr_info *mythr = (struct thr_info *)userdata;
|
struct thr_info *mythr = (struct thr_info *)userdata;
|
||||||
int switchn = pool_switch_count;
|
int switchn = pool_switch_count;
|
||||||
int thr_id = mythr->id;
|
int thr_id = mythr->id;
|
||||||
struct work work;
|
struct work work;
|
||||||
|
uint64_t loopcnt = 0;
|
||||||
uint32_t max_nonce;
|
uint32_t max_nonce;
|
||||||
uint32_t end_nonce = UINT32_MAX / opt_n_threads * (thr_id + 1) - (thr_id + 1);
|
uint32_t end_nonce = UINT32_MAX / opt_n_threads * (thr_id + 1) - (thr_id + 1);
|
||||||
bool work_done = false;
|
bool work_done = false;
|
||||||
@ -1676,6 +1729,19 @@ static void *miner_thread(void *userdata)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (opt_benchmark && algo_benchmark >= 0) {
|
||||||
|
if (loopcnt > 3) {
|
||||||
|
if (!algo_switch_next(thr_id)) {
|
||||||
|
proper_exit(0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
algo_benchmark = (int) opt_algo;
|
||||||
|
// for scrypt...
|
||||||
|
opt_autotune = false;
|
||||||
|
loopcnt = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!opt_benchmark && (g_work.height != work.height || memcmp(work.target, g_work.target, sizeof(work.target))))
|
if (!opt_benchmark && (g_work.height != work.height || memcmp(work.target, g_work.target, sizeof(work.target))))
|
||||||
{
|
{
|
||||||
if (opt_debug) {
|
if (opt_debug) {
|
||||||
@ -1825,8 +1891,10 @@ static void *miner_thread(void *userdata)
|
|||||||
minmax = 0x300000;
|
minmax = 0x300000;
|
||||||
break;
|
break;
|
||||||
case ALGO_SCRYPT:
|
case ALGO_SCRYPT:
|
||||||
|
minmax = 0x80000;
|
||||||
|
break;
|
||||||
case ALGO_SCRYPT_JANE:
|
case ALGO_SCRYPT_JANE:
|
||||||
minmax = 0x100000;
|
minmax = 0x1000;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
max64 = max(minmax-1, max64);
|
max64 = max(minmax-1, max64);
|
||||||
@ -2012,7 +2080,8 @@ static void *miner_thread(void *userdata)
|
|||||||
pthread_mutex_lock(&stats_lock);
|
pthread_mutex_lock(&stats_lock);
|
||||||
thr_hashrates[thr_id] = hashes_done / dtime;
|
thr_hashrates[thr_id] = hashes_done / dtime;
|
||||||
thr_hashrates[thr_id] *= rate_factor;
|
thr_hashrates[thr_id] *= rate_factor;
|
||||||
stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id], (uint8_t) rc, work.height);
|
if (loopcnt) // ignore first (init time)
|
||||||
|
stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id], (uint8_t) rc, work.height);
|
||||||
pthread_mutex_unlock(&stats_lock);
|
pthread_mutex_unlock(&stats_lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2090,6 +2159,7 @@ static void *miner_thread(void *userdata)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
loopcnt++;
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
@ -3026,6 +3096,16 @@ static void parse_cmdline(int argc, char *argv[])
|
|||||||
argv[0]);
|
argv[0]);
|
||||||
show_usage_and_exit(1);
|
show_usage_and_exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (opt_algo == ALGO_AUTO) {
|
||||||
|
for (int n=0; n < MAX_GPUS; n++)
|
||||||
|
gpus_intensity[n] = 0; // use default
|
||||||
|
if (opt_benchmark) {
|
||||||
|
opt_autotune = false;
|
||||||
|
algo_benchmark = opt_algo = ALGO_BLAKE; /* first */
|
||||||
|
applog(LOG_BLUE, "Starting benchmark mode");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef WIN32
|
#ifndef WIN32
|
||||||
|
12
cuda.cpp
12
cuda.cpp
@ -196,7 +196,7 @@ int cuda_gpu_clocks(struct cgpu_info *gpu)
|
|||||||
// if we use 2 threads on the same gpu, we need to reinit the threads
|
// if we use 2 threads on the same gpu, we need to reinit the threads
|
||||||
void cuda_reset_device(int thr_id, bool *init)
|
void cuda_reset_device(int thr_id, bool *init)
|
||||||
{
|
{
|
||||||
int dev_id = device_map[thr_id];
|
int dev_id = device_map[thr_id % MAX_GPUS];
|
||||||
cudaSetDevice(dev_id);
|
cudaSetDevice(dev_id);
|
||||||
if (init != NULL) {
|
if (init != NULL) {
|
||||||
// with init array, its meant to be used in algo's scan code...
|
// with init array, its meant to be used in algo's scan code...
|
||||||
@ -216,6 +216,16 @@ void cuda_reset_device(int thr_id, bool *init)
|
|||||||
cudaSetDeviceFlags((unsigned)(opt_cudaschedule & cudaDeviceScheduleMask));
|
cudaSetDeviceFlags((unsigned)(opt_cudaschedule & cudaDeviceScheduleMask));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// return free memory in megabytes
|
||||||
|
int cuda_available_memory(int thr_id)
|
||||||
|
{
|
||||||
|
int dev_id = device_map[thr_id % MAX_GPUS];
|
||||||
|
size_t mtotal, mfree = 0;
|
||||||
|
cudaSetDevice(dev_id);
|
||||||
|
cudaMemGetInfo(&mfree, &mtotal);
|
||||||
|
return (int) (mfree / (1024 * 1024));
|
||||||
|
}
|
||||||
|
|
||||||
void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func)
|
void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func)
|
||||||
{
|
{
|
||||||
struct cgpu_info *gpu = &thr_info[thr_id].gpu;
|
struct cgpu_info *gpu = &thr_info[thr_id].gpu;
|
||||||
|
Loading…
Reference in New Issue
Block a user