From 934555994dc330634e084dbb1ca306c2fecf6a93 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Thu, 8 Oct 2015 21:41:20 +0200 Subject: [PATCH] benchmark: allow -a auto to bench all algos at once --- ccminer.cpp | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++--- cuda.cpp | 12 +++++++- 2 files changed, 95 insertions(+), 5 deletions(-) diff --git a/ccminer.cpp b/ccminer.cpp index 1b4ac47..8e23d6c 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -55,13 +55,14 @@ BOOL WINAPI ConsoleHandler(DWORD); #define HEAVYCOIN_BLKHDR_SZ 84 #define MNR_BLKHDR_SZ 80 -// from cuda.cpp +// decl. from cuda.cpp (to move in miner.h) int cuda_num_devices(); void cuda_devicenames(); void cuda_reset_device(int thr_id, bool *init); void cuda_shutdown(); int cuda_finddevice(char *name); void cuda_print_devices(); +int cuda_available_memory(int thr_id); #include "nvml.h" #ifdef USE_WRAPNVML @@ -120,6 +121,7 @@ enum sha_algos { ALGO_WHIRLPOOL, ALGO_WHIRLPOOLX, ALGO_ZR5, + ALGO_AUTO, ALGO_COUNT }; @@ -160,6 +162,7 @@ static const char *algo_names[] = { "whirlpool", "whirlpoolx", "zr5", + "auto", /* reserved for multi algo */ "" }; @@ -168,6 +171,7 @@ bool opt_debug_diff = false; bool opt_debug_threads = false; bool opt_protocol = false; bool opt_benchmark = false; +int algo_benchmark = -1; bool opt_showdiff = false; // todo: limit use of these flags, @@ -319,7 +323,8 @@ Options:\n\ x14 X14\n\ x15 X15\n\ x17 X17\n\ - whirlpool Old Whirlcoin algo\n\ + whirlcoin Old Whirlcoin (Whirlpool algo)\n\ + whirlpool Whirlpool algo\n\ whirlpoolx WhirlpoolX (VNL)\n\ zr5 ZR5 (ZiftrCoin)\n\ -d, --devices Comma separated list of CUDA devices to use.\n\ @@ -1559,12 +1564,60 @@ void miner_free_device(int thr_id) free_scrypt_jane(thr_id); } +// to benchmark all algos +bool algo_switch_next(int thr_id) +{ + int algo = (int) opt_algo; + int prev_algo = algo; + int dev_id = device_map[thr_id % MAX_GPUS]; + int mfree; + char rate[32] = { 0 }; + + // free current algo memory and track mem usage + miner_free_device(thr_id); + mfree = cuda_available_memory(thr_id); + + work_restart[thr_id].restart = 1; + + algo++; + if (algo == ALGO_AUTO) + return false; + + // we need to wait completion on all cards before the switch + if (opt_n_threads > 1) { + pthread_mutex_lock(&stratum_sock_lock); // unused in benchmark + for (int n=0; n < opt_n_threads; n++) + if (!work_restart[thr_id].restart) { + applog(LOG_DEBUG, "GPU #%d: waiting GPU %d", dev_id, device_map[n]); + usleep(100*1000); + } + sleep(1); + pthread_mutex_unlock(&stratum_sock_lock); + } + + double hashrate = stats_get_speed(thr_id, thr_hashrates[thr_id]); + format_hashrate(hashrate, rate); + applog(LOG_NOTICE, "GPU #%d: %s rate: %s - %d MB free", dev_id, algo_names[prev_algo], rate, mfree); + + stats_purge_all(); + global_hashrate = 0; + + opt_algo = (enum sha_algos) algo; + + applog(LOG_BLUE, "GPU #%d: Benchmark for algo %s...", dev_id, algo_names[algo]); + sleep(1); + work_restart[thr_id].restart = 0; + + return true; +} + static void *miner_thread(void *userdata) { struct thr_info *mythr = (struct thr_info *)userdata; int switchn = pool_switch_count; int thr_id = mythr->id; struct work work; + uint64_t loopcnt = 0; uint32_t max_nonce; uint32_t end_nonce = UINT32_MAX / opt_n_threads * (thr_id + 1) - (thr_id + 1); bool work_done = false; @@ -1676,6 +1729,19 @@ static void *miner_thread(void *userdata) } } + if (opt_benchmark && algo_benchmark >= 0) { + if (loopcnt > 3) { + if (!algo_switch_next(thr_id)) { + proper_exit(0); + break; + } + algo_benchmark = (int) opt_algo; + // for scrypt... + opt_autotune = false; + loopcnt = 0; + } + } + if (!opt_benchmark && (g_work.height != work.height || memcmp(work.target, g_work.target, sizeof(work.target)))) { if (opt_debug) { @@ -1825,8 +1891,10 @@ static void *miner_thread(void *userdata) minmax = 0x300000; break; case ALGO_SCRYPT: + minmax = 0x80000; + break; case ALGO_SCRYPT_JANE: - minmax = 0x100000; + minmax = 0x1000; break; } max64 = max(minmax-1, max64); @@ -2012,7 +2080,8 @@ static void *miner_thread(void *userdata) pthread_mutex_lock(&stats_lock); thr_hashrates[thr_id] = hashes_done / dtime; thr_hashrates[thr_id] *= rate_factor; - stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id], (uint8_t) rc, work.height); + if (loopcnt) // ignore first (init time) + stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id], (uint8_t) rc, work.height); pthread_mutex_unlock(&stats_lock); } } @@ -2090,6 +2159,7 @@ static void *miner_thread(void *userdata) break; } } + loopcnt++; } out: @@ -3026,6 +3096,16 @@ static void parse_cmdline(int argc, char *argv[]) argv[0]); show_usage_and_exit(1); } + + if (opt_algo == ALGO_AUTO) { + for (int n=0; n < MAX_GPUS; n++) + gpus_intensity[n] = 0; // use default + if (opt_benchmark) { + opt_autotune = false; + algo_benchmark = opt_algo = ALGO_BLAKE; /* first */ + applog(LOG_BLUE, "Starting benchmark mode"); + } + } } #ifndef WIN32 diff --git a/cuda.cpp b/cuda.cpp index 679e77f..5a41b49 100644 --- a/cuda.cpp +++ b/cuda.cpp @@ -196,7 +196,7 @@ int cuda_gpu_clocks(struct cgpu_info *gpu) // if we use 2 threads on the same gpu, we need to reinit the threads void cuda_reset_device(int thr_id, bool *init) { - int dev_id = device_map[thr_id]; + int dev_id = device_map[thr_id % MAX_GPUS]; cudaSetDevice(dev_id); if (init != NULL) { // with init array, its meant to be used in algo's scan code... @@ -216,6 +216,16 @@ void cuda_reset_device(int thr_id, bool *init) cudaSetDeviceFlags((unsigned)(opt_cudaschedule & cudaDeviceScheduleMask)); } +// return free memory in megabytes +int cuda_available_memory(int thr_id) +{ + int dev_id = device_map[thr_id % MAX_GPUS]; + size_t mtotal, mfree = 0; + cudaSetDevice(dev_id); + cudaMemGetInfo(&mfree, &mtotal); + return (int) (mfree / (1024 * 1024)); +} + void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func) { struct cgpu_info *gpu = &thr_info[thr_id].gpu;