benchmark: show mem and default throughput in results
and prepare a new function to get the default intensity also, take care of multiple threads per gpu...
This commit is contained in:
parent
8db5a0bc9e
commit
c6dcc5e5cf
@ -388,7 +388,8 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint64_t targetHigh = ((uint64_t*)ptarget)[3];
|
||||
int intensity = (device_sm[device_map[thr_id]] > 500) ? 22 : 20;
|
||||
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity, max_nonce - first_nonce);
|
||||
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
|
||||
throughput = min(throughput, max_nonce - first_nonce);
|
||||
|
||||
int rc = 0;
|
||||
|
||||
|
@ -96,6 +96,7 @@ its command line interface and options.
|
||||
x14 use to mine X14Coin
|
||||
x15 use to mine Halcyon
|
||||
x17 use to mine X17
|
||||
whirlpool use to mine Joincoin
|
||||
whirlpoolx use to mine Vanilla
|
||||
zr5 use to mine ZiftrCoin
|
||||
|
||||
@ -228,6 +229,9 @@ features.
|
||||
>>> RELEASE HISTORY <<<
|
||||
|
||||
Under Dev... v1.7
|
||||
Restore whirlpool algo (and whirlcoin variant)
|
||||
Prepare algo switch ability
|
||||
Add --benchmark -a auto to run a multi algo benchmark
|
||||
Add --cuda-schedule parameter
|
||||
Add --show-diff parameter, which display shares diff,
|
||||
and is able to detect real solved blocks on pools.
|
||||
|
2
api.cpp
2
api.cpp
@ -990,7 +990,7 @@ void *api_thread(void *userdata)
|
||||
/* to be able to report the default value set in each algo */
|
||||
void api_set_throughput(int thr_id, uint32_t throughput)
|
||||
{
|
||||
if (&thr_info[thr_id]) {
|
||||
if (thr_id < MAX_GPUS) {
|
||||
struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
|
||||
uint32_t ws = throughput;
|
||||
uint8_t i = 0;
|
||||
|
37
bench.cpp
37
bench.cpp
@ -11,7 +11,9 @@
|
||||
|
||||
int bench_algo = -1;
|
||||
|
||||
static double * algo_hashrates[MAX_GPUS] = { 0 };
|
||||
static double algo_hashrates[MAX_GPUS][ALGO_COUNT] = { 0 };
|
||||
static uint32_t algo_throughput[MAX_GPUS][ALGO_COUNT] = { 0 };
|
||||
static int algo_mem_used[MAX_GPUS][ALGO_COUNT] = { 0 };
|
||||
static int device_mem_free[MAX_GPUS] = { 0 };
|
||||
|
||||
static pthread_barrier_t miner_barr;
|
||||
@ -25,18 +27,12 @@ void bench_init(int threads)
|
||||
{
|
||||
bench_algo = opt_algo = (enum sha_algos) 0; /* first */
|
||||
applog(LOG_BLUE, "Starting benchmark mode with %s", algo_names[opt_algo]);
|
||||
for (int n=0; n < MAX_GPUS; n++) {
|
||||
algo_hashrates[n] = (double*) calloc(1, ALGO_COUNT * sizeof(double));
|
||||
}
|
||||
pthread_barrier_init(&miner_barr, NULL, threads);
|
||||
pthread_barrier_init(&algo_barr, NULL, threads);
|
||||
}
|
||||
|
||||
void bench_free()
|
||||
{
|
||||
for (int n=0; n < MAX_GPUS; n++) {
|
||||
free(algo_hashrates[n]);
|
||||
}
|
||||
pthread_barrier_destroy(&miner_barr);
|
||||
pthread_barrier_destroy(&algo_barr);
|
||||
}
|
||||
@ -47,12 +43,7 @@ bool bench_algo_switch_next(int thr_id)
|
||||
int algo = (int) opt_algo;
|
||||
int prev_algo = algo;
|
||||
int dev_id = device_map[thr_id % MAX_GPUS];
|
||||
int mfree;
|
||||
char rate[32] = { 0 };
|
||||
|
||||
// free current algo memory and track mem usage
|
||||
miner_free_device(thr_id);
|
||||
mfree = cuda_available_memory(thr_id);
|
||||
int mfree, mused;
|
||||
|
||||
algo++;
|
||||
|
||||
@ -70,16 +61,23 @@ bool bench_algo_switch_next(int thr_id)
|
||||
pthread_barrier_wait(&miner_barr);
|
||||
}
|
||||
|
||||
|
||||
char rate[32] = { 0 };
|
||||
double hashrate = stats_get_speed(thr_id, thr_hashrates[thr_id]);
|
||||
format_hashrate(hashrate, rate);
|
||||
applog(LOG_NOTICE, "GPU #%d: %s hashrate = %s", dev_id, algo_names[prev_algo], rate);
|
||||
|
||||
// free current algo memory and track mem usage
|
||||
mused = cuda_available_memory(thr_id);
|
||||
miner_free_device(thr_id);
|
||||
mfree = cuda_available_memory(thr_id);
|
||||
|
||||
// check if there is memory leak
|
||||
if (device_mem_free[thr_id] > mfree) {
|
||||
applog(LOG_WARNING, "GPU #%d, memory leak detected in %s ! %d MB free",
|
||||
dev_id, algo_names[prev_algo], mfree);
|
||||
dev_id, algo_names[prev_algo], mfree);
|
||||
}
|
||||
// store used memory per algo
|
||||
algo_mem_used[thr_id][opt_algo] = device_mem_free[thr_id] - mused;
|
||||
device_mem_free[thr_id] = mfree;
|
||||
|
||||
// store to dump a table per gpu later
|
||||
@ -109,6 +107,11 @@ bool bench_algo_switch_next(int thr_id)
|
||||
return true;
|
||||
}
|
||||
|
||||
void bench_set_throughput(int thr_id, uint32_t throughput)
|
||||
{
|
||||
algo_throughput[thr_id][opt_algo] = throughput;
|
||||
}
|
||||
|
||||
void bench_display_results()
|
||||
{
|
||||
for (int n=0; n < opt_n_threads; n++)
|
||||
@ -118,7 +121,9 @@ void bench_display_results()
|
||||
for (int i=0; i < ALGO_COUNT-1; i++) {
|
||||
double rate = algo_hashrates[n][i];
|
||||
if (rate == 0.0) continue;
|
||||
applog(LOG_INFO, "%12s : %12.1f kH/s", algo_names[i], rate / 1024.);
|
||||
applog(LOG_INFO, "%12s : %12.1f kH/s, %5d MB, %8u thr.", algo_names[i],
|
||||
rate / 1024., algo_mem_used[n][i], algo_throughput[n][i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
25
ccminer.cpp
25
ccminer.cpp
@ -110,6 +110,7 @@ static json_t *opt_config;
|
||||
static const bool opt_time = true;
|
||||
enum sha_algos opt_algo = ALGO_X11;
|
||||
int opt_n_threads = 0;
|
||||
int gpu_threads = 1;
|
||||
int64_t opt_affinity = -1L;
|
||||
int opt_priority = 0;
|
||||
static double opt_difficulty = 1.;
|
||||
@ -1483,6 +1484,7 @@ static void *miner_thread(void *userdata)
|
||||
struct thr_info *mythr = (struct thr_info *)userdata;
|
||||
int switchn = pool_switch_count;
|
||||
int thr_id = mythr->id;
|
||||
int dev_id = device_map[thr_id % MAX_GPUS];
|
||||
struct work work;
|
||||
uint64_t loopcnt = 0;
|
||||
uint32_t max_nonce;
|
||||
@ -1635,7 +1637,7 @@ static void *miner_thread(void *userdata)
|
||||
|
||||
// --benchmark [-a auto]
|
||||
if (opt_benchmark && bench_algo >= 0) {
|
||||
//applog(LOG_DEBUG, "GPU #%d: loop %d", device_map[thr_id], loopcnt);
|
||||
//applog(LOG_DEBUG, "GPU #%d: loop %d", dev_id, loopcnt);
|
||||
if (loopcnt >= 3) {
|
||||
if (!bench_algo_switch_next(thr_id) && thr_id == 0)
|
||||
{
|
||||
@ -1755,11 +1757,11 @@ static void *miner_thread(void *userdata)
|
||||
break;
|
||||
case ALGO_KECCAK:
|
||||
case ALGO_JACKPOT:
|
||||
case ALGO_NEOSCRYPT:
|
||||
case ALGO_X15:
|
||||
minmax = 0x300000;
|
||||
break;
|
||||
case ALGO_LYRA2:
|
||||
case ALGO_NEOSCRYPT:
|
||||
case ALGO_SCRYPT:
|
||||
minmax = 0x80000;
|
||||
break;
|
||||
@ -1795,7 +1797,7 @@ static void *miner_thread(void *userdata)
|
||||
|
||||
if (opt_debug)
|
||||
applog(LOG_DEBUG, "GPU #%d: start=%08x end=%08x range=%08x",
|
||||
device_map[thr_id], start_nonce, max_nonce, (max_nonce-start_nonce));
|
||||
dev_id, start_nonce, max_nonce, (max_nonce-start_nonce));
|
||||
|
||||
hashes_done = 0;
|
||||
gettimeofday(&tv_start, NULL);
|
||||
@ -1967,7 +1969,7 @@ static void *miner_thread(void *userdata)
|
||||
work.scanned_to = max_nonce;
|
||||
if (opt_debug && opt_benchmark) {
|
||||
// to debug nonce ranges
|
||||
applog(LOG_DEBUG, "GPU #%d: ends=%08x range=%08x", device_map[thr_id],
|
||||
applog(LOG_DEBUG, "GPU #%d: ends=%08x range=%08x", dev_id,
|
||||
nonceptr[0], (nonceptr[0] - start_nonce));
|
||||
}
|
||||
}
|
||||
@ -1978,8 +1980,7 @@ static void *miner_thread(void *userdata)
|
||||
/* output */
|
||||
if (!opt_quiet && firstwork_time) {
|
||||
format_hashrate(thr_hashrates[thr_id], s);
|
||||
applog(LOG_INFO, "GPU #%d: %s, %s",
|
||||
device_map[thr_id], device_name[device_map[thr_id]], s);
|
||||
applog(LOG_INFO, "GPU #%d: %s, %s", dev_id, device_name[dev_id], s);
|
||||
}
|
||||
|
||||
/* ignore first loop hashrate */
|
||||
@ -2835,8 +2836,6 @@ void parse_arg(int key, char *arg)
|
||||
proper_exit(EXIT_CODE_CUDA_NODEVICE);
|
||||
}
|
||||
}
|
||||
// set number of active gpus
|
||||
active_gpus = opt_n_threads;
|
||||
pch = strtok (NULL, ",");
|
||||
}
|
||||
}
|
||||
@ -3057,8 +3056,11 @@ int main(int argc, char *argv[])
|
||||
if (num_cpus < 1)
|
||||
num_cpus = 1;
|
||||
|
||||
// number of gpus
|
||||
active_gpus = cuda_num_devices();
|
||||
|
||||
for (i = 0; i < MAX_GPUS; i++) {
|
||||
device_map[i] = i;
|
||||
device_map[i] = i % active_gpus;
|
||||
device_name[i] = NULL;
|
||||
device_config[i] = NULL;
|
||||
device_backoff[i] = is_windows() ? 12 : 2;
|
||||
@ -3070,8 +3072,6 @@ int main(int argc, char *argv[])
|
||||
device_pstate[i] = -1;
|
||||
}
|
||||
|
||||
// number of gpus
|
||||
active_gpus = cuda_num_devices();
|
||||
cuda_devicenames();
|
||||
|
||||
/* parse command line */
|
||||
@ -3192,6 +3192,9 @@ int main(int argc, char *argv[])
|
||||
if (!opt_n_threads)
|
||||
opt_n_threads = active_gpus;
|
||||
|
||||
// generally doesn't work... let 1
|
||||
gpu_threads = opt_n_threads / active_gpus;
|
||||
|
||||
if (opt_benchmark && opt_algo == ALGO_AUTO) {
|
||||
bench_init(opt_n_threads);
|
||||
for (int n=0; n < MAX_GPUS; n++) {
|
||||
|
23
cuda.cpp
23
cuda.cpp
@ -67,7 +67,8 @@ void cuda_devicenames()
|
||||
exit(1);
|
||||
}
|
||||
|
||||
GPU_N = min(MAX_GPUS, GPU_N);
|
||||
if (opt_n_threads)
|
||||
GPU_N = min(MAX_GPUS, opt_n_threads);
|
||||
for (int i=0; i < GPU_N; i++)
|
||||
{
|
||||
char vendorname[32] = { 0 };
|
||||
@ -98,7 +99,7 @@ void cuda_print_devices()
|
||||
int ngpus = cuda_num_devices();
|
||||
cuda_devicenames();
|
||||
for (int n=0; n < ngpus; n++) {
|
||||
int m = device_map[n];
|
||||
int m = device_map[n % MAX_GPUS];
|
||||
cudaDeviceProp props;
|
||||
cudaGetDeviceProperties(&props, m);
|
||||
if (!opt_n_threads || n < opt_n_threads) {
|
||||
@ -148,10 +149,25 @@ int cuda_finddevice(char *name)
|
||||
return -1;
|
||||
}
|
||||
|
||||
// deprecated since 1.7
|
||||
uint32_t device_intensity(int thr_id, const char *func, uint32_t defcount)
|
||||
{
|
||||
uint32_t throughput = gpus_intensity[thr_id] ? gpus_intensity[thr_id] : defcount;
|
||||
if (gpu_threads > 1) throughput >> (gpu_threads-1);
|
||||
api_set_throughput(thr_id, throughput);
|
||||
bench_set_throughput(thr_id, throughput);
|
||||
return throughput;
|
||||
}
|
||||
|
||||
// since 1.7
|
||||
uint32_t cuda_default_throughput(int thr_id, uint32_t defcount)
|
||||
{
|
||||
//int dev_id = device_map[thr_id % MAX_GPUS];
|
||||
uint32_t throughput = gpus_intensity[thr_id] ? gpus_intensity[thr_id] : defcount;
|
||||
if (gpu_threads > 1) throughput >> (gpu_threads-1);
|
||||
api_set_throughput(thr_id, throughput);
|
||||
bench_set_throughput(thr_id, throughput);
|
||||
//if (opt_debug) applog(LOG_DEBUG, "GPU %d-%d: throughput %u", dev_id, thr_id, throughput);
|
||||
return throughput;
|
||||
}
|
||||
|
||||
@ -240,7 +256,8 @@ cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
|
||||
void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func)
|
||||
{
|
||||
struct cgpu_info *gpu = &thr_info[thr_id].gpu;
|
||||
int dev_id = device_map[thr_id % MAX_GPUS];
|
||||
gpu->hw_errors++;
|
||||
applog(LOG_ERR, "GPU #%d: %s %s", device_map[thr_id], func, cudaGetErrorString(err));
|
||||
applog(LOG_ERR, "GPU #%d: %s %s", dev_id, func, cudaGetErrorString(err));
|
||||
sleep(1);
|
||||
}
|
||||
|
7
miner.h
7
miner.h
@ -447,6 +447,7 @@ extern bool opt_showdiff;
|
||||
extern bool opt_tracegpu;
|
||||
extern int opt_n_threads;
|
||||
extern int active_gpus;
|
||||
extern int gpu_threads;
|
||||
extern int opt_timeout;
|
||||
extern bool want_longpoll;
|
||||
extern bool have_longpoll;
|
||||
@ -489,6 +490,9 @@ int cuda_finddevice(char *name);
|
||||
void cuda_print_devices();
|
||||
int cuda_available_memory(int thr_id);
|
||||
|
||||
uint32_t device_intensity(int thr_id, const char *func, uint32_t defcount);
|
||||
uint32_t cuda_default_throughput(int thr_id, uint32_t defcount);
|
||||
|
||||
#define CL_N "\x1B[0m"
|
||||
#define CL_RED "\x1B[31m"
|
||||
#define CL_GRN "\x1B[32m"
|
||||
@ -522,6 +526,7 @@ int cuda_available_memory(int thr_id);
|
||||
|
||||
extern void format_hashrate(double hashrate, char *output);
|
||||
extern void applog(int prio, const char *fmt, ...);
|
||||
#define gpulog(prio, fmt, thr_id, ...) applog(prio, fmt, thr_id, __VA_ARGS__)
|
||||
void get_defconfig_path(char *out, size_t bufsize, char *argv0);
|
||||
extern void cbin2hex(char *out, const char *in, size_t len);
|
||||
extern char *bin2hex(const unsigned char *in, size_t len);
|
||||
@ -533,7 +538,6 @@ void diff_to_target(uint32_t* target, double diff);
|
||||
void work_set_target(struct work* work, double diff);
|
||||
double target_to_diff(uint32_t* target);
|
||||
extern void get_currentalgo(char* buf, int sz);
|
||||
extern uint32_t device_intensity(int thr_id, const char *func, uint32_t defcount);
|
||||
|
||||
// bignum
|
||||
double bn_convert_nbits(const uint32_t nbits);
|
||||
@ -547,6 +551,7 @@ extern int bench_algo;
|
||||
void bench_init(int threads);
|
||||
void bench_free();
|
||||
bool bench_algo_switch_next(int thr_id);
|
||||
void bench_set_throughput(int thr_id, uint32_t throughput);
|
||||
void bench_display_results();
|
||||
|
||||
|
||||
|
2
skein.cu
2
skein.cu
@ -364,7 +364,7 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
|
||||
throughput = min(throughput, (max_nonce - first_nonce));
|
||||
|
||||
uint32_t foundNonce, secNonce = 0;
|
||||
uint64_t target64;
|
||||
uint64_t target64 = 0;
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x03;
|
||||
|
Loading…
Reference in New Issue
Block a user