From 0dd022779b37a958c4ed8b4e8fd1e4a2c9a17c8c Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Mon, 9 Jan 2017 16:21:36 +0100 Subject: [PATCH] power monitoring thread + some api changes based on alexis monitoring thread idea, but which only use one thread note: other api changes will come soon, related to that --- api.cpp | 27 +++++++++---- ccminer.cpp | 47 +++++++++++++++++++++- miner.h | 15 +++++++ nvml.cpp | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++ nvml.h | 4 ++ 5 files changed, 194 insertions(+), 9 deletions(-) diff --git a/api.cpp b/api.cpp index 917ef10..c209b4e 100644 --- a/api.cpp +++ b/api.cpp @@ -8,7 +8,7 @@ * Software Foundation; either version 2 of the License, or (at your option) * any later version. See COPYING for more details. */ -#define APIVERSION "1.8" +#define APIVERSION "1.9" #ifdef WIN32 # define _WINSOCK_DEPRECATED_NO_WARNINGS @@ -112,6 +112,7 @@ static void gpustatus(int thr_id) if (thr_id >= 0 && thr_id < opt_n_threads) { struct cgpu_info *cgpu = &thr_info[thr_id].gpu; + double khashes_per_watt = 0; int gpuid = cgpu->gpu_id; char buf[512]; *buf = '\0'; char* card; @@ -131,14 +132,24 @@ static void gpustatus(int thr_id) cgpu->rejected = p->rejected_count; cgpu->khashes = stats_get_speed(thr_id, 0.0) / 1000.0; + if (cgpu->monitor.gpu_power) { + cgpu->gpu_power = cgpu->monitor.gpu_power; + khashes_per_watt = (double)cgpu->khashes / cgpu->monitor.gpu_power; + khashes_per_watt *= 1000; // power in mW + //gpulog(LOG_BLUE, thr_id, "KHW: %g", khashes_per_watt); + } card = device_name[gpuid]; snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;TEMP=%.1f;" - "POWER=%u;FAN=%hu;RPM=%hu;FREQ=%d;KHS=%.2f;HWF=%d;I=%.1f;THR=%u|", + "POWER=%u;FAN=%hu;RPM=%hu;" + "FREQ=%u;CORE=%u;MEM=%u;" + "KHS=%.2f;KHW=%.5f;" + "HWF=%d;I=%.1f;THR=%u|", gpuid, cgpu->gpu_bus, card, cgpu->gpu_temp, cgpu->gpu_power, cgpu->gpu_fan, cgpu->gpu_fan_rpm, - cgpu->gpu_clock, cgpu->khashes, + cgpu->gpu_clock, cgpu->monitor.gpu_clock, cgpu->monitor.gpu_memclock, + cgpu->khashes, khashes_per_watt, cgpu->hw_errors, cgpu->intensity, cgpu->throughput); // append to buffer for multi gpus @@ -349,7 +360,7 @@ static char *gethistory(char *params) *buffer = '\0'; for (int i = 0; i < records; i++) { time_t ts = data[i].tm_stat; - p += sprintf(p, "GPU=%d;H=%u;KHS=%.2f;DIFF=%.6f;" + p += sprintf(p, "GPU=%d;H=%u;KHS=%.2f;DIFF=%g;" "COUNT=%u;FOUND=%u;ID=%u;TS=%u|", data[i].gpu_id, data[i].height, data[i].hashrate, data[i].difficulty, data[i].hashcount, data[i].hashfound, data[i].uid, (uint32_t)ts); @@ -358,7 +369,7 @@ static char *gethistory(char *params) } /** - * Returns the job scans ranges (debug purpose) + * Returns the job scans ranges (debug purpose, only with -D) */ static char *getscanlog(char *params) { @@ -368,9 +379,11 @@ static char *getscanlog(char *params) *buffer = '\0'; for (int i = 0; i < records; i++) { time_t ts = data[i].tm_upd; - p += sprintf(p, "H=%u;P=%u;JOB=%u;N=%u;FROM=0x%x;SCANTO=0x%x;" + p += sprintf(p, "H=%u;P=%u;JOB=%u;ID=%d;DIFF=%g;" + "N=0x%x;FROM=0x%x;SCANTO=0x%x;" "COUNT=0x%x;FOUND=%u;TS=%u|", - data[i].height, data[i].npool, data[i].njobid, data[i].nonce, data[i].scanned_from, data[i].scanned_to, + data[i].height, data[i].npool, data[i].njobid, (int)data[i].job_nonce_id, data[i].sharediff, + data[i].nonce, data[i].scanned_from, data[i].scanned_to, (data[i].scanned_to - data[i].scanned_from), data[i].tm_sent ? 1 : 0, (uint32_t)ts); } return buffer; diff --git a/ccminer.cpp b/ccminer.cpp index b85fe86..4721a45 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -85,6 +85,7 @@ bool opt_debug_threads = false; bool opt_protocol = false; bool opt_benchmark = false; bool opt_showdiff = true; +bool opt_hwmonitor = true; // todo: limit use of these flags, // prefer the pools[] attributes @@ -183,6 +184,7 @@ struct thr_api *thr_api; int longpoll_thr_id = -1; int stratum_thr_id = -1; int api_thr_id = -1; +int monitor_thr_id = -1; bool stratum_need_reset = false; volatile bool abort_flag = false; struct work_restart *work_restart = NULL; @@ -1684,6 +1686,7 @@ static void *miner_thread(void *userdata) int switchn = pool_switch_count; int thr_id = mythr->id; int dev_id = device_map[thr_id % MAX_GPUS]; + struct cgpu_info * cgpu = &thr_info[thr_id].gpu; struct work work; uint64_t loopcnt = 0; uint32_t max_nonce; @@ -2142,6 +2145,11 @@ static void *miner_thread(void *userdata) if (opt_led_mode == LED_MODE_MINING) gpu_led_on(dev_id); + if (cgpu && loopcnt > 1) { + cgpu->monitor.sampling_flag = true; + pthread_cond_signal(&cgpu->monitor.sampling_signal); + } + hashes_done = 0; gettimeofday(&tv_start, NULL); @@ -2350,6 +2358,10 @@ static void *miner_thread(void *userdata) timeval_subtract(&diff, &tv_end, &tv_start); + if (cgpu && diff.tv_sec) { // stop monitoring + cgpu->monitor.sampling_flag = false; + } + if (diff.tv_usec || diff.tv_sec) { double dtime = (double) diff.tv_sec + 1e-6 * diff.tv_usec; @@ -3805,7 +3817,7 @@ int main(int argc, char *argv[]) if (!work_restart) return EXIT_CODE_SW_INIT_ERROR; - thr_info = (struct thr_info *)calloc(opt_n_threads + 4, sizeof(*thr)); + thr_info = (struct thr_info *)calloc(opt_n_threads + 5, sizeof(*thr)); if (!thr_info) return EXIT_CODE_SW_INIT_ERROR; @@ -3914,6 +3926,22 @@ int main(int argc, char *argv[]) } } +#ifdef USE_WRAPNVML + // to monitor gpu activitity during work, a thread is required + if (1) { + monitor_thr_id = opt_n_threads + 4; + thr = &thr_info[monitor_thr_id]; + thr->id = monitor_thr_id; + thr->q = tq_new(); + if (!thr->q) + return EXIT_CODE_SW_INIT_ERROR; + if (unlikely(pthread_create(&thr->pth, NULL, monitor_thread, thr))) { + applog(LOG_ERR, "Monitoring thread %d create failed", i); + return EXIT_CODE_SW_INIT_ERROR; + } + } +#endif + /* start mining threads */ for (i = 0; i < opt_n_threads; i++) { thr = &thr_info[i]; @@ -3926,6 +3954,9 @@ int main(int argc, char *argv[]) if (!thr->q) return EXIT_CODE_SW_INIT_ERROR; + pthread_mutex_init(&thr->gpu.monitor.lock, NULL); + pthread_cond_init(&thr->gpu.monitor.sampling_signal, NULL); + if (unlikely(pthread_create(&thr->pth, NULL, miner_thread, thr))) { applog(LOG_ERR, "thread %d create failed", i); return EXIT_CODE_SW_INIT_ERROR; @@ -3944,9 +3975,21 @@ int main(int argc, char *argv[]) /* main loop - simply wait for workio thread to exit */ pthread_join(thr_info[work_thr_id].pth, NULL); + abort_flag = true; + /* wait for mining threads */ - for (i = 0; i < opt_n_threads; i++) + for (i = 0; i < opt_n_threads; i++) { + struct cgpu_info *cgpu = &thr_info[i].gpu; + if (monitor_thr_id != -1 && cgpu) { + pthread_cond_signal(&cgpu->monitor.sampling_signal); + } pthread_join(thr_info[i].pth, NULL); + } + + if (monitor_thr_id != -1) { + pthread_join(thr_info[monitor_thr_id].pth, NULL); + //tq_free(thr_info[monitor_thr_id].q); + } if (opt_debug) applog(LOG_DEBUG, "workio thread dead, exiting."); diff --git a/miner.h b/miner.h index 0ff92f3..05cd476 100644 --- a/miner.h +++ b/miner.h @@ -361,6 +361,19 @@ extern void free_scrypt_jane(int thr_id); void *api_thread(void *userdata); void api_set_throughput(int thr_id, uint32_t throughput); +struct monitor_info { + uint32_t gpu_temp; + uint32_t gpu_fan; + uint32_t gpu_clock; + uint32_t gpu_memclock; + uint32_t gpu_power; + + pthread_mutex_t lock; + pthread_cond_t sampling_signal; + volatile bool sampling_flag; + uint32_t tm_displayed; +}; + struct cgpu_info { uint8_t gpu_id; uint8_t thr_id; @@ -391,6 +404,8 @@ struct cgpu_info { char gpu_desc[64]; double intensity; uint32_t throughput; + + struct monitor_info monitor; }; struct thr_api { diff --git a/nvml.cpp b/nvml.cpp index 140eb1e..eb02802 100644 --- a/nvml.cpp +++ b/nvml.cpp @@ -418,6 +418,25 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id) return ret; } +int nvml_get_clocks(nvml_handle *nvmlh, int dev_id, unsigned int *core, unsigned int *mem) +{ + int ret = 0; + nvmlReturn_t rc; + uint32_t gpu_clk = 0, mem_clk = 0; + int n = nvmlh->cuda_nvml_device_id[dev_id]; + if (n < 0 || n >= nvmlh->nvml_gpucount) + return -ENODEV; + + if (nvmlh->nvmlDeviceGetClockInfo) { + rc = nvmlh->nvmlDeviceGetClockInfo(nvmlh->devs[n], NVML_CLOCK_SM, &gpu_clk); + rc = nvmlh->nvmlDeviceGetClockInfo(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk); + if (rc == NVML_SUCCESS) { + *core = gpu_clk; *mem = mem_clk; + return 1; + } + } + return ret; +} /** * Set power state of a device (9xx) @@ -639,6 +658,21 @@ int nvml_get_fanpcnt(nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt) return 0; } + +int nvml_get_current_clocks(int cudaindex, uint32_t *graphics_clock, uint32_t *mem_clock) +{ + nvmlReturn_t rc; + int gpuindex = hnvml->cuda_nvml_device_id[cudaindex]; + if (gpuindex < 0 || gpuindex >= hnvml->nvml_gpucount) return -1; + + rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_SM, graphics_clock); + if (rc != NVML_SUCCESS) return -1; + rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_MEM, mem_clock); + if (rc != NVML_SUCCESS) return -1; + + return 0; +} + /* Not Supported on 750Ti 340.23 */ int nvml_get_power_usage(nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts) { @@ -2051,3 +2085,79 @@ void gpu_led_off(int dev_id) } #endif } + +#ifdef USE_WRAPNVML +extern double thr_hashrates[MAX_GPUS]; +extern bool opt_debug_threads; +extern bool opt_hwmonitor; +extern int num_cpus; + +void *monitor_thread(void *userdata) +{ + //struct thr_info *mythr = (struct thr_info *)userdata; + int thr_id = -1; + + while (!abort_flag && !opt_quiet) + { + // This thread monitors card's power lazily during scans, one at a time... + thr_id = (thr_id + 1) % opt_n_threads; + struct cgpu_info *cgpu = &thr_info[thr_id].gpu; + int dev_id = cgpu->gpu_id; cudaSetDevice(dev_id); + + //applog(LOG_BLUE, "sampling device %d", dev_id); + if (hnvml != NULL && cgpu) + { + uint64_t clock = 0, mem_clock = 0; + uint32_t fanpercent = 0, power = 0; + double tempC = 0, khs_per_watt = 0; + uint32_t counter = 0; + int max_loops = 2000; + + pthread_cond_wait(&cgpu->monitor.sampling_signal, &cgpu->monitor.lock); + + do { + uint32_t tmp_clock, tmp_memclock; + nvml_get_current_clocks(device_map[thr_id], &tmp_clock, &tmp_memclock); + clock += tmp_clock; + mem_clock += tmp_memclock; + tempC += gpu_temp(cgpu); + fanpercent += gpu_fanpercent(cgpu); + power += gpu_power(cgpu); + counter++; + + usleep(50000); + if (abort_flag) goto abort; + + } while (cgpu->monitor.sampling_flag && (--max_loops)); + + cgpu->monitor.gpu_temp = (uint32_t) (tempC/counter); + cgpu->monitor.gpu_fan = fanpercent/counter; + cgpu->monitor.gpu_power = power/counter; + cgpu->monitor.gpu_clock = (uint32_t) (clock/counter); + cgpu->monitor.gpu_memclock = (uint32_t) (mem_clock/counter); + + if (power) { + // todo: handle units + khs_per_watt = stats_get_speed(thr_id, thr_hashrates[thr_id]) / ((double)power / counter); + } + + // todo: not shown on decred + if (opt_hwmonitor && (time(NULL) - cgpu->monitor.tm_displayed) > 60) { + gpulog(LOG_INFO, thr_id, "%u MHz %.3f kH/W %uW %uC FAN %u%%", + cgpu->monitor.gpu_clock/*, cgpu->monitor.gpu_memclock*/, + khs_per_watt, cgpu->monitor.gpu_power / 1000, + cgpu->monitor.gpu_temp, cgpu->monitor.gpu_fan + ); + cgpu->monitor.tm_displayed = (uint32_t)time(NULL); + } + + pthread_mutex_unlock(&cgpu->monitor.lock); + } + usleep(500); // safety + } +abort: + if (opt_debug_threads) + applog(LOG_DEBUG, "%s() died", __func__); + return NULL; +} +#endif diff --git a/nvml.h b/nvml.h index 75016db..1298a3a 100644 --- a/nvml.h +++ b/nvml.h @@ -17,6 +17,8 @@ #include "miner.h" +void *monitor_thread(void *userdata); + typedef void * nvmlDevice_t; /* our own version of the PCI info struct */ @@ -212,6 +214,8 @@ unsigned int gpu_power(struct cgpu_info *gpu); int gpu_pstate(struct cgpu_info *gpu); int gpu_busid(struct cgpu_info *gpu); +void gpu_current_clocks(struct cgpu_info *gpu); + // pid/vid, sn and bios rev int gpu_info(struct cgpu_info *gpu);