Browse Source

power monitoring thread + some api changes

based on alexis monitoring thread idea, but which only use one thread

note: other api changes will come soon, related to that
master
Tanguy Pruvot 8 years ago
parent
commit
0dd022779b
  1. 27
      api.cpp
  2. 47
      ccminer.cpp
  3. 15
      miner.h
  4. 110
      nvml.cpp
  5. 4
      nvml.h

27
api.cpp

@ -8,7 +8,7 @@
* Software Foundation; either version 2 of the License, or (at your option) * Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details. * any later version. See COPYING for more details.
*/ */
#define APIVERSION "1.8" #define APIVERSION "1.9"
#ifdef WIN32 #ifdef WIN32
# define _WINSOCK_DEPRECATED_NO_WARNINGS # define _WINSOCK_DEPRECATED_NO_WARNINGS
@ -112,6 +112,7 @@ static void gpustatus(int thr_id)
if (thr_id >= 0 && thr_id < opt_n_threads) { if (thr_id >= 0 && thr_id < opt_n_threads) {
struct cgpu_info *cgpu = &thr_info[thr_id].gpu; struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
double khashes_per_watt = 0;
int gpuid = cgpu->gpu_id; int gpuid = cgpu->gpu_id;
char buf[512]; *buf = '\0'; char buf[512]; *buf = '\0';
char* card; char* card;
@ -131,14 +132,24 @@ static void gpustatus(int thr_id)
cgpu->rejected = p->rejected_count; cgpu->rejected = p->rejected_count;
cgpu->khashes = stats_get_speed(thr_id, 0.0) / 1000.0; cgpu->khashes = stats_get_speed(thr_id, 0.0) / 1000.0;
if (cgpu->monitor.gpu_power) {
cgpu->gpu_power = cgpu->monitor.gpu_power;
khashes_per_watt = (double)cgpu->khashes / cgpu->monitor.gpu_power;
khashes_per_watt *= 1000; // power in mW
//gpulog(LOG_BLUE, thr_id, "KHW: %g", khashes_per_watt);
}
card = device_name[gpuid]; card = device_name[gpuid];
snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;TEMP=%.1f;" snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;TEMP=%.1f;"
"POWER=%u;FAN=%hu;RPM=%hu;FREQ=%d;KHS=%.2f;HWF=%d;I=%.1f;THR=%u|", "POWER=%u;FAN=%hu;RPM=%hu;"
"FREQ=%u;CORE=%u;MEM=%u;"
"KHS=%.2f;KHW=%.5f;"
"HWF=%d;I=%.1f;THR=%u|",
gpuid, cgpu->gpu_bus, card, cgpu->gpu_temp, gpuid, cgpu->gpu_bus, card, cgpu->gpu_temp,
cgpu->gpu_power, cgpu->gpu_fan, cgpu->gpu_fan_rpm, cgpu->gpu_power, cgpu->gpu_fan, cgpu->gpu_fan_rpm,
cgpu->gpu_clock, cgpu->khashes, cgpu->gpu_clock, cgpu->monitor.gpu_clock, cgpu->monitor.gpu_memclock,
cgpu->khashes, khashes_per_watt,
cgpu->hw_errors, cgpu->intensity, cgpu->throughput); cgpu->hw_errors, cgpu->intensity, cgpu->throughput);
// append to buffer for multi gpus // append to buffer for multi gpus
@ -349,7 +360,7 @@ static char *gethistory(char *params)
*buffer = '\0'; *buffer = '\0';
for (int i = 0; i < records; i++) { for (int i = 0; i < records; i++) {
time_t ts = data[i].tm_stat; time_t ts = data[i].tm_stat;
p += sprintf(p, "GPU=%d;H=%u;KHS=%.2f;DIFF=%.6f;" p += sprintf(p, "GPU=%d;H=%u;KHS=%.2f;DIFF=%g;"
"COUNT=%u;FOUND=%u;ID=%u;TS=%u|", "COUNT=%u;FOUND=%u;ID=%u;TS=%u|",
data[i].gpu_id, data[i].height, data[i].hashrate, data[i].difficulty, data[i].gpu_id, data[i].height, data[i].hashrate, data[i].difficulty,
data[i].hashcount, data[i].hashfound, data[i].uid, (uint32_t)ts); data[i].hashcount, data[i].hashfound, data[i].uid, (uint32_t)ts);
@ -358,7 +369,7 @@ static char *gethistory(char *params)
} }
/** /**
* Returns the job scans ranges (debug purpose) * Returns the job scans ranges (debug purpose, only with -D)
*/ */
static char *getscanlog(char *params) static char *getscanlog(char *params)
{ {
@ -368,9 +379,11 @@ static char *getscanlog(char *params)
*buffer = '\0'; *buffer = '\0';
for (int i = 0; i < records; i++) { for (int i = 0; i < records; i++) {
time_t ts = data[i].tm_upd; time_t ts = data[i].tm_upd;
p += sprintf(p, "H=%u;P=%u;JOB=%u;N=%u;FROM=0x%x;SCANTO=0x%x;" p += sprintf(p, "H=%u;P=%u;JOB=%u;ID=%d;DIFF=%g;"
"N=0x%x;FROM=0x%x;SCANTO=0x%x;"
"COUNT=0x%x;FOUND=%u;TS=%u|", "COUNT=0x%x;FOUND=%u;TS=%u|",
data[i].height, data[i].npool, data[i].njobid, data[i].nonce, data[i].scanned_from, data[i].scanned_to, data[i].height, data[i].npool, data[i].njobid, (int)data[i].job_nonce_id, data[i].sharediff,
data[i].nonce, data[i].scanned_from, data[i].scanned_to,
(data[i].scanned_to - data[i].scanned_from), data[i].tm_sent ? 1 : 0, (uint32_t)ts); (data[i].scanned_to - data[i].scanned_from), data[i].tm_sent ? 1 : 0, (uint32_t)ts);
} }
return buffer; return buffer;

47
ccminer.cpp

@ -85,6 +85,7 @@ bool opt_debug_threads = false;
bool opt_protocol = false; bool opt_protocol = false;
bool opt_benchmark = false; bool opt_benchmark = false;
bool opt_showdiff = true; bool opt_showdiff = true;
bool opt_hwmonitor = true;
// todo: limit use of these flags, // todo: limit use of these flags,
// prefer the pools[] attributes // prefer the pools[] attributes
@ -183,6 +184,7 @@ struct thr_api *thr_api;
int longpoll_thr_id = -1; int longpoll_thr_id = -1;
int stratum_thr_id = -1; int stratum_thr_id = -1;
int api_thr_id = -1; int api_thr_id = -1;
int monitor_thr_id = -1;
bool stratum_need_reset = false; bool stratum_need_reset = false;
volatile bool abort_flag = false; volatile bool abort_flag = false;
struct work_restart *work_restart = NULL; struct work_restart *work_restart = NULL;
@ -1684,6 +1686,7 @@ static void *miner_thread(void *userdata)
int switchn = pool_switch_count; int switchn = pool_switch_count;
int thr_id = mythr->id; int thr_id = mythr->id;
int dev_id = device_map[thr_id % MAX_GPUS]; int dev_id = device_map[thr_id % MAX_GPUS];
struct cgpu_info * cgpu = &thr_info[thr_id].gpu;
struct work work; struct work work;
uint64_t loopcnt = 0; uint64_t loopcnt = 0;
uint32_t max_nonce; uint32_t max_nonce;
@ -2142,6 +2145,11 @@ static void *miner_thread(void *userdata)
if (opt_led_mode == LED_MODE_MINING) if (opt_led_mode == LED_MODE_MINING)
gpu_led_on(dev_id); gpu_led_on(dev_id);
if (cgpu && loopcnt > 1) {
cgpu->monitor.sampling_flag = true;
pthread_cond_signal(&cgpu->monitor.sampling_signal);
}
hashes_done = 0; hashes_done = 0;
gettimeofday(&tv_start, NULL); gettimeofday(&tv_start, NULL);
@ -2350,6 +2358,10 @@ static void *miner_thread(void *userdata)
timeval_subtract(&diff, &tv_end, &tv_start); timeval_subtract(&diff, &tv_end, &tv_start);
if (cgpu && diff.tv_sec) { // stop monitoring
cgpu->monitor.sampling_flag = false;
}
if (diff.tv_usec || diff.tv_sec) { if (diff.tv_usec || diff.tv_sec) {
double dtime = (double) diff.tv_sec + 1e-6 * diff.tv_usec; double dtime = (double) diff.tv_sec + 1e-6 * diff.tv_usec;
@ -3805,7 +3817,7 @@ int main(int argc, char *argv[])
if (!work_restart) if (!work_restart)
return EXIT_CODE_SW_INIT_ERROR; return EXIT_CODE_SW_INIT_ERROR;
thr_info = (struct thr_info *)calloc(opt_n_threads + 4, sizeof(*thr)); thr_info = (struct thr_info *)calloc(opt_n_threads + 5, sizeof(*thr));
if (!thr_info) if (!thr_info)
return EXIT_CODE_SW_INIT_ERROR; return EXIT_CODE_SW_INIT_ERROR;
@ -3914,6 +3926,22 @@ int main(int argc, char *argv[])
} }
} }
#ifdef USE_WRAPNVML
// to monitor gpu activitity during work, a thread is required
if (1) {
monitor_thr_id = opt_n_threads + 4;
thr = &thr_info[monitor_thr_id];
thr->id = monitor_thr_id;
thr->q = tq_new();
if (!thr->q)
return EXIT_CODE_SW_INIT_ERROR;
if (unlikely(pthread_create(&thr->pth, NULL, monitor_thread, thr))) {
applog(LOG_ERR, "Monitoring thread %d create failed", i);
return EXIT_CODE_SW_INIT_ERROR;
}
}
#endif
/* start mining threads */ /* start mining threads */
for (i = 0; i < opt_n_threads; i++) { for (i = 0; i < opt_n_threads; i++) {
thr = &thr_info[i]; thr = &thr_info[i];
@ -3926,6 +3954,9 @@ int main(int argc, char *argv[])
if (!thr->q) if (!thr->q)
return EXIT_CODE_SW_INIT_ERROR; return EXIT_CODE_SW_INIT_ERROR;
pthread_mutex_init(&thr->gpu.monitor.lock, NULL);
pthread_cond_init(&thr->gpu.monitor.sampling_signal, NULL);
if (unlikely(pthread_create(&thr->pth, NULL, miner_thread, thr))) { if (unlikely(pthread_create(&thr->pth, NULL, miner_thread, thr))) {
applog(LOG_ERR, "thread %d create failed", i); applog(LOG_ERR, "thread %d create failed", i);
return EXIT_CODE_SW_INIT_ERROR; return EXIT_CODE_SW_INIT_ERROR;
@ -3944,9 +3975,21 @@ int main(int argc, char *argv[])
/* main loop - simply wait for workio thread to exit */ /* main loop - simply wait for workio thread to exit */
pthread_join(thr_info[work_thr_id].pth, NULL); pthread_join(thr_info[work_thr_id].pth, NULL);
abort_flag = true;
/* wait for mining threads */ /* wait for mining threads */
for (i = 0; i < opt_n_threads; i++) for (i = 0; i < opt_n_threads; i++) {
struct cgpu_info *cgpu = &thr_info[i].gpu;
if (monitor_thr_id != -1 && cgpu) {
pthread_cond_signal(&cgpu->monitor.sampling_signal);
}
pthread_join(thr_info[i].pth, NULL); pthread_join(thr_info[i].pth, NULL);
}
if (monitor_thr_id != -1) {
pthread_join(thr_info[monitor_thr_id].pth, NULL);
//tq_free(thr_info[monitor_thr_id].q);
}
if (opt_debug) if (opt_debug)
applog(LOG_DEBUG, "workio thread dead, exiting."); applog(LOG_DEBUG, "workio thread dead, exiting.");

15
miner.h

@ -361,6 +361,19 @@ extern void free_scrypt_jane(int thr_id);
void *api_thread(void *userdata); void *api_thread(void *userdata);
void api_set_throughput(int thr_id, uint32_t throughput); void api_set_throughput(int thr_id, uint32_t throughput);
struct monitor_info {
uint32_t gpu_temp;
uint32_t gpu_fan;
uint32_t gpu_clock;
uint32_t gpu_memclock;
uint32_t gpu_power;
pthread_mutex_t lock;
pthread_cond_t sampling_signal;
volatile bool sampling_flag;
uint32_t tm_displayed;
};
struct cgpu_info { struct cgpu_info {
uint8_t gpu_id; uint8_t gpu_id;
uint8_t thr_id; uint8_t thr_id;
@ -391,6 +404,8 @@ struct cgpu_info {
char gpu_desc[64]; char gpu_desc[64];
double intensity; double intensity;
uint32_t throughput; uint32_t throughput;
struct monitor_info monitor;
}; };
struct thr_api { struct thr_api {

110
nvml.cpp

@ -418,6 +418,25 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id)
return ret; return ret;
} }
int nvml_get_clocks(nvml_handle *nvmlh, int dev_id, unsigned int *core, unsigned int *mem)
{
int ret = 0;
nvmlReturn_t rc;
uint32_t gpu_clk = 0, mem_clk = 0;
int n = nvmlh->cuda_nvml_device_id[dev_id];
if (n < 0 || n >= nvmlh->nvml_gpucount)
return -ENODEV;
if (nvmlh->nvmlDeviceGetClockInfo) {
rc = nvmlh->nvmlDeviceGetClockInfo(nvmlh->devs[n], NVML_CLOCK_SM, &gpu_clk);
rc = nvmlh->nvmlDeviceGetClockInfo(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
if (rc == NVML_SUCCESS) {
*core = gpu_clk; *mem = mem_clk;
return 1;
}
}
return ret;
}
/** /**
* Set power state of a device (9xx) * Set power state of a device (9xx)
@ -639,6 +658,21 @@ int nvml_get_fanpcnt(nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt)
return 0; return 0;
} }
int nvml_get_current_clocks(int cudaindex, uint32_t *graphics_clock, uint32_t *mem_clock)
{
nvmlReturn_t rc;
int gpuindex = hnvml->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= hnvml->nvml_gpucount) return -1;
rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_SM, graphics_clock);
if (rc != NVML_SUCCESS) return -1;
rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_MEM, mem_clock);
if (rc != NVML_SUCCESS) return -1;
return 0;
}
/* Not Supported on 750Ti 340.23 */ /* Not Supported on 750Ti 340.23 */
int nvml_get_power_usage(nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts) int nvml_get_power_usage(nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts)
{ {
@ -2051,3 +2085,79 @@ void gpu_led_off(int dev_id)
} }
#endif #endif
} }
#ifdef USE_WRAPNVML
extern double thr_hashrates[MAX_GPUS];
extern bool opt_debug_threads;
extern bool opt_hwmonitor;
extern int num_cpus;
void *monitor_thread(void *userdata)
{
//struct thr_info *mythr = (struct thr_info *)userdata;
int thr_id = -1;
while (!abort_flag && !opt_quiet)
{
// This thread monitors card's power lazily during scans, one at a time...
thr_id = (thr_id + 1) % opt_n_threads;
struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
int dev_id = cgpu->gpu_id; cudaSetDevice(dev_id);
//applog(LOG_BLUE, "sampling device %d", dev_id);
if (hnvml != NULL && cgpu)
{
uint64_t clock = 0, mem_clock = 0;
uint32_t fanpercent = 0, power = 0;
double tempC = 0, khs_per_watt = 0;
uint32_t counter = 0;
int max_loops = 2000;
pthread_cond_wait(&cgpu->monitor.sampling_signal, &cgpu->monitor.lock);
do {
uint32_t tmp_clock, tmp_memclock;
nvml_get_current_clocks(device_map[thr_id], &tmp_clock, &tmp_memclock);
clock += tmp_clock;
mem_clock += tmp_memclock;
tempC += gpu_temp(cgpu);
fanpercent += gpu_fanpercent(cgpu);
power += gpu_power(cgpu);
counter++;
usleep(50000);
if (abort_flag) goto abort;
} while (cgpu->monitor.sampling_flag && (--max_loops));
cgpu->monitor.gpu_temp = (uint32_t) (tempC/counter);
cgpu->monitor.gpu_fan = fanpercent/counter;
cgpu->monitor.gpu_power = power/counter;
cgpu->monitor.gpu_clock = (uint32_t) (clock/counter);
cgpu->monitor.gpu_memclock = (uint32_t) (mem_clock/counter);
if (power) {
// todo: handle units
khs_per_watt = stats_get_speed(thr_id, thr_hashrates[thr_id]) / ((double)power / counter);
}
// todo: not shown on decred
if (opt_hwmonitor && (time(NULL) - cgpu->monitor.tm_displayed) > 60) {
gpulog(LOG_INFO, thr_id, "%u MHz %.3f kH/W %uW %uC FAN %u%%",
cgpu->monitor.gpu_clock/*, cgpu->monitor.gpu_memclock*/,
khs_per_watt, cgpu->monitor.gpu_power / 1000,
cgpu->monitor.gpu_temp, cgpu->monitor.gpu_fan
);
cgpu->monitor.tm_displayed = (uint32_t)time(NULL);
}
pthread_mutex_unlock(&cgpu->monitor.lock);
}
usleep(500); // safety
}
abort:
if (opt_debug_threads)
applog(LOG_DEBUG, "%s() died", __func__);
return NULL;
}
#endif

4
nvml.h

@ -17,6 +17,8 @@
#include "miner.h" #include "miner.h"
void *monitor_thread(void *userdata);
typedef void * nvmlDevice_t; typedef void * nvmlDevice_t;
/* our own version of the PCI info struct */ /* our own version of the PCI info struct */
@ -212,6 +214,8 @@ unsigned int gpu_power(struct cgpu_info *gpu);
int gpu_pstate(struct cgpu_info *gpu); int gpu_pstate(struct cgpu_info *gpu);
int gpu_busid(struct cgpu_info *gpu); int gpu_busid(struct cgpu_info *gpu);
void gpu_current_clocks(struct cgpu_info *gpu);
// pid/vid, sn and bios rev // pid/vid, sn and bios rev
int gpu_info(struct cgpu_info *gpu); int gpu_info(struct cgpu_info *gpu);

Loading…
Cancel
Save