Browse Source

nvml: add --plimit and --pstate parameters

--pstate 0 is an alternative to clocks values to set the device in P0
mode, 1 will set appl. clocks to default P1 clocks, 2 to default P2..

--plimit 150W really works on the 9xx and allow to limit/reduce the gpu usage

Note: nvml interface is only available on linux and windows x64

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
2upstream
Tanguy Pruvot 9 years ago
parent
commit
7c5087d01c
  1. 42
      ccminer.cpp
  2. 131
      nvml.cpp
  3. 39
      nvml.h

42
ccminer.cpp

@ -198,6 +198,8 @@ long device_sm[MAX_GPUS] = { 0 }; @@ -198,6 +198,8 @@ long device_sm[MAX_GPUS] = { 0 };
uint32_t gpus_intensity[MAX_GPUS] = { 0 };
uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
uint32_t device_mem_clocks[MAX_GPUS] = { 0 };
uint32_t device_plimit[MAX_GPUS] = { 0 };
int8_t device_pstate[MAX_GPUS] = { -1 };
// un-linked to cmdline scrypt options (useless)
int device_batchsize[MAX_GPUS] = { 0 };
@ -343,10 +345,12 @@ Options:\n\ @@ -343,10 +345,12 @@ Options:\n\
--max-temp=N Only mine if gpu temp is less than specified value\n\
--max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\
--max-diff=N Only mine if net difficulty is less than specified value\n"
#if defined(USE_WRAPNVML) && (defined(__linux) || defined(_WIN64))
#if defined(USE_WRAPNVML) && (defined(__linux) || defined(_WIN64)) /* via nvml */
"\
--gpu-clock=1150 Set device application clock\n\
--mem-clock=3505 Set the gpu memory clock (require 346.72+ driver)\n"
--mem-clock=3505 Set the gpu memory max clock (346.72+ driver)\n\
--gpu-clock=1150 Set the gpu engine max clock (346.72+ driver)\n\
--pstate=0[,2] Set the gpu power state (352.21+ driver)\n\
--plimit=100W Set the gpu power limit (352.21+ driver)\n"
#endif
#ifdef HAVE_SYSLOG_H
"\
@ -410,6 +414,8 @@ struct option options[] = { @@ -410,6 +414,8 @@ struct option options[] = {
{ "statsavg", 1, NULL, 'N' },
{ "gpu-clock", 1, NULL, 1070 },
{ "mem-clock", 1, NULL, 1071 },
{ "pstate", 1, NULL, 1072 },
{ "plimit", 1, NULL, 1073 },
#ifdef HAVE_SYSLOG_H
{ "syslog", 0, NULL, 'S' },
{ "syslog-prefix", 1, NULL, 1018 },
@ -2731,6 +2737,28 @@ void parse_arg(int key, char *arg) @@ -2731,6 +2737,28 @@ void parse_arg(int key, char *arg)
}
}
break;
case 1072: /* --pstate */
{
char *pch = strtok(arg,",");
int n = 0;
while (pch != NULL && n < MAX_GPUS) {
int dev_id = device_map[n++];
device_pstate[dev_id] = (int8_t) atoi(pch);
pch = strtok(NULL, ",");
}
}
break;
case 1073: /* --plimit */
{
char *pch = strtok(arg,",");
int n = 0;
while (pch != NULL && n < MAX_GPUS) {
int dev_id = device_map[n++];
device_plimit[dev_id] = atoi(pch);
pch = strtok(NULL, ",");
}
}
break;
case 1005:
opt_benchmark = true;
want_longpoll = false;
@ -3049,6 +3077,7 @@ int main(int argc, char *argv[]) @@ -3049,6 +3077,7 @@ int main(int argc, char *argv[])
device_interactive[i] = -1;
device_texturecache[i] = -1;
device_singlememory[i] = -1;
device_pstate[i] = -1;
}
// number of gpus
@ -3231,10 +3260,17 @@ int main(int argc, char *argv[]) @@ -3231,10 +3260,17 @@ int main(int argc, char *argv[])
/* nvml is currently not the best choice on Windows (only in x64) */
hnvml = nvml_create();
if (hnvml) {
bool gpu_reinit = false;
cuda_devicenames(); // refresh gpu vendor name
applog(LOG_INFO, "NVML GPU monitoring enabled.");
for (int n=0; n < opt_n_threads; n++) {
if (nvml_set_pstate(hnvml, device_map[n]) == 1)
gpu_reinit = true;
if (nvml_set_plimit(hnvml, device_map[n]) == 1)
gpu_reinit = true;
if (nvml_set_clocks(hnvml, device_map[n]) == 1)
gpu_reinit = true;
if (gpu_reinit)
cuda_reset_device(n, NULL);
}
}

131
nvml.cpp

@ -37,6 +37,8 @@ static uint32_t device_bus_ids[MAX_GPUS] = { 0 }; @@ -37,6 +37,8 @@ static uint32_t device_bus_ids[MAX_GPUS] = { 0 };
extern uint32_t device_gpu_clocks[MAX_GPUS];
extern uint32_t device_mem_clocks[MAX_GPUS];
extern uint32_t device_plimit[MAX_GPUS];
extern int8_t device_pstate[MAX_GPUS];
uint8_t gpu_clocks_changed[MAX_GPUS] = { 0 };
@ -326,12 +328,12 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id) @@ -326,12 +328,12 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
// these functions works for the 960 and the 970 (346.72+), not for the 750 Ti
uint32_t nclocks = 0, clocks[128] = { 0 };
uint32_t nclocks = 0, clocks[127] = { 0 };
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
nclocks = min(nclocks, 128);
nclocks = min(nclocks, 127);
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, clocks);
for (uint8_t u=0; u < nclocks; u++) {
// ordered desc, so get first
for (int8_t u=0; u < nclocks; u++) {
// ordered by pstate (so highest is first memory clock - P0)
if (clocks[u] <= mem_clk) {
mem_clk = clocks[u];
break;
@ -340,7 +342,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id) @@ -340,7 +342,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
nclocks = 0;
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
nclocks = min(nclocks, 128);
nclocks = min(nclocks, 127);
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, clocks);
for (uint8_t u=0; u < nclocks; u++) {
// ordered desc, so get first
@ -354,7 +356,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id) @@ -354,7 +356,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
if (rc == NVML_SUCCESS)
applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk);
else {
applog(LOG_ERR, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
applog(LOG_WARNING, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
return -1;
}
@ -383,6 +385,123 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id) @@ -383,6 +385,123 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id)
return 1;
}
/**
* Set power state of a device (9xx)
* Code is similar as clocks one, which allow the change of the pstate
*/
int nvml_set_pstate(nvml_handle *nvmlh, int dev_id)
{
nvmlReturn_t rc;
uint32_t gpu_clk = 0, mem_clk = 0;
int n = nvmlh->cuda_nvml_device_id[dev_id];
if (n < 0 || n >= nvmlh->nvml_gpucount)
return -ENODEV;
if (device_pstate[dev_id] < 0)
return 0;
// prevent double operations on the same gpu... to enhance
if (gpu_clocks_changed[dev_id])
return 0;
if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) {
applog(LOG_WARNING, "GPU #%d: NVML app. clock feature is not allowed!", dev_id);
return -EPERM;
}
nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
if (rc != NVML_SUCCESS) {
applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id);
return -EINVAL;
}
// get application config values
if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
// these functions works for the 960 and the 970 (346.72+), not for the 750 Ti
uint32_t nclocks = 0, clocks[127] = { 0 };
int8_t wanted_pstate = device_pstate[dev_id];
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
nclocks = min(nclocks, 127);
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, clocks);
for (uint8_t u=0; u < nclocks; u++) {
// ordered by pstate (so high first)
if (u == wanted_pstate) {
mem_clk = clocks[u];
break;
}
}
nclocks = 0;
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
nclocks = min(nclocks, 127);
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, clocks);
for (uint8_t u=0; u < nclocks; u++) {
// ordered desc, so get first
if (clocks[u] <= gpu_clk) {
gpu_clk = clocks[u];
break;
}
}
rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
if (rc != NVML_SUCCESS) {
applog(LOG_WARNING, "GPU #%d: pstate %s", dev_id, nvmlh->nvmlErrorString(rc));
return -1;
}
if (opt_debug)
applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int) wanted_pstate, mem_clk, gpu_clk);
gpu_clocks_changed[dev_id] = 1;
return 1;
}
int nvml_set_plimit(nvml_handle *nvmlh, int dev_id)
{
nvmlReturn_t rc = NVML_ERROR_UNKNOWN;
uint32_t gpu_clk = 0, mem_clk = 0;
int n = nvmlh->cuda_nvml_device_id[dev_id];
if (n < 0 || n >= nvmlh->nvml_gpucount)
return -ENODEV;
if (!device_plimit[dev_id])
return 0; // nothing to do
if (!nvmlh->nvmlDeviceSetPowerManagementLimit)
return -ENOSYS;
uint32_t plimit = device_plimit[dev_id] * 1000U;
uint32_t pmin = 1000, pmax = 0;
if (nvmlh->nvmlDeviceGetPowerManagementLimitConstraints)
rc = nvmlh->nvmlDeviceGetPowerManagementLimitConstraints(nvmlh->devs[n], &pmin, &pmax);
if (rc != NVML_SUCCESS) {
if (!nvmlh->nvmlDeviceGetPowerManagementLimit)
return -ENOSYS;
pmax = 100 * 1000; // should not happen...
nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &pmax);
}
plimit = min(plimit, pmax);
plimit = max(plimit, pmin);
rc = nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit);
if (rc != NVML_SUCCESS) {
applog(LOG_WARNING, "GPU #%d: plimit %s", dev_id, nvmlh->nvmlErrorString(rc));
return -1;
}
if (opt_debug) {
applog(LOG_INFO, "GPU #%d: power limit set to %uW (allowed range is %u-%u)",
dev_id, plimit/1000U, pmin/1000U, pmax/1000U);
}
return 0;
}
int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount)
{
*gpucount = nvmlh->nvml_gpucount;

39
nvml.h

@ -153,43 +153,8 @@ int nvml_destroy(nvml_handle *nvmlh); @@ -153,43 +153,8 @@ int nvml_destroy(nvml_handle *nvmlh);
*/
int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount);
/*
* Query the number of GPUs seen by CUDA
*/
int cuda_get_gpucount(nvml_handle *nvmlh, int *gpucount);
/*
* query the name of the GPU model from the CUDA device ID
*
*/
int nvml_get_gpu_name(nvml_handle *nvmlh,
int gpuindex,
char *namebuf,
int bufsize);
/*
* Query the current GPU temperature (Celsius), from the CUDA device ID
*/
int nvml_get_tempC(nvml_handle *nvmlh,
int gpuindex, unsigned int *tempC);
/*
* Query the current GPU fan speed (percent) from the CUDA device ID
*/
int nvml_get_fanpcnt(nvml_handle *nvmlh,
int gpuindex, unsigned int *fanpcnt);
/*
* Query the current GPU power usage in millwatts from the CUDA device ID
*
* This feature is only available on recent GPU generations and may be
* limited in some cases only to Tesla series GPUs.
* If the query is run on an unsupported GPU, this routine will return -1.
*/
int nvml_get_power_usage(nvml_handle *nvmlh,
int gpuindex,
unsigned int *milliwatts);
int nvml_set_plimit(nvml_handle *nvmlh, int dev_id);
int nvml_set_pstate(nvml_handle *nvmlh, int dev_id);
int nvml_set_clocks(nvml_handle *nvmlh, int dev_id);
int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id);

Loading…
Cancel
Save