mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-27 06:54:15 +00:00
nvml: add --plimit and --pstate parameters
--pstate 0 is an alternative to clocks values to set the device in P0 mode, 1 will set appl. clocks to default P1 clocks, 2 to default P2.. --plimit 150W really works on the 9xx and allow to limit/reduce the gpu usage Note: nvml interface is only available on linux and windows x64 Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
This commit is contained in:
parent
b5b9b1f2db
commit
7c5087d01c
42
ccminer.cpp
42
ccminer.cpp
@ -198,6 +198,8 @@ long device_sm[MAX_GPUS] = { 0 };
|
|||||||
uint32_t gpus_intensity[MAX_GPUS] = { 0 };
|
uint32_t gpus_intensity[MAX_GPUS] = { 0 };
|
||||||
uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
|
uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
|
||||||
uint32_t device_mem_clocks[MAX_GPUS] = { 0 };
|
uint32_t device_mem_clocks[MAX_GPUS] = { 0 };
|
||||||
|
uint32_t device_plimit[MAX_GPUS] = { 0 };
|
||||||
|
int8_t device_pstate[MAX_GPUS] = { -1 };
|
||||||
|
|
||||||
// un-linked to cmdline scrypt options (useless)
|
// un-linked to cmdline scrypt options (useless)
|
||||||
int device_batchsize[MAX_GPUS] = { 0 };
|
int device_batchsize[MAX_GPUS] = { 0 };
|
||||||
@ -343,10 +345,12 @@ Options:\n\
|
|||||||
--max-temp=N Only mine if gpu temp is less than specified value\n\
|
--max-temp=N Only mine if gpu temp is less than specified value\n\
|
||||||
--max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\
|
--max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\
|
||||||
--max-diff=N Only mine if net difficulty is less than specified value\n"
|
--max-diff=N Only mine if net difficulty is less than specified value\n"
|
||||||
#if defined(USE_WRAPNVML) && (defined(__linux) || defined(_WIN64))
|
#if defined(USE_WRAPNVML) && (defined(__linux) || defined(_WIN64)) /* via nvml */
|
||||||
"\
|
"\
|
||||||
--gpu-clock=1150 Set device application clock\n\
|
--mem-clock=3505 Set the gpu memory max clock (346.72+ driver)\n\
|
||||||
--mem-clock=3505 Set the gpu memory clock (require 346.72+ driver)\n"
|
--gpu-clock=1150 Set the gpu engine max clock (346.72+ driver)\n\
|
||||||
|
--pstate=0[,2] Set the gpu power state (352.21+ driver)\n\
|
||||||
|
--plimit=100W Set the gpu power limit (352.21+ driver)\n"
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAVE_SYSLOG_H
|
#ifdef HAVE_SYSLOG_H
|
||||||
"\
|
"\
|
||||||
@ -410,6 +414,8 @@ struct option options[] = {
|
|||||||
{ "statsavg", 1, NULL, 'N' },
|
{ "statsavg", 1, NULL, 'N' },
|
||||||
{ "gpu-clock", 1, NULL, 1070 },
|
{ "gpu-clock", 1, NULL, 1070 },
|
||||||
{ "mem-clock", 1, NULL, 1071 },
|
{ "mem-clock", 1, NULL, 1071 },
|
||||||
|
{ "pstate", 1, NULL, 1072 },
|
||||||
|
{ "plimit", 1, NULL, 1073 },
|
||||||
#ifdef HAVE_SYSLOG_H
|
#ifdef HAVE_SYSLOG_H
|
||||||
{ "syslog", 0, NULL, 'S' },
|
{ "syslog", 0, NULL, 'S' },
|
||||||
{ "syslog-prefix", 1, NULL, 1018 },
|
{ "syslog-prefix", 1, NULL, 1018 },
|
||||||
@ -2731,6 +2737,28 @@ void parse_arg(int key, char *arg)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 1072: /* --pstate */
|
||||||
|
{
|
||||||
|
char *pch = strtok(arg,",");
|
||||||
|
int n = 0;
|
||||||
|
while (pch != NULL && n < MAX_GPUS) {
|
||||||
|
int dev_id = device_map[n++];
|
||||||
|
device_pstate[dev_id] = (int8_t) atoi(pch);
|
||||||
|
pch = strtok(NULL, ",");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 1073: /* --plimit */
|
||||||
|
{
|
||||||
|
char *pch = strtok(arg,",");
|
||||||
|
int n = 0;
|
||||||
|
while (pch != NULL && n < MAX_GPUS) {
|
||||||
|
int dev_id = device_map[n++];
|
||||||
|
device_plimit[dev_id] = atoi(pch);
|
||||||
|
pch = strtok(NULL, ",");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
case 1005:
|
case 1005:
|
||||||
opt_benchmark = true;
|
opt_benchmark = true;
|
||||||
want_longpoll = false;
|
want_longpoll = false;
|
||||||
@ -3049,6 +3077,7 @@ int main(int argc, char *argv[])
|
|||||||
device_interactive[i] = -1;
|
device_interactive[i] = -1;
|
||||||
device_texturecache[i] = -1;
|
device_texturecache[i] = -1;
|
||||||
device_singlememory[i] = -1;
|
device_singlememory[i] = -1;
|
||||||
|
device_pstate[i] = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// number of gpus
|
// number of gpus
|
||||||
@ -3231,10 +3260,17 @@ int main(int argc, char *argv[])
|
|||||||
/* nvml is currently not the best choice on Windows (only in x64) */
|
/* nvml is currently not the best choice on Windows (only in x64) */
|
||||||
hnvml = nvml_create();
|
hnvml = nvml_create();
|
||||||
if (hnvml) {
|
if (hnvml) {
|
||||||
|
bool gpu_reinit = false;
|
||||||
cuda_devicenames(); // refresh gpu vendor name
|
cuda_devicenames(); // refresh gpu vendor name
|
||||||
applog(LOG_INFO, "NVML GPU monitoring enabled.");
|
applog(LOG_INFO, "NVML GPU monitoring enabled.");
|
||||||
for (int n=0; n < opt_n_threads; n++) {
|
for (int n=0; n < opt_n_threads; n++) {
|
||||||
|
if (nvml_set_pstate(hnvml, device_map[n]) == 1)
|
||||||
|
gpu_reinit = true;
|
||||||
|
if (nvml_set_plimit(hnvml, device_map[n]) == 1)
|
||||||
|
gpu_reinit = true;
|
||||||
if (nvml_set_clocks(hnvml, device_map[n]) == 1)
|
if (nvml_set_clocks(hnvml, device_map[n]) == 1)
|
||||||
|
gpu_reinit = true;
|
||||||
|
if (gpu_reinit)
|
||||||
cuda_reset_device(n, NULL);
|
cuda_reset_device(n, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
131
nvml.cpp
131
nvml.cpp
@ -37,6 +37,8 @@ static uint32_t device_bus_ids[MAX_GPUS] = { 0 };
|
|||||||
|
|
||||||
extern uint32_t device_gpu_clocks[MAX_GPUS];
|
extern uint32_t device_gpu_clocks[MAX_GPUS];
|
||||||
extern uint32_t device_mem_clocks[MAX_GPUS];
|
extern uint32_t device_mem_clocks[MAX_GPUS];
|
||||||
|
extern uint32_t device_plimit[MAX_GPUS];
|
||||||
|
extern int8_t device_pstate[MAX_GPUS];
|
||||||
|
|
||||||
uint8_t gpu_clocks_changed[MAX_GPUS] = { 0 };
|
uint8_t gpu_clocks_changed[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
@ -326,12 +328,12 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
|
|||||||
if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
|
if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
|
||||||
|
|
||||||
// these functions works for the 960 and the 970 (346.72+), not for the 750 Ti
|
// these functions works for the 960 and the 970 (346.72+), not for the 750 Ti
|
||||||
uint32_t nclocks = 0, clocks[128] = { 0 };
|
uint32_t nclocks = 0, clocks[127] = { 0 };
|
||||||
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
|
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
|
||||||
nclocks = min(nclocks, 128);
|
nclocks = min(nclocks, 127);
|
||||||
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, clocks);
|
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, clocks);
|
||||||
for (uint8_t u=0; u < nclocks; u++) {
|
for (int8_t u=0; u < nclocks; u++) {
|
||||||
// ordered desc, so get first
|
// ordered by pstate (so highest is first memory clock - P0)
|
||||||
if (clocks[u] <= mem_clk) {
|
if (clocks[u] <= mem_clk) {
|
||||||
mem_clk = clocks[u];
|
mem_clk = clocks[u];
|
||||||
break;
|
break;
|
||||||
@ -340,7 +342,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
|
|||||||
|
|
||||||
nclocks = 0;
|
nclocks = 0;
|
||||||
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
|
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
|
||||||
nclocks = min(nclocks, 128);
|
nclocks = min(nclocks, 127);
|
||||||
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, clocks);
|
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, clocks);
|
||||||
for (uint8_t u=0; u < nclocks; u++) {
|
for (uint8_t u=0; u < nclocks; u++) {
|
||||||
// ordered desc, so get first
|
// ordered desc, so get first
|
||||||
@ -354,7 +356,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
|
|||||||
if (rc == NVML_SUCCESS)
|
if (rc == NVML_SUCCESS)
|
||||||
applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk);
|
applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk);
|
||||||
else {
|
else {
|
||||||
applog(LOG_ERR, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
|
applog(LOG_WARNING, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -383,6 +385,123 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set power state of a device (9xx)
|
||||||
|
* Code is similar as clocks one, which allow the change of the pstate
|
||||||
|
*/
|
||||||
|
int nvml_set_pstate(nvml_handle *nvmlh, int dev_id)
|
||||||
|
{
|
||||||
|
nvmlReturn_t rc;
|
||||||
|
uint32_t gpu_clk = 0, mem_clk = 0;
|
||||||
|
int n = nvmlh->cuda_nvml_device_id[dev_id];
|
||||||
|
if (n < 0 || n >= nvmlh->nvml_gpucount)
|
||||||
|
return -ENODEV;
|
||||||
|
|
||||||
|
if (device_pstate[dev_id] < 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
// prevent double operations on the same gpu... to enhance
|
||||||
|
if (gpu_clocks_changed[dev_id])
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) {
|
||||||
|
applog(LOG_WARNING, "GPU #%d: NVML app. clock feature is not allowed!", dev_id);
|
||||||
|
return -EPERM;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
|
||||||
|
rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
|
||||||
|
if (rc != NVML_SUCCESS) {
|
||||||
|
applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get application config values
|
||||||
|
if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
|
||||||
|
if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
|
||||||
|
|
||||||
|
// these functions works for the 960 and the 970 (346.72+), not for the 750 Ti
|
||||||
|
uint32_t nclocks = 0, clocks[127] = { 0 };
|
||||||
|
int8_t wanted_pstate = device_pstate[dev_id];
|
||||||
|
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
|
||||||
|
nclocks = min(nclocks, 127);
|
||||||
|
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, clocks);
|
||||||
|
for (uint8_t u=0; u < nclocks; u++) {
|
||||||
|
// ordered by pstate (so high first)
|
||||||
|
if (u == wanted_pstate) {
|
||||||
|
mem_clk = clocks[u];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nclocks = 0;
|
||||||
|
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
|
||||||
|
nclocks = min(nclocks, 127);
|
||||||
|
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, clocks);
|
||||||
|
for (uint8_t u=0; u < nclocks; u++) {
|
||||||
|
// ordered desc, so get first
|
||||||
|
if (clocks[u] <= gpu_clk) {
|
||||||
|
gpu_clk = clocks[u];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
|
||||||
|
if (rc != NVML_SUCCESS) {
|
||||||
|
applog(LOG_WARNING, "GPU #%d: pstate %s", dev_id, nvmlh->nvmlErrorString(rc));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opt_debug)
|
||||||
|
applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int) wanted_pstate, mem_clk, gpu_clk);
|
||||||
|
|
||||||
|
gpu_clocks_changed[dev_id] = 1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvml_set_plimit(nvml_handle *nvmlh, int dev_id)
|
||||||
|
{
|
||||||
|
nvmlReturn_t rc = NVML_ERROR_UNKNOWN;
|
||||||
|
uint32_t gpu_clk = 0, mem_clk = 0;
|
||||||
|
int n = nvmlh->cuda_nvml_device_id[dev_id];
|
||||||
|
if (n < 0 || n >= nvmlh->nvml_gpucount)
|
||||||
|
return -ENODEV;
|
||||||
|
|
||||||
|
if (!device_plimit[dev_id])
|
||||||
|
return 0; // nothing to do
|
||||||
|
|
||||||
|
if (!nvmlh->nvmlDeviceSetPowerManagementLimit)
|
||||||
|
return -ENOSYS;
|
||||||
|
|
||||||
|
uint32_t plimit = device_plimit[dev_id] * 1000U;
|
||||||
|
uint32_t pmin = 1000, pmax = 0;
|
||||||
|
if (nvmlh->nvmlDeviceGetPowerManagementLimitConstraints)
|
||||||
|
rc = nvmlh->nvmlDeviceGetPowerManagementLimitConstraints(nvmlh->devs[n], &pmin, &pmax);
|
||||||
|
|
||||||
|
if (rc != NVML_SUCCESS) {
|
||||||
|
if (!nvmlh->nvmlDeviceGetPowerManagementLimit)
|
||||||
|
return -ENOSYS;
|
||||||
|
pmax = 100 * 1000; // should not happen...
|
||||||
|
nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &pmax);
|
||||||
|
}
|
||||||
|
|
||||||
|
plimit = min(plimit, pmax);
|
||||||
|
plimit = max(plimit, pmin);
|
||||||
|
rc = nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit);
|
||||||
|
if (rc != NVML_SUCCESS) {
|
||||||
|
applog(LOG_WARNING, "GPU #%d: plimit %s", dev_id, nvmlh->nvmlErrorString(rc));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opt_debug) {
|
||||||
|
applog(LOG_INFO, "GPU #%d: power limit set to %uW (allowed range is %u-%u)",
|
||||||
|
dev_id, plimit/1000U, pmin/1000U, pmax/1000U);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount)
|
int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount)
|
||||||
{
|
{
|
||||||
*gpucount = nvmlh->nvml_gpucount;
|
*gpucount = nvmlh->nvml_gpucount;
|
||||||
|
39
nvml.h
39
nvml.h
@ -153,43 +153,8 @@ int nvml_destroy(nvml_handle *nvmlh);
|
|||||||
*/
|
*/
|
||||||
int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount);
|
int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount);
|
||||||
|
|
||||||
/*
|
int nvml_set_plimit(nvml_handle *nvmlh, int dev_id);
|
||||||
* Query the number of GPUs seen by CUDA
|
int nvml_set_pstate(nvml_handle *nvmlh, int dev_id);
|
||||||
*/
|
|
||||||
int cuda_get_gpucount(nvml_handle *nvmlh, int *gpucount);
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* query the name of the GPU model from the CUDA device ID
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
int nvml_get_gpu_name(nvml_handle *nvmlh,
|
|
||||||
int gpuindex,
|
|
||||||
char *namebuf,
|
|
||||||
int bufsize);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Query the current GPU temperature (Celsius), from the CUDA device ID
|
|
||||||
*/
|
|
||||||
int nvml_get_tempC(nvml_handle *nvmlh,
|
|
||||||
int gpuindex, unsigned int *tempC);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Query the current GPU fan speed (percent) from the CUDA device ID
|
|
||||||
*/
|
|
||||||
int nvml_get_fanpcnt(nvml_handle *nvmlh,
|
|
||||||
int gpuindex, unsigned int *fanpcnt);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Query the current GPU power usage in millwatts from the CUDA device ID
|
|
||||||
*
|
|
||||||
* This feature is only available on recent GPU generations and may be
|
|
||||||
* limited in some cases only to Tesla series GPUs.
|
|
||||||
* If the query is run on an unsupported GPU, this routine will return -1.
|
|
||||||
*/
|
|
||||||
int nvml_get_power_usage(nvml_handle *nvmlh,
|
|
||||||
int gpuindex,
|
|
||||||
unsigned int *milliwatts);
|
|
||||||
|
|
||||||
int nvml_set_clocks(nvml_handle *nvmlh, int dev_id);
|
int nvml_set_clocks(nvml_handle *nvmlh, int dev_id);
|
||||||
int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id);
|
int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user