diff --git a/ccminer.cpp b/ccminer.cpp index 010886a..3d7cf3b 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -198,6 +198,8 @@ long device_sm[MAX_GPUS] = { 0 }; uint32_t gpus_intensity[MAX_GPUS] = { 0 }; uint32_t device_gpu_clocks[MAX_GPUS] = { 0 }; uint32_t device_mem_clocks[MAX_GPUS] = { 0 }; +uint32_t device_plimit[MAX_GPUS] = { 0 }; +int8_t device_pstate[MAX_GPUS] = { -1 }; // un-linked to cmdline scrypt options (useless) int device_batchsize[MAX_GPUS] = { 0 }; @@ -343,10 +345,12 @@ Options:\n\ --max-temp=N Only mine if gpu temp is less than specified value\n\ --max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\ --max-diff=N Only mine if net difficulty is less than specified value\n" -#if defined(USE_WRAPNVML) && (defined(__linux) || defined(_WIN64)) +#if defined(USE_WRAPNVML) && (defined(__linux) || defined(_WIN64)) /* via nvml */ "\ - --gpu-clock=1150 Set device application clock\n\ - --mem-clock=3505 Set the gpu memory clock (require 346.72+ driver)\n" + --mem-clock=3505 Set the gpu memory max clock (346.72+ driver)\n\ + --gpu-clock=1150 Set the gpu engine max clock (346.72+ driver)\n\ + --pstate=0[,2] Set the gpu power state (352.21+ driver)\n\ + --plimit=100W Set the gpu power limit (352.21+ driver)\n" #endif #ifdef HAVE_SYSLOG_H "\ @@ -410,6 +414,8 @@ struct option options[] = { { "statsavg", 1, NULL, 'N' }, { "gpu-clock", 1, NULL, 1070 }, { "mem-clock", 1, NULL, 1071 }, + { "pstate", 1, NULL, 1072 }, + { "plimit", 1, NULL, 1073 }, #ifdef HAVE_SYSLOG_H { "syslog", 0, NULL, 'S' }, { "syslog-prefix", 1, NULL, 1018 }, @@ -2731,6 +2737,28 @@ void parse_arg(int key, char *arg) } } break; + case 1072: /* --pstate */ + { + char *pch = strtok(arg,","); + int n = 0; + while (pch != NULL && n < MAX_GPUS) { + int dev_id = device_map[n++]; + device_pstate[dev_id] = (int8_t) atoi(pch); + pch = strtok(NULL, ","); + } + } + break; + case 1073: /* --plimit */ + { + char *pch = strtok(arg,","); + int n = 0; + while (pch != NULL && n < MAX_GPUS) { + int dev_id = device_map[n++]; + device_plimit[dev_id] = atoi(pch); + pch = strtok(NULL, ","); + } + } + break; case 1005: opt_benchmark = true; want_longpoll = false; @@ -3049,6 +3077,7 @@ int main(int argc, char *argv[]) device_interactive[i] = -1; device_texturecache[i] = -1; device_singlememory[i] = -1; + device_pstate[i] = -1; } // number of gpus @@ -3231,10 +3260,17 @@ int main(int argc, char *argv[]) /* nvml is currently not the best choice on Windows (only in x64) */ hnvml = nvml_create(); if (hnvml) { + bool gpu_reinit = false; cuda_devicenames(); // refresh gpu vendor name applog(LOG_INFO, "NVML GPU monitoring enabled."); for (int n=0; n < opt_n_threads; n++) { + if (nvml_set_pstate(hnvml, device_map[n]) == 1) + gpu_reinit = true; + if (nvml_set_plimit(hnvml, device_map[n]) == 1) + gpu_reinit = true; if (nvml_set_clocks(hnvml, device_map[n]) == 1) + gpu_reinit = true; + if (gpu_reinit) cuda_reset_device(n, NULL); } } diff --git a/nvml.cpp b/nvml.cpp index daa56b4..becc4c5 100644 --- a/nvml.cpp +++ b/nvml.cpp @@ -37,6 +37,8 @@ static uint32_t device_bus_ids[MAX_GPUS] = { 0 }; extern uint32_t device_gpu_clocks[MAX_GPUS]; extern uint32_t device_mem_clocks[MAX_GPUS]; +extern uint32_t device_plimit[MAX_GPUS]; +extern int8_t device_pstate[MAX_GPUS]; uint8_t gpu_clocks_changed[MAX_GPUS] = { 0 }; @@ -326,12 +328,12 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id) if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id]; // these functions works for the 960 and the 970 (346.72+), not for the 750 Ti - uint32_t nclocks = 0, clocks[128] = { 0 }; + uint32_t nclocks = 0, clocks[127] = { 0 }; nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL); - nclocks = min(nclocks, 128); + nclocks = min(nclocks, 127); nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, clocks); - for (uint8_t u=0; u < nclocks; u++) { - // ordered desc, so get first + for (int8_t u=0; u < nclocks; u++) { + // ordered by pstate (so highest is first memory clock - P0) if (clocks[u] <= mem_clk) { mem_clk = clocks[u]; break; @@ -340,7 +342,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id) nclocks = 0; nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL); - nclocks = min(nclocks, 128); + nclocks = min(nclocks, 127); nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, clocks); for (uint8_t u=0; u < nclocks; u++) { // ordered desc, so get first @@ -354,7 +356,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id) if (rc == NVML_SUCCESS) applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk); else { - applog(LOG_ERR, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc)); + applog(LOG_WARNING, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc)); return -1; } @@ -383,6 +385,123 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id) return 1; } + +/** + * Set power state of a device (9xx) + * Code is similar as clocks one, which allow the change of the pstate + */ +int nvml_set_pstate(nvml_handle *nvmlh, int dev_id) +{ + nvmlReturn_t rc; + uint32_t gpu_clk = 0, mem_clk = 0; + int n = nvmlh->cuda_nvml_device_id[dev_id]; + if (n < 0 || n >= nvmlh->nvml_gpucount) + return -ENODEV; + + if (device_pstate[dev_id] < 0) + return 0; + + // prevent double operations on the same gpu... to enhance + if (gpu_clocks_changed[dev_id]) + return 0; + + if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) { + applog(LOG_WARNING, "GPU #%d: NVML app. clock feature is not allowed!", dev_id); + return -EPERM; + } + + nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk); + rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk); + if (rc != NVML_SUCCESS) { + applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id); + return -EINVAL; + } + + // get application config values + if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id]; + if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id]; + + // these functions works for the 960 and the 970 (346.72+), not for the 750 Ti + uint32_t nclocks = 0, clocks[127] = { 0 }; + int8_t wanted_pstate = device_pstate[dev_id]; + nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL); + nclocks = min(nclocks, 127); + nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, clocks); + for (uint8_t u=0; u < nclocks; u++) { + // ordered by pstate (so high first) + if (u == wanted_pstate) { + mem_clk = clocks[u]; + break; + } + } + + nclocks = 0; + nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL); + nclocks = min(nclocks, 127); + nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, clocks); + for (uint8_t u=0; u < nclocks; u++) { + // ordered desc, so get first + if (clocks[u] <= gpu_clk) { + gpu_clk = clocks[u]; + break; + } + } + + rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk); + if (rc != NVML_SUCCESS) { + applog(LOG_WARNING, "GPU #%d: pstate %s", dev_id, nvmlh->nvmlErrorString(rc)); + return -1; + } + + if (opt_debug) + applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int) wanted_pstate, mem_clk, gpu_clk); + + gpu_clocks_changed[dev_id] = 1; + return 1; +} + +int nvml_set_plimit(nvml_handle *nvmlh, int dev_id) +{ + nvmlReturn_t rc = NVML_ERROR_UNKNOWN; + uint32_t gpu_clk = 0, mem_clk = 0; + int n = nvmlh->cuda_nvml_device_id[dev_id]; + if (n < 0 || n >= nvmlh->nvml_gpucount) + return -ENODEV; + + if (!device_plimit[dev_id]) + return 0; // nothing to do + + if (!nvmlh->nvmlDeviceSetPowerManagementLimit) + return -ENOSYS; + + uint32_t plimit = device_plimit[dev_id] * 1000U; + uint32_t pmin = 1000, pmax = 0; + if (nvmlh->nvmlDeviceGetPowerManagementLimitConstraints) + rc = nvmlh->nvmlDeviceGetPowerManagementLimitConstraints(nvmlh->devs[n], &pmin, &pmax); + + if (rc != NVML_SUCCESS) { + if (!nvmlh->nvmlDeviceGetPowerManagementLimit) + return -ENOSYS; + pmax = 100 * 1000; // should not happen... + nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &pmax); + } + + plimit = min(plimit, pmax); + plimit = max(plimit, pmin); + rc = nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit); + if (rc != NVML_SUCCESS) { + applog(LOG_WARNING, "GPU #%d: plimit %s", dev_id, nvmlh->nvmlErrorString(rc)); + return -1; + } + + if (opt_debug) { + applog(LOG_INFO, "GPU #%d: power limit set to %uW (allowed range is %u-%u)", + dev_id, plimit/1000U, pmin/1000U, pmax/1000U); + } + + return 0; +} + int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount) { *gpucount = nvmlh->nvml_gpucount; diff --git a/nvml.h b/nvml.h index 1397b78..4e1df9f 100644 --- a/nvml.h +++ b/nvml.h @@ -153,43 +153,8 @@ int nvml_destroy(nvml_handle *nvmlh); */ int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount); -/* - * Query the number of GPUs seen by CUDA - */ -int cuda_get_gpucount(nvml_handle *nvmlh, int *gpucount); - - -/* - * query the name of the GPU model from the CUDA device ID - * - */ -int nvml_get_gpu_name(nvml_handle *nvmlh, - int gpuindex, - char *namebuf, - int bufsize); - -/* - * Query the current GPU temperature (Celsius), from the CUDA device ID - */ -int nvml_get_tempC(nvml_handle *nvmlh, - int gpuindex, unsigned int *tempC); - -/* - * Query the current GPU fan speed (percent) from the CUDA device ID - */ -int nvml_get_fanpcnt(nvml_handle *nvmlh, - int gpuindex, unsigned int *fanpcnt); - -/* - * Query the current GPU power usage in millwatts from the CUDA device ID - * - * This feature is only available on recent GPU generations and may be - * limited in some cases only to Tesla series GPUs. - * If the query is run on an unsupported GPU, this routine will return -1. - */ -int nvml_get_power_usage(nvml_handle *nvmlh, - int gpuindex, - unsigned int *milliwatts); +int nvml_set_plimit(nvml_handle *nvmlh, int dev_id); +int nvml_set_pstate(nvml_handle *nvmlh, int dev_id); int nvml_set_clocks(nvml_handle *nvmlh, int dev_id); int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id);