nvml: add --plimit and --pstate parameters

--pstate 0 is an alternative to clocks values to set the device in P0 mode, 1 will set appl. clocks to default P1 clocks, 2 to default P2.. --plimit 150W really works on the 9xx and allow to limit/reduce the gpu usage Note: nvml interface is only available on linux and windows x64 Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
10 years ago · 7c5087d01c
3 changed files with 166 additions and 46 deletions
--- a/ccminer.cpp
+++ b/ccminer.cpp
@ -198,6 +198,8 @@ long  device_sm[MAX_GPUS] = { 0 };
 uint32_t gpus_intensity[MAX_GPUS] = { 0 };
 uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
 uint32_t device_mem_clocks[MAX_GPUS] = { 0 };
 uint32_t device_plimit[MAX_GPUS] = { 0 };
 int8_t device_pstate[MAX_GPUS] = { -1 };
 // un-linked to cmdline scrypt options (useless)
 int device_batchsize[MAX_GPUS] = { 0 };
@ -343,10 +345,12 @@ Options:\n\
      --max-temp=N      Only mine if gpu temp is less than specified value\n\
      --max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\
      --max-diff=N      Only mine if net difficulty is less than specified value\n"
-#if defined(USE_WRAPNVML) && (defined(__linux) || defined(_WIN64))
+#if defined(USE_WRAPNVML) && (defined(__linux) || defined(_WIN64)) /* via nvml */
 "\
-      --gpu-clock=1150  Set device application clock\n\
+      --mem-clock=3505  Set the gpu memory max clock (346.72+ driver)\n\
-      --mem-clock=3505  Set the gpu memory clock (require 346.72+ driver)\n"
+      --gpu-clock=1150  Set the gpu engine max clock (346.72+ driver)\n\
      --pstate=0[,2]    Set the gpu power state (352.21+ driver)\n\
      --plimit=100W     Set the gpu power limit (352.21+ driver)\n"
 #endif
 #ifdef HAVE_SYSLOG_H
 "\
@ -410,6 +414,8 @@ struct option options[] = {
 	{ "statsavg", 1, NULL, 'N' },
 	{ "gpu-clock", 1, NULL, 1070 },
 	{ "mem-clock", 1, NULL, 1071 },
 	{ "pstate", 1, NULL, 1072 },
 	{ "plimit", 1, NULL, 1073 },
 #ifdef HAVE_SYSLOG_H
 	{ "syslog", 0, NULL, 'S' },
 	{ "syslog-prefix", 1, NULL, 1018 },
@ -2731,6 +2737,28 @@ void parse_arg(int key, char *arg)
 			}
 		}
 		break;
 	case 1072: /* --pstate */
 		{
 			char *pch = strtok(arg,",");
 			int n = 0;
 			while (pch != NULL && n < MAX_GPUS) {
 				int dev_id = device_map[n++];
 				device_pstate[dev_id] = (int8_t) atoi(pch);
 				pch = strtok(NULL, ",");
 			}
 		}
 		break;
 	case 1073: /* --plimit */
 		{
 			char *pch = strtok(arg,",");
 			int n = 0;
 			while (pch != NULL && n < MAX_GPUS) {
 				int dev_id = device_map[n++];
 				device_plimit[dev_id] = atoi(pch);
 				pch = strtok(NULL, ",");
 			}
 		}
 		break;
 	case 1005:
 		opt_benchmark = true;
 		want_longpoll = false;
@ -3049,6 +3077,7 @@ int main(int argc, char *argv[])
 		device_interactive[i] = -1;
 		device_texturecache[i] = -1;
 		device_singlememory[i] = -1;
 		device_pstate[i] = -1;
 	}
 	// number of gpus
@ -3231,10 +3260,17 @@ int main(int argc, char *argv[])
 	/* nvml is currently not the best choice on Windows (only in x64) */
 	hnvml = nvml_create();
 	if (hnvml) {
 		bool gpu_reinit = false;
 		cuda_devicenames(); // refresh gpu vendor name
 		applog(LOG_INFO, "NVML GPU monitoring enabled.");
 		for (int n=0; n < opt_n_threads; n++) {
 			if (nvml_set_pstate(hnvml, device_map[n]) == 1)
 				gpu_reinit = true;
 			if (nvml_set_plimit(hnvml, device_map[n]) == 1)
 				gpu_reinit = true;
 			if (nvml_set_clocks(hnvml, device_map[n]) == 1)
 				gpu_reinit = true;
 			if (gpu_reinit)
 				cuda_reset_device(n, NULL);
 		}
 	}
--- a/nvml.cpp
+++ b/nvml.cpp
@ -37,6 +37,8 @@ static uint32_t device_bus_ids[MAX_GPUS] = { 0 };
 extern uint32_t device_gpu_clocks[MAX_GPUS];
 extern uint32_t device_mem_clocks[MAX_GPUS];
 extern uint32_t device_plimit[MAX_GPUS];
 extern int8_t device_pstate[MAX_GPUS];
 uint8_t gpu_clocks_changed[MAX_GPUS] = { 0 };
@ -326,12 +328,12 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
 	if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
 	// these functions works for the 960 and the 970 (346.72+), not for the 750 Ti
-	uint32_t nclocks = 0, clocks[128] = { 0 };
+	uint32_t nclocks = 0, clocks[127] = { 0 };
 	nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
-	nclocks = min(nclocks, 128);
+	nclocks = min(nclocks, 127);
 	nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, clocks);
-	for (uint8_t u=0; u < nclocks; u++) {
+	for (int8_t u=0; u < nclocks; u++) {
-		// ordered desc, so get first
+		// ordered by pstate (so highest is first memory clock - P0)
 		if (clocks[u] <= mem_clk) {
 			mem_clk = clocks[u];
 			break;
@ -340,7 +342,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
 	nclocks = 0;
 	nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
-	nclocks = min(nclocks, 128);
+	nclocks = min(nclocks, 127);
 	nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, clocks);
 	for (uint8_t u=0; u < nclocks; u++) {
 		// ordered desc, so get first
@ -354,7 +356,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
 	if (rc == NVML_SUCCESS)
 		applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk);
 	else {
-		applog(LOG_ERR, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
+		applog(LOG_WARNING, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
 		return -1;
 	}
@ -383,6 +385,123 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id)
 	return 1;
 }
 /**
 * Set power state of a device (9xx)
 * Code is similar as clocks one, which allow the change of the pstate
 */
 int nvml_set_pstate(nvml_handle *nvmlh, int dev_id)
 {
 	nvmlReturn_t rc;
 	uint32_t gpu_clk = 0, mem_clk = 0;
 	int n = nvmlh->cuda_nvml_device_id[dev_id];
 	if (n < 0 || n >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 	if (device_pstate[dev_id] < 0)
 		return 0;
 	// prevent double operations on the same gpu... to enhance
 	if (gpu_clocks_changed[dev_id])
 		return 0;
 	if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) {
 		applog(LOG_WARNING, "GPU #%d: NVML app. clock feature is not allowed!", dev_id);
 		return -EPERM;
 	}
 	nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
 	rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
 	if (rc != NVML_SUCCESS) {
 		applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id);
 		return -EINVAL;
 	}
 	// get application config values
 	if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
 	if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
 	// these functions works for the 960 and the 970 (346.72+), not for the 750 Ti
 	uint32_t nclocks = 0, clocks[127] = { 0 };
 	int8_t wanted_pstate = device_pstate[dev_id];
 	nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
 	nclocks = min(nclocks, 127);
 	nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, clocks);
 	for (uint8_t u=0; u < nclocks; u++) {
 		// ordered by pstate (so high first)
 		if (u == wanted_pstate) {
 			mem_clk = clocks[u];
 			break;
 		}
 	}
 	nclocks = 0;
 	nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
 	nclocks = min(nclocks, 127);
 	nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, clocks);
 	for (uint8_t u=0; u < nclocks; u++) {
 		// ordered desc, so get first
 		if (clocks[u] <= gpu_clk) {
 			gpu_clk = clocks[u];
 			break;
 		}
 	}
 	rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
 	if (rc != NVML_SUCCESS) {
 		applog(LOG_WARNING, "GPU #%d: pstate %s", dev_id, nvmlh->nvmlErrorString(rc));
 		return -1;
 	}
 	if (opt_debug)
 		applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int) wanted_pstate, mem_clk, gpu_clk);
 	gpu_clocks_changed[dev_id] = 1;
 	return 1;
 }
 int nvml_set_plimit(nvml_handle *nvmlh, int dev_id)
 {
 	nvmlReturn_t rc = NVML_ERROR_UNKNOWN;
 	uint32_t gpu_clk = 0, mem_clk = 0;
 	int n = nvmlh->cuda_nvml_device_id[dev_id];
 	if (n < 0 || n >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 	if (!device_plimit[dev_id])
 		return 0; // nothing to do
 	if (!nvmlh->nvmlDeviceSetPowerManagementLimit)
 		return -ENOSYS;
 	uint32_t plimit = device_plimit[dev_id] * 1000U;
 	uint32_t pmin = 1000, pmax = 0;
 	if (nvmlh->nvmlDeviceGetPowerManagementLimitConstraints)
 		rc = nvmlh->nvmlDeviceGetPowerManagementLimitConstraints(nvmlh->devs[n], &pmin, &pmax);
 	if (rc != NVML_SUCCESS) {
 		if (!nvmlh->nvmlDeviceGetPowerManagementLimit)
 			return -ENOSYS;
 		pmax = 100 * 1000; // should not happen...
 		nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &pmax);
 	}
 	plimit = min(plimit, pmax);
 	plimit = max(plimit, pmin);
 	rc = nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit);
 	if (rc != NVML_SUCCESS) {
 		applog(LOG_WARNING, "GPU #%d: plimit %s", dev_id, nvmlh->nvmlErrorString(rc));
 		return -1;
 	}
 	if (opt_debug) {
 		applog(LOG_INFO, "GPU #%d: power limit set to %uW (allowed range is %u-%u)",
 			dev_id, plimit/1000U, pmin/1000U, pmax/1000U);
 	}
 	return 0;
 }
 int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount)
 {
 	*gpucount = nvmlh->nvml_gpucount;
--- a/nvml.h
+++ b/nvml.h
@ -153,43 +153,8 @@ int nvml_destroy(nvml_handle *nvmlh);
 */
 int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount);
-/*
+int nvml_set_plimit(nvml_handle *nvmlh, int dev_id);
- * Query the number of GPUs seen by CUDA
+int nvml_set_pstate(nvml_handle *nvmlh, int dev_id);
 */
 int cuda_get_gpucount(nvml_handle *nvmlh, int *gpucount);
 /*
 * query the name of the GPU model from the CUDA device ID
 *
 */
 int nvml_get_gpu_name(nvml_handle *nvmlh,
                           int gpuindex,
                           char *namebuf,
                           int bufsize);
 /*
 * Query the current GPU temperature (Celsius), from the CUDA device ID
 */
 int nvml_get_tempC(nvml_handle *nvmlh,
                        int gpuindex, unsigned int *tempC);
 /*
 * Query the current GPU fan speed (percent) from the CUDA device ID
 */
 int nvml_get_fanpcnt(nvml_handle *nvmlh,
                          int gpuindex, unsigned int *fanpcnt);
 /*
 * Query the current GPU power usage in millwatts from the CUDA device ID
 *
 * This feature is only available on recent GPU generations and may be
 * limited in some cases only to Tesla series GPUs.
 * If the query is run on an unsupported GPU, this routine will return -1.
 */
 int nvml_get_power_usage(nvml_handle *nvmlh,
                              int gpuindex,
                              unsigned int *milliwatts);
 int nvml_set_clocks(nvml_handle *nvmlh, int dev_id);
 int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id);