From c9d110e543db8c25cbc61ee8875e55323de5090b Mon Sep 17 00:00:00 2001
From: Tanguy Pruvot <tanguy.pruvot@gmail.com>
Date: Wed, 27 May 2015 14:11:38 +0200
Subject: [PATCH] linux: allow to set application clocks

Since linux driver 346.72, nvidia-smi allow to query gpu/mem clocks

Tested ok on the Asus Strix 970, but fails on the Gigabyte 750 Ti

system could require first persistence mode and app clock unlock :
    nvidia-smi -pm 1
    nvidia-smi -acp 0

supported values are displayed by
    nvidia-smi -q -d SUPPORTED_CLOCKS

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
---
 ccminer.cpp       | 39 ++++++++++++++++++++++++-
 configure.ac      |  2 +-
 cpuminer-config.h |  6 ++--
 nvml.cpp          | 72 ++++++++++++++++++++++++++++++++++++-----------
 nvml.h            |  2 ++
 5 files changed, 100 insertions(+), 21 deletions(-)

diff --git a/ccminer.cpp b/ccminer.cpp
index 7946653..a667796 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -196,6 +196,8 @@ char * device_name[MAX_GPUS];
 short device_map[MAX_GPUS] = { 0 };
 long  device_sm[MAX_GPUS] = { 0 };
 uint32_t gpus_intensity[MAX_GPUS] = { 0 };
+uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
+uint32_t device_mem_clocks[MAX_GPUS] = { 0 };
 
 // un-linked to cmdline scrypt options (useless)
 int device_batchsize[MAX_GPUS] = { 0 };
@@ -352,6 +354,11 @@ Options:\n\
       --max-temp=N      Only mine if gpu temp is less than specified value\n\
       --max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\
       --max-diff=N      Only mine if net difficulty is less than specified value\n"
+#if defined(USE_WRAPNVML) && defined(__linux)
+"\
+      --gpu-clock=1150  Set device application clock\n\
+      --mem-clock=3505  Set the gpu memory clock (require 346.72 linux driver)\n"
+#endif
 #ifdef HAVE_SYSLOG_H
 "\
   -S, --syslog          use system log for output messages\n\
@@ -412,6 +419,8 @@ static struct option const options[] = {
 	{ "retry-pause", 1, NULL, 'R' },
 	{ "scantime", 1, NULL, 's' },
 	{ "statsavg", 1, NULL, 'N' },
+	{ "gpu-clock", 1, NULL, 1070 },
+	{ "mem-clock", 1, NULL, 1071 },
 #ifdef HAVE_SYSLOG_H
 	{ "syslog", 0, NULL, 'S' },
 	{ "syslog-prefix", 1, NULL, 1018 },
@@ -2895,6 +2904,30 @@ void parse_arg(int key, char *arg)
 				device_interactive[n++] = last;
 		}
 		break;
+	case 1070: /* --gpu-clock */
+		{
+			char *pch = strtok(arg,",");
+			int n = 0, last = atoi(arg);
+			while (pch != NULL) {
+				device_gpu_clocks[n++] = last = atoi(pch);
+				pch = strtok(NULL, ",");
+			}
+			//while (n < MAX_GPUS)
+			//	device_gpu_clocks[n++] = last;
+		}
+		break;
+	case 1071: /* --mem-clock */
+		{
+			char *pch = strtok(arg,",");
+			int n = 0, last = atoi(arg);
+			while (pch != NULL) {
+				device_mem_clocks[n++] = last = atoi(pch);
+				pch = strtok(NULL, ",");
+			}
+			//while (n < MAX_GPUS)
+			//	device_gpu_clocks[n++] = last;
+		}
+		break;
 	case 1005:
 		opt_benchmark = true;
 		want_longpoll = false;
@@ -3448,8 +3481,12 @@ int main(int argc, char *argv[])
 #ifndef WIN32
 	/* nvml is currently not the best choice on Windows (only in x64) */
 	hnvml = nvml_create();
-	if (hnvml)
+	if (hnvml) {
 		applog(LOG_INFO, "NVML GPU monitoring enabled.");
+		for (int n=0; n < opt_n_threads; n++) {
+			nvml_set_clocks(hnvml, device_map[n]);
+		}
+	}
 #else
 	if (nvapi_init() == 0)
 		applog(LOG_INFO, "NVAPI GPU monitoring enabled.");
diff --git a/configure.ac b/configure.ac
index 549247f..4c9bc38 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([ccminer], [1.6.4])
+AC_INIT([ccminer], [1.6.5-git])
 
 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
diff --git a/cpuminer-config.h b/cpuminer-config.h
index 4e3bdbc..4ede926 100644
--- a/cpuminer-config.h
+++ b/cpuminer-config.h
@@ -159,7 +159,7 @@
 #define PACKAGE_NAME "ccminer"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "ccminer 1.6.4"
+#define PACKAGE_STRING "ccminer 1.6.5-git"
 
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "ccminer"
@@ -168,7 +168,7 @@
 #define PACKAGE_URL "http://github.com/tpruvot/ccminer"
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "1.6.4"
+#define PACKAGE_VERSION "1.6.5-git"
 
 /* If using the C implementation of alloca, define if you know the
    direction of stack growth for your system; otherwise it will be
@@ -182,7 +182,7 @@
 #define STDC_HEADERS 1
 
 /* Version number of package */
-#define VERSION "1.6.4"
+#define VERSION "1.6.5-git"
 
 /* Define curl_free() as free() if our version of curl lacks curl_free. */
 /* #undef curl_free */
diff --git a/nvml.cpp b/nvml.cpp
index 4868444..03d4b8a 100644
--- a/nvml.cpp
+++ b/nvml.cpp
@@ -33,6 +33,11 @@ extern char driver_version[32];
 
 static uint32_t device_bus_ids[MAX_GPUS] = { 0 };
 
+extern uint32_t device_gpu_clocks[MAX_GPUS];
+extern uint32_t device_mem_clocks[MAX_GPUS];
+
+uint8_t gpu_clocks_changed[MAX_GPUS] = { 0 };
+
 /*
  * Wrappers to emulate dlopen() on other systems like Windows
  */
@@ -218,21 +223,6 @@ nvml_handle * nvml_create()
 		if (nvmlh->nvmlDeviceGetAPIRestriction) {
 			nvmlh->nvmlDeviceGetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
 				&nvmlh->app_clocks[i]);
-			if (nvmlh->app_clocks[i] == NVML_FEATURE_ENABLED && opt_debug) {
-				applog(LOG_DEBUG, "NVML application clock feature is allowed");
-#if 0
-				uint32_t mem;
-				nvmlReturn_t rc;
-				rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[i], NVML_CLOCK_MEM, &mem);
-				if (rc == NVML_SUCCESS)
-					applog(LOG_DEBUG, "nvmlDeviceGetDefaultApplicationsClock: mem %u", mem);
-				else
-					applog(LOG_DEBUG, "nvmlDeviceGetDefaultApplicationsClock: %s", nvmlh->nvmlErrorString(rc));
-				rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[i], mem, 1228000);
-				if (rc != NVML_SUCCESS)
-					applog(LOG_DEBUG, "nvmlDeviceSetApplicationsClocks: %s", nvmlh->nvmlErrorString(rc));
-#endif
-			}
 		}
 	}
 
@@ -251,7 +241,7 @@ nvml_handle * nvml_create()
 				    (nvmlh->nvml_pci_bus_id[j]    == (uint32_t) props.pciBusID) &&
 				    (nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) {
 					if (opt_debug)
-						applog(LOG_DEBUG, "CUDA GPU#%d matches NVML GPU %d by busId %u",
+						applog(LOG_DEBUG, "CUDA GPU %d matches NVML GPU %d by busId %u",
 							i, j, (uint32_t) props.pciBusID);
 					nvmlh->nvml_cuda_device_id[j] = i;
 					nvmlh->cuda_nvml_device_id[i] = j;
@@ -263,6 +253,56 @@ nvml_handle * nvml_create()
 	return nvmlh;
 }
 
+/* apply config clocks to an used device */
+int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
+{
+	nvmlReturn_t rc;
+	uint32_t gpu_clk = 0, mem_clk = 0;
+	int n = nvmlh->cuda_nvml_device_id[dev_id];
+	if (n < 0 || n >= nvmlh->nvml_gpucount)
+		return -1;
+
+	// prevent double operations on the same gpu... to enhance
+	if (gpu_clocks_changed[dev_id])
+		return 0;
+
+	int c = nvmlh->nvml_cuda_device_id[n];
+	if (!device_gpu_clocks[c] && !device_mem_clocks[c])
+		return 0; // nothing to do
+
+	// applog(LOG_DEBUG, "device %d cuda %d nvml %d", dev_id, c, n);
+	if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) {
+		applog(LOG_WARNING, "GPU #%d: NVML application clock feature is not allowed!", c);
+		return -1;
+	}
+
+	if (opt_debug)
+		applog(LOG_DEBUG, "GPU #%d: NVML application clock feature is allowed", c);
+
+	nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
+	rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
+	if (rc != NVML_SUCCESS)
+		return -1;
+
+	if (opt_debug)
+		applog(LOG_DEBUG, "GPU #%d: default clocks are %u/%u", c, mem_clk, gpu_clk);
+
+	// get application config values
+	if (device_mem_clocks[c]) mem_clk = device_mem_clocks[c];
+	if (device_gpu_clocks[c]) gpu_clk = device_gpu_clocks[c];
+
+	rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
+	if (rc == NVML_SUCCESS)
+		applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", c, mem_clk, gpu_clk);
+	else {
+		applog(LOG_ERR, "GPU #%d: %u/%u - %s", c, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
+		return -1;
+	}
+
+	gpu_clocks_changed[dev_id] = 1;
+	return 0;
+}
+
 int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount)
 {
 	*gpucount = nvmlh->nvml_gpucount;
diff --git a/nvml.h b/nvml.h
index d9fa5e4..224daf4 100644
--- a/nvml.h
+++ b/nvml.h
@@ -156,6 +156,8 @@ int nvml_get_power_usage(nvml_handle *nvmlh,
                               int gpuindex,
                               unsigned int *milliwatts);
 
+int nvml_set_clocks(nvml_handle *nvmlh, int dev_id);
+
 /* api functions */
 
 unsigned int gpu_fanpercent(struct cgpu_info *gpu);