From bcadca2c45d342e6ba353381db6947f403f76424 Mon Sep 17 00:00:00 2001
From: Tanguy Pruvot <tanguy.pruvot@gmail.com>
Date: Wed, 7 Jun 2017 18:31:38 +0200
Subject: [PATCH] linux: prepare nvidia-settings queries

Note: this method is slower than real apis and requires X opened
and configured correctly.

sample usage: -d 0,1 --mem-clock=+200,-200
---
 Makefile.am             |   1 +
 ccminer.cpp             |  26 ++++-
 ccminer.vcxproj         |   3 +-
 ccminer.vcxproj.filters |   5 +-
 nvml.cpp                |   4 +
 nvml.h                  |  12 ++
 nvsettings.cpp          | 247 ++++++++++++++++++++++++++++++++++++++++
 7 files changed, 293 insertions(+), 5 deletions(-)
 create mode 100644 nvsettings.cpp
diff --git a/Makefile.am b/Makefile.am
index 683e21d..c409f01 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -21,6 +21,7 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
 			  crc32.c hefty1.c \
 			  ccminer.cpp pools.cpp util.cpp bench.cpp bignum.cpp \
 			  api.cpp hashlog.cpp nvml.cpp stats.cpp sysinfos.cpp cuda.cpp \
+			  nvsettings.cpp \
 			  equi/equi-stratum.cpp equi/equi.cpp equi/blake2/blake2bx.cpp \
 			  equi/equihash.cpp equi/cuda_equi.cu \
 			  heavy/heavy.cu \
diff --git a/ccminer.cpp b/ccminer.cpp
index 85ec68e..cc33d61 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -126,6 +126,7 @@ bool opt_trust_pool = false;
 uint16_t opt_vote = 9999;
 int num_cpus;
 int active_gpus;
+bool need_nvsettings = false;
 char * device_name[MAX_GPUS];
 short device_map[MAX_GPUS] = { 0 };
 long  device_sm[MAX_GPUS] = { 0 };
@@ -133,6 +134,7 @@ short device_mpcount[MAX_GPUS] = { 0 };
 uint32_t gpus_intensity[MAX_GPUS] = { 0 };
 uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
 uint32_t device_mem_clocks[MAX_GPUS] = { 0 };
+int32_t device_mem_offsets[MAX_GPUS] = { 0 };
 uint32_t device_plimit[MAX_GPUS] = { 0 };
 uint8_t device_tlimit[MAX_GPUS] = { 0 };
 int8_t device_pstate[MAX_GPUS] = { -1, -1 };
@@ -2058,8 +2060,10 @@ static void *miner_thread(void *userdata)
 		}
 
 		/* conditional mining */
-		if (!wanna_mine(thr_id)) {
-
+		if (!wanna_mine(thr_id))
+		{
+			// reset default mem offset before idle..
+			if (need_nvsettings) nvs_reset_clocks(dev_id);
 			// free gpu resources
 			algo_free_all(thr_id);
 			// clear any free error (algo switch)
@@ -2084,7 +2088,11 @@ static void *miner_thread(void *userdata)
 			sleep(5);
 			if (!thr_id) pools[cur_pooln].wait_time += 5;
 			continue;
+		} else {
+			// reapply mem offset if needed
+			if (need_nvsettings) nvs_set_clocks(dev_id);
 		}
+
 		pool_on_hold = false;
 
 		work_restart[thr_id].restart = 0;
@@ -3164,6 +3172,7 @@ void parse_arg(int key, char *arg)
 		nvapi_init_settings();
 		#endif
 		#endif
+		nvs_init();
 		cuda_print_devices();
 		proper_exit(EXIT_CODE_OK);
 		break;
@@ -3389,7 +3398,11 @@ void parse_arg(int key, char *arg)
 			int n = 0;
 			while (pch != NULL && n < MAX_GPUS) {
 				int dev_id = device_map[n++];
-				device_mem_clocks[dev_id] = atoi(pch);
+				if (*pch == '+' || *pch == '-')
+					device_mem_offsets[dev_id] = atoi(pch);
+				else
+					device_mem_clocks[dev_id] = atoi(pch);
+				need_nvsettings = true;
 				pch = strtok(NULL, ",");
 			}
 		}
@@ -4061,6 +4074,13 @@ int main(int argc, char *argv[])
 		tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
 	}
 
+#ifdef __linux__
+	if (need_nvsettings) {
+		if (nvs_init() < 0)
+			need_nvsettings = false;
+	}
+#endif
+
 #ifdef USE_WRAPNVML
 #if defined(__linux__) || defined(_WIN64)
 	/* nvml is currently not the best choice on Windows (only in x64) */
diff --git a/ccminer.vcxproj b/ccminer.vcxproj
index 1316052..6715336 100644
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@@ -243,6 +243,7 @@
     <ClCompile Include="equi\equi.cpp" />
     <ClCompile Include="equi\equihash.cpp" />
     <ClCompile Include="nvapi.cpp" />
+    <ClCompile Include="nvsettings.cpp" />
     <ClCompile Include="pools.cpp" />
     <ClCompile Include="util.cpp" />
     <ClCompile Include="bench.cpp" />
@@ -597,4 +598,4 @@
   <Target Name="AfterClean">
     <Delete Files="@(FilesToCopy->'$(OutDir)%(Filename)%(Extension)')" TreatErrorsAsWarnings="true" />
   </Target>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters
index a6b39c4..12c0072 100644
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@@ -297,6 +297,9 @@
     <ClCompile Include="nvapi.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="nvsettings.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="sia\sia-rpc.cpp">
       <Filter>Source Files\sia</Filter>
     </ClCompile>
@@ -935,4 +938,4 @@
       <Filter>Ressources</Filter>
     </Text>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/nvml.cpp b/nvml.cpp
index 8063d29..daa570c 100644
--- a/nvml.cpp
+++ b/nvml.cpp
@@ -310,6 +310,8 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
 	nvmlReturn_t rc;
 	uint32_t gpu_clk = 0, mem_clk = 0;
 	int n = nvmlh->cuda_nvml_device_id[dev_id];
+	//if (need_nvsettings) /* prefer later than init time */
+	//	nvs_set_clocks(dev_id);
 	if (n < 0 || n >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
@@ -395,6 +397,8 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id)
 	nvmlReturn_t rc;
 	uint32_t gpu_clk = 0, mem_clk = 0;
 	int n = nvmlh->cuda_nvml_device_id[dev_id];
+	if (need_nvsettings)
+		nvs_reset_clocks(dev_id);
 	if (n < 0 || n >= nvmlh->nvml_gpucount)
 		return -ENODEV;
 
diff --git a/nvml.h b/nvml.h
index bb311a5..de5d16d 100644
--- a/nvml.h
+++ b/nvml.h
@@ -250,3 +250,15 @@ void gpu_led_off(int dev_id);
 #define LED_MODE_SHARES 1
 #define LED_MODE_MINING 2
 
+/* ------ nvidia-settings stuff for linux -------------------- */
+
+int nvs_init();
+int nvs_set_clocks(int dev_id);
+void nvs_reset_clocks(int dev_id);
+
+// nvidia-settings (X) devNum from dev_id (cuda GPU #N)
+int8_t nvs_devnum(int dev_id);
+int nvs_devid(int8_t devNum);
+
+extern bool need_nvsettings;
+
diff --git a/nvsettings.cpp b/nvsettings.cpp
new file mode 100644
index 0000000..61c90d1
--- /dev/null
+++ b/nvsettings.cpp
@@ -0,0 +1,247 @@
+/**
+ * nvidia-settings command line interface for linux - tpruvot 2017
+ *
+ * Notes: need X setup and running, with an opened X session.
+ *        init speed could be improved, running multiple threads
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/types.h> // pid_t
+
+#include "miner.h"
+#include "nvml.h"
+#include "cuda_runtime.h"
+
+#ifdef __linux__
+
+#define NVS_PATH "/usr/bin/nvidia-settings"
+
+static int8_t nvs_dev_map[MAX_GPUS] = { 0 };
+static uint8_t nvs_bus_ids[MAX_GPUS] = { 0 };
+static int32_t nvs_clocks_set[MAX_GPUS] = { 0 };
+
+extern int32_t device_mem_offsets[MAX_GPUS];
+
+#if 0 /* complicated exec way and not better in fine */
+int nvs_query_fork_int(int nvs_id, const char* field)
+{
+	pid_t pid;
+	int pipes[2] = { 0 };
+	if (pipe(pipes) < 0)
+		return -1;
+
+	if ((pid = fork()) == -1) {
+		close(pipes[0]);
+		close(pipes[1]);
+		return -1;
+	} else if (pid == 0) {
+		char gpu_field[128] = { 0 };
+		sprintf(gpu_field, "[gpu:%d]/%s", nvs_id, field);
+
+		dup2(pipes[1], STDOUT_FILENO);
+		close(pipes[0]);
+		//close(pipes[1]);
+
+		if (-1 == execl(NVS_PATH, "nvidia-settings", "-q", gpu_field, "-t", NULL)) {
+			exit(-1);
+		}
+	} else {
+		int intval = -1;
+		FILE *p = fdopen(pipes[0], "r");
+		close(pipes[1]);
+		if (!p) {
+			applog(LOG_WARNING, "%s: fdopen(%d) failed", __func__, pipes[0]);
+			return -1;
+		}
+		int rc = fscanf(p, "%d", &intval); // BUS 0000:2a:00.0 is read 42
+		if (rc > 0) {
+			//applog(LOG_BLUE, "%s res=%d", field, intval);
+		}
+		fclose(p);
+		close(pipes[0]);
+		return intval;
+	}
+	return -1;
+}
+#endif
+
+int nvs_query_int(int nvs_id, const char* field, int showerr)
+{
+	FILE *fp;
+	char command[256] = { 0 };
+	sprintf(command, "%s -t -q '[gpu:%d]/%s' 2>&1", NVS_PATH, nvs_id, field);
+	fp = popen(command, "r");
+	if (fp) {
+		int intval = -1;
+		if (!showerr)
+			fscanf(fp, "%d", &intval);
+		else {
+			char msg[512] = { 0 };
+			char buf[64] = { 0 };
+			ssize_t bytes, len=0, maxlen=sizeof(msg)-1;
+			while ((bytes=fscanf(fp, "%s", buf)) > 0) {
+				len += snprintf(&msg[len], maxlen-len, "%s ", buf);
+				if (len >= maxlen) break;
+			}
+			if (strstr(msg, "ERROR")) {
+				char *xtra = strstr(msg, "; please run");
+				if (xtra) *xtra = '\0'; // strip noise
+				applog(LOG_INFO, "%s", msg);
+				intval = -1;
+			} else {
+				sscanf(msg, "%d", &intval);
+			}
+		}
+		pclose(fp);
+		return intval;
+	}
+	return -1;
+}
+
+int nvs_query_str(int nvs_id, const char* field, char* output, size_t maxlen)
+{
+	FILE *fp;
+	char command[256] = { 0 };
+	*output = '\0';
+	sprintf(command, "%s -t -q '[gpu:%d]/%s' 2>&1", NVS_PATH, nvs_id, field);
+	fp = popen(command, "r");
+	if (fp) {
+		char buf[256] = { 0 };
+		ssize_t len=0;
+	        ssize_t bytes=0;
+		while ((bytes=fscanf(fp, "%s", buf)) > 0) {
+			//applog(LOG_BLUE, "%d %s %d", nvs_id, buf, (int) bytes);
+			len += snprintf(&output[len], maxlen-len, "%s ", buf);
+			if (len >= maxlen) break;
+		}
+		pclose(fp);
+		if (strstr(output, "ERROR")) {
+			char *xtra = strstr(output, "; please run");
+			if (xtra) *xtra = '\0'; // strip noise
+			applog(LOG_INFO, "%s", output);
+			*output='\0';
+		}
+		return (int) len;
+	}
+	return -1;
+}
+
+int nvs_set_int(int nvs_id, const char* field, int value)
+{
+	FILE *fp;
+	char command[256] = { 0 };
+	int res = -1;
+	snprintf(command, 256, "%s -a '[gpu:%d]/%s=%d' 2>&1", NVS_PATH, nvs_id, field, value);
+	fp = popen(command, "r");
+	if (fp) {
+		char msg[512] = { 0 };
+		char buf[64] = { 0 };
+		ssize_t bytes, len=0, maxlen=sizeof(msg)-1;
+		while ((bytes=fscanf(fp, "%s", buf)) > 0) {
+			len += snprintf(&msg[len], maxlen-len, "%s ", buf);
+			if (len >= maxlen) break;
+		}
+		if (strstr(msg, "ERROR")) {
+			char *xtra = strstr(msg, "; please run");
+			if (xtra) *xtra = '\0'; // strip noise
+			applog(LOG_INFO, "%s", msg);
+		} else
+			res = 0;
+		pclose(fp);
+	}
+	return res;
+}
+
+int8_t nvs_devnum(int dev_id)
+{
+	return nvs_dev_map[dev_id];
+}
+
+int nvs_devid(int8_t nvs_id)
+{
+	for (int i=0; i < opt_n_threads; i++) {
+		int dev_id = device_map[i % MAX_GPUS];
+		if (nvs_dev_map[dev_id] == nvs_id)
+			return dev_id;
+	}
+	return 0;
+}
+
+int nvs_init()
+{
+	struct stat info;
+	struct timeval tv_start, tv_end, diff;
+	int x_devices = 0;
+	int n_threads = opt_n_threads;
+	if (stat(NVS_PATH, &info))
+		return -ENOENT;
+
+	gettimeofday(&tv_start, NULL);
+
+	for (int d = 0; d < MAX_GPUS; d++) {
+		// this part can be "slow" (100-200ms per device)
+		int res = nvs_query_int(d, "PCIBus", 1);
+		if (res < 0) break;
+		nvs_bus_ids[d] = 0xFFu & res;
+		x_devices++;
+	}
+
+	if (opt_debug) {
+		gettimeofday(&tv_end, NULL);
+		timeval_subtract(&diff, &tv_end, &tv_start);
+		applog(LOG_DEBUG, "nvidia-settings pci bus queries took %.2f ms",
+			(1000.0 * diff.tv_sec) + (0.001 * diff.tv_usec));
+	}
+
+	if (!x_devices)
+		return -ENODEV;
+	if (!n_threads) n_threads = cuda_num_devices();
+	for (int i = 0; i < n_threads; i++) {
+		int dev_id = device_map[i % MAX_GPUS];
+		cudaDeviceProp props;
+		if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) {
+			for (int8_t d = 0; d < x_devices; d++) {
+				if (nvs_bus_ids[d] == (uint8_t) props.pciBusID) {
+					gpulog(LOG_DEBUG, i, "matches X gpu:%d by busId %u",
+						(int) d, (uint) nvs_bus_ids[d]);
+					nvs_dev_map[dev_id] = d;
+					/* char buf[1024] = { 0 };
+					nvs_query_str(d, "GPUCurrentClockFreqsString", buf, sizeof(buf)-1);
+					gpulog(LOG_DEBUG, d, "%s", buf); */
+					break;
+				}
+			}
+		}
+	}
+	return 0;
+}
+
+int nvs_set_clocks(int dev_id)
+{
+	int res;
+	int8_t d = nvs_devnum(dev_id);
+	if (d < 0) return -ENODEV;
+	if (!device_mem_offsets[dev_id] || nvs_clocks_set[d]) return 0;
+	res = nvs_set_int(d, "GPUMemoryTransferRateOffsetAllPerformanceLevels", device_mem_offsets[dev_id]*2);
+	if (res) nvs_clocks_set[d] = device_mem_offsets[dev_id]*2;
+	return res;
+}
+
+void nvs_reset_clocks(int dev_id)
+{
+	int8_t d = nvs_devnum(dev_id);
+	if (d < 0 || !nvs_clocks_set[d]) return;
+	nvs_set_int(d, "GPUMemoryTransferRateOffsetAllPerformanceLevels", 0);
+	nvs_clocks_set[d] = 0;
+}
+
+#else
+int nvs_init() { return -ENOSYS; }
+int nvs_set_clocks(int dev_id) { return -ENOSYS; }
+void nvs_reset_clocks(int dev_id) { }
+#endif