linux: prepare nvidia-settings queries

Note: this method is slower than real apis and requires X opened and configured correctly. sample usage: -d 0,1 --mem-clock=+200,-200
8 years ago · bcadca2c45
7 changed files with 293 additions and 5 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -21,6 +21,7 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
 			  crc32.c hefty1.c \
 			  ccminer.cpp pools.cpp util.cpp bench.cpp bignum.cpp \
 			  api.cpp hashlog.cpp nvml.cpp stats.cpp sysinfos.cpp cuda.cpp \
 			  nvsettings.cpp \
 			  equi/equi-stratum.cpp equi/equi.cpp equi/blake2/blake2bx.cpp \
 			  equi/equihash.cpp equi/cuda_equi.cu \
 			  heavy/heavy.cu \
--- a/ccminer.cpp
+++ b/ccminer.cpp
@ -126,6 +126,7 @@ bool opt_trust_pool = false;
 uint16_t opt_vote = 9999;
 int num_cpus;
 int active_gpus;
 bool need_nvsettings = false;
 char * device_name[MAX_GPUS];
 short device_map[MAX_GPUS] = { 0 };
 long  device_sm[MAX_GPUS] = { 0 };
@ -133,6 +134,7 @@ short device_mpcount[MAX_GPUS] = { 0 };
 uint32_t gpus_intensity[MAX_GPUS] = { 0 };
 uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
 uint32_t device_mem_clocks[MAX_GPUS] = { 0 };
 int32_t device_mem_offsets[MAX_GPUS] = { 0 };
 uint32_t device_plimit[MAX_GPUS] = { 0 };
 uint8_t device_tlimit[MAX_GPUS] = { 0 };
 int8_t device_pstate[MAX_GPUS] = { -1, -1 };
@ -2058,8 +2060,10 @@ static void *miner_thread(void *userdata)
 		}
 		/* conditional mining */
-		if (!wanna_mine(thr_id)) {
+		if (!wanna_mine(thr_id))
-
+		{
 			// reset default mem offset before idle..
 			if (need_nvsettings) nvs_reset_clocks(dev_id);
 			// free gpu resources
 			algo_free_all(thr_id);
 			// clear any free error (algo switch)
@ -2084,7 +2088,11 @@ static void *miner_thread(void *userdata)
 			sleep(5);
 			if (!thr_id) pools[cur_pooln].wait_time += 5;
 			continue;
 		} else {
 			// reapply mem offset if needed
 			if (need_nvsettings) nvs_set_clocks(dev_id);
 		}
 		pool_on_hold = false;
 		work_restart[thr_id].restart = 0;
@ -3164,6 +3172,7 @@ void parse_arg(int key, char *arg)
 		nvapi_init_settings();
 		#endif
 		#endif
 		nvs_init();
 		cuda_print_devices();
 		proper_exit(EXIT_CODE_OK);
 		break;
@ -3389,7 +3398,11 @@ void parse_arg(int key, char *arg)
 			int n = 0;
 			while (pch != NULL && n < MAX_GPUS) {
 				int dev_id = device_map[n++];
-				device_mem_clocks[dev_id] = atoi(pch);
+				if (*pch == '+' || *pch == '-')
 					device_mem_offsets[dev_id] = atoi(pch);
 				else
 					device_mem_clocks[dev_id] = atoi(pch);
 				need_nvsettings = true;
 				pch = strtok(NULL, ",");
 			}
 		}
@ -4061,6 +4074,13 @@ int main(int argc, char *argv[])
 		tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
 	}
 #ifdef __linux__
 	if (need_nvsettings) {
 		if (nvs_init() < 0)
 			need_nvsettings = false;
 	}
 #endif
 #ifdef USE_WRAPNVML
 #if defined(__linux__) || defined(_WIN64)
 	/* nvml is currently not the best choice on Windows (only in x64) */
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -243,6 +243,7 @@
    <ClCompile Include="equi\equi.cpp" />
    <ClCompile Include="equi\equihash.cpp" />
    <ClCompile Include="nvapi.cpp" />
    <ClCompile Include="nvsettings.cpp" />
    <ClCompile Include="pools.cpp" />
    <ClCompile Include="util.cpp" />
    <ClCompile Include="bench.cpp" />
@ -597,4 +598,4 @@
  <Target Name="AfterClean">
    <Delete Files="@(FilesToCopy->'$(OutDir)%(Filename)%(Extension)')" TreatErrorsAsWarnings="true" />
  </Target>
-</Project>
+</Project>
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -297,6 +297,9 @@
    <ClCompile Include="nvapi.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="nvsettings.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="sia\sia-rpc.cpp">
      <Filter>Source Files\sia</Filter>
    </ClCompile>
@ -935,4 +938,4 @@
      <Filter>Ressources</Filter>
    </Text>
  </ItemGroup>
-</Project>
+</Project>
--- a/nvml.cpp
+++ b/nvml.cpp
@ -310,6 +310,8 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
 	nvmlReturn_t rc;
 	uint32_t gpu_clk = 0, mem_clk = 0;
 	int n = nvmlh->cuda_nvml_device_id[dev_id];
 	//if (need_nvsettings) /* prefer later than init time */
 	//	nvs_set_clocks(dev_id);
 	if (n < 0 || n >= nvmlh->nvml_gpucount)
 		return -ENODEV;
@ -395,6 +397,8 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id)
 	nvmlReturn_t rc;
 	uint32_t gpu_clk = 0, mem_clk = 0;
 	int n = nvmlh->cuda_nvml_device_id[dev_id];
 	if (need_nvsettings)
 		nvs_reset_clocks(dev_id);
 	if (n < 0 || n >= nvmlh->nvml_gpucount)
 		return -ENODEV;
--- a/nvml.h
+++ b/nvml.h
@ -250,3 +250,15 @@ void gpu_led_off(int dev_id);
 #define LED_MODE_SHARES 1
 #define LED_MODE_MINING 2
 /* ------ nvidia-settings stuff for linux -------------------- */
 int nvs_init();
 int nvs_set_clocks(int dev_id);
 void nvs_reset_clocks(int dev_id);
 // nvidia-settings (X) devNum from dev_id (cuda GPU #N)
 int8_t nvs_devnum(int dev_id);
 int nvs_devid(int8_t devNum);
 extern bool need_nvsettings;
--- a/nvsettings.cpp
+++ b/nvsettings.cpp
@ -0,0 +1,247 @@
 /**
 * nvidia-settings command line interface for linux - tpruvot 2017
 *
 * Notes: need X setup and running, with an opened X session.
 *        init speed could be improved, running multiple threads
 */
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <sys/stat.h>
 #include <sys/types.h> // pid_t
 #include "miner.h"
 #include "nvml.h"
 #include "cuda_runtime.h"
 #ifdef __linux__
 #define NVS_PATH "/usr/bin/nvidia-settings"
 static int8_t nvs_dev_map[MAX_GPUS] = { 0 };
 static uint8_t nvs_bus_ids[MAX_GPUS] = { 0 };
 static int32_t nvs_clocks_set[MAX_GPUS] = { 0 };
 extern int32_t device_mem_offsets[MAX_GPUS];
 #if 0 /* complicated exec way and not better in fine */
 int nvs_query_fork_int(int nvs_id, const char* field)
 {
 	pid_t pid;
 	int pipes[2] = { 0 };
 	if (pipe(pipes) < 0)
 		return -1;
 	if ((pid = fork()) == -1) {
 		close(pipes[0]);
 		close(pipes[1]);
 		return -1;
 	} else if (pid == 0) {
 		char gpu_field[128] = { 0 };
 		sprintf(gpu_field, "[gpu:%d]/%s", nvs_id, field);
 		dup2(pipes[1], STDOUT_FILENO);
 		close(pipes[0]);
 		//close(pipes[1]);
 		if (-1 == execl(NVS_PATH, "nvidia-settings", "-q", gpu_field, "-t", NULL)) {
 			exit(-1);
 		}
 	} else {
 		int intval = -1;
 		FILE *p = fdopen(pipes[0], "r");
 		close(pipes[1]);
 		if (!p) {
 			applog(LOG_WARNING, "%s: fdopen(%d) failed", __func__, pipes[0]);
 			return -1;
 		}
 		int rc = fscanf(p, "%d", &intval); // BUS 0000:2a:00.0 is read 42
 		if (rc > 0) {
 			//applog(LOG_BLUE, "%s res=%d", field, intval);
 		}
 		fclose(p);
 		close(pipes[0]);
 		return intval;
 	}
 	return -1;
 }
 #endif
 int nvs_query_int(int nvs_id, const char* field, int showerr)
 {
 	FILE *fp;
 	char command[256] = { 0 };
 	sprintf(command, "%s -t -q '[gpu:%d]/%s' 2>&1", NVS_PATH, nvs_id, field);
 	fp = popen(command, "r");
 	if (fp) {
 		int intval = -1;
 		if (!showerr)
 			fscanf(fp, "%d", &intval);
 		else {
 			char msg[512] = { 0 };
 			char buf[64] = { 0 };
 			ssize_t bytes, len=0, maxlen=sizeof(msg)-1;
 			while ((bytes=fscanf(fp, "%s", buf)) > 0) {
 				len += snprintf(&msg[len], maxlen-len, "%s ", buf);
 				if (len >= maxlen) break;
 			}
 			if (strstr(msg, "ERROR")) {
 				char *xtra = strstr(msg, "; please run");
 				if (xtra) *xtra = '\0'; // strip noise
 				applog(LOG_INFO, "%s", msg);
 				intval = -1;
 			} else {
 				sscanf(msg, "%d", &intval);
 			}
 		}
 		pclose(fp);
 		return intval;
 	}
 	return -1;
 }
 int nvs_query_str(int nvs_id, const char* field, char* output, size_t maxlen)
 {
 	FILE *fp;
 	char command[256] = { 0 };
 	*output = '\0';
 	sprintf(command, "%s -t -q '[gpu:%d]/%s' 2>&1", NVS_PATH, nvs_id, field);
 	fp = popen(command, "r");
 	if (fp) {
 		char buf[256] = { 0 };
 		ssize_t len=0;
 	        ssize_t bytes=0;
 		while ((bytes=fscanf(fp, "%s", buf)) > 0) {
 			//applog(LOG_BLUE, "%d %s %d", nvs_id, buf, (int) bytes);
 			len += snprintf(&output[len], maxlen-len, "%s ", buf);
 			if (len >= maxlen) break;
 		}
 		pclose(fp);
 		if (strstr(output, "ERROR")) {
 			char *xtra = strstr(output, "; please run");
 			if (xtra) *xtra = '\0'; // strip noise
 			applog(LOG_INFO, "%s", output);
 			*output='\0';
 		}
 		return (int) len;
 	}
 	return -1;
 }
 int nvs_set_int(int nvs_id, const char* field, int value)
 {
 	FILE *fp;
 	char command[256] = { 0 };
 	int res = -1;
 	snprintf(command, 256, "%s -a '[gpu:%d]/%s=%d' 2>&1", NVS_PATH, nvs_id, field, value);
 	fp = popen(command, "r");
 	if (fp) {
 		char msg[512] = { 0 };
 		char buf[64] = { 0 };
 		ssize_t bytes, len=0, maxlen=sizeof(msg)-1;
 		while ((bytes=fscanf(fp, "%s", buf)) > 0) {
 			len += snprintf(&msg[len], maxlen-len, "%s ", buf);
 			if (len >= maxlen) break;
 		}
 		if (strstr(msg, "ERROR")) {
 			char *xtra = strstr(msg, "; please run");
 			if (xtra) *xtra = '\0'; // strip noise
 			applog(LOG_INFO, "%s", msg);
 		} else
 			res = 0;
 		pclose(fp);
 	}
 	return res;
 }
 int8_t nvs_devnum(int dev_id)
 {
 	return nvs_dev_map[dev_id];
 }
 int nvs_devid(int8_t nvs_id)
 {
 	for (int i=0; i < opt_n_threads; i++) {
 		int dev_id = device_map[i % MAX_GPUS];
 		if (nvs_dev_map[dev_id] == nvs_id)
 			return dev_id;
 	}
 	return 0;
 }
 int nvs_init()
 {
 	struct stat info;
 	struct timeval tv_start, tv_end, diff;
 	int x_devices = 0;
 	int n_threads = opt_n_threads;
 	if (stat(NVS_PATH, &info))
 		return -ENOENT;
 	gettimeofday(&tv_start, NULL);
 	for (int d = 0; d < MAX_GPUS; d++) {
 		// this part can be "slow" (100-200ms per device)
 		int res = nvs_query_int(d, "PCIBus", 1);
 		if (res < 0) break;
 		nvs_bus_ids[d] = 0xFFu & res;
 		x_devices++;
 	}
 	if (opt_debug) {
 		gettimeofday(&tv_end, NULL);
 		timeval_subtract(&diff, &tv_end, &tv_start);
 		applog(LOG_DEBUG, "nvidia-settings pci bus queries took %.2f ms",
 			(1000.0 * diff.tv_sec) + (0.001 * diff.tv_usec));
 	}
 	if (!x_devices)
 		return -ENODEV;
 	if (!n_threads) n_threads = cuda_num_devices();
 	for (int i = 0; i < n_threads; i++) {
 		int dev_id = device_map[i % MAX_GPUS];
 		cudaDeviceProp props;
 		if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) {
 			for (int8_t d = 0; d < x_devices; d++) {
 				if (nvs_bus_ids[d] == (uint8_t) props.pciBusID) {
 					gpulog(LOG_DEBUG, i, "matches X gpu:%d by busId %u",
 						(int) d, (uint) nvs_bus_ids[d]);
 					nvs_dev_map[dev_id] = d;
 					/* char buf[1024] = { 0 };
 					nvs_query_str(d, "GPUCurrentClockFreqsString", buf, sizeof(buf)-1);
 					gpulog(LOG_DEBUG, d, "%s", buf); */
 					break;
 				}
 			}
 		}
 	}
 	return 0;
 }
 int nvs_set_clocks(int dev_id)
 {
 	int res;
 	int8_t d = nvs_devnum(dev_id);
 	if (d < 0) return -ENODEV;
 	if (!device_mem_offsets[dev_id] || nvs_clocks_set[d]) return 0;
 	res = nvs_set_int(d, "GPUMemoryTransferRateOffsetAllPerformanceLevels", device_mem_offsets[dev_id]*2);
 	if (res) nvs_clocks_set[d] = device_mem_offsets[dev_id]*2;
 	return res;
 }
 void nvs_reset_clocks(int dev_id)
 {
 	int8_t d = nvs_devnum(dev_id);
 	if (d < 0 || !nvs_clocks_set[d]) return;
 	nvs_set_int(d, "GPUMemoryTransferRateOffsetAllPerformanceLevels", 0);
 	nvs_clocks_set[d] = 0;
 }
 #else
 int nvs_init() { return -ENOSYS; }
 int nvs_set_clocks(int dev_id) { return -ENOSYS; }
 void nvs_reset_clocks(int dev_id) { }
 #endif