From bcadca2c45d342e6ba353381db6947f403f76424 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Wed, 7 Jun 2017 18:31:38 +0200 Subject: [PATCH] linux: prepare nvidia-settings queries Note: this method is slower than real apis and requires X opened and configured correctly. sample usage: -d 0,1 --mem-clock=+200,-200 --- Makefile.am | 1 + ccminer.cpp | 26 ++++- ccminer.vcxproj | 3 +- ccminer.vcxproj.filters | 5 +- nvml.cpp | 4 + nvml.h | 12 ++ nvsettings.cpp | 247 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 293 insertions(+), 5 deletions(-) create mode 100644 nvsettings.cpp diff --git a/Makefile.am b/Makefile.am index 683e21d..c409f01 100644 --- a/Makefile.am +++ b/Makefile.am @@ -21,6 +21,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \ crc32.c hefty1.c \ ccminer.cpp pools.cpp util.cpp bench.cpp bignum.cpp \ api.cpp hashlog.cpp nvml.cpp stats.cpp sysinfos.cpp cuda.cpp \ + nvsettings.cpp \ equi/equi-stratum.cpp equi/equi.cpp equi/blake2/blake2bx.cpp \ equi/equihash.cpp equi/cuda_equi.cu \ heavy/heavy.cu \ diff --git a/ccminer.cpp b/ccminer.cpp index 85ec68e..cc33d61 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -126,6 +126,7 @@ bool opt_trust_pool = false; uint16_t opt_vote = 9999; int num_cpus; int active_gpus; +bool need_nvsettings = false; char * device_name[MAX_GPUS]; short device_map[MAX_GPUS] = { 0 }; long device_sm[MAX_GPUS] = { 0 }; @@ -133,6 +134,7 @@ short device_mpcount[MAX_GPUS] = { 0 }; uint32_t gpus_intensity[MAX_GPUS] = { 0 }; uint32_t device_gpu_clocks[MAX_GPUS] = { 0 }; uint32_t device_mem_clocks[MAX_GPUS] = { 0 }; +int32_t device_mem_offsets[MAX_GPUS] = { 0 }; uint32_t device_plimit[MAX_GPUS] = { 0 }; uint8_t device_tlimit[MAX_GPUS] = { 0 }; int8_t device_pstate[MAX_GPUS] = { -1, -1 }; @@ -2058,8 +2060,10 @@ static void *miner_thread(void *userdata) } /* conditional mining */ - if (!wanna_mine(thr_id)) { - + if (!wanna_mine(thr_id)) + { + // reset default mem offset before idle.. + if (need_nvsettings) nvs_reset_clocks(dev_id); // free gpu resources algo_free_all(thr_id); // clear any free error (algo switch) @@ -2084,7 +2088,11 @@ static void *miner_thread(void *userdata) sleep(5); if (!thr_id) pools[cur_pooln].wait_time += 5; continue; + } else { + // reapply mem offset if needed + if (need_nvsettings) nvs_set_clocks(dev_id); } + pool_on_hold = false; work_restart[thr_id].restart = 0; @@ -3164,6 +3172,7 @@ void parse_arg(int key, char *arg) nvapi_init_settings(); #endif #endif + nvs_init(); cuda_print_devices(); proper_exit(EXIT_CODE_OK); break; @@ -3389,7 +3398,11 @@ void parse_arg(int key, char *arg) int n = 0; while (pch != NULL && n < MAX_GPUS) { int dev_id = device_map[n++]; - device_mem_clocks[dev_id] = atoi(pch); + if (*pch == '+' || *pch == '-') + device_mem_offsets[dev_id] = atoi(pch); + else + device_mem_clocks[dev_id] = atoi(pch); + need_nvsettings = true; pch = strtok(NULL, ","); } } @@ -4061,6 +4074,13 @@ int main(int argc, char *argv[]) tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url)); } +#ifdef __linux__ + if (need_nvsettings) { + if (nvs_init() < 0) + need_nvsettings = false; + } +#endif + #ifdef USE_WRAPNVML #if defined(__linux__) || defined(_WIN64) /* nvml is currently not the best choice on Windows (only in x64) */ diff --git a/ccminer.vcxproj b/ccminer.vcxproj index 1316052..6715336 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -243,6 +243,7 @@ + @@ -597,4 +598,4 @@ - + \ No newline at end of file diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters index a6b39c4..12c0072 100644 --- a/ccminer.vcxproj.filters +++ b/ccminer.vcxproj.filters @@ -297,6 +297,9 @@ Source Files + + Source Files + Source Files\sia @@ -935,4 +938,4 @@ Ressources - \ No newline at end of file + diff --git a/nvml.cpp b/nvml.cpp index 8063d29..daa570c 100644 --- a/nvml.cpp +++ b/nvml.cpp @@ -310,6 +310,8 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id) nvmlReturn_t rc; uint32_t gpu_clk = 0, mem_clk = 0; int n = nvmlh->cuda_nvml_device_id[dev_id]; + //if (need_nvsettings) /* prefer later than init time */ + // nvs_set_clocks(dev_id); if (n < 0 || n >= nvmlh->nvml_gpucount) return -ENODEV; @@ -395,6 +397,8 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id) nvmlReturn_t rc; uint32_t gpu_clk = 0, mem_clk = 0; int n = nvmlh->cuda_nvml_device_id[dev_id]; + if (need_nvsettings) + nvs_reset_clocks(dev_id); if (n < 0 || n >= nvmlh->nvml_gpucount) return -ENODEV; diff --git a/nvml.h b/nvml.h index bb311a5..de5d16d 100644 --- a/nvml.h +++ b/nvml.h @@ -250,3 +250,15 @@ void gpu_led_off(int dev_id); #define LED_MODE_SHARES 1 #define LED_MODE_MINING 2 +/* ------ nvidia-settings stuff for linux -------------------- */ + +int nvs_init(); +int nvs_set_clocks(int dev_id); +void nvs_reset_clocks(int dev_id); + +// nvidia-settings (X) devNum from dev_id (cuda GPU #N) +int8_t nvs_devnum(int dev_id); +int nvs_devid(int8_t devNum); + +extern bool need_nvsettings; + diff --git a/nvsettings.cpp b/nvsettings.cpp new file mode 100644 index 0000000..61c90d1 --- /dev/null +++ b/nvsettings.cpp @@ -0,0 +1,247 @@ +/** + * nvidia-settings command line interface for linux - tpruvot 2017 + * + * Notes: need X setup and running, with an opened X session. + * init speed could be improved, running multiple threads + */ + +#include +#include +#include +#include +#include +#include +#include // pid_t + +#include "miner.h" +#include "nvml.h" +#include "cuda_runtime.h" + +#ifdef __linux__ + +#define NVS_PATH "/usr/bin/nvidia-settings" + +static int8_t nvs_dev_map[MAX_GPUS] = { 0 }; +static uint8_t nvs_bus_ids[MAX_GPUS] = { 0 }; +static int32_t nvs_clocks_set[MAX_GPUS] = { 0 }; + +extern int32_t device_mem_offsets[MAX_GPUS]; + +#if 0 /* complicated exec way and not better in fine */ +int nvs_query_fork_int(int nvs_id, const char* field) +{ + pid_t pid; + int pipes[2] = { 0 }; + if (pipe(pipes) < 0) + return -1; + + if ((pid = fork()) == -1) { + close(pipes[0]); + close(pipes[1]); + return -1; + } else if (pid == 0) { + char gpu_field[128] = { 0 }; + sprintf(gpu_field, "[gpu:%d]/%s", nvs_id, field); + + dup2(pipes[1], STDOUT_FILENO); + close(pipes[0]); + //close(pipes[1]); + + if (-1 == execl(NVS_PATH, "nvidia-settings", "-q", gpu_field, "-t", NULL)) { + exit(-1); + } + } else { + int intval = -1; + FILE *p = fdopen(pipes[0], "r"); + close(pipes[1]); + if (!p) { + applog(LOG_WARNING, "%s: fdopen(%d) failed", __func__, pipes[0]); + return -1; + } + int rc = fscanf(p, "%d", &intval); // BUS 0000:2a:00.0 is read 42 + if (rc > 0) { + //applog(LOG_BLUE, "%s res=%d", field, intval); + } + fclose(p); + close(pipes[0]); + return intval; + } + return -1; +} +#endif + +int nvs_query_int(int nvs_id, const char* field, int showerr) +{ + FILE *fp; + char command[256] = { 0 }; + sprintf(command, "%s -t -q '[gpu:%d]/%s' 2>&1", NVS_PATH, nvs_id, field); + fp = popen(command, "r"); + if (fp) { + int intval = -1; + if (!showerr) + fscanf(fp, "%d", &intval); + else { + char msg[512] = { 0 }; + char buf[64] = { 0 }; + ssize_t bytes, len=0, maxlen=sizeof(msg)-1; + while ((bytes=fscanf(fp, "%s", buf)) > 0) { + len += snprintf(&msg[len], maxlen-len, "%s ", buf); + if (len >= maxlen) break; + } + if (strstr(msg, "ERROR")) { + char *xtra = strstr(msg, "; please run"); + if (xtra) *xtra = '\0'; // strip noise + applog(LOG_INFO, "%s", msg); + intval = -1; + } else { + sscanf(msg, "%d", &intval); + } + } + pclose(fp); + return intval; + } + return -1; +} + +int nvs_query_str(int nvs_id, const char* field, char* output, size_t maxlen) +{ + FILE *fp; + char command[256] = { 0 }; + *output = '\0'; + sprintf(command, "%s -t -q '[gpu:%d]/%s' 2>&1", NVS_PATH, nvs_id, field); + fp = popen(command, "r"); + if (fp) { + char buf[256] = { 0 }; + ssize_t len=0; + ssize_t bytes=0; + while ((bytes=fscanf(fp, "%s", buf)) > 0) { + //applog(LOG_BLUE, "%d %s %d", nvs_id, buf, (int) bytes); + len += snprintf(&output[len], maxlen-len, "%s ", buf); + if (len >= maxlen) break; + } + pclose(fp); + if (strstr(output, "ERROR")) { + char *xtra = strstr(output, "; please run"); + if (xtra) *xtra = '\0'; // strip noise + applog(LOG_INFO, "%s", output); + *output='\0'; + } + return (int) len; + } + return -1; +} + +int nvs_set_int(int nvs_id, const char* field, int value) +{ + FILE *fp; + char command[256] = { 0 }; + int res = -1; + snprintf(command, 256, "%s -a '[gpu:%d]/%s=%d' 2>&1", NVS_PATH, nvs_id, field, value); + fp = popen(command, "r"); + if (fp) { + char msg[512] = { 0 }; + char buf[64] = { 0 }; + ssize_t bytes, len=0, maxlen=sizeof(msg)-1; + while ((bytes=fscanf(fp, "%s", buf)) > 0) { + len += snprintf(&msg[len], maxlen-len, "%s ", buf); + if (len >= maxlen) break; + } + if (strstr(msg, "ERROR")) { + char *xtra = strstr(msg, "; please run"); + if (xtra) *xtra = '\0'; // strip noise + applog(LOG_INFO, "%s", msg); + } else + res = 0; + pclose(fp); + } + return res; +} + +int8_t nvs_devnum(int dev_id) +{ + return nvs_dev_map[dev_id]; +} + +int nvs_devid(int8_t nvs_id) +{ + for (int i=0; i < opt_n_threads; i++) { + int dev_id = device_map[i % MAX_GPUS]; + if (nvs_dev_map[dev_id] == nvs_id) + return dev_id; + } + return 0; +} + +int nvs_init() +{ + struct stat info; + struct timeval tv_start, tv_end, diff; + int x_devices = 0; + int n_threads = opt_n_threads; + if (stat(NVS_PATH, &info)) + return -ENOENT; + + gettimeofday(&tv_start, NULL); + + for (int d = 0; d < MAX_GPUS; d++) { + // this part can be "slow" (100-200ms per device) + int res = nvs_query_int(d, "PCIBus", 1); + if (res < 0) break; + nvs_bus_ids[d] = 0xFFu & res; + x_devices++; + } + + if (opt_debug) { + gettimeofday(&tv_end, NULL); + timeval_subtract(&diff, &tv_end, &tv_start); + applog(LOG_DEBUG, "nvidia-settings pci bus queries took %.2f ms", + (1000.0 * diff.tv_sec) + (0.001 * diff.tv_usec)); + } + + if (!x_devices) + return -ENODEV; + if (!n_threads) n_threads = cuda_num_devices(); + for (int i = 0; i < n_threads; i++) { + int dev_id = device_map[i % MAX_GPUS]; + cudaDeviceProp props; + if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) { + for (int8_t d = 0; d < x_devices; d++) { + if (nvs_bus_ids[d] == (uint8_t) props.pciBusID) { + gpulog(LOG_DEBUG, i, "matches X gpu:%d by busId %u", + (int) d, (uint) nvs_bus_ids[d]); + nvs_dev_map[dev_id] = d; + /* char buf[1024] = { 0 }; + nvs_query_str(d, "GPUCurrentClockFreqsString", buf, sizeof(buf)-1); + gpulog(LOG_DEBUG, d, "%s", buf); */ + break; + } + } + } + } + return 0; +} + +int nvs_set_clocks(int dev_id) +{ + int res; + int8_t d = nvs_devnum(dev_id); + if (d < 0) return -ENODEV; + if (!device_mem_offsets[dev_id] || nvs_clocks_set[d]) return 0; + res = nvs_set_int(d, "GPUMemoryTransferRateOffsetAllPerformanceLevels", device_mem_offsets[dev_id]*2); + if (res) nvs_clocks_set[d] = device_mem_offsets[dev_id]*2; + return res; +} + +void nvs_reset_clocks(int dev_id) +{ + int8_t d = nvs_devnum(dev_id); + if (d < 0 || !nvs_clocks_set[d]) return; + nvs_set_int(d, "GPUMemoryTransferRateOffsetAllPerformanceLevels", 0); + nvs_clocks_set[d] = 0; +} + +#else +int nvs_init() { return -ENOSYS; } +int nvs_set_clocks(int dev_id) { return -ENOSYS; } +void nvs_reset_clocks(int dev_id) { } +#endif