From c9d110e543db8c25cbc61ee8875e55323de5090b Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Wed, 27 May 2015 14:11:38 +0200 Subject: [PATCH] linux: allow to set application clocks Since linux driver 346.72, nvidia-smi allow to query gpu/mem clocks Tested ok on the Asus Strix 970, but fails on the Gigabyte 750 Ti system could require first persistence mode and app clock unlock : nvidia-smi -pm 1 nvidia-smi -acp 0 supported values are displayed by nvidia-smi -q -d SUPPORTED_CLOCKS Signed-off-by: Tanguy Pruvot --- ccminer.cpp | 39 ++++++++++++++++++++++++- configure.ac | 2 +- cpuminer-config.h | 6 ++-- nvml.cpp | 72 ++++++++++++++++++++++++++++++++++++----------- nvml.h | 2 ++ 5 files changed, 100 insertions(+), 21 deletions(-) diff --git a/ccminer.cpp b/ccminer.cpp index 7946653..a667796 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -196,6 +196,8 @@ char * device_name[MAX_GPUS]; short device_map[MAX_GPUS] = { 0 }; long device_sm[MAX_GPUS] = { 0 }; uint32_t gpus_intensity[MAX_GPUS] = { 0 }; +uint32_t device_gpu_clocks[MAX_GPUS] = { 0 }; +uint32_t device_mem_clocks[MAX_GPUS] = { 0 }; // un-linked to cmdline scrypt options (useless) int device_batchsize[MAX_GPUS] = { 0 }; @@ -352,6 +354,11 @@ Options:\n\ --max-temp=N Only mine if gpu temp is less than specified value\n\ --max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\ --max-diff=N Only mine if net difficulty is less than specified value\n" +#if defined(USE_WRAPNVML) && defined(__linux) +"\ + --gpu-clock=1150 Set device application clock\n\ + --mem-clock=3505 Set the gpu memory clock (require 346.72 linux driver)\n" +#endif #ifdef HAVE_SYSLOG_H "\ -S, --syslog use system log for output messages\n\ @@ -412,6 +419,8 @@ static struct option const options[] = { { "retry-pause", 1, NULL, 'R' }, { "scantime", 1, NULL, 's' }, { "statsavg", 1, NULL, 'N' }, + { "gpu-clock", 1, NULL, 1070 }, + { "mem-clock", 1, NULL, 1071 }, #ifdef HAVE_SYSLOG_H { "syslog", 0, NULL, 'S' }, { "syslog-prefix", 1, NULL, 1018 }, @@ -2895,6 +2904,30 @@ void parse_arg(int key, char *arg) device_interactive[n++] = last; } break; + case 1070: /* --gpu-clock */ + { + char *pch = strtok(arg,","); + int n = 0, last = atoi(arg); + while (pch != NULL) { + device_gpu_clocks[n++] = last = atoi(pch); + pch = strtok(NULL, ","); + } + //while (n < MAX_GPUS) + // device_gpu_clocks[n++] = last; + } + break; + case 1071: /* --mem-clock */ + { + char *pch = strtok(arg,","); + int n = 0, last = atoi(arg); + while (pch != NULL) { + device_mem_clocks[n++] = last = atoi(pch); + pch = strtok(NULL, ","); + } + //while (n < MAX_GPUS) + // device_gpu_clocks[n++] = last; + } + break; case 1005: opt_benchmark = true; want_longpoll = false; @@ -3448,8 +3481,12 @@ int main(int argc, char *argv[]) #ifndef WIN32 /* nvml is currently not the best choice on Windows (only in x64) */ hnvml = nvml_create(); - if (hnvml) + if (hnvml) { applog(LOG_INFO, "NVML GPU monitoring enabled."); + for (int n=0; n < opt_n_threads; n++) { + nvml_set_clocks(hnvml, device_map[n]); + } + } #else if (nvapi_init() == 0) applog(LOG_INFO, "NVAPI GPU monitoring enabled."); diff --git a/configure.ac b/configure.ac index 549247f..4c9bc38 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([ccminer], [1.6.4]) +AC_INIT([ccminer], [1.6.5-git]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/cpuminer-config.h b/cpuminer-config.h index 4e3bdbc..4ede926 100644 --- a/cpuminer-config.h +++ b/cpuminer-config.h @@ -159,7 +159,7 @@ #define PACKAGE_NAME "ccminer" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "ccminer 1.6.4" +#define PACKAGE_STRING "ccminer 1.6.5-git" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "ccminer" @@ -168,7 +168,7 @@ #define PACKAGE_URL "http://github.com/tpruvot/ccminer" /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.6.4" +#define PACKAGE_VERSION "1.6.5-git" /* If using the C implementation of alloca, define if you know the direction of stack growth for your system; otherwise it will be @@ -182,7 +182,7 @@ #define STDC_HEADERS 1 /* Version number of package */ -#define VERSION "1.6.4" +#define VERSION "1.6.5-git" /* Define curl_free() as free() if our version of curl lacks curl_free. */ /* #undef curl_free */ diff --git a/nvml.cpp b/nvml.cpp index 4868444..03d4b8a 100644 --- a/nvml.cpp +++ b/nvml.cpp @@ -33,6 +33,11 @@ extern char driver_version[32]; static uint32_t device_bus_ids[MAX_GPUS] = { 0 }; +extern uint32_t device_gpu_clocks[MAX_GPUS]; +extern uint32_t device_mem_clocks[MAX_GPUS]; + +uint8_t gpu_clocks_changed[MAX_GPUS] = { 0 }; + /* * Wrappers to emulate dlopen() on other systems like Windows */ @@ -218,21 +223,6 @@ nvml_handle * nvml_create() if (nvmlh->nvmlDeviceGetAPIRestriction) { nvmlh->nvmlDeviceGetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, &nvmlh->app_clocks[i]); - if (nvmlh->app_clocks[i] == NVML_FEATURE_ENABLED && opt_debug) { - applog(LOG_DEBUG, "NVML application clock feature is allowed"); -#if 0 - uint32_t mem; - nvmlReturn_t rc; - rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[i], NVML_CLOCK_MEM, &mem); - if (rc == NVML_SUCCESS) - applog(LOG_DEBUG, "nvmlDeviceGetDefaultApplicationsClock: mem %u", mem); - else - applog(LOG_DEBUG, "nvmlDeviceGetDefaultApplicationsClock: %s", nvmlh->nvmlErrorString(rc)); - rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[i], mem, 1228000); - if (rc != NVML_SUCCESS) - applog(LOG_DEBUG, "nvmlDeviceSetApplicationsClocks: %s", nvmlh->nvmlErrorString(rc)); -#endif - } } } @@ -251,7 +241,7 @@ nvml_handle * nvml_create() (nvmlh->nvml_pci_bus_id[j] == (uint32_t) props.pciBusID) && (nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) { if (opt_debug) - applog(LOG_DEBUG, "CUDA GPU#%d matches NVML GPU %d by busId %u", + applog(LOG_DEBUG, "CUDA GPU %d matches NVML GPU %d by busId %u", i, j, (uint32_t) props.pciBusID); nvmlh->nvml_cuda_device_id[j] = i; nvmlh->cuda_nvml_device_id[i] = j; @@ -263,6 +253,56 @@ nvml_handle * nvml_create() return nvmlh; } +/* apply config clocks to an used device */ +int nvml_set_clocks(nvml_handle *nvmlh, int dev_id) +{ + nvmlReturn_t rc; + uint32_t gpu_clk = 0, mem_clk = 0; + int n = nvmlh->cuda_nvml_device_id[dev_id]; + if (n < 0 || n >= nvmlh->nvml_gpucount) + return -1; + + // prevent double operations on the same gpu... to enhance + if (gpu_clocks_changed[dev_id]) + return 0; + + int c = nvmlh->nvml_cuda_device_id[n]; + if (!device_gpu_clocks[c] && !device_mem_clocks[c]) + return 0; // nothing to do + + // applog(LOG_DEBUG, "device %d cuda %d nvml %d", dev_id, c, n); + if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) { + applog(LOG_WARNING, "GPU #%d: NVML application clock feature is not allowed!", c); + return -1; + } + + if (opt_debug) + applog(LOG_DEBUG, "GPU #%d: NVML application clock feature is allowed", c); + + nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk); + rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk); + if (rc != NVML_SUCCESS) + return -1; + + if (opt_debug) + applog(LOG_DEBUG, "GPU #%d: default clocks are %u/%u", c, mem_clk, gpu_clk); + + // get application config values + if (device_mem_clocks[c]) mem_clk = device_mem_clocks[c]; + if (device_gpu_clocks[c]) gpu_clk = device_gpu_clocks[c]; + + rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk); + if (rc == NVML_SUCCESS) + applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", c, mem_clk, gpu_clk); + else { + applog(LOG_ERR, "GPU #%d: %u/%u - %s", c, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc)); + return -1; + } + + gpu_clocks_changed[dev_id] = 1; + return 0; +} + int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount) { *gpucount = nvmlh->nvml_gpucount; diff --git a/nvml.h b/nvml.h index d9fa5e4..224daf4 100644 --- a/nvml.h +++ b/nvml.h @@ -156,6 +156,8 @@ int nvml_get_power_usage(nvml_handle *nvmlh, int gpuindex, unsigned int *milliwatts); +int nvml_set_clocks(nvml_handle *nvmlh, int dev_id); + /* api functions */ unsigned int gpu_fanpercent(struct cgpu_info *gpu);