Browse Source

linux: allow to set application clocks

Since linux driver 346.72, nvidia-smi allow to query gpu/mem clocks

Tested ok on the Asus Strix 970, but fails on the Gigabyte 750 Ti

system could require first persistence mode and app clock unlock :
    nvidia-smi -pm 1
    nvidia-smi -acp 0

supported values are displayed by
    nvidia-smi -q -d SUPPORTED_CLOCKS

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
master
Tanguy Pruvot 10 years ago
parent
commit
c9d110e543
  1. 39
      ccminer.cpp
  2. 2
      configure.ac
  3. 6
      cpuminer-config.h
  4. 72
      nvml.cpp
  5. 2
      nvml.h

39
ccminer.cpp

@ -196,6 +196,8 @@ char * device_name[MAX_GPUS]; @@ -196,6 +196,8 @@ char * device_name[MAX_GPUS];
short device_map[MAX_GPUS] = { 0 };
long device_sm[MAX_GPUS] = { 0 };
uint32_t gpus_intensity[MAX_GPUS] = { 0 };
uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
uint32_t device_mem_clocks[MAX_GPUS] = { 0 };
// un-linked to cmdline scrypt options (useless)
int device_batchsize[MAX_GPUS] = { 0 };
@ -352,6 +354,11 @@ Options:\n\ @@ -352,6 +354,11 @@ Options:\n\
--max-temp=N Only mine if gpu temp is less than specified value\n\
--max-rate=N[KMG] Only mine if net hashrate is less than specified value\n\
--max-diff=N Only mine if net difficulty is less than specified value\n"
#if defined(USE_WRAPNVML) && defined(__linux)
"\
--gpu-clock=1150 Set device application clock\n\
--mem-clock=3505 Set the gpu memory clock (require 346.72 linux driver)\n"
#endif
#ifdef HAVE_SYSLOG_H
"\
-S, --syslog use system log for output messages\n\
@ -412,6 +419,8 @@ static struct option const options[] = { @@ -412,6 +419,8 @@ static struct option const options[] = {
{ "retry-pause", 1, NULL, 'R' },
{ "scantime", 1, NULL, 's' },
{ "statsavg", 1, NULL, 'N' },
{ "gpu-clock", 1, NULL, 1070 },
{ "mem-clock", 1, NULL, 1071 },
#ifdef HAVE_SYSLOG_H
{ "syslog", 0, NULL, 'S' },
{ "syslog-prefix", 1, NULL, 1018 },
@ -2895,6 +2904,30 @@ void parse_arg(int key, char *arg) @@ -2895,6 +2904,30 @@ void parse_arg(int key, char *arg)
device_interactive[n++] = last;
}
break;
case 1070: /* --gpu-clock */
{
char *pch = strtok(arg,",");
int n = 0, last = atoi(arg);
while (pch != NULL) {
device_gpu_clocks[n++] = last = atoi(pch);
pch = strtok(NULL, ",");
}
//while (n < MAX_GPUS)
// device_gpu_clocks[n++] = last;
}
break;
case 1071: /* --mem-clock */
{
char *pch = strtok(arg,",");
int n = 0, last = atoi(arg);
while (pch != NULL) {
device_mem_clocks[n++] = last = atoi(pch);
pch = strtok(NULL, ",");
}
//while (n < MAX_GPUS)
// device_gpu_clocks[n++] = last;
}
break;
case 1005:
opt_benchmark = true;
want_longpoll = false;
@ -3448,8 +3481,12 @@ int main(int argc, char *argv[]) @@ -3448,8 +3481,12 @@ int main(int argc, char *argv[])
#ifndef WIN32
/* nvml is currently not the best choice on Windows (only in x64) */
hnvml = nvml_create();
if (hnvml)
if (hnvml) {
applog(LOG_INFO, "NVML GPU monitoring enabled.");
for (int n=0; n < opt_n_threads; n++) {
nvml_set_clocks(hnvml, device_map[n]);
}
}
#else
if (nvapi_init() == 0)
applog(LOG_INFO, "NVAPI GPU monitoring enabled.");

2
configure.ac

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
AC_INIT([ccminer], [1.6.4])
AC_INIT([ccminer], [1.6.5-git])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

6
cpuminer-config.h

@ -159,7 +159,7 @@ @@ -159,7 +159,7 @@
#define PACKAGE_NAME "ccminer"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "ccminer 1.6.4"
#define PACKAGE_STRING "ccminer 1.6.5-git"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "ccminer"
@ -168,7 +168,7 @@ @@ -168,7 +168,7 @@
#define PACKAGE_URL "http://github.com/tpruvot/ccminer"
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.6.4"
#define PACKAGE_VERSION "1.6.5-git"
/* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be
@ -182,7 +182,7 @@ @@ -182,7 +182,7 @@
#define STDC_HEADERS 1
/* Version number of package */
#define VERSION "1.6.4"
#define VERSION "1.6.5-git"
/* Define curl_free() as free() if our version of curl lacks curl_free. */
/* #undef curl_free */

72
nvml.cpp

@ -33,6 +33,11 @@ extern char driver_version[32]; @@ -33,6 +33,11 @@ extern char driver_version[32];
static uint32_t device_bus_ids[MAX_GPUS] = { 0 };
extern uint32_t device_gpu_clocks[MAX_GPUS];
extern uint32_t device_mem_clocks[MAX_GPUS];
uint8_t gpu_clocks_changed[MAX_GPUS] = { 0 };
/*
* Wrappers to emulate dlopen() on other systems like Windows
*/
@ -218,21 +223,6 @@ nvml_handle * nvml_create() @@ -218,21 +223,6 @@ nvml_handle * nvml_create()
if (nvmlh->nvmlDeviceGetAPIRestriction) {
nvmlh->nvmlDeviceGetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
&nvmlh->app_clocks[i]);
if (nvmlh->app_clocks[i] == NVML_FEATURE_ENABLED && opt_debug) {
applog(LOG_DEBUG, "NVML application clock feature is allowed");
#if 0
uint32_t mem;
nvmlReturn_t rc;
rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[i], NVML_CLOCK_MEM, &mem);
if (rc == NVML_SUCCESS)
applog(LOG_DEBUG, "nvmlDeviceGetDefaultApplicationsClock: mem %u", mem);
else
applog(LOG_DEBUG, "nvmlDeviceGetDefaultApplicationsClock: %s", nvmlh->nvmlErrorString(rc));
rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[i], mem, 1228000);
if (rc != NVML_SUCCESS)
applog(LOG_DEBUG, "nvmlDeviceSetApplicationsClocks: %s", nvmlh->nvmlErrorString(rc));
#endif
}
}
}
@ -251,7 +241,7 @@ nvml_handle * nvml_create() @@ -251,7 +241,7 @@ nvml_handle * nvml_create()
(nvmlh->nvml_pci_bus_id[j] == (uint32_t) props.pciBusID) &&
(nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) {
if (opt_debug)
applog(LOG_DEBUG, "CUDA GPU#%d matches NVML GPU %d by busId %u",
applog(LOG_DEBUG, "CUDA GPU %d matches NVML GPU %d by busId %u",
i, j, (uint32_t) props.pciBusID);
nvmlh->nvml_cuda_device_id[j] = i;
nvmlh->cuda_nvml_device_id[i] = j;
@ -263,6 +253,56 @@ nvml_handle * nvml_create() @@ -263,6 +253,56 @@ nvml_handle * nvml_create()
return nvmlh;
}
/* apply config clocks to an used device */
int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
{
nvmlReturn_t rc;
uint32_t gpu_clk = 0, mem_clk = 0;
int n = nvmlh->cuda_nvml_device_id[dev_id];
if (n < 0 || n >= nvmlh->nvml_gpucount)
return -1;
// prevent double operations on the same gpu... to enhance
if (gpu_clocks_changed[dev_id])
return 0;
int c = nvmlh->nvml_cuda_device_id[n];
if (!device_gpu_clocks[c] && !device_mem_clocks[c])
return 0; // nothing to do
// applog(LOG_DEBUG, "device %d cuda %d nvml %d", dev_id, c, n);
if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) {
applog(LOG_WARNING, "GPU #%d: NVML application clock feature is not allowed!", c);
return -1;
}
if (opt_debug)
applog(LOG_DEBUG, "GPU #%d: NVML application clock feature is allowed", c);
nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
if (rc != NVML_SUCCESS)
return -1;
if (opt_debug)
applog(LOG_DEBUG, "GPU #%d: default clocks are %u/%u", c, mem_clk, gpu_clk);
// get application config values
if (device_mem_clocks[c]) mem_clk = device_mem_clocks[c];
if (device_gpu_clocks[c]) gpu_clk = device_gpu_clocks[c];
rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
if (rc == NVML_SUCCESS)
applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", c, mem_clk, gpu_clk);
else {
applog(LOG_ERR, "GPU #%d: %u/%u - %s", c, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
return -1;
}
gpu_clocks_changed[dev_id] = 1;
return 0;
}
int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount)
{
*gpucount = nvmlh->nvml_gpucount;

2
nvml.h

@ -156,6 +156,8 @@ int nvml_get_power_usage(nvml_handle *nvmlh, @@ -156,6 +156,8 @@ int nvml_get_power_usage(nvml_handle *nvmlh,
int gpuindex,
unsigned int *milliwatts);
int nvml_set_clocks(nvml_handle *nvmlh, int dev_id);
/* api functions */
unsigned int gpu_fanpercent(struct cgpu_info *gpu);

Loading…
Cancel
Save