Browse Source

nvapi: link clocks and tlimit to command line

boost clocks and the thermal limit are shared with afterburner
beware with your settings, not as safe as application clocks!

Note: both nvapi and nvml are now used on windows x64
Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
master
Tanguy Pruvot 9 years ago
parent
commit
bc6ac3a3ab
  1. 2
      README.txt
  2. 49
      ccminer.cpp
  3. 57
      compat/nvapi/nvapi_ccminer.h
  4. 14
      cuda.cpp
  5. 56
      nvapi.cpp
  6. 265
      nvml.cpp
  7. 10
      nvml.h

2
README.txt

@ -243,6 +243,8 @@ features.
June 2016 v1.8.0 June 2016 v1.8.0
Pascal support with cuda 8 Pascal support with cuda 8
x11evo algo (XRE) x11evo algo (XRE)
Lyra2v2 and Decred hashrate improvements
Enhance windows NVAPI clock and power limits
May 18th 2016 v1.7.6 May 18th 2016 v1.7.6
Decred vote support Decred vote support

49
ccminer.cpp

@ -128,7 +128,8 @@ uint32_t gpus_intensity[MAX_GPUS] = { 0 };
uint32_t device_gpu_clocks[MAX_GPUS] = { 0 }; uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
uint32_t device_mem_clocks[MAX_GPUS] = { 0 }; uint32_t device_mem_clocks[MAX_GPUS] = { 0 };
uint32_t device_plimit[MAX_GPUS] = { 0 }; uint32_t device_plimit[MAX_GPUS] = { 0 };
int8_t device_pstate[MAX_GPUS] = { -1 }; uint8_t device_tlimit[MAX_GPUS] = { 0 };
int8_t device_pstate[MAX_GPUS] = { -1, -1 };
int opt_cudaschedule = -1; int opt_cudaschedule = -1;
static bool opt_keep_clocks = false; static bool opt_keep_clocks = false;
@ -303,7 +304,10 @@ Options:\n\
--plimit=100W Set the gpu power limit (352.21+ driver)\n" --plimit=100W Set the gpu power limit (352.21+ driver)\n"
#else /* via nvapi.dll */ #else /* via nvapi.dll */
"\ "\
--plimit=100 Set the gpu power limit in percentage\n" --mem-clock=3505 Set the gpu memory boost clock\n\
--gpu-clock=1150 Set the gpu engine boost clock\n\
--plimit=100 Set the gpu power limit in percentage\n\
--tlimit=80 Set the gpu thermal limit in degrees\n"
#endif #endif
#ifdef HAVE_SYSLOG_H #ifdef HAVE_SYSLOG_H
"\ "\
@ -380,6 +384,7 @@ struct option options[] = {
{ "pstate", 1, NULL, 1072 }, { "pstate", 1, NULL, 1072 },
{ "plimit", 1, NULL, 1073 }, { "plimit", 1, NULL, 1073 },
{ "keep-clocks", 0, NULL, 1074 }, { "keep-clocks", 0, NULL, 1074 },
{ "tlimit", 1, NULL, 1075 },
#ifdef HAVE_SYSLOG_H #ifdef HAVE_SYSLOG_H
{ "syslog", 0, NULL, 'S' }, { "syslog", 0, NULL, 'S' },
{ "syslog-prefix", 1, NULL, 1018 }, { "syslog-prefix", 1, NULL, 1018 },
@ -2687,7 +2692,8 @@ void parse_arg(int key, char *arg)
#ifdef USE_WRAPNVML #ifdef USE_WRAPNVML
hnvml = nvml_create(); hnvml = nvml_create();
#ifdef WIN32 #ifdef WIN32
if (!hnvml) nvapi_init(); nvapi_init();
nvapi_init_settings();
#endif #endif
#endif #endif
cuda_print_devices(); cuda_print_devices();
@ -2931,6 +2937,17 @@ void parse_arg(int key, char *arg)
case 1074: /* --keep-clocks */ case 1074: /* --keep-clocks */
opt_keep_clocks = true; opt_keep_clocks = true;
break; break;
case 1075: /* --tlimit */
{
char *pch = strtok(arg,",");
int n = 0;
while (pch != NULL && n < MAX_GPUS) {
int dev_id = device_map[n++];
device_tlimit[dev_id] = (uint8_t) atoi(pch);
pch = strtok(NULL, ",");
}
}
break;
case 1005: case 1005:
opt_benchmark = true; opt_benchmark = true;
want_longpoll = false; want_longpoll = false;
@ -3504,13 +3521,14 @@ int main(int argc, char *argv[])
if (hnvml) { if (hnvml) {
bool gpu_reinit = (opt_cudaschedule >= 0); //false bool gpu_reinit = (opt_cudaschedule >= 0); //false
cuda_devicenames(); // refresh gpu vendor name cuda_devicenames(); // refresh gpu vendor name
applog(LOG_INFO, "NVML GPU monitoring enabled."); if (!opt_quiet)
applog(LOG_INFO, "NVML GPU monitoring enabled.");
for (int n=0; n < active_gpus; n++) { for (int n=0; n < active_gpus; n++) {
if (nvml_set_pstate(hnvml, device_map[n]) == 1) if (nvml_set_pstate(hnvml, device_map[n]) == 1)
gpu_reinit = true; gpu_reinit = true;
if (nvml_set_plimit(hnvml, device_map[n]) == 1) if (nvml_set_plimit(hnvml, device_map[n]) == 1)
gpu_reinit = true; gpu_reinit = true;
if (nvml_set_clocks(hnvml, device_map[n]) == 1) if (!is_windows() && nvml_set_clocks(hnvml, device_map[n]) == 1)
gpu_reinit = true; gpu_reinit = true;
if (gpu_reinit) { if (gpu_reinit) {
cuda_reset_device(n, NULL); cuda_reset_device(n, NULL);
@ -3518,20 +3536,25 @@ int main(int argc, char *argv[])
} }
} }
#endif #endif
#ifdef WIN32
if (nvapi_init() == 0) {
if (!opt_quiet)
applog(LOG_INFO, "NVAPI GPU monitoring enabled.");
if (!hnvml) {
cuda_devicenames(); // refresh gpu vendor name
}
nvapi_init_settings();
}
#endif
else if (!hnvml && !opt_quiet)
applog(LOG_INFO, "GPU monitoring is not available.");
// force reinit to set default device flags // force reinit to set default device flags
if (opt_cudaschedule >= 0 && !hnvml) { if (opt_cudaschedule >= 0 && !hnvml) {
for (int n=0; n < active_gpus; n++) { for (int n=0; n < active_gpus; n++) {
cuda_reset_device(n, NULL); cuda_reset_device(n, NULL);
} }
} }
#ifdef WIN32
if (!hnvml && nvapi_init() == 0) {
applog(LOG_INFO, "NVAPI GPU monitoring enabled.");
cuda_devicenames(); // refresh gpu vendor name
}
#endif
else if (!hnvml)
applog(LOG_INFO, "GPU monitoring is not available.");
#endif #endif
if (opt_api_listen) { if (opt_api_listen) {

57
compat/nvapi/nvapi_ccminer.h

@ -33,6 +33,18 @@ typedef struct {
} NVAPI_GPU_POWER_STATUS; } NVAPI_GPU_POWER_STATUS;
#define NVAPI_GPU_POWER_STATUS_VER MAKE_NVAPI_VERSION(NVAPI_GPU_POWER_STATUS, 1) #define NVAPI_GPU_POWER_STATUS_VER MAKE_NVAPI_VERSION(NVAPI_GPU_POWER_STATUS, 1)
typedef struct {
NvU32 version;
NvU32 count;
struct {
NvU32 unknown1;
NvU32 unknown2;
NvU32 power; // unsure ?? 85536 to 95055 on 1080, 104825+ on 970
NvU32 unknown4;
} entries[4];
} NVAPI_GPU_POWER_TOPO;
#define NVAPI_GPU_POWER_TOPO_VER MAKE_NVAPI_VERSION(NVAPI_GPU_POWER_TOPO, 1)
typedef struct { typedef struct {
NvU32 version; NvU32 version;
NvU32 flags; NvU32 flags;
@ -134,11 +146,48 @@ typedef struct {
} NVAPI_CLOCK_TABLE; // 9248 bytes } NVAPI_CLOCK_TABLE; // 9248 bytes
#define NVAPI_CLOCK_TABLE_VER MAKE_NVAPI_VERSION(NVAPI_CLOCK_TABLE, 1) #define NVAPI_CLOCK_TABLE_VER MAKE_NVAPI_VERSION(NVAPI_CLOCK_TABLE, 1)
typedef struct {
NvU32 version;
NvU32 mask[4]; // 80 bits mask
NvU32 buf0[12];
struct {
NvU32 a; // 0
NvU32 freq_kHz;
NvU32 volt_uV;
NvU32 d;
NvU32 e;
NvU32 f;
NvU32 g;
} gpuEntries[80];
struct {
NvU32 a; // 1 for idle values ?
NvU32 freq_kHz;
NvU32 volt_uV;
NvU32 d;
NvU32 e;
NvU32 f;
NvU32 g;
} memEntries[23];
NvU32 buf1[1064];
} NVAPI_VFP_CURVE; // 7208 bytes (1-1c28)
#define NVAPI_VFP_CURVE_VER MAKE_NVAPI_VERSION(NVAPI_VFP_CURVE, 1)
typedef struct {
NvU32 version;
NvU32 flags;
NvU32 count; // unsure
NvU32 unknown;
NvU32 value_uV;
NvU32 buf1[30];
} NVAPI_VOLT_STATUS; // 140 bytes (1-008c)
#define NVAPI_VOLT_STATUS_VER MAKE_NVAPI_VERSION(NVAPI_VOLT_STATUS, 1)
NvAPI_Status NvAPI_DLL_GetInterfaceVersionString(NvAPI_ShortString string); NvAPI_Status NvAPI_DLL_GetInterfaceVersionString(NvAPI_ShortString string);
NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_INFO*); NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_INFO*);
NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_STATUS*); NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_STATUS*);
NvAPI_Status NvAPI_DLL_ClientPowerPoliciesSetStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_STATUS*); NvAPI_Status NvAPI_DLL_ClientPowerPoliciesSetStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_STATUS*);
NvAPI_Status NvAPI_DLL_ClientPowerTopologyGetStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_TOPO*); // EDCF624E 1-0048
NvAPI_Status NvAPI_DLL_ClientThermalPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_THERMAL_INFO*); NvAPI_Status NvAPI_DLL_ClientThermalPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_THERMAL_INFO*);
NvAPI_Status NvAPI_DLL_ClientThermalPoliciesGetLimit(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_THERMAL_LIMIT*); NvAPI_Status NvAPI_DLL_ClientThermalPoliciesGetLimit(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_THERMAL_LIMIT*);
@ -146,17 +195,21 @@ NvAPI_Status NvAPI_DLL_ClientThermalPoliciesSetLimit(NvPhysicalGpuHandle hPhysic
NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS*); NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS*);
// to dig... // Pascal GTX only
NvAPI_Status NvAPI_DLL_GetClockBoostRanges(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCKS_RANGE*); NvAPI_Status NvAPI_DLL_GetClockBoostRanges(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCKS_RANGE*);
NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_MASKS*); // 0x507B4B59 NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_MASKS*); // 0x507B4B59
NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x23F1B133 NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x23F1B133
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4
// Maxwell ?
NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C
NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle hPhysicalGpu, void* pFreqs); NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle hPhysicalGpu, void* pFreqs);
NvAPI_Status NvAPI_DLL_GetSerialNumber(NvPhysicalGpuHandle handle, NvAPI_ShortString serial); NvAPI_Status NvAPI_DLL_GetSerialNumber(NvPhysicalGpuHandle handle, NvAPI_ShortString serial);
NvAPI_Status NvAPI_DLL_SetPstates20(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO *pPerfPstatesInfo); NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet);
NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V2 *pSet);
NvAPI_Status NvAPI_DLL_Unload(); NvAPI_Status NvAPI_DLL_Unload();

14
cuda.cpp

@ -199,9 +199,19 @@ void cuda_reset_device(int thr_id, bool *init)
int cuda_available_memory(int thr_id) int cuda_available_memory(int thr_id)
{ {
int dev_id = device_map[thr_id % MAX_GPUS]; int dev_id = device_map[thr_id % MAX_GPUS];
size_t mtotal, mfree = 0; size_t mtotal = 0, mfree = 0;
cudaDeviceProp props;
cudaSetDevice(dev_id); cudaSetDevice(dev_id);
cudaMemGetInfo(&mfree, &mtotal); cudaDeviceSynchronize();
if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) {
#if defined(_WIN32) && CUDART_VERSION == 6050 && defined(_DEBUG)
if (!strstr(props.name, "GTX 10"))
// seems to crash in vstudio on 8GB cards (pascal ?) with cuda 6.5
cudaMemGetInfo(&mfree, &mtotal);
#else
cudaMemGetInfo(&mfree, &mtotal);
#endif
}
return (int) (mfree / (1024 * 1024)); return (int) (mfree / (1024 * 1024));
} }

56
nvapi.cpp

@ -119,6 +119,16 @@ NvAPI_Status NvAPI_DLL_ClientPowerPoliciesSetStatus(NvPhysicalGpuHandle handle,
return (*pointer)(handle, pPolicies); return (*pointer)(handle, pPolicies);
} }
#define NVAPI_ID_POWERTOPO_GET 0xEDCF624E
NvAPI_Status NvAPI_DLL_ClientPowerTopologyGetStatus(NvPhysicalGpuHandle handle, NVAPI_GPU_POWER_TOPO* topo) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_GPU_POWER_TOPO*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
if(!pointer) {
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_GPU_POWER_TOPO*))nvidia_handle->query(NVAPI_ID_POWERTOPO_GET);
}
return (*pointer)(handle, topo);
}
#define NVAPI_ID_THERMAL_INFO 0x0D258BB5 #define NVAPI_ID_THERMAL_INFO 0x0D258BB5
NvAPI_Status NvAPI_DLL_ClientThermalPoliciesGetInfo(NvPhysicalGpuHandle handle, NVAPI_GPU_THERMAL_INFO* pInfo) { NvAPI_Status NvAPI_DLL_ClientThermalPoliciesGetInfo(NvPhysicalGpuHandle handle, NVAPI_GPU_THERMAL_INFO* pInfo) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_GPU_THERMAL_INFO*) = NULL; static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_GPU_THERMAL_INFO*) = NULL;
@ -169,7 +179,7 @@ NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle handle, NVIDI
return (*pointer)(handle, status); return (*pointer)(handle, status);
} }
#define NVAPI_ID_CLK_RANGE_GET 0x64B43A6A #define NVAPI_ID_CLK_RANGE_GET 0x64B43A6A // Pascal
NvAPI_Status NvAPI_DLL_GetClockBoostRanges(NvPhysicalGpuHandle handle, NVAPI_CLOCKS_RANGE* range) { NvAPI_Status NvAPI_DLL_GetClockBoostRanges(NvPhysicalGpuHandle handle, NVAPI_CLOCKS_RANGE* range) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_CLOCKS_RANGE*) = NULL; static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_CLOCKS_RANGE*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
@ -179,7 +189,7 @@ NvAPI_Status NvAPI_DLL_GetClockBoostRanges(NvPhysicalGpuHandle handle, NVAPI_CLO
return (*pointer)(handle, range); return (*pointer)(handle, range);
} }
#define NVAPI_ID_CLK_BOOST_MASK 0x507B4B59 #define NVAPI_ID_CLK_BOOST_MASK 0x507B4B59 // Pascal
NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle handle, NVAPI_CLOCK_MASKS* range) { NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle handle, NVAPI_CLOCK_MASKS* range) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_CLOCK_MASKS*) = NULL; static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_CLOCK_MASKS*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
@ -189,7 +199,7 @@ NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle handle, NVAPI_CLOCK
return (*pointer)(handle, range); return (*pointer)(handle, range);
} }
#define NVAPI_ID_CLK_BOOST_TABLE 0x23F1B133 #define NVAPI_ID_CLK_BOOST_TABLE 0x23F1B133 // Pascal
NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOCK_TABLE* range) { NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOCK_TABLE* range) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_CLOCK_TABLE*) = NULL; static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_CLOCK_TABLE*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
@ -199,8 +209,25 @@ NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOC
return (*pointer)(handle, range); return (*pointer)(handle, range);
} }
#define NVAPI_ID_CLK_BOOST_CURVE 0x0700004A //?? #define NVAPI_ID_VFP_CURVE_GET 0x21537AD4 // Pascal 39442CFB to check also, Set ?
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE* curve) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VFP_CURVE*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
if(!pointer) {
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VFP_CURVE*))nvidia_handle->query(NVAPI_ID_VFP_CURVE_GET);
}
return (*pointer)(handle, curve);
}
#define NVAPI_ID_VOLT_STATUS_GET 0xC16C7E2C
NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle handle, NVAPI_VOLT_STATUS* data) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLT_STATUS*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
if(!pointer) {
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLT_STATUS*))nvidia_handle->query(NVAPI_ID_VOLT_STATUS_GET);
}
return (*pointer)(handle, data);
}
#define NVAPI_ID_PERFCLOCKS_GET 0x1EA54A3B #define NVAPI_ID_PERFCLOCKS_GET 0x1EA54A3B
NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){ NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){
@ -212,14 +239,25 @@ NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){
return (*pointer)(handle, pFreqs); return (*pointer)(handle, pFreqs);
} }
#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B // NOT SUPPORTED #define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B // Need struct v1 of 7316 bytes (v2 semms unsupported)
NvAPI_Status NvAPI_DLL_SetPstates20(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO *pPerfPstatesInfo) { // allow to set gpu/mem core freq delta
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO*) = NULL; NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V1*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
if(!pointer) {
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V1*))nvidia_handle->query(NVAPI_ID_PSTATE20_SET);
}
return (*pointer)(handle, pSet);
}
// allow to set gpu core voltage delta
NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V2 *pSet) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V2*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
if(!pointer) { if(!pointer) {
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO*))nvidia_handle->query(NVAPI_ID_PSTATE20_SET); pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V2*))nvidia_handle->query(NVAPI_ID_PSTATE20_SET);
} }
return (*pointer)(handle, pPerfPstatesInfo); return (*pointer)(handle, pSet);
} }
#define NVAPI_ID_UNLOAD 0xD22BDD7E #define NVAPI_ID_UNLOAD 0xD22BDD7E

265
nvml.cpp

@ -35,6 +35,7 @@ static uint32_t device_bus_ids[MAX_GPUS] = { 0 };
extern uint32_t device_gpu_clocks[MAX_GPUS]; extern uint32_t device_gpu_clocks[MAX_GPUS];
extern uint32_t device_mem_clocks[MAX_GPUS]; extern uint32_t device_mem_clocks[MAX_GPUS];
extern uint32_t device_plimit[MAX_GPUS]; extern uint32_t device_plimit[MAX_GPUS];
extern uint8_t device_tlimit[MAX_GPUS];
extern int8_t device_pstate[MAX_GPUS]; extern int8_t device_pstate[MAX_GPUS];
uint32_t clock_prev[MAX_GPUS] = { 0 }; uint32_t clock_prev[MAX_GPUS] = { 0 };
@ -371,7 +372,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
if (rc == NVML_SUCCESS) if (rc == NVML_SUCCESS)
applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk); applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk);
else { else {
applog(LOG_WARNING, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc)); applog(LOG_WARNING, "GPU #%d: %u/%u - %s (NVML)", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
return -1; return -1;
} }
@ -953,8 +954,6 @@ int nvapi_getbios(unsigned int devNum, char *desc, unsigned int maxlen)
return 0; return 0;
} }
#define FREQ_GETVAL(clk) (clk.typeId == 0 ? clk.data.single.freq_kHz : clk.data.range.maxFreq_kHz)
int nvapi_pstateinfo(unsigned int devNum) int nvapi_pstateinfo(unsigned int devNum)
{ {
uint32_t n; uint32_t n;
@ -964,6 +963,23 @@ int nvapi_pstateinfo(unsigned int devNum)
// useless on init but... // useless on init but...
nvapi_getpstate(devNum, &current); nvapi_getpstate(devNum, &current);
#if 0
// Unsure of the meaning of these values
NVAPI_GPU_POWER_TOPO topo = { 0 };
topo.version = NVAPI_GPU_POWER_TOPO_VER;
if ((ret = NvAPI_DLL_ClientPowerTopologyGetStatus(phys[devNum], &topo)) == NVAPI_OK) {
if (topo.count)
applog(LOG_RAW, " GPU TDP is %.1f~%.1f W ?",
(double) topo.entries[0].power/1000, (double) topo.entries[1].power/1000);
// Ok on 970, not pascal
NV_GPU_PERF_PSTATES20_INFO_V2 pset2 = { 0 };
pset2.version = NV_GPU_PERF_PSTATES20_INFO_VER2;
pset2.ov.numVoltages = 1;
pset2.ov.voltages[0].voltDelta_uV.value = 3000; // gpu + 3000 uv;
ret = NvAPI_DLL_SetPstates20v2(phys[devNum], &pset2);
#endif
NV_GPU_PERF_PSTATES20_INFO info = { 0 }; NV_GPU_PERF_PSTATES20_INFO info = { 0 };
info.version = NV_GPU_PERF_PSTATES20_INFO_VER; info.version = NV_GPU_PERF_PSTATES20_INFO_VER;
if ((ret = NvAPI_GPU_GetPstates20(phys[devNum], &info)) != NVAPI_OK) { if ((ret = NvAPI_GPU_GetPstates20(phys[devNum], &info)) != NVAPI_OK) {
@ -973,46 +989,59 @@ int nvapi_pstateinfo(unsigned int devNum)
applog(LOG_RAW, "NVAPI GetPstates20: %s", string); applog(LOG_RAW, "NVAPI GetPstates20: %s", string);
return -1; return -1;
} }
applog(LOG_RAW, "%u P-states with %u clocks %s",
info.numPstates, info.numClocks, info.numBaseVoltages ? "and voltage":"");
for (n=0; n < info.numPstates; n++) { for (n=0; n < info.numPstates; n++) {
NV_GPU_PSTATE20_CLOCK_ENTRY_V1* clocks = info.pstates[n].clocks; NV_GPU_PSTATE20_CLOCK_ENTRY_V1* clocks = info.pstates[n].clocks;
applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %3u-%4u MHz%s %4u mV%s \x7F %d/%d", applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %6.1f MHz%s %4u mV%s \x7F %d/%d",
info.pstates[n].pstateId == current ? ">":" ", info.pstates[n].pstateId, info.pstates[n].pstateId == current ? ">":" ", info.pstates[n].pstateId,
FREQ_GETVAL(clocks[1])/1000, clocks[1].bIsEditable ? "*":" ", clocks[1].data.single.freq_kHz/1000, clocks[1].bIsEditable ? "*":" ",
clocks[0].data.range.minFreq_kHz/1000, FREQ_GETVAL(clocks[0])/1000, clocks[0].bIsEditable ? "*":" ", (double) clocks[0].data.single.freq_kHz/1000, clocks[0].bIsEditable ? "*":" ",
info.pstates[n].baseVoltages[0].volt_uV/1000, info.pstates[n].baseVoltages[0].bIsEditable ? "*": " ", info.pstates[n].baseVoltages[0].volt_uV/1000, info.pstates[n].baseVoltages[0].bIsEditable ? "*": " ",
info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable
info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000); info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000);
if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) {
applog(LOG_RAW, " OC %4d MHz %6.1f MHz",
clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000);
}
} }
// boost over volting (GTX 9xx) ? // boost over volting (GTX 9xx only ?)
for (n=0; n < info.ov.numVoltages; n++) { for (n=0; n < info.ov.numVoltages; n++) {
applog(LOG_RAW, " OV: %u mV%s + %d/%d", applog(LOG_RAW, " OV: %u+%u mV%s \x7F %d/%d",
info.ov.voltages[n].volt_uV/1000, info.ov.voltages[n].bIsEditable ? "*":" ", info.ov.voltages[n].volt_uV/1000, info.ov.voltages[n].voltDelta_uV.value/1000, info.ov.voltages[n].bIsEditable ? "*":" ",
info.ov.voltages[n].voltDelta_uV.valueRange.min/1000, info.ov.voltages[n].voltDelta_uV.valueRange.max/1000); info.ov.voltages[n].voltDelta_uV.valueRange.min/1000, info.ov.voltages[n].voltDelta_uV.valueRange.max/1000);
} }
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 }; NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER; freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ; freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
applog(LOG_RAW, " MEM %4.0f MHz GPU %8.2f MHz >Current", applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Base Clocks",
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK;
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
applog(LOG_RAW, " MEM %4.0f MHz GPU %8.2f MHz Base Clocks", applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Boost Clocks",
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK; freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ;
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
applog(LOG_RAW, " MEM %4.0f MHz GPU %8.2f MHz Boost Clocks", applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz >Current",
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
#if 1 // Maxwell only
NVAPI_VOLT_STATUS pvdom = { 0 };
pvdom.version = NVAPI_VOLT_STATUS_VER;
if ((ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &pvdom)) == NVAPI_OK) {
if (pvdom.value_uV)
applog(LOG_RAW, " GPU Voltage is %u mV", pvdom.value_uV/1000);
}
uint8_t plim = nvapi_get_plimit(devNum);
applog(LOG_RAW, " Power limit is set to %u%%", (uint32_t) plim);
NV_GPU_THERMAL_SETTINGS tset = { 0 }; NV_GPU_THERMAL_SETTINGS tset = { 0 };
NVAPI_GPU_THERMAL_INFO tnfo = { 0 }; NVAPI_GPU_THERMAL_INFO tnfo = { 0 };
NVAPI_GPU_THERMAL_LIMIT tlim = { 0 }; NVAPI_GPU_THERMAL_LIMIT tlim = { 0 };
@ -1025,16 +1054,9 @@ int nvapi_pstateinfo(unsigned int devNum)
applog(LOG_RAW, " Thermal limit is set to %u, current Tc %d, range [%u-%u]", applog(LOG_RAW, " Thermal limit is set to %u, current Tc %d, range [%u-%u]",
tlim.entries[0].value >> 8, tset.sensor[0].currentTemp, tlim.entries[0].value >> 8, tset.sensor[0].currentTemp,
tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8); tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
// ok
//tlim.entries[0].value = 80 << 8;
//tlim.flags = 1;
//ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim);
} }
#endif
uint8_t plim = nvapi_getplimit(devNum);
applog(LOG_RAW, " Power limit coef. is set to %u%%", (uint32_t) plim);
#if 1 #if 0
// seems empty.. // seems empty..
NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS volts = { 0 }; NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS volts = { 0 };
volts.version = NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS_VER; volts.version = NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS_VER;
@ -1055,32 +1077,61 @@ int nvapi_pstateinfo(unsigned int devNum)
if (boost.clocks[n].gpuDelta) gpuClocks++; if (boost.clocks[n].gpuDelta) gpuClocks++;
} }
if (gpuClocks || memClocks) { // PASCAL GTX ONLY
applog(LOG_RAW, "Boost table contains %d gpu clocks and %d mem clocks.", gpuClocks, memClocks); //if (gpuClocks || memClocks) {
NVAPI_CLOCK_TABLE table = { 0 }; NVAPI_CLOCK_TABLE table = { 0 };
table.version = NVAPI_CLOCK_TABLE_VER; table.version = NVAPI_CLOCK_TABLE_VER;
memcpy(table.mask, boost.mask, 12); memcpy(table.mask, boost.mask, 12);
ret = NvAPI_DLL_GetClockBoostTable(phys[devNum], &table); ret = NvAPI_DLL_GetClockBoostTable(phys[devNum], &table);
gpuClocks = 0, memClocks = 0;
for (n=0; n < 12; n++) { for (n=0; n < 12; n++) {
if (table.buf0[n] != 0) applog(LOG_RAW, "boost table 0[%u] not empty (%u)", n, table.buf0[n]); if (table.buf0[n] != 0) applog(LOG_RAW, "boost table 0[%u] not empty (%u)", n, table.buf0[n]);
} }
for (n=0; n < 80; n++) { for (n=0; n < 80; n++) {
if (table.gpuDeltas[n].freqDelta) if (table.gpuDeltas[n].freqDelta) {
applog(LOG_RAW, "boost gpu clock delta %u set to %d MHz", n, table.gpuDeltas[n].freqDelta/1000); // note: gpu delta value seems to be x2, not the memory
//applog(LOG_RAW, " Boost gpu clock delta %u set to %d MHz", n, table.gpuDeltas[n].freqDelta/2000);
gpuClocks++;
}
} }
for (n=0; n < 23; n++) { for (n=0; n < 23; n++) {
if (table.memFilled[n]) if (table.memFilled[n]) {
applog(LOG_RAW, "boost mem clock delta %u set to %d MHz", n, table.memDeltas[n]/1000); //applog(LOG_RAW, " Boost mem clock delta %u set to %d MHz", n, table.memDeltas[n]/1000);
memClocks++;
}
} }
for (n=0; n < 1529; n++) { for (n=0; n < 1529; n++) {
if (table.buf1[n] != 0) applog(LOG_RAW, "boost table 1[%u] not empty (%u)", n, table.buf1[n]); if (table.buf1[n] != 0) applog(LOG_RAW, "boost table 1[%u] not empty (%u)", n, table.buf1[n]);
} }
} applog(LOG_RAW, " Boost table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
NVAPI_VFP_CURVE curve = { 0 };
curve.version = NVAPI_VFP_CURVE_VER;
memcpy(curve.mask, boost.mask, 12);
ret = NvAPI_DLL_GetVFPCurve(phys[devNum], &curve);
gpuClocks = 0, memClocks = 0;
for (n=0; n < 80; n++) {
if (curve.gpuEntries[n].freq_kHz || curve.gpuEntries[n].volt_uV) {
// applog(LOG_RAW, "gpu volt table %2u %4u MHz - %6u mV", n, curve.gpuEntries[n].freq_kHz/1000, curve.gpuEntries[n].volt_uV/1000);
gpuClocks++;
}
}
for (n=0; n < 23; n++) {
if (curve.memEntries[n].freq_kHz || curve.memEntries[n].volt_uV) {
// applog(LOG_RAW, "mem volt table %2u %4u MHz - %6u mV", n, curve.memEntries[n].freq_kHz/1000, curve.memEntries[n].volt_uV/1000);
memClocks++;
}
}
for (n=0; n < 1064; n++) {
if (table.buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve.buf1[n]);
}
applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
//}
#endif #endif
return 0; return 0;
} }
uint8_t nvapi_getplimit(unsigned int devNum) uint8_t nvapi_get_plimit(unsigned int devNum)
{ {
NvAPI_Status ret = NVAPI_OK; NvAPI_Status ret = NVAPI_OK;
NVAPI_GPU_POWER_STATUS pol = { 0 }; NVAPI_GPU_POWER_STATUS pol = { 0 };
@ -1095,7 +1146,7 @@ uint8_t nvapi_getplimit(unsigned int devNum)
return (uint8_t) (pol.entries[0].power / 1000); // in percent return (uint8_t) (pol.entries[0].power / 1000); // in percent
} }
int nvapi_setplimit(unsigned int devNum, uint16_t percent) int nvapi_set_plimit(unsigned int devNum, uint16_t percent)
{ {
NvAPI_Status ret = NVAPI_OK; NvAPI_Status ret = NVAPI_OK;
uint32_t val = percent * 1000; uint32_t val = percent * 1000;
@ -1126,6 +1177,98 @@ int nvapi_setplimit(unsigned int devNum, uint16_t percent)
return ret; return ret;
} }
int nvapi_set_tlimit(unsigned int devNum, uint8_t limit)
{
NvAPI_Status ret;
uint32_t val = limit;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
NV_GPU_THERMAL_SETTINGS tset = { 0 };
NVAPI_GPU_THERMAL_INFO tnfo = { 0 };
NVAPI_GPU_THERMAL_LIMIT tlim = { 0 };
tset.version = NV_GPU_THERMAL_SETTINGS_VER;
NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &tset);
tnfo.version = NVAPI_GPU_THERMAL_INFO_VER;
NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], &tnfo);
tlim.version = NVAPI_GPU_THERMAL_LIMIT_VER;
if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], &tlim)) == NVAPI_OK) {
tlim.entries[0].value = val << 8;
tlim.flags = 1;
ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim);
if (ret == NVAPI_OK) {
applog(LOG_INFO, "GPU #%u: thermal limit set to %u, current Tc %d, range [%u-%u]",
devNum, val, tset.sensor[0].currentTemp,
tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
} else {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
applog(LOG_WARNING, "GPU #%u: thermal limit: %s, valid range is [%u-%u]", devNum, string,
tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
}
}
return (int) ret;
}
int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
{
NvAPI_Status ret;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
if (ret) return ret;
NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
pset1.numPstates = 1;
pset1.numClocks = 1;
// Ok on both 1080 and 970
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS;
pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff;
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
if (ret == NVAPI_OK) {
applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, diff/1000);
}
return ret;
}
int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
{
NvAPI_Status ret;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
if (ret) return ret;
NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
pset1.numPstates = 1;
pset1.numClocks = 1;
// Memory boost clock seems only ok on pascal with this api
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff;
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
if (ret == NVAPI_OK) {
applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, diff/1000);
}
return ret;
}
int nvapi_init() int nvapi_init()
{ {
int num_gpus = cuda_num_devices(); int num_gpus = cuda_num_devices();
@ -1191,23 +1334,49 @@ int nvapi_init()
sprintf(driver_version,"%d.%02d", udv / 100, udv % 100); sprintf(driver_version,"%d.%02d", udv / 100, udv % 100);
} }
return 0;
}
int nvapi_init_settings()
{
// nvapi.dll // nvapi.dll
ret = nvapi_dll_init(); int ret = nvapi_dll_init();
if (ret == NVAPI_OK) { if (ret != NVAPI_OK)
for (int n=0; n < opt_n_threads; n++) { return ret;
int dev_id = device_map[n % MAX_GPUS];
if (device_plimit[dev_id]) { for (int n=0; n < opt_n_threads; n++) {
nvapi_setplimit(nvapi_dev_map[dev_id], device_plimit[dev_id]); // 0=default int dev_id = device_map[n % MAX_GPUS];
uint32_t res = nvapi_getplimit(nvapi_dev_map[dev_id]); if (device_plimit[dev_id]) {
gpulog(LOG_INFO, n, "NVAPI power limit is set to %u%%", res); if (nvapi_set_plimit(nvapi_dev_map[dev_id], device_plimit[dev_id]) == NVAPI_OK) {
uint32_t res = nvapi_get_plimit(nvapi_dev_map[dev_id]);
gpulog(LOG_INFO, n, "Power limit is set to %u%%", res);
}
}
if (device_tlimit[dev_id]) {
nvapi_set_tlimit(nvapi_dev_map[dev_id], device_tlimit[dev_id]);
}
if (device_gpu_clocks[dev_id]) {
ret = nvapi_set_gpuclock(nvapi_dev_map[dev_id], device_gpu_clocks[dev_id]);
if (ret) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage((NvAPI_Status) ret, string);
gpulog(LOG_WARNING, n, "Boost gpu clock %s", string);
} }
if (device_pstate[dev_id]) { }
// todo... if (device_mem_clocks[dev_id]) {
ret = nvapi_set_memclock(nvapi_dev_map[dev_id], device_mem_clocks[dev_id]);
if (ret) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage((NvAPI_Status) ret, string);
gpulog(LOG_WARNING, n, "Boost mem clock %s", string);
} }
} }
if (device_pstate[dev_id]) {
// dunno how via nvapi or/and pascal
}
} }
return 0; return ret;
} }
#endif #endif
@ -1306,7 +1475,7 @@ unsigned int gpu_power(struct cgpu_info *gpu)
if (support == -1) { if (support == -1) {
unsigned int pct = 0; unsigned int pct = 0;
nvapi_getusage(nvapi_dev_map[gpu->gpu_id], &pct); nvapi_getusage(nvapi_dev_map[gpu->gpu_id], &pct);
pct *= nvapi_getplimit(nvapi_dev_map[gpu->gpu_id]); pct *= nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]);
pct /= 100; pct /= 100;
mw = pct; // to fix mw = pct; // to fix
} }

10
nvml.h

@ -217,13 +217,15 @@ int gpu_info(struct cgpu_info *gpu);
int gpu_vendor(uint8_t pci_bus_id, char *vendorname); int gpu_vendor(uint8_t pci_bus_id, char *vendorname);
// to debug clocks..
int nvapi_pstateinfo(unsigned int devNum);
uint8_t nvapi_getplimit(unsigned int devNum);
/* nvapi functions */ /* nvapi functions */
#ifdef WIN32 #ifdef WIN32
int nvapi_init(); int nvapi_init();
int nvapi_init_settings();
// to debug nvapi..
int nvapi_pstateinfo(unsigned int devNum);
uint8_t nvapi_get_plimit(unsigned int devNum);
#endif #endif
#endif /* USE_WRAPNVML */ #endif /* USE_WRAPNVML */

Loading…
Cancel
Save