Browse Source

nvapi: link clocks and tlimit to command line

boost clocks and the thermal limit are shared with afterburner
beware with your settings, not as safe as application clocks!

Note: both nvapi and nvml are now used on windows x64
Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
2upstream
Tanguy Pruvot 9 years ago
parent
commit
bc6ac3a3ab
  1. 2
      README.txt
  2. 49
      ccminer.cpp
  3. 57
      compat/nvapi/nvapi_ccminer.h
  4. 14
      cuda.cpp
  5. 56
      nvapi.cpp
  6. 265
      nvml.cpp
  7. 10
      nvml.h

2
README.txt

@ -243,6 +243,8 @@ features. @@ -243,6 +243,8 @@ features.
June 2016 v1.8.0
Pascal support with cuda 8
x11evo algo (XRE)
Lyra2v2 and Decred hashrate improvements
Enhance windows NVAPI clock and power limits
May 18th 2016 v1.7.6
Decred vote support

49
ccminer.cpp

@ -128,7 +128,8 @@ uint32_t gpus_intensity[MAX_GPUS] = { 0 }; @@ -128,7 +128,8 @@ uint32_t gpus_intensity[MAX_GPUS] = { 0 };
uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
uint32_t device_mem_clocks[MAX_GPUS] = { 0 };
uint32_t device_plimit[MAX_GPUS] = { 0 };
int8_t device_pstate[MAX_GPUS] = { -1 };
uint8_t device_tlimit[MAX_GPUS] = { 0 };
int8_t device_pstate[MAX_GPUS] = { -1, -1 };
int opt_cudaschedule = -1;
static bool opt_keep_clocks = false;
@ -303,7 +304,10 @@ Options:\n\ @@ -303,7 +304,10 @@ Options:\n\
--plimit=100W Set the gpu power limit (352.21+ driver)\n"
#else /* via nvapi.dll */
"\
--plimit=100 Set the gpu power limit in percentage\n"
--mem-clock=3505 Set the gpu memory boost clock\n\
--gpu-clock=1150 Set the gpu engine boost clock\n\
--plimit=100 Set the gpu power limit in percentage\n\
--tlimit=80 Set the gpu thermal limit in degrees\n"
#endif
#ifdef HAVE_SYSLOG_H
"\
@ -380,6 +384,7 @@ struct option options[] = { @@ -380,6 +384,7 @@ struct option options[] = {
{ "pstate", 1, NULL, 1072 },
{ "plimit", 1, NULL, 1073 },
{ "keep-clocks", 0, NULL, 1074 },
{ "tlimit", 1, NULL, 1075 },
#ifdef HAVE_SYSLOG_H
{ "syslog", 0, NULL, 'S' },
{ "syslog-prefix", 1, NULL, 1018 },
@ -2687,7 +2692,8 @@ void parse_arg(int key, char *arg) @@ -2687,7 +2692,8 @@ void parse_arg(int key, char *arg)
#ifdef USE_WRAPNVML
hnvml = nvml_create();
#ifdef WIN32
if (!hnvml) nvapi_init();
nvapi_init();
nvapi_init_settings();
#endif
#endif
cuda_print_devices();
@ -2931,6 +2937,17 @@ void parse_arg(int key, char *arg) @@ -2931,6 +2937,17 @@ void parse_arg(int key, char *arg)
case 1074: /* --keep-clocks */
opt_keep_clocks = true;
break;
case 1075: /* --tlimit */
{
char *pch = strtok(arg,",");
int n = 0;
while (pch != NULL && n < MAX_GPUS) {
int dev_id = device_map[n++];
device_tlimit[dev_id] = (uint8_t) atoi(pch);
pch = strtok(NULL, ",");
}
}
break;
case 1005:
opt_benchmark = true;
want_longpoll = false;
@ -3504,13 +3521,14 @@ int main(int argc, char *argv[]) @@ -3504,13 +3521,14 @@ int main(int argc, char *argv[])
if (hnvml) {
bool gpu_reinit = (opt_cudaschedule >= 0); //false
cuda_devicenames(); // refresh gpu vendor name
applog(LOG_INFO, "NVML GPU monitoring enabled.");
if (!opt_quiet)
applog(LOG_INFO, "NVML GPU monitoring enabled.");
for (int n=0; n < active_gpus; n++) {
if (nvml_set_pstate(hnvml, device_map[n]) == 1)
gpu_reinit = true;
if (nvml_set_plimit(hnvml, device_map[n]) == 1)
gpu_reinit = true;
if (nvml_set_clocks(hnvml, device_map[n]) == 1)
if (!is_windows() && nvml_set_clocks(hnvml, device_map[n]) == 1)
gpu_reinit = true;
if (gpu_reinit) {
cuda_reset_device(n, NULL);
@ -3518,20 +3536,25 @@ int main(int argc, char *argv[]) @@ -3518,20 +3536,25 @@ int main(int argc, char *argv[])
}
}
#endif
#ifdef WIN32
if (nvapi_init() == 0) {
if (!opt_quiet)
applog(LOG_INFO, "NVAPI GPU monitoring enabled.");
if (!hnvml) {
cuda_devicenames(); // refresh gpu vendor name
}
nvapi_init_settings();
}
#endif
else if (!hnvml && !opt_quiet)
applog(LOG_INFO, "GPU monitoring is not available.");
// force reinit to set default device flags
if (opt_cudaschedule >= 0 && !hnvml) {
for (int n=0; n < active_gpus; n++) {
cuda_reset_device(n, NULL);
}
}
#ifdef WIN32
if (!hnvml && nvapi_init() == 0) {
applog(LOG_INFO, "NVAPI GPU monitoring enabled.");
cuda_devicenames(); // refresh gpu vendor name
}
#endif
else if (!hnvml)
applog(LOG_INFO, "GPU monitoring is not available.");
#endif
if (opt_api_listen) {

57
compat/nvapi/nvapi_ccminer.h

@ -33,6 +33,18 @@ typedef struct { @@ -33,6 +33,18 @@ typedef struct {
} NVAPI_GPU_POWER_STATUS;
#define NVAPI_GPU_POWER_STATUS_VER MAKE_NVAPI_VERSION(NVAPI_GPU_POWER_STATUS, 1)
typedef struct {
NvU32 version;
NvU32 count;
struct {
NvU32 unknown1;
NvU32 unknown2;
NvU32 power; // unsure ?? 85536 to 95055 on 1080, 104825+ on 970
NvU32 unknown4;
} entries[4];
} NVAPI_GPU_POWER_TOPO;
#define NVAPI_GPU_POWER_TOPO_VER MAKE_NVAPI_VERSION(NVAPI_GPU_POWER_TOPO, 1)
typedef struct {
NvU32 version;
NvU32 flags;
@ -134,11 +146,48 @@ typedef struct { @@ -134,11 +146,48 @@ typedef struct {
} NVAPI_CLOCK_TABLE; // 9248 bytes
#define NVAPI_CLOCK_TABLE_VER MAKE_NVAPI_VERSION(NVAPI_CLOCK_TABLE, 1)
typedef struct {
NvU32 version;
NvU32 mask[4]; // 80 bits mask
NvU32 buf0[12];
struct {
NvU32 a; // 0
NvU32 freq_kHz;
NvU32 volt_uV;
NvU32 d;
NvU32 e;
NvU32 f;
NvU32 g;
} gpuEntries[80];
struct {
NvU32 a; // 1 for idle values ?
NvU32 freq_kHz;
NvU32 volt_uV;
NvU32 d;
NvU32 e;
NvU32 f;
NvU32 g;
} memEntries[23];
NvU32 buf1[1064];
} NVAPI_VFP_CURVE; // 7208 bytes (1-1c28)
#define NVAPI_VFP_CURVE_VER MAKE_NVAPI_VERSION(NVAPI_VFP_CURVE, 1)
typedef struct {
NvU32 version;
NvU32 flags;
NvU32 count; // unsure
NvU32 unknown;
NvU32 value_uV;
NvU32 buf1[30];
} NVAPI_VOLT_STATUS; // 140 bytes (1-008c)
#define NVAPI_VOLT_STATUS_VER MAKE_NVAPI_VERSION(NVAPI_VOLT_STATUS, 1)
NvAPI_Status NvAPI_DLL_GetInterfaceVersionString(NvAPI_ShortString string);
NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_INFO*);
NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_STATUS*);
NvAPI_Status NvAPI_DLL_ClientPowerPoliciesSetStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_STATUS*);
NvAPI_Status NvAPI_DLL_ClientPowerTopologyGetStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_TOPO*); // EDCF624E 1-0048
NvAPI_Status NvAPI_DLL_ClientThermalPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_THERMAL_INFO*);
NvAPI_Status NvAPI_DLL_ClientThermalPoliciesGetLimit(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_THERMAL_LIMIT*);
@ -146,17 +195,21 @@ NvAPI_Status NvAPI_DLL_ClientThermalPoliciesSetLimit(NvPhysicalGpuHandle hPhysic @@ -146,17 +195,21 @@ NvAPI_Status NvAPI_DLL_ClientThermalPoliciesSetLimit(NvPhysicalGpuHandle hPhysic
NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS*);
// to dig...
// Pascal GTX only
NvAPI_Status NvAPI_DLL_GetClockBoostRanges(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCKS_RANGE*);
NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_MASKS*); // 0x507B4B59
NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x23F1B133
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4
// Maxwell ?
NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C
NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle hPhysicalGpu, void* pFreqs);
NvAPI_Status NvAPI_DLL_GetSerialNumber(NvPhysicalGpuHandle handle, NvAPI_ShortString serial);
NvAPI_Status NvAPI_DLL_SetPstates20(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO *pPerfPstatesInfo);
NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet);
NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V2 *pSet);
NvAPI_Status NvAPI_DLL_Unload();

14
cuda.cpp

@ -199,9 +199,19 @@ void cuda_reset_device(int thr_id, bool *init) @@ -199,9 +199,19 @@ void cuda_reset_device(int thr_id, bool *init)
int cuda_available_memory(int thr_id)
{
int dev_id = device_map[thr_id % MAX_GPUS];
size_t mtotal, mfree = 0;
size_t mtotal = 0, mfree = 0;
cudaDeviceProp props;
cudaSetDevice(dev_id);
cudaMemGetInfo(&mfree, &mtotal);
cudaDeviceSynchronize();
if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) {
#if defined(_WIN32) && CUDART_VERSION == 6050 && defined(_DEBUG)
if (!strstr(props.name, "GTX 10"))
// seems to crash in vstudio on 8GB cards (pascal ?) with cuda 6.5
cudaMemGetInfo(&mfree, &mtotal);
#else
cudaMemGetInfo(&mfree, &mtotal);
#endif
}
return (int) (mfree / (1024 * 1024));
}

56
nvapi.cpp

@ -119,6 +119,16 @@ NvAPI_Status NvAPI_DLL_ClientPowerPoliciesSetStatus(NvPhysicalGpuHandle handle, @@ -119,6 +119,16 @@ NvAPI_Status NvAPI_DLL_ClientPowerPoliciesSetStatus(NvPhysicalGpuHandle handle,
return (*pointer)(handle, pPolicies);
}
#define NVAPI_ID_POWERTOPO_GET 0xEDCF624E
NvAPI_Status NvAPI_DLL_ClientPowerTopologyGetStatus(NvPhysicalGpuHandle handle, NVAPI_GPU_POWER_TOPO* topo) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_GPU_POWER_TOPO*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
if(!pointer) {
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_GPU_POWER_TOPO*))nvidia_handle->query(NVAPI_ID_POWERTOPO_GET);
}
return (*pointer)(handle, topo);
}
#define NVAPI_ID_THERMAL_INFO 0x0D258BB5
NvAPI_Status NvAPI_DLL_ClientThermalPoliciesGetInfo(NvPhysicalGpuHandle handle, NVAPI_GPU_THERMAL_INFO* pInfo) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_GPU_THERMAL_INFO*) = NULL;
@ -169,7 +179,7 @@ NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle handle, NVIDI @@ -169,7 +179,7 @@ NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle handle, NVIDI
return (*pointer)(handle, status);
}
#define NVAPI_ID_CLK_RANGE_GET 0x64B43A6A
#define NVAPI_ID_CLK_RANGE_GET 0x64B43A6A // Pascal
NvAPI_Status NvAPI_DLL_GetClockBoostRanges(NvPhysicalGpuHandle handle, NVAPI_CLOCKS_RANGE* range) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_CLOCKS_RANGE*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
@ -179,7 +189,7 @@ NvAPI_Status NvAPI_DLL_GetClockBoostRanges(NvPhysicalGpuHandle handle, NVAPI_CLO @@ -179,7 +189,7 @@ NvAPI_Status NvAPI_DLL_GetClockBoostRanges(NvPhysicalGpuHandle handle, NVAPI_CLO
return (*pointer)(handle, range);
}
#define NVAPI_ID_CLK_BOOST_MASK 0x507B4B59
#define NVAPI_ID_CLK_BOOST_MASK 0x507B4B59 // Pascal
NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle handle, NVAPI_CLOCK_MASKS* range) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_CLOCK_MASKS*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
@ -189,7 +199,7 @@ NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle handle, NVAPI_CLOCK @@ -189,7 +199,7 @@ NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle handle, NVAPI_CLOCK
return (*pointer)(handle, range);
}
#define NVAPI_ID_CLK_BOOST_TABLE 0x23F1B133
#define NVAPI_ID_CLK_BOOST_TABLE 0x23F1B133 // Pascal
NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOCK_TABLE* range) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_CLOCK_TABLE*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
@ -199,8 +209,25 @@ NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOC @@ -199,8 +209,25 @@ NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOC
return (*pointer)(handle, range);
}
#define NVAPI_ID_CLK_BOOST_CURVE 0x0700004A //??
#define NVAPI_ID_VFP_CURVE_GET 0x21537AD4 // Pascal 39442CFB to check also, Set ?
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE* curve) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VFP_CURVE*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
if(!pointer) {
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VFP_CURVE*))nvidia_handle->query(NVAPI_ID_VFP_CURVE_GET);
}
return (*pointer)(handle, curve);
}
#define NVAPI_ID_VOLT_STATUS_GET 0xC16C7E2C
NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle handle, NVAPI_VOLT_STATUS* data) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLT_STATUS*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
if(!pointer) {
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLT_STATUS*))nvidia_handle->query(NVAPI_ID_VOLT_STATUS_GET);
}
return (*pointer)(handle, data);
}
#define NVAPI_ID_PERFCLOCKS_GET 0x1EA54A3B
NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){
@ -212,14 +239,25 @@ NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){ @@ -212,14 +239,25 @@ NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){
return (*pointer)(handle, pFreqs);
}
#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B // NOT SUPPORTED
NvAPI_Status NvAPI_DLL_SetPstates20(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO *pPerfPstatesInfo) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO*) = NULL;
#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B // Need struct v1 of 7316 bytes (v2 semms unsupported)
// allow to set gpu/mem core freq delta
NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V1*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
if(!pointer) {
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V1*))nvidia_handle->query(NVAPI_ID_PSTATE20_SET);
}
return (*pointer)(handle, pSet);
}
// allow to set gpu core voltage delta
NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V2 *pSet) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V2*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
if(!pointer) {
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO*))nvidia_handle->query(NVAPI_ID_PSTATE20_SET);
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V2*))nvidia_handle->query(NVAPI_ID_PSTATE20_SET);
}
return (*pointer)(handle, pPerfPstatesInfo);
return (*pointer)(handle, pSet);
}
#define NVAPI_ID_UNLOAD 0xD22BDD7E

265
nvml.cpp

@ -35,6 +35,7 @@ static uint32_t device_bus_ids[MAX_GPUS] = { 0 }; @@ -35,6 +35,7 @@ static uint32_t device_bus_ids[MAX_GPUS] = { 0 };
extern uint32_t device_gpu_clocks[MAX_GPUS];
extern uint32_t device_mem_clocks[MAX_GPUS];
extern uint32_t device_plimit[MAX_GPUS];
extern uint8_t device_tlimit[MAX_GPUS];
extern int8_t device_pstate[MAX_GPUS];
uint32_t clock_prev[MAX_GPUS] = { 0 };
@ -371,7 +372,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id) @@ -371,7 +372,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
if (rc == NVML_SUCCESS)
applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk);
else {
applog(LOG_WARNING, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
applog(LOG_WARNING, "GPU #%d: %u/%u - %s (NVML)", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
return -1;
}
@ -953,8 +954,6 @@ int nvapi_getbios(unsigned int devNum, char *desc, unsigned int maxlen) @@ -953,8 +954,6 @@ int nvapi_getbios(unsigned int devNum, char *desc, unsigned int maxlen)
return 0;
}
#define FREQ_GETVAL(clk) (clk.typeId == 0 ? clk.data.single.freq_kHz : clk.data.range.maxFreq_kHz)
int nvapi_pstateinfo(unsigned int devNum)
{
uint32_t n;
@ -964,6 +963,23 @@ int nvapi_pstateinfo(unsigned int devNum) @@ -964,6 +963,23 @@ int nvapi_pstateinfo(unsigned int devNum)
// useless on init but...
nvapi_getpstate(devNum, &current);
#if 0
// Unsure of the meaning of these values
NVAPI_GPU_POWER_TOPO topo = { 0 };
topo.version = NVAPI_GPU_POWER_TOPO_VER;
if ((ret = NvAPI_DLL_ClientPowerTopologyGetStatus(phys[devNum], &topo)) == NVAPI_OK) {
if (topo.count)
applog(LOG_RAW, " GPU TDP is %.1f~%.1f W ?",
(double) topo.entries[0].power/1000, (double) topo.entries[1].power/1000);
// Ok on 970, not pascal
NV_GPU_PERF_PSTATES20_INFO_V2 pset2 = { 0 };
pset2.version = NV_GPU_PERF_PSTATES20_INFO_VER2;
pset2.ov.numVoltages = 1;
pset2.ov.voltages[0].voltDelta_uV.value = 3000; // gpu + 3000 uv;
ret = NvAPI_DLL_SetPstates20v2(phys[devNum], &pset2);
#endif
NV_GPU_PERF_PSTATES20_INFO info = { 0 };
info.version = NV_GPU_PERF_PSTATES20_INFO_VER;
if ((ret = NvAPI_GPU_GetPstates20(phys[devNum], &info)) != NVAPI_OK) {
@ -973,46 +989,59 @@ int nvapi_pstateinfo(unsigned int devNum) @@ -973,46 +989,59 @@ int nvapi_pstateinfo(unsigned int devNum)
applog(LOG_RAW, "NVAPI GetPstates20: %s", string);
return -1;
}
applog(LOG_RAW, "%u P-states with %u clocks %s",
info.numPstates, info.numClocks, info.numBaseVoltages ? "and voltage":"");
for (n=0; n < info.numPstates; n++) {
NV_GPU_PSTATE20_CLOCK_ENTRY_V1* clocks = info.pstates[n].clocks;
applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %3u-%4u MHz%s %4u mV%s \x7F %d/%d",
applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %6.1f MHz%s %4u mV%s \x7F %d/%d",
info.pstates[n].pstateId == current ? ">":" ", info.pstates[n].pstateId,
FREQ_GETVAL(clocks[1])/1000, clocks[1].bIsEditable ? "*":" ",
clocks[0].data.range.minFreq_kHz/1000, FREQ_GETVAL(clocks[0])/1000, clocks[0].bIsEditable ? "*":" ",
clocks[1].data.single.freq_kHz/1000, clocks[1].bIsEditable ? "*":" ",
(double) clocks[0].data.single.freq_kHz/1000, clocks[0].bIsEditable ? "*":" ",
info.pstates[n].baseVoltages[0].volt_uV/1000, info.pstates[n].baseVoltages[0].bIsEditable ? "*": " ",
info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable
info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000);
if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) {
applog(LOG_RAW, " OC %4d MHz %6.1f MHz",
clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000);
}
}
// boost over volting (GTX 9xx) ?
// boost over volting (GTX 9xx only ?)
for (n=0; n < info.ov.numVoltages; n++) {
applog(LOG_RAW, " OV: %u mV%s + %d/%d",
info.ov.voltages[n].volt_uV/1000, info.ov.voltages[n].bIsEditable ? "*":" ",
applog(LOG_RAW, " OV: %u+%u mV%s \x7F %d/%d",
info.ov.voltages[n].volt_uV/1000, info.ov.voltages[n].voltDelta_uV.value/1000, info.ov.voltages[n].bIsEditable ? "*":" ",
info.ov.voltages[n].voltDelta_uV.valueRange.min/1000, info.ov.voltages[n].voltDelta_uV.valueRange.max/1000);
}
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ;
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
applog(LOG_RAW, " MEM %4.0f MHz GPU %8.2f MHz >Current",
applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Base Clocks",
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK;
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
applog(LOG_RAW, " MEM %4.0f MHz GPU %8.2f MHz Base Clocks",
applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Boost Clocks",
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK;
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ;
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
applog(LOG_RAW, " MEM %4.0f MHz GPU %8.2f MHz Boost Clocks",
applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz >Current",
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
#if 1
// Maxwell only
NVAPI_VOLT_STATUS pvdom = { 0 };
pvdom.version = NVAPI_VOLT_STATUS_VER;
if ((ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &pvdom)) == NVAPI_OK) {
if (pvdom.value_uV)
applog(LOG_RAW, " GPU Voltage is %u mV", pvdom.value_uV/1000);
}
uint8_t plim = nvapi_get_plimit(devNum);
applog(LOG_RAW, " Power limit is set to %u%%", (uint32_t) plim);
NV_GPU_THERMAL_SETTINGS tset = { 0 };
NVAPI_GPU_THERMAL_INFO tnfo = { 0 };
NVAPI_GPU_THERMAL_LIMIT tlim = { 0 };
@ -1025,16 +1054,9 @@ int nvapi_pstateinfo(unsigned int devNum) @@ -1025,16 +1054,9 @@ int nvapi_pstateinfo(unsigned int devNum)
applog(LOG_RAW, " Thermal limit is set to %u, current Tc %d, range [%u-%u]",
tlim.entries[0].value >> 8, tset.sensor[0].currentTemp,
tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
// ok
//tlim.entries[0].value = 80 << 8;
//tlim.flags = 1;
//ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim);
}
#endif
uint8_t plim = nvapi_getplimit(devNum);
applog(LOG_RAW, " Power limit coef. is set to %u%%", (uint32_t) plim);
#if 1
#if 0
// seems empty..
NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS volts = { 0 };
volts.version = NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS_VER;
@ -1055,32 +1077,61 @@ int nvapi_pstateinfo(unsigned int devNum) @@ -1055,32 +1077,61 @@ int nvapi_pstateinfo(unsigned int devNum)
if (boost.clocks[n].gpuDelta) gpuClocks++;
}
if (gpuClocks || memClocks) {
applog(LOG_RAW, "Boost table contains %d gpu clocks and %d mem clocks.", gpuClocks, memClocks);
// PASCAL GTX ONLY
//if (gpuClocks || memClocks) {
NVAPI_CLOCK_TABLE table = { 0 };
table.version = NVAPI_CLOCK_TABLE_VER;
memcpy(table.mask, boost.mask, 12);
ret = NvAPI_DLL_GetClockBoostTable(phys[devNum], &table);
gpuClocks = 0, memClocks = 0;
for (n=0; n < 12; n++) {
if (table.buf0[n] != 0) applog(LOG_RAW, "boost table 0[%u] not empty (%u)", n, table.buf0[n]);
}
for (n=0; n < 80; n++) {
if (table.gpuDeltas[n].freqDelta)
applog(LOG_RAW, "boost gpu clock delta %u set to %d MHz", n, table.gpuDeltas[n].freqDelta/1000);
if (table.gpuDeltas[n].freqDelta) {
// note: gpu delta value seems to be x2, not the memory
//applog(LOG_RAW, " Boost gpu clock delta %u set to %d MHz", n, table.gpuDeltas[n].freqDelta/2000);
gpuClocks++;
}
}
for (n=0; n < 23; n++) {
if (table.memFilled[n])
applog(LOG_RAW, "boost mem clock delta %u set to %d MHz", n, table.memDeltas[n]/1000);
if (table.memFilled[n]) {
//applog(LOG_RAW, " Boost mem clock delta %u set to %d MHz", n, table.memDeltas[n]/1000);
memClocks++;
}
}
for (n=0; n < 1529; n++) {
if (table.buf1[n] != 0) applog(LOG_RAW, "boost table 1[%u] not empty (%u)", n, table.buf1[n]);
}
}
applog(LOG_RAW, " Boost table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
NVAPI_VFP_CURVE curve = { 0 };
curve.version = NVAPI_VFP_CURVE_VER;
memcpy(curve.mask, boost.mask, 12);
ret = NvAPI_DLL_GetVFPCurve(phys[devNum], &curve);
gpuClocks = 0, memClocks = 0;
for (n=0; n < 80; n++) {
if (curve.gpuEntries[n].freq_kHz || curve.gpuEntries[n].volt_uV) {
// applog(LOG_RAW, "gpu volt table %2u %4u MHz - %6u mV", n, curve.gpuEntries[n].freq_kHz/1000, curve.gpuEntries[n].volt_uV/1000);
gpuClocks++;
}
}
for (n=0; n < 23; n++) {
if (curve.memEntries[n].freq_kHz || curve.memEntries[n].volt_uV) {
// applog(LOG_RAW, "mem volt table %2u %4u MHz - %6u mV", n, curve.memEntries[n].freq_kHz/1000, curve.memEntries[n].volt_uV/1000);
memClocks++;
}
}
for (n=0; n < 1064; n++) {
if (table.buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve.buf1[n]);
}
applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
//}
#endif
return 0;
}
uint8_t nvapi_getplimit(unsigned int devNum)
uint8_t nvapi_get_plimit(unsigned int devNum)
{
NvAPI_Status ret = NVAPI_OK;
NVAPI_GPU_POWER_STATUS pol = { 0 };
@ -1095,7 +1146,7 @@ uint8_t nvapi_getplimit(unsigned int devNum) @@ -1095,7 +1146,7 @@ uint8_t nvapi_getplimit(unsigned int devNum)
return (uint8_t) (pol.entries[0].power / 1000); // in percent
}
int nvapi_setplimit(unsigned int devNum, uint16_t percent)
int nvapi_set_plimit(unsigned int devNum, uint16_t percent)
{
NvAPI_Status ret = NVAPI_OK;
uint32_t val = percent * 1000;
@ -1126,6 +1177,98 @@ int nvapi_setplimit(unsigned int devNum, uint16_t percent) @@ -1126,6 +1177,98 @@ int nvapi_setplimit(unsigned int devNum, uint16_t percent)
return ret;
}
int nvapi_set_tlimit(unsigned int devNum, uint8_t limit)
{
NvAPI_Status ret;
uint32_t val = limit;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
NV_GPU_THERMAL_SETTINGS tset = { 0 };
NVAPI_GPU_THERMAL_INFO tnfo = { 0 };
NVAPI_GPU_THERMAL_LIMIT tlim = { 0 };
tset.version = NV_GPU_THERMAL_SETTINGS_VER;
NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &tset);
tnfo.version = NVAPI_GPU_THERMAL_INFO_VER;
NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], &tnfo);
tlim.version = NVAPI_GPU_THERMAL_LIMIT_VER;
if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], &tlim)) == NVAPI_OK) {
tlim.entries[0].value = val << 8;
tlim.flags = 1;
ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim);
if (ret == NVAPI_OK) {
applog(LOG_INFO, "GPU #%u: thermal limit set to %u, current Tc %d, range [%u-%u]",
devNum, val, tset.sensor[0].currentTemp,
tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
} else {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
applog(LOG_WARNING, "GPU #%u: thermal limit: %s, valid range is [%u-%u]", devNum, string,
tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
}
}
return (int) ret;
}
int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
{
NvAPI_Status ret;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
if (ret) return ret;
NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
pset1.numPstates = 1;
pset1.numClocks = 1;
// Ok on both 1080 and 970
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS;
pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff;
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
if (ret == NVAPI_OK) {
applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, diff/1000);
}
return ret;
}
int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
{
NvAPI_Status ret;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
if (ret) return ret;
NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
pset1.numPstates = 1;
pset1.numClocks = 1;
// Memory boost clock seems only ok on pascal with this api
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff;
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
if (ret == NVAPI_OK) {
applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, diff/1000);
}
return ret;
}
int nvapi_init()
{
int num_gpus = cuda_num_devices();
@ -1191,23 +1334,49 @@ int nvapi_init() @@ -1191,23 +1334,49 @@ int nvapi_init()
sprintf(driver_version,"%d.%02d", udv / 100, udv % 100);
}
return 0;
}
int nvapi_init_settings()
{
// nvapi.dll
ret = nvapi_dll_init();
if (ret == NVAPI_OK) {
for (int n=0; n < opt_n_threads; n++) {
int dev_id = device_map[n % MAX_GPUS];
if (device_plimit[dev_id]) {
nvapi_setplimit(nvapi_dev_map[dev_id], device_plimit[dev_id]); // 0=default
uint32_t res = nvapi_getplimit(nvapi_dev_map[dev_id]);
gpulog(LOG_INFO, n, "NVAPI power limit is set to %u%%", res);
int ret = nvapi_dll_init();
if (ret != NVAPI_OK)
return ret;
for (int n=0; n < opt_n_threads; n++) {
int dev_id = device_map[n % MAX_GPUS];
if (device_plimit[dev_id]) {
if (nvapi_set_plimit(nvapi_dev_map[dev_id], device_plimit[dev_id]) == NVAPI_OK) {
uint32_t res = nvapi_get_plimit(nvapi_dev_map[dev_id]);
gpulog(LOG_INFO, n, "Power limit is set to %u%%", res);
}
}
if (device_tlimit[dev_id]) {
nvapi_set_tlimit(nvapi_dev_map[dev_id], device_tlimit[dev_id]);
}
if (device_gpu_clocks[dev_id]) {
ret = nvapi_set_gpuclock(nvapi_dev_map[dev_id], device_gpu_clocks[dev_id]);
if (ret) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage((NvAPI_Status) ret, string);
gpulog(LOG_WARNING, n, "Boost gpu clock %s", string);
}
if (device_pstate[dev_id]) {
// todo...
}
if (device_mem_clocks[dev_id]) {
ret = nvapi_set_memclock(nvapi_dev_map[dev_id], device_mem_clocks[dev_id]);
if (ret) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage((NvAPI_Status) ret, string);
gpulog(LOG_WARNING, n, "Boost mem clock %s", string);
}
}
if (device_pstate[dev_id]) {
// dunno how via nvapi or/and pascal
}
}
return 0;
return ret;
}
#endif
@ -1306,7 +1475,7 @@ unsigned int gpu_power(struct cgpu_info *gpu) @@ -1306,7 +1475,7 @@ unsigned int gpu_power(struct cgpu_info *gpu)
if (support == -1) {
unsigned int pct = 0;
nvapi_getusage(nvapi_dev_map[gpu->gpu_id], &pct);
pct *= nvapi_getplimit(nvapi_dev_map[gpu->gpu_id]);
pct *= nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]);
pct /= 100;
mw = pct; // to fix
}

10
nvml.h

@ -217,13 +217,15 @@ int gpu_info(struct cgpu_info *gpu); @@ -217,13 +217,15 @@ int gpu_info(struct cgpu_info *gpu);
int gpu_vendor(uint8_t pci_bus_id, char *vendorname);
// to debug clocks..
int nvapi_pstateinfo(unsigned int devNum);
uint8_t nvapi_getplimit(unsigned int devNum);
/* nvapi functions */
#ifdef WIN32
int nvapi_init();
int nvapi_init_settings();
// to debug nvapi..
int nvapi_pstateinfo(unsigned int devNum);
uint8_t nvapi_get_plimit(unsigned int devNum);
#endif
#endif /* USE_WRAPNVML */

Loading…
Cancel
Save