Browse Source

nvapi: base memclock was wrong on maxwell

2upstream
Tanguy Pruvot 8 years ago
parent
commit
7ff179abe9
  1. 17
      compat/nvapi/nvapi_ccminer.h
  2. 13
      nvapi.cpp
  3. 104
      nvml.cpp

17
compat/nvapi/nvapi_ccminer.h

@ -182,6 +182,19 @@ typedef struct { @@ -182,6 +182,19 @@ typedef struct {
} NVAPI_VOLT_STATUS; // 140 bytes (1-008c)
#define NVAPI_VOLT_STATUS_VER MAKE_NVAPI_VERSION(NVAPI_VOLT_STATUS, 1)
typedef struct {
NvU32 version;
NvU32 flags;
NvU32 filled; // 1
struct {
NvU32 volt_uV;
NvU32 unknown;
} entries[128];
// some empty tables then...
NvU32 buf1[3888];
} NVAPI_VOLTAGES_TABLE; // 16588 bytes (1-40cc)
#define NVAPI_VOLTAGES_TABLE_VER MAKE_NVAPI_VERSION(NVAPI_VOLTAGES_TABLE, 1)
NvAPI_Status NvAPI_DLL_GetInterfaceVersionString(NvAPI_ShortString string);
NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_INFO*);
@ -201,8 +214,9 @@ NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle hPhysicalGpu, NVAPI @@ -201,8 +214,9 @@ NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle hPhysicalGpu, NVAPI
NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x23F1B133
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4
// Maxwell ?
// Maxwell only
NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C
NvAPI_Status NvAPI_DLL_GetVoltages(NvPhysicalGpuHandle handle, NVAPI_VOLTAGES_TABLE*); // 7D656244 1-40CC
NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle hPhysicalGpu, void* pFreqs);
@ -211,6 +225,7 @@ NvAPI_Status NvAPI_DLL_GetSerialNumber(NvPhysicalGpuHandle handle, NvAPI_ShortSt @@ -211,6 +225,7 @@ NvAPI_Status NvAPI_DLL_GetSerialNumber(NvPhysicalGpuHandle handle, NvAPI_ShortSt
NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet);
NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V2 *pSet);
NvAPI_Status NvAPI_DLL_Unload();
#define NV_ASSERT(x) { NvAPI_Status ret = x; if(ret != NVAPI_OK) return ret; }

13
nvapi.cpp

@ -239,7 +239,7 @@ NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){ @@ -239,7 +239,7 @@ NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){
return (*pointer)(handle, pFreqs);
}
#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B // Need struct v1 of 7316 bytes (v2 semms unsupported)
#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B
// allow to set gpu/mem core freq delta
NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V1*) = NULL;
@ -260,6 +260,17 @@ NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PS @@ -260,6 +260,17 @@ NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PS
return (*pointer)(handle, pSet);
}
// maxwell voltage table
#define NVAPI_ID_VOLTAGES 0x7D656244 // 1-40cc
NvAPI_Status NvAPI_DLL_GetVoltages(NvPhysicalGpuHandle handle, NVAPI_VOLTAGES_TABLE *pInfo) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLTAGES_TABLE*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
if(!pointer) {
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLTAGES_TABLE*))nvidia_handle->query(NVAPI_ID_VOLTAGES);
}
return (*pointer)(handle, pInfo);
}
#define NVAPI_ID_UNLOAD 0xD22BDD7E
NvAPI_Status NvAPI_DLL_Unload() {
static NvAPI_Status (*pointer)() = NULL;

104
nvml.cpp

@ -963,6 +963,23 @@ int nvapi_pstateinfo(unsigned int devNum) @@ -963,6 +963,23 @@ int nvapi_pstateinfo(unsigned int devNum)
// useless on init but...
nvapi_getpstate(devNum, &current);
#if 0
// try :p
uint32_t* buf = (uint32_t*) calloc(1, 0x8000);
for (int i=8; i < 0x8000 && buf; i+=4) {
buf[0] = 0x10000 + i;
if ((ret = NvAPI_DLL_XXX(phys[devNum], buf)) != NVAPI_INCOMPATIBLE_STRUCT_VERSION) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
applog(LOG_BLUE, "struct size is %06x : %s", buf[0], string);
for (int n=0; n < i/32; n++)
applog_hex(&buf[n*(32/4)], 80);
break;
}
}
free(buf);
#endif
#if 0
// Unsure of the meaning of these values
NVAPI_GPU_POWER_TOPO topo = { 0 };
@ -1056,13 +1073,6 @@ int nvapi_pstateinfo(unsigned int devNum) @@ -1056,13 +1073,6 @@ int nvapi_pstateinfo(unsigned int devNum)
tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
}
#if 0
// seems empty..
NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS volts = { 0 };
volts.version = NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS_VER;
ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &volts);
#endif
#if 1
// Read pascal Clocks Table, Empty on 9xx
NVAPI_CLOCKS_RANGE ranges = { 0 };
@ -1078,7 +1088,7 @@ int nvapi_pstateinfo(unsigned int devNum) @@ -1078,7 +1088,7 @@ int nvapi_pstateinfo(unsigned int devNum)
}
// PASCAL GTX ONLY
//if (gpuClocks || memClocks) {
if (gpuClocks || memClocks) {
NVAPI_CLOCK_TABLE table = { 0 };
table.version = NVAPI_CLOCK_TABLE_VER;
memcpy(table.mask, boost.mask, 12);
@ -1126,7 +1136,20 @@ int nvapi_pstateinfo(unsigned int devNum) @@ -1126,7 +1136,20 @@ int nvapi_pstateinfo(unsigned int devNum)
if (table.buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve.buf1[n]);
}
applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
//}
}
// Maxwell
else {
NVAPI_VOLTAGES_TABLE volts = { 0 };
volts.version = NVAPI_VOLTAGES_TABLE_VER;
int entries = 0;
ret = NvAPI_DLL_GetVoltages(phys[devNum], &volts);
for (n=0; n < 128; n++) {
if (volts.entries[n].volt_uV)
entries++;
}
applog(LOG_RAW, " Volts table contains %d gpu levels.", entries);
}
#endif
return 0;
}
@ -1214,17 +1237,43 @@ int nvapi_set_tlimit(unsigned int devNum, uint8_t limit) @@ -1214,17 +1237,43 @@ int nvapi_set_tlimit(unsigned int devNum, uint8_t limit)
int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
{
NvAPI_Status ret;
NvS32 delta = 0;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
#if 0
// wrong api to get default base clock when modified, cuda props seems fine
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
if (ret) return ret;
if (ret == NVAPI_OK) {
delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
}
NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0); // we want default clock grr!
if (ret == NVAPI_OK) {
if (deffreqs.pstates[0].clocks[1].domainId == NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS)
delta = (clock * 1000) - deffreqs.pstates[0].clocks[1].freq*2;
}
#endif
cudaDeviceProp props = { 0 };
NvU32 busId = 0xFFFF;
ret = NvAPI_GPU_GetBusId(phys[devNum], &busId);
for (int d=0; d<nvapi_dev_cnt; d++) {
// unsure about devNum, so be safe
cudaGetDeviceProperties(&props, d);
if (props.pciBusID == busId) {
delta = (clock * 1000) - props.clockRate;
break;
}
}
if (delta == (clock * 1000))
return ret;
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
@ -1232,10 +1281,10 @@ int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock) @@ -1232,10 +1281,10 @@ int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
pset1.numClocks = 1;
// Ok on both 1080 and 970
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS;
pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff;
pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
if (ret == NVAPI_OK) {
applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, diff/1000);
applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, delta/1000);
}
return ret;
}
@ -1243,28 +1292,43 @@ int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock) @@ -1243,28 +1292,43 @@ int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
{
NvAPI_Status ret;
NvS32 delta = 0;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
// wrong to get default base clock (when modified) on maxwell (same as cuda props one)
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
if (ret) return ret;
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); // wrong base clocks, useless
if (ret == NVAPI_OK) {
delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
}
// seems ok on maxwell and pascal for the mem clocks
NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // wrong def clocks, useless
if (ret == NVAPI_OK) {
if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY)
delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq;
}
if (delta == (clock * 1000))
return ret;
NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
// todo: bounds check with GetPstates20
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
pset1.numPstates = 1;
pset1.numClocks = 1;
// Memory boost clock seems only ok on pascal with this api
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff;
pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
if (ret == NVAPI_OK) {
applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, diff/1000);
applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, delta/1000);
}
return ret;
}

Loading…
Cancel
Save