nvapi: base memclock was wrong on maxwell
This commit is contained in:
parent
bc6ac3a3ab
commit
7ff179abe9
@ -182,6 +182,19 @@ typedef struct {
|
||||
} NVAPI_VOLT_STATUS; // 140 bytes (1-008c)
|
||||
#define NVAPI_VOLT_STATUS_VER MAKE_NVAPI_VERSION(NVAPI_VOLT_STATUS, 1)
|
||||
|
||||
typedef struct {
|
||||
NvU32 version;
|
||||
NvU32 flags;
|
||||
NvU32 filled; // 1
|
||||
struct {
|
||||
NvU32 volt_uV;
|
||||
NvU32 unknown;
|
||||
} entries[128];
|
||||
// some empty tables then...
|
||||
NvU32 buf1[3888];
|
||||
} NVAPI_VOLTAGES_TABLE; // 16588 bytes (1-40cc)
|
||||
#define NVAPI_VOLTAGES_TABLE_VER MAKE_NVAPI_VERSION(NVAPI_VOLTAGES_TABLE, 1)
|
||||
|
||||
NvAPI_Status NvAPI_DLL_GetInterfaceVersionString(NvAPI_ShortString string);
|
||||
|
||||
NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_INFO*);
|
||||
@ -201,8 +214,9 @@ NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle hPhysicalGpu, NVAPI
|
||||
NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x23F1B133
|
||||
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4
|
||||
|
||||
// Maxwell ?
|
||||
// Maxwell only
|
||||
NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C
|
||||
NvAPI_Status NvAPI_DLL_GetVoltages(NvPhysicalGpuHandle handle, NVAPI_VOLTAGES_TABLE*); // 7D656244 1-40CC
|
||||
|
||||
NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle hPhysicalGpu, void* pFreqs);
|
||||
|
||||
@ -211,6 +225,7 @@ NvAPI_Status NvAPI_DLL_GetSerialNumber(NvPhysicalGpuHandle handle, NvAPI_ShortSt
|
||||
NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet);
|
||||
NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V2 *pSet);
|
||||
|
||||
|
||||
NvAPI_Status NvAPI_DLL_Unload();
|
||||
|
||||
#define NV_ASSERT(x) { NvAPI_Status ret = x; if(ret != NVAPI_OK) return ret; }
|
||||
|
13
nvapi.cpp
13
nvapi.cpp
@ -239,7 +239,7 @@ NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){
|
||||
return (*pointer)(handle, pFreqs);
|
||||
}
|
||||
|
||||
#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B // Need struct v1 of 7316 bytes (v2 semms unsupported)
|
||||
#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B
|
||||
// allow to set gpu/mem core freq delta
|
||||
NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet) {
|
||||
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V1*) = NULL;
|
||||
@ -260,6 +260,17 @@ NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PS
|
||||
return (*pointer)(handle, pSet);
|
||||
}
|
||||
|
||||
// maxwell voltage table
|
||||
#define NVAPI_ID_VOLTAGES 0x7D656244 // 1-40cc
|
||||
NvAPI_Status NvAPI_DLL_GetVoltages(NvPhysicalGpuHandle handle, NVAPI_VOLTAGES_TABLE *pInfo) {
|
||||
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLTAGES_TABLE*) = NULL;
|
||||
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
|
||||
if(!pointer) {
|
||||
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLTAGES_TABLE*))nvidia_handle->query(NVAPI_ID_VOLTAGES);
|
||||
}
|
||||
return (*pointer)(handle, pInfo);
|
||||
}
|
||||
|
||||
#define NVAPI_ID_UNLOAD 0xD22BDD7E
|
||||
NvAPI_Status NvAPI_DLL_Unload() {
|
||||
static NvAPI_Status (*pointer)() = NULL;
|
||||
|
104
nvml.cpp
104
nvml.cpp
@ -963,6 +963,23 @@ int nvapi_pstateinfo(unsigned int devNum)
|
||||
// useless on init but...
|
||||
nvapi_getpstate(devNum, ¤t);
|
||||
|
||||
#if 0
|
||||
// try :p
|
||||
uint32_t* buf = (uint32_t*) calloc(1, 0x8000);
|
||||
for (int i=8; i < 0x8000 && buf; i+=4) {
|
||||
buf[0] = 0x10000 + i;
|
||||
if ((ret = NvAPI_DLL_XXX(phys[devNum], buf)) != NVAPI_INCOMPATIBLE_STRUCT_VERSION) {
|
||||
NvAPI_ShortString string;
|
||||
NvAPI_GetErrorMessage(ret, string);
|
||||
applog(LOG_BLUE, "struct size is %06x : %s", buf[0], string);
|
||||
for (int n=0; n < i/32; n++)
|
||||
applog_hex(&buf[n*(32/4)], 80);
|
||||
break;
|
||||
}
|
||||
}
|
||||
free(buf);
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
// Unsure of the meaning of these values
|
||||
NVAPI_GPU_POWER_TOPO topo = { 0 };
|
||||
@ -1056,13 +1073,6 @@ int nvapi_pstateinfo(unsigned int devNum)
|
||||
tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
|
||||
}
|
||||
|
||||
#if 0
|
||||
// seems empty..
|
||||
NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS volts = { 0 };
|
||||
volts.version = NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS_VER;
|
||||
ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &volts);
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
// Read pascal Clocks Table, Empty on 9xx
|
||||
NVAPI_CLOCKS_RANGE ranges = { 0 };
|
||||
@ -1078,7 +1088,7 @@ int nvapi_pstateinfo(unsigned int devNum)
|
||||
}
|
||||
|
||||
// PASCAL GTX ONLY
|
||||
//if (gpuClocks || memClocks) {
|
||||
if (gpuClocks || memClocks) {
|
||||
NVAPI_CLOCK_TABLE table = { 0 };
|
||||
table.version = NVAPI_CLOCK_TABLE_VER;
|
||||
memcpy(table.mask, boost.mask, 12);
|
||||
@ -1126,7 +1136,20 @@ int nvapi_pstateinfo(unsigned int devNum)
|
||||
if (table.buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve.buf1[n]);
|
||||
}
|
||||
applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
|
||||
//}
|
||||
}
|
||||
|
||||
// Maxwell
|
||||
else {
|
||||
NVAPI_VOLTAGES_TABLE volts = { 0 };
|
||||
volts.version = NVAPI_VOLTAGES_TABLE_VER;
|
||||
int entries = 0;
|
||||
ret = NvAPI_DLL_GetVoltages(phys[devNum], &volts);
|
||||
for (n=0; n < 128; n++) {
|
||||
if (volts.entries[n].volt_uV)
|
||||
entries++;
|
||||
}
|
||||
applog(LOG_RAW, " Volts table contains %d gpu levels.", entries);
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
@ -1214,17 +1237,43 @@ int nvapi_set_tlimit(unsigned int devNum, uint8_t limit)
|
||||
int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
|
||||
{
|
||||
NvAPI_Status ret;
|
||||
NvS32 delta = 0;
|
||||
|
||||
if (devNum >= nvapi_dev_cnt)
|
||||
return -ENODEV;
|
||||
|
||||
#if 0
|
||||
// wrong api to get default base clock when modified, cuda props seems fine
|
||||
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
|
||||
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
|
||||
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
|
||||
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
|
||||
if (ret) return ret;
|
||||
if (ret == NVAPI_OK) {
|
||||
delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
|
||||
}
|
||||
|
||||
NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
|
||||
NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
|
||||
deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
|
||||
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0); // we want default clock grr!
|
||||
if (ret == NVAPI_OK) {
|
||||
if (deffreqs.pstates[0].clocks[1].domainId == NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS)
|
||||
delta = (clock * 1000) - deffreqs.pstates[0].clocks[1].freq*2;
|
||||
}
|
||||
#endif
|
||||
|
||||
cudaDeviceProp props = { 0 };
|
||||
NvU32 busId = 0xFFFF;
|
||||
ret = NvAPI_GPU_GetBusId(phys[devNum], &busId);
|
||||
for (int d=0; d<nvapi_dev_cnt; d++) {
|
||||
// unsure about devNum, so be safe
|
||||
cudaGetDeviceProperties(&props, d);
|
||||
if (props.pciBusID == busId) {
|
||||
delta = (clock * 1000) - props.clockRate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (delta == (clock * 1000))
|
||||
return ret;
|
||||
|
||||
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
|
||||
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
|
||||
@ -1232,10 +1281,10 @@ int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
|
||||
pset1.numClocks = 1;
|
||||
// Ok on both 1080 and 970
|
||||
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS;
|
||||
pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff;
|
||||
pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
|
||||
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
|
||||
if (ret == NVAPI_OK) {
|
||||
applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, diff/1000);
|
||||
applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, delta/1000);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -1243,28 +1292,43 @@ int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
|
||||
int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
|
||||
{
|
||||
NvAPI_Status ret;
|
||||
NvS32 delta = 0;
|
||||
|
||||
if (devNum >= nvapi_dev_cnt)
|
||||
return -ENODEV;
|
||||
|
||||
// wrong to get default base clock (when modified) on maxwell (same as cuda props one)
|
||||
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
|
||||
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
|
||||
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
|
||||
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
|
||||
if (ret) return ret;
|
||||
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); // wrong base clocks, useless
|
||||
if (ret == NVAPI_OK) {
|
||||
delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
|
||||
}
|
||||
|
||||
NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
|
||||
// seems ok on maxwell and pascal for the mem clocks
|
||||
NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
|
||||
deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
|
||||
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // wrong def clocks, useless
|
||||
if (ret == NVAPI_OK) {
|
||||
if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY)
|
||||
delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq;
|
||||
}
|
||||
|
||||
if (delta == (clock * 1000))
|
||||
return ret;
|
||||
|
||||
// todo: bounds check with GetPstates20
|
||||
|
||||
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
|
||||
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
|
||||
pset1.numPstates = 1;
|
||||
pset1.numClocks = 1;
|
||||
// Memory boost clock seems only ok on pascal with this api
|
||||
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
|
||||
pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff;
|
||||
pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
|
||||
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
|
||||
if (ret == NVAPI_OK) {
|
||||
applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, diff/1000);
|
||||
applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, delta/1000);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user