From 7ff179abe906c819059dd688f0490758592484e5 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Sat, 25 Jun 2016 16:16:09 +0200 Subject: [PATCH] nvapi: base memclock was wrong on maxwell --- compat/nvapi/nvapi_ccminer.h | 17 +++++- nvapi.cpp | 13 ++++- nvml.cpp | 104 ++++++++++++++++++++++++++++------- 3 files changed, 112 insertions(+), 22 deletions(-) diff --git a/compat/nvapi/nvapi_ccminer.h b/compat/nvapi/nvapi_ccminer.h index 83b9697..7b07bf7 100644 --- a/compat/nvapi/nvapi_ccminer.h +++ b/compat/nvapi/nvapi_ccminer.h @@ -182,6 +182,19 @@ typedef struct { } NVAPI_VOLT_STATUS; // 140 bytes (1-008c) #define NVAPI_VOLT_STATUS_VER MAKE_NVAPI_VERSION(NVAPI_VOLT_STATUS, 1) +typedef struct { + NvU32 version; + NvU32 flags; + NvU32 filled; // 1 + struct { + NvU32 volt_uV; + NvU32 unknown; + } entries[128]; + // some empty tables then... + NvU32 buf1[3888]; +} NVAPI_VOLTAGES_TABLE; // 16588 bytes (1-40cc) +#define NVAPI_VOLTAGES_TABLE_VER MAKE_NVAPI_VERSION(NVAPI_VOLTAGES_TABLE, 1) + NvAPI_Status NvAPI_DLL_GetInterfaceVersionString(NvAPI_ShortString string); NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_INFO*); @@ -201,8 +214,9 @@ NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle hPhysicalGpu, NVAPI NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x23F1B133 NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4 -// Maxwell ? +// Maxwell only NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C +NvAPI_Status NvAPI_DLL_GetVoltages(NvPhysicalGpuHandle handle, NVAPI_VOLTAGES_TABLE*); // 7D656244 1-40CC NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle hPhysicalGpu, void* pFreqs); @@ -211,6 +225,7 @@ NvAPI_Status NvAPI_DLL_GetSerialNumber(NvPhysicalGpuHandle handle, NvAPI_ShortSt NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet); NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V2 *pSet); + NvAPI_Status NvAPI_DLL_Unload(); #define NV_ASSERT(x) { NvAPI_Status ret = x; if(ret != NVAPI_OK) return ret; } diff --git a/nvapi.cpp b/nvapi.cpp index 4acf405..e15995e 100644 --- a/nvapi.cpp +++ b/nvapi.cpp @@ -239,7 +239,7 @@ NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){ return (*pointer)(handle, pFreqs); } -#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B // Need struct v1 of 7316 bytes (v2 semms unsupported) +#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B // allow to set gpu/mem core freq delta NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet) { static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V1*) = NULL; @@ -260,6 +260,17 @@ NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PS return (*pointer)(handle, pSet); } +// maxwell voltage table +#define NVAPI_ID_VOLTAGES 0x7D656244 // 1-40cc +NvAPI_Status NvAPI_DLL_GetVoltages(NvPhysicalGpuHandle handle, NVAPI_VOLTAGES_TABLE *pInfo) { + static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLTAGES_TABLE*) = NULL; + if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; + if(!pointer) { + pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLTAGES_TABLE*))nvidia_handle->query(NVAPI_ID_VOLTAGES); + } + return (*pointer)(handle, pInfo); +} + #define NVAPI_ID_UNLOAD 0xD22BDD7E NvAPI_Status NvAPI_DLL_Unload() { static NvAPI_Status (*pointer)() = NULL; diff --git a/nvml.cpp b/nvml.cpp index 92e5939..7447812 100644 --- a/nvml.cpp +++ b/nvml.cpp @@ -963,6 +963,23 @@ int nvapi_pstateinfo(unsigned int devNum) // useless on init but... nvapi_getpstate(devNum, ¤t); +#if 0 + // try :p + uint32_t* buf = (uint32_t*) calloc(1, 0x8000); + for (int i=8; i < 0x8000 && buf; i+=4) { + buf[0] = 0x10000 + i; + if ((ret = NvAPI_DLL_XXX(phys[devNum], buf)) != NVAPI_INCOMPATIBLE_STRUCT_VERSION) { + NvAPI_ShortString string; + NvAPI_GetErrorMessage(ret, string); + applog(LOG_BLUE, "struct size is %06x : %s", buf[0], string); + for (int n=0; n < i/32; n++) + applog_hex(&buf[n*(32/4)], 80); + break; + } + } + free(buf); +#endif + #if 0 // Unsure of the meaning of these values NVAPI_GPU_POWER_TOPO topo = { 0 }; @@ -1056,13 +1073,6 @@ int nvapi_pstateinfo(unsigned int devNum) tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8); } -#if 0 - // seems empty.. - NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS volts = { 0 }; - volts.version = NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS_VER; - ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &volts); -#endif - #if 1 // Read pascal Clocks Table, Empty on 9xx NVAPI_CLOCKS_RANGE ranges = { 0 }; @@ -1078,7 +1088,7 @@ int nvapi_pstateinfo(unsigned int devNum) } // PASCAL GTX ONLY - //if (gpuClocks || memClocks) { + if (gpuClocks || memClocks) { NVAPI_CLOCK_TABLE table = { 0 }; table.version = NVAPI_CLOCK_TABLE_VER; memcpy(table.mask, boost.mask, 12); @@ -1126,7 +1136,20 @@ int nvapi_pstateinfo(unsigned int devNum) if (table.buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve.buf1[n]); } applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks); - //} + } + + // Maxwell + else { + NVAPI_VOLTAGES_TABLE volts = { 0 }; + volts.version = NVAPI_VOLTAGES_TABLE_VER; + int entries = 0; + ret = NvAPI_DLL_GetVoltages(phys[devNum], &volts); + for (n=0; n < 128; n++) { + if (volts.entries[n].volt_uV) + entries++; + } + applog(LOG_RAW, " Volts table contains %d gpu levels.", entries); + } #endif return 0; } @@ -1214,17 +1237,43 @@ int nvapi_set_tlimit(unsigned int devNum, uint8_t limit) int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock) { NvAPI_Status ret; + NvS32 delta = 0; if (devNum >= nvapi_dev_cnt) return -ENODEV; - +#if 0 + // wrong api to get default base clock when modified, cuda props seems fine NV_GPU_CLOCK_FREQUENCIES freqs = { 0 }; freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER; freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); - if (ret) return ret; + if (ret == NVAPI_OK) { + delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency; + } - NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency; + NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 }; + deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER; + ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0); // we want default clock grr! + if (ret == NVAPI_OK) { + if (deffreqs.pstates[0].clocks[1].domainId == NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS) + delta = (clock * 1000) - deffreqs.pstates[0].clocks[1].freq*2; + } +#endif + + cudaDeviceProp props = { 0 }; + NvU32 busId = 0xFFFF; + ret = NvAPI_GPU_GetBusId(phys[devNum], &busId); + for (int d=0; d= nvapi_dev_cnt) return -ENODEV; + // wrong to get default base clock (when modified) on maxwell (same as cuda props one) NV_GPU_CLOCK_FREQUENCIES freqs = { 0 }; freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER; freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; - ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); - if (ret) return ret; + ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); // wrong base clocks, useless + if (ret == NVAPI_OK) { + delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency; + } + + // seems ok on maxwell and pascal for the mem clocks + NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 }; + deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER; + ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // wrong def clocks, useless + if (ret == NVAPI_OK) { + if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY) + delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq; + } + + if (delta == (clock * 1000)) + return ret; - NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency; + // todo: bounds check with GetPstates20 NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 }; pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1; pset1.numPstates = 1; pset1.numClocks = 1; - // Memory boost clock seems only ok on pascal with this api pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY; - pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff; + pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta; ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1); if (ret == NVAPI_OK) { - applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, diff/1000); + applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, delta/1000); } return ret; }