mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-14 08:48:04 +00:00
nvapi: base memclock was wrong on maxwell
This commit is contained in:
parent
bc6ac3a3ab
commit
7ff179abe9
@ -182,6 +182,19 @@ typedef struct {
|
|||||||
} NVAPI_VOLT_STATUS; // 140 bytes (1-008c)
|
} NVAPI_VOLT_STATUS; // 140 bytes (1-008c)
|
||||||
#define NVAPI_VOLT_STATUS_VER MAKE_NVAPI_VERSION(NVAPI_VOLT_STATUS, 1)
|
#define NVAPI_VOLT_STATUS_VER MAKE_NVAPI_VERSION(NVAPI_VOLT_STATUS, 1)
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
NvU32 version;
|
||||||
|
NvU32 flags;
|
||||||
|
NvU32 filled; // 1
|
||||||
|
struct {
|
||||||
|
NvU32 volt_uV;
|
||||||
|
NvU32 unknown;
|
||||||
|
} entries[128];
|
||||||
|
// some empty tables then...
|
||||||
|
NvU32 buf1[3888];
|
||||||
|
} NVAPI_VOLTAGES_TABLE; // 16588 bytes (1-40cc)
|
||||||
|
#define NVAPI_VOLTAGES_TABLE_VER MAKE_NVAPI_VERSION(NVAPI_VOLTAGES_TABLE, 1)
|
||||||
|
|
||||||
NvAPI_Status NvAPI_DLL_GetInterfaceVersionString(NvAPI_ShortString string);
|
NvAPI_Status NvAPI_DLL_GetInterfaceVersionString(NvAPI_ShortString string);
|
||||||
|
|
||||||
NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_INFO*);
|
NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_INFO*);
|
||||||
@ -201,8 +214,9 @@ NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle hPhysicalGpu, NVAPI
|
|||||||
NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x23F1B133
|
NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x23F1B133
|
||||||
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4
|
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4
|
||||||
|
|
||||||
// Maxwell ?
|
// Maxwell only
|
||||||
NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C
|
NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C
|
||||||
|
NvAPI_Status NvAPI_DLL_GetVoltages(NvPhysicalGpuHandle handle, NVAPI_VOLTAGES_TABLE*); // 7D656244 1-40CC
|
||||||
|
|
||||||
NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle hPhysicalGpu, void* pFreqs);
|
NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle hPhysicalGpu, void* pFreqs);
|
||||||
|
|
||||||
@ -211,6 +225,7 @@ NvAPI_Status NvAPI_DLL_GetSerialNumber(NvPhysicalGpuHandle handle, NvAPI_ShortSt
|
|||||||
NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet);
|
NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet);
|
||||||
NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V2 *pSet);
|
NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V2 *pSet);
|
||||||
|
|
||||||
|
|
||||||
NvAPI_Status NvAPI_DLL_Unload();
|
NvAPI_Status NvAPI_DLL_Unload();
|
||||||
|
|
||||||
#define NV_ASSERT(x) { NvAPI_Status ret = x; if(ret != NVAPI_OK) return ret; }
|
#define NV_ASSERT(x) { NvAPI_Status ret = x; if(ret != NVAPI_OK) return ret; }
|
||||||
|
13
nvapi.cpp
13
nvapi.cpp
@ -239,7 +239,7 @@ NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){
|
|||||||
return (*pointer)(handle, pFreqs);
|
return (*pointer)(handle, pFreqs);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B // Need struct v1 of 7316 bytes (v2 semms unsupported)
|
#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B
|
||||||
// allow to set gpu/mem core freq delta
|
// allow to set gpu/mem core freq delta
|
||||||
NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet) {
|
NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet) {
|
||||||
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V1*) = NULL;
|
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V1*) = NULL;
|
||||||
@ -260,6 +260,17 @@ NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PS
|
|||||||
return (*pointer)(handle, pSet);
|
return (*pointer)(handle, pSet);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// maxwell voltage table
|
||||||
|
#define NVAPI_ID_VOLTAGES 0x7D656244 // 1-40cc
|
||||||
|
NvAPI_Status NvAPI_DLL_GetVoltages(NvPhysicalGpuHandle handle, NVAPI_VOLTAGES_TABLE *pInfo) {
|
||||||
|
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLTAGES_TABLE*) = NULL;
|
||||||
|
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
|
||||||
|
if(!pointer) {
|
||||||
|
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLTAGES_TABLE*))nvidia_handle->query(NVAPI_ID_VOLTAGES);
|
||||||
|
}
|
||||||
|
return (*pointer)(handle, pInfo);
|
||||||
|
}
|
||||||
|
|
||||||
#define NVAPI_ID_UNLOAD 0xD22BDD7E
|
#define NVAPI_ID_UNLOAD 0xD22BDD7E
|
||||||
NvAPI_Status NvAPI_DLL_Unload() {
|
NvAPI_Status NvAPI_DLL_Unload() {
|
||||||
static NvAPI_Status (*pointer)() = NULL;
|
static NvAPI_Status (*pointer)() = NULL;
|
||||||
|
104
nvml.cpp
104
nvml.cpp
@ -963,6 +963,23 @@ int nvapi_pstateinfo(unsigned int devNum)
|
|||||||
// useless on init but...
|
// useless on init but...
|
||||||
nvapi_getpstate(devNum, ¤t);
|
nvapi_getpstate(devNum, ¤t);
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// try :p
|
||||||
|
uint32_t* buf = (uint32_t*) calloc(1, 0x8000);
|
||||||
|
for (int i=8; i < 0x8000 && buf; i+=4) {
|
||||||
|
buf[0] = 0x10000 + i;
|
||||||
|
if ((ret = NvAPI_DLL_XXX(phys[devNum], buf)) != NVAPI_INCOMPATIBLE_STRUCT_VERSION) {
|
||||||
|
NvAPI_ShortString string;
|
||||||
|
NvAPI_GetErrorMessage(ret, string);
|
||||||
|
applog(LOG_BLUE, "struct size is %06x : %s", buf[0], string);
|
||||||
|
for (int n=0; n < i/32; n++)
|
||||||
|
applog_hex(&buf[n*(32/4)], 80);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(buf);
|
||||||
|
#endif
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
// Unsure of the meaning of these values
|
// Unsure of the meaning of these values
|
||||||
NVAPI_GPU_POWER_TOPO topo = { 0 };
|
NVAPI_GPU_POWER_TOPO topo = { 0 };
|
||||||
@ -1056,13 +1073,6 @@ int nvapi_pstateinfo(unsigned int devNum)
|
|||||||
tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
|
tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
// seems empty..
|
|
||||||
NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS volts = { 0 };
|
|
||||||
volts.version = NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS_VER;
|
|
||||||
ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &volts);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
// Read pascal Clocks Table, Empty on 9xx
|
// Read pascal Clocks Table, Empty on 9xx
|
||||||
NVAPI_CLOCKS_RANGE ranges = { 0 };
|
NVAPI_CLOCKS_RANGE ranges = { 0 };
|
||||||
@ -1078,7 +1088,7 @@ int nvapi_pstateinfo(unsigned int devNum)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// PASCAL GTX ONLY
|
// PASCAL GTX ONLY
|
||||||
//if (gpuClocks || memClocks) {
|
if (gpuClocks || memClocks) {
|
||||||
NVAPI_CLOCK_TABLE table = { 0 };
|
NVAPI_CLOCK_TABLE table = { 0 };
|
||||||
table.version = NVAPI_CLOCK_TABLE_VER;
|
table.version = NVAPI_CLOCK_TABLE_VER;
|
||||||
memcpy(table.mask, boost.mask, 12);
|
memcpy(table.mask, boost.mask, 12);
|
||||||
@ -1126,7 +1136,20 @@ int nvapi_pstateinfo(unsigned int devNum)
|
|||||||
if (table.buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve.buf1[n]);
|
if (table.buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve.buf1[n]);
|
||||||
}
|
}
|
||||||
applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
|
applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
|
||||||
//}
|
}
|
||||||
|
|
||||||
|
// Maxwell
|
||||||
|
else {
|
||||||
|
NVAPI_VOLTAGES_TABLE volts = { 0 };
|
||||||
|
volts.version = NVAPI_VOLTAGES_TABLE_VER;
|
||||||
|
int entries = 0;
|
||||||
|
ret = NvAPI_DLL_GetVoltages(phys[devNum], &volts);
|
||||||
|
for (n=0; n < 128; n++) {
|
||||||
|
if (volts.entries[n].volt_uV)
|
||||||
|
entries++;
|
||||||
|
}
|
||||||
|
applog(LOG_RAW, " Volts table contains %d gpu levels.", entries);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -1214,17 +1237,43 @@ int nvapi_set_tlimit(unsigned int devNum, uint8_t limit)
|
|||||||
int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
|
int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
|
||||||
{
|
{
|
||||||
NvAPI_Status ret;
|
NvAPI_Status ret;
|
||||||
|
NvS32 delta = 0;
|
||||||
|
|
||||||
if (devNum >= nvapi_dev_cnt)
|
if (devNum >= nvapi_dev_cnt)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
#if 0
|
||||||
|
// wrong api to get default base clock when modified, cuda props seems fine
|
||||||
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
|
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
|
||||||
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
|
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
|
||||||
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
|
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
|
||||||
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
|
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
|
||||||
if (ret) return ret;
|
if (ret == NVAPI_OK) {
|
||||||
|
delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
|
||||||
|
}
|
||||||
|
|
||||||
NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
|
NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
|
||||||
|
deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
|
||||||
|
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0); // we want default clock grr!
|
||||||
|
if (ret == NVAPI_OK) {
|
||||||
|
if (deffreqs.pstates[0].clocks[1].domainId == NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS)
|
||||||
|
delta = (clock * 1000) - deffreqs.pstates[0].clocks[1].freq*2;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cudaDeviceProp props = { 0 };
|
||||||
|
NvU32 busId = 0xFFFF;
|
||||||
|
ret = NvAPI_GPU_GetBusId(phys[devNum], &busId);
|
||||||
|
for (int d=0; d<nvapi_dev_cnt; d++) {
|
||||||
|
// unsure about devNum, so be safe
|
||||||
|
cudaGetDeviceProperties(&props, d);
|
||||||
|
if (props.pciBusID == busId) {
|
||||||
|
delta = (clock * 1000) - props.clockRate;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (delta == (clock * 1000))
|
||||||
|
return ret;
|
||||||
|
|
||||||
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
|
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
|
||||||
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
|
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
|
||||||
@ -1232,10 +1281,10 @@ int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
|
|||||||
pset1.numClocks = 1;
|
pset1.numClocks = 1;
|
||||||
// Ok on both 1080 and 970
|
// Ok on both 1080 and 970
|
||||||
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS;
|
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS;
|
||||||
pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff;
|
pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
|
||||||
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
|
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
|
||||||
if (ret == NVAPI_OK) {
|
if (ret == NVAPI_OK) {
|
||||||
applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, diff/1000);
|
applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, delta/1000);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -1243,28 +1292,43 @@ int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
|
|||||||
int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
|
int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
|
||||||
{
|
{
|
||||||
NvAPI_Status ret;
|
NvAPI_Status ret;
|
||||||
|
NvS32 delta = 0;
|
||||||
|
|
||||||
if (devNum >= nvapi_dev_cnt)
|
if (devNum >= nvapi_dev_cnt)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
|
// wrong to get default base clock (when modified) on maxwell (same as cuda props one)
|
||||||
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
|
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
|
||||||
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
|
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
|
||||||
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
|
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
|
||||||
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
|
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); // wrong base clocks, useless
|
||||||
if (ret) return ret;
|
if (ret == NVAPI_OK) {
|
||||||
|
delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
|
||||||
|
}
|
||||||
|
|
||||||
NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
|
// seems ok on maxwell and pascal for the mem clocks
|
||||||
|
NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
|
||||||
|
deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
|
||||||
|
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // wrong def clocks, useless
|
||||||
|
if (ret == NVAPI_OK) {
|
||||||
|
if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY)
|
||||||
|
delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (delta == (clock * 1000))
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
// todo: bounds check with GetPstates20
|
||||||
|
|
||||||
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
|
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
|
||||||
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
|
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
|
||||||
pset1.numPstates = 1;
|
pset1.numPstates = 1;
|
||||||
pset1.numClocks = 1;
|
pset1.numClocks = 1;
|
||||||
// Memory boost clock seems only ok on pascal with this api
|
|
||||||
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
|
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
|
||||||
pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff;
|
pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
|
||||||
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
|
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
|
||||||
if (ret == NVAPI_OK) {
|
if (ret == NVAPI_OK) {
|
||||||
applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, diff/1000);
|
applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, delta/1000);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user