nvapi: base memclock was wrong on maxwell

2025-08-26 05:42:02 +00:00 · 2016-06-25 16:16:09 +02:00 · 2016-06-25 16:16:09 +02:00 · 7ff179abe9
commit 7ff179abe9
parent bc6ac3a3ab
3 changed files with 112 additions and 22 deletions
--- a/compat/nvapi/nvapi_ccminer.h
+++ b/compat/nvapi/nvapi_ccminer.h
@ -182,6 +182,19 @@ typedef struct {
 } NVAPI_VOLT_STATUS; // 140 bytes (1-008c)
 #define NVAPI_VOLT_STATUS_VER MAKE_NVAPI_VERSION(NVAPI_VOLT_STATUS, 1)

+typedef struct {
+	NvU32 version;
+	NvU32 flags;
+	NvU32 filled; // 1
+	struct {
+		NvU32 volt_uV;
+		NvU32 unknown;
+	} entries[128];
+	// some empty tables then...
+	NvU32 buf1[3888];
+} NVAPI_VOLTAGES_TABLE; // 16588 bytes (1-40cc)
+#define NVAPI_VOLTAGES_TABLE_VER MAKE_NVAPI_VERSION(NVAPI_VOLTAGES_TABLE, 1)
+
 NvAPI_Status NvAPI_DLL_GetInterfaceVersionString(NvAPI_ShortString string);

 NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_INFO*);
@ -201,8 +214,9 @@ NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle hPhysicalGpu, NVAPI
 NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x23F1B133
 NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4

-// Maxwell ?
+// Maxwell only
 NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C
+NvAPI_Status NvAPI_DLL_GetVoltages(NvPhysicalGpuHandle handle, NVAPI_VOLTAGES_TABLE*); // 7D656244 1-40CC

 NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle hPhysicalGpu, void* pFreqs);

@ -211,6 +225,7 @@ NvAPI_Status NvAPI_DLL_GetSerialNumber(NvPhysicalGpuHandle handle, NvAPI_ShortSt
 NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet);
 NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V2 *pSet);

+
 NvAPI_Status NvAPI_DLL_Unload();

 #define NV_ASSERT(x) { NvAPI_Status ret = x; if(ret != NVAPI_OK) return ret; }
--- a/nvapi.cpp
+++ b/nvapi.cpp
@ -239,7 +239,7 @@ NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){
 	return (*pointer)(handle, pFreqs);
 }

-#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B // Need struct v1 of 7316 bytes (v2 semms unsupported)
+#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B
 // allow to set gpu/mem core freq delta
 NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet) {
 	static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V1*) = NULL;
@ -260,6 +260,17 @@ NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PS
 	return (*pointer)(handle, pSet);
 }

+// maxwell voltage table
+#define NVAPI_ID_VOLTAGES 0x7D656244 // 1-40cc
+NvAPI_Status NvAPI_DLL_GetVoltages(NvPhysicalGpuHandle handle, NVAPI_VOLTAGES_TABLE *pInfo) {
+	static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLTAGES_TABLE*) = NULL;
+	if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
+	if(!pointer) {
+		pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLTAGES_TABLE*))nvidia_handle->query(NVAPI_ID_VOLTAGES);
+	}
+	return (*pointer)(handle, pInfo);
+}
+
 #define NVAPI_ID_UNLOAD 0xD22BDD7E
 NvAPI_Status NvAPI_DLL_Unload() {
 	static NvAPI_Status (*pointer)() = NULL;
--- a/nvml.cpp
+++ b/nvml.cpp
@ -963,6 +963,23 @@ int nvapi_pstateinfo(unsigned int devNum)
 	// useless on init but...
 	nvapi_getpstate(devNum, &current);

+#if 0
+	// try :p
+	uint32_t* buf = (uint32_t*) calloc(1, 0x8000);
+	for (int i=8; i < 0x8000 && buf; i+=4) {
+		buf[0] = 0x10000 + i;
+		if ((ret = NvAPI_DLL_XXX(phys[devNum], buf)) != NVAPI_INCOMPATIBLE_STRUCT_VERSION) {
+			NvAPI_ShortString string;
+			NvAPI_GetErrorMessage(ret, string);
+			applog(LOG_BLUE, "struct size is %06x : %s", buf[0], string);
+			for (int n=0; n < i/32; n++)
+				applog_hex(&buf[n*(32/4)], 80);
+			break;
+		}
+	}
+	free(buf);
+#endif
+
 #if 0
 	// Unsure of the meaning of these values
 	NVAPI_GPU_POWER_TOPO topo = { 0 };
@ -1056,13 +1073,6 @@ int nvapi_pstateinfo(unsigned int devNum)
 			tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
 	}

-#if 0
-	// seems empty..
-	NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS volts = { 0 };
-	volts.version = NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS_VER;
-	ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &volts);
-#endif
-
 #if 1
 	// Read pascal Clocks Table, Empty on 9xx
 	NVAPI_CLOCKS_RANGE ranges = { 0 };
@ -1078,7 +1088,7 @@ int nvapi_pstateinfo(unsigned int devNum)
 	}

 	// PASCAL GTX ONLY
-	//if (gpuClocks || memClocks) {
+	if (gpuClocks || memClocks) {
 		NVAPI_CLOCK_TABLE table = { 0 };
 		table.version = NVAPI_CLOCK_TABLE_VER;
 		memcpy(table.mask, boost.mask, 12);
@ -1126,7 +1136,20 @@ int nvapi_pstateinfo(unsigned int devNum)
 			if (table.buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve.buf1[n]);
 		}
 		applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
-	//}
+	}
+
+	// Maxwell
+	else {
+		NVAPI_VOLTAGES_TABLE volts = { 0 };
+		volts.version = NVAPI_VOLTAGES_TABLE_VER;
+		int entries = 0;
+		ret = NvAPI_DLL_GetVoltages(phys[devNum], &volts);
+		for (n=0; n < 128; n++) {
+			if (volts.entries[n].volt_uV)
+				entries++;
+		}
+		applog(LOG_RAW, " Volts table contains %d gpu levels.", entries);
+	}
 #endif
 	return 0;
 }
@ -1214,17 +1237,43 @@ int nvapi_set_tlimit(unsigned int devNum, uint8_t limit)
 int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
 {
 	NvAPI_Status ret;
+	NvS32 delta = 0;

 	if (devNum >= nvapi_dev_cnt)
 		return -ENODEV;
-
+#if 0
+	// wrong api to get default base clock when modified, cuda props seems fine
 	NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
 	freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
 	freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
 	ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
-	if (ret) return ret;
+	if (ret == NVAPI_OK)  {
+		delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
+	}

-	NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
+	NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
+	deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
+	ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0); // we want default clock grr!
+	if (ret == NVAPI_OK) {
+		if (deffreqs.pstates[0].clocks[1].domainId == NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS)
+			delta = (clock * 1000) - deffreqs.pstates[0].clocks[1].freq*2;
+	}
+#endif
+
+	cudaDeviceProp props = { 0 };
+	NvU32 busId = 0xFFFF;
+	ret = NvAPI_GPU_GetBusId(phys[devNum], &busId);
+	for (int d=0; d<nvapi_dev_cnt; d++) {
+		 // unsure about devNum, so be safe
+		cudaGetDeviceProperties(&props, d);
+		if (props.pciBusID == busId) {
+			delta = (clock * 1000) - props.clockRate;
+			break;
+		}
+	}
+
+	if (delta == (clock * 1000))
+		return ret;

 	NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
 	pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
@ -1232,10 +1281,10 @@ int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
 	pset1.numClocks = 1;
 	// Ok on both 1080 and 970
 	pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS;
-	pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff;
+	pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
 	ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
 	if (ret == NVAPI_OK) {
-		applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, diff/1000);
+		applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, delta/1000);
 	}
 	return ret;
 }
@ -1243,28 +1292,43 @@ int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
 int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
 {
 	NvAPI_Status ret;
+	NvS32 delta = 0;

 	if (devNum >= nvapi_dev_cnt)
 		return -ENODEV;

+	// wrong to get default base clock (when modified) on maxwell (same as cuda props one)
 	NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
 	freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
 	freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
-	ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
-	if (ret) return ret;
+	ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); // wrong base clocks, useless
+	if (ret == NVAPI_OK)  {
+		delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
+	}

-	NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
+	// seems ok on maxwell and pascal for the mem clocks
+	NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
+	deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
+	ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // wrong def clocks, useless
+	if (ret == NVAPI_OK) {
+		if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY)
+			delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq;
+	}
+
+	if (delta == (clock * 1000))
+		return ret;
+
+	// todo: bounds check with GetPstates20

 	NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
 	pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
 	pset1.numPstates = 1;
 	pset1.numClocks = 1;
-	// Memory boost clock seems only ok on pascal with this api
 	pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
-	pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff;
+	pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
 	ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
 	if (ret == NVAPI_OK) {
-		applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, diff/1000);
+		applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, delta/1000);
 	}
 	return ret;
 }