From bc6ac3a3ab3e75a585e9e983a94673d4286e4f16 Mon Sep 17 00:00:00 2001
From: Tanguy Pruvot <tanguy.pruvot@gmail.com>
Date: Sat, 25 Jun 2016 09:40:37 +0200
Subject: [PATCH] nvapi: link clocks and tlimit to command line

boost clocks and the thermal limit are shared with afterburner
beware with your settings, not as safe as application clocks!

Note: both nvapi and nvml are now used on windows x64
Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
---
 README.txt                   |   2 +
 ccminer.cpp                  |  49 +++++--
 compat/nvapi/nvapi_ccminer.h |  57 +++++++-
 cuda.cpp                     |  14 +-
 nvapi.cpp                    |  56 ++++++--
 nvml.cpp                     | 265 ++++++++++++++++++++++++++++-------
 nvml.h                       |  10 +-
 7 files changed, 375 insertions(+), 78 deletions(-)

diff --git a/README.txt b/README.txt
index 7512b69..e1bce2e 100644
--- a/README.txt
+++ b/README.txt
@@ -243,6 +243,8 @@ features.
   June 2016       v1.8.0
                   Pascal support with cuda 8
                   x11evo algo (XRE)
+                  Lyra2v2 and Decred hashrate improvements
+                  Enhance windows NVAPI clock and power limits
 
   May  18th 2016  v1.7.6
                   Decred vote support
diff --git a/ccminer.cpp b/ccminer.cpp
index 3aa82e7..476bc80 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -128,7 +128,8 @@ uint32_t gpus_intensity[MAX_GPUS] = { 0 };
 uint32_t device_gpu_clocks[MAX_GPUS] = { 0 };
 uint32_t device_mem_clocks[MAX_GPUS] = { 0 };
 uint32_t device_plimit[MAX_GPUS] = { 0 };
-int8_t device_pstate[MAX_GPUS] = { -1 };
+uint8_t device_tlimit[MAX_GPUS] = { 0 };
+int8_t device_pstate[MAX_GPUS] = { -1, -1 };
 int opt_cudaschedule = -1;
 static bool opt_keep_clocks = false;
 
@@ -303,7 +304,10 @@ Options:\n\
       --plimit=100W     Set the gpu power limit (352.21+ driver)\n"
 #else /* via nvapi.dll */
 "\
-      --plimit=100      Set the gpu power limit in percentage\n"
+      --mem-clock=3505  Set the gpu memory boost clock\n\
+      --gpu-clock=1150  Set the gpu engine boost clock\n\
+      --plimit=100      Set the gpu power limit in percentage\n\
+      --tlimit=80       Set the gpu thermal limit in degrees\n"
 #endif
 #ifdef HAVE_SYSLOG_H
 "\
@@ -380,6 +384,7 @@ struct option options[] = {
 	{ "pstate", 1, NULL, 1072 },
 	{ "plimit", 1, NULL, 1073 },
 	{ "keep-clocks", 0, NULL, 1074 },
+	{ "tlimit", 1, NULL, 1075 },
 #ifdef HAVE_SYSLOG_H
 	{ "syslog", 0, NULL, 'S' },
 	{ "syslog-prefix", 1, NULL, 1018 },
@@ -2687,7 +2692,8 @@ void parse_arg(int key, char *arg)
 		#ifdef USE_WRAPNVML
 		hnvml = nvml_create();
 		#ifdef WIN32
-		if (!hnvml) nvapi_init();
+		nvapi_init();
+		nvapi_init_settings();
 		#endif
 		#endif
 		cuda_print_devices();
@@ -2931,6 +2937,17 @@ void parse_arg(int key, char *arg)
 	case 1074: /* --keep-clocks */
 		opt_keep_clocks = true;
 		break;
+	case 1075: /* --tlimit */
+		{
+			char *pch = strtok(arg,",");
+			int n = 0;
+			while (pch != NULL && n < MAX_GPUS) {
+				int dev_id = device_map[n++];
+				device_tlimit[dev_id] = (uint8_t) atoi(pch);
+				pch = strtok(NULL, ",");
+			}
+		}
+		break;
 	case 1005:
 		opt_benchmark = true;
 		want_longpoll = false;
@@ -3504,13 +3521,14 @@ int main(int argc, char *argv[])
 	if (hnvml) {
 		bool gpu_reinit = (opt_cudaschedule >= 0); //false
 		cuda_devicenames(); // refresh gpu vendor name
-		applog(LOG_INFO, "NVML GPU monitoring enabled.");
+		if (!opt_quiet)
+			applog(LOG_INFO, "NVML GPU monitoring enabled.");
 		for (int n=0; n < active_gpus; n++) {
 			if (nvml_set_pstate(hnvml, device_map[n]) == 1)
 				gpu_reinit = true;
 			if (nvml_set_plimit(hnvml, device_map[n]) == 1)
 				gpu_reinit = true;
-			if (nvml_set_clocks(hnvml, device_map[n]) == 1)
+			if (!is_windows() && nvml_set_clocks(hnvml, device_map[n]) == 1)
 				gpu_reinit = true;
 			if (gpu_reinit) {
 				cuda_reset_device(n, NULL);
@@ -3518,20 +3536,25 @@ int main(int argc, char *argv[])
 		}
 	}
 #endif
+#ifdef WIN32
+	if (nvapi_init() == 0) {
+		if (!opt_quiet)
+			applog(LOG_INFO, "NVAPI GPU monitoring enabled.");
+		if (!hnvml) {
+			cuda_devicenames(); // refresh gpu vendor name
+		}
+		nvapi_init_settings();
+	}
+#endif
+	else if (!hnvml && !opt_quiet)
+		applog(LOG_INFO, "GPU monitoring is not available.");
+
 	// force reinit to set default device flags
 	if (opt_cudaschedule >= 0 && !hnvml) {
 		for (int n=0; n < active_gpus; n++) {
 			cuda_reset_device(n, NULL);
 		}
 	}
-#ifdef WIN32
-	if (!hnvml && nvapi_init() == 0) {
-		applog(LOG_INFO, "NVAPI GPU monitoring enabled.");
-		cuda_devicenames(); // refresh gpu vendor name
-	}
-#endif
-	else if (!hnvml)
-		applog(LOG_INFO, "GPU monitoring is not available.");
 #endif
 
 	if (opt_api_listen) {
diff --git a/compat/nvapi/nvapi_ccminer.h b/compat/nvapi/nvapi_ccminer.h
index 45218d2..83b9697 100644
--- a/compat/nvapi/nvapi_ccminer.h
+++ b/compat/nvapi/nvapi_ccminer.h
@@ -33,6 +33,18 @@ typedef struct {
 } NVAPI_GPU_POWER_STATUS;
 #define NVAPI_GPU_POWER_STATUS_VER MAKE_NVAPI_VERSION(NVAPI_GPU_POWER_STATUS, 1)
 
+typedef struct {
+	NvU32 version;
+	NvU32 count;
+	struct {
+		NvU32 unknown1;
+		NvU32 unknown2;
+		NvU32 power; // unsure ?? 85536 to 95055 on 1080, 104825+ on 970
+		NvU32 unknown4;
+	} entries[4];
+} NVAPI_GPU_POWER_TOPO;
+#define NVAPI_GPU_POWER_TOPO_VER MAKE_NVAPI_VERSION(NVAPI_GPU_POWER_TOPO, 1)
+
 typedef struct {
 	NvU32 version;
 	NvU32 flags;
@@ -134,11 +146,48 @@ typedef struct {
 } NVAPI_CLOCK_TABLE; // 9248 bytes
 #define NVAPI_CLOCK_TABLE_VER MAKE_NVAPI_VERSION(NVAPI_CLOCK_TABLE, 1)
 
+typedef struct {
+	NvU32 version;
+	NvU32 mask[4]; // 80 bits mask
+	NvU32 buf0[12];
+	struct {
+		NvU32 a; // 0
+		NvU32 freq_kHz;
+		NvU32 volt_uV;
+		NvU32 d;
+		NvU32 e;
+		NvU32 f;
+		NvU32 g;
+	} gpuEntries[80];
+	struct {
+		NvU32 a;  // 1 for idle values ?
+		NvU32 freq_kHz;
+		NvU32 volt_uV;
+		NvU32 d;
+		NvU32 e;
+		NvU32 f;
+		NvU32 g;
+	} memEntries[23];
+	NvU32 buf1[1064];
+} NVAPI_VFP_CURVE; // 7208 bytes (1-1c28)
+#define NVAPI_VFP_CURVE_VER MAKE_NVAPI_VERSION(NVAPI_VFP_CURVE, 1)
+
+typedef struct {
+	NvU32 version;
+	NvU32 flags;
+	NvU32 count; // unsure
+	NvU32 unknown;
+	NvU32 value_uV;
+	NvU32 buf1[30];
+} NVAPI_VOLT_STATUS; // 140 bytes (1-008c)
+#define NVAPI_VOLT_STATUS_VER MAKE_NVAPI_VERSION(NVAPI_VOLT_STATUS, 1)
+
 NvAPI_Status NvAPI_DLL_GetInterfaceVersionString(NvAPI_ShortString string);
 
 NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_INFO*);
 NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_STATUS*);
 NvAPI_Status NvAPI_DLL_ClientPowerPoliciesSetStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_STATUS*);
+NvAPI_Status NvAPI_DLL_ClientPowerTopologyGetStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_TOPO*); // EDCF624E 1-0048
 
 NvAPI_Status NvAPI_DLL_ClientThermalPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_THERMAL_INFO*);
 NvAPI_Status NvAPI_DLL_ClientThermalPoliciesGetLimit(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_THERMAL_LIMIT*);
@@ -146,17 +195,21 @@ NvAPI_Status NvAPI_DLL_ClientThermalPoliciesSetLimit(NvPhysicalGpuHandle hPhysic
 
 NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS*);
 
-// to dig...
+// Pascal GTX only
 NvAPI_Status NvAPI_DLL_GetClockBoostRanges(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCKS_RANGE*);
 NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_MASKS*); // 0x507B4B59
 NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x23F1B133
+NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4
 
+// Maxwell ?
+NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C
 
 NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle hPhysicalGpu, void* pFreqs);
 
 NvAPI_Status NvAPI_DLL_GetSerialNumber(NvPhysicalGpuHandle handle, NvAPI_ShortString serial);
 
-NvAPI_Status NvAPI_DLL_SetPstates20(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO *pPerfPstatesInfo);
+NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet);
+NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V2 *pSet);
 
 NvAPI_Status NvAPI_DLL_Unload();
 
diff --git a/cuda.cpp b/cuda.cpp
index 810ce9d..cd378fb 100644
--- a/cuda.cpp
+++ b/cuda.cpp
@@ -199,9 +199,19 @@ void cuda_reset_device(int thr_id, bool *init)
 int cuda_available_memory(int thr_id)
 {
 	int dev_id = device_map[thr_id % MAX_GPUS];
-	size_t mtotal, mfree = 0;
+	size_t mtotal = 0, mfree = 0;
+	cudaDeviceProp props;
 	cudaSetDevice(dev_id);
-	cudaMemGetInfo(&mfree, &mtotal);
+	cudaDeviceSynchronize();
+	if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) {
+#if defined(_WIN32) && CUDART_VERSION == 6050 && defined(_DEBUG)
+		if (!strstr(props.name, "GTX 10"))
+			// seems to crash in vstudio on 8GB cards (pascal ?) with cuda 6.5
+			cudaMemGetInfo(&mfree, &mtotal);
+#else
+		cudaMemGetInfo(&mfree, &mtotal);
+#endif
+	}
 	return (int) (mfree / (1024 * 1024));
 }
 
diff --git a/nvapi.cpp b/nvapi.cpp
index f81ebd5..4acf405 100644
--- a/nvapi.cpp
+++ b/nvapi.cpp
@@ -119,6 +119,16 @@ NvAPI_Status NvAPI_DLL_ClientPowerPoliciesSetStatus(NvPhysicalGpuHandle handle,
 	return (*pointer)(handle, pPolicies);
 }
 
+#define NVAPI_ID_POWERTOPO_GET 0xEDCF624E
+NvAPI_Status NvAPI_DLL_ClientPowerTopologyGetStatus(NvPhysicalGpuHandle handle, NVAPI_GPU_POWER_TOPO* topo) {
+	static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_GPU_POWER_TOPO*) = NULL;
+	if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
+	if(!pointer) {
+		pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_GPU_POWER_TOPO*))nvidia_handle->query(NVAPI_ID_POWERTOPO_GET);
+	}
+	return (*pointer)(handle, topo);
+}
+
 #define NVAPI_ID_THERMAL_INFO 0x0D258BB5
 NvAPI_Status NvAPI_DLL_ClientThermalPoliciesGetInfo(NvPhysicalGpuHandle handle, NVAPI_GPU_THERMAL_INFO* pInfo) {
 	static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_GPU_THERMAL_INFO*) = NULL;
@@ -169,7 +179,7 @@ NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle handle, NVIDI
 	return (*pointer)(handle, status);
 }
 
-#define NVAPI_ID_CLK_RANGE_GET 0x64B43A6A
+#define NVAPI_ID_CLK_RANGE_GET 0x64B43A6A // Pascal
 NvAPI_Status NvAPI_DLL_GetClockBoostRanges(NvPhysicalGpuHandle handle, NVAPI_CLOCKS_RANGE* range) {
 	static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_CLOCKS_RANGE*) = NULL;
 	if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
@@ -179,7 +189,7 @@ NvAPI_Status NvAPI_DLL_GetClockBoostRanges(NvPhysicalGpuHandle handle, NVAPI_CLO
 	return (*pointer)(handle, range);
 }
 
-#define NVAPI_ID_CLK_BOOST_MASK 0x507B4B59
+#define NVAPI_ID_CLK_BOOST_MASK 0x507B4B59 // Pascal
 NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle handle, NVAPI_CLOCK_MASKS* range) {
 	static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_CLOCK_MASKS*) = NULL;
 	if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
@@ -189,7 +199,7 @@ NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle handle, NVAPI_CLOCK
 	return (*pointer)(handle, range);
 }
 
-#define NVAPI_ID_CLK_BOOST_TABLE 0x23F1B133
+#define NVAPI_ID_CLK_BOOST_TABLE 0x23F1B133 // Pascal
 NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOCK_TABLE* range) {
 	static NvAPI_Status (*pointer)(NvPhysicalGpuHandle,  NVAPI_CLOCK_TABLE*) = NULL;
 	if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
@@ -199,8 +209,25 @@ NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOC
 	return (*pointer)(handle, range);
 }
 
-#define NVAPI_ID_CLK_BOOST_CURVE 0x0700004A //??
+#define NVAPI_ID_VFP_CURVE_GET 0x21537AD4 // Pascal 39442CFB to check also, Set ?
+NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE* curve) {
+	static NvAPI_Status (*pointer)(NvPhysicalGpuHandle,  NVAPI_VFP_CURVE*) = NULL;
+	if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
+	if(!pointer) {
+		pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VFP_CURVE*))nvidia_handle->query(NVAPI_ID_VFP_CURVE_GET);
+	}
+	return (*pointer)(handle, curve);
+}
 
+#define NVAPI_ID_VOLT_STATUS_GET 0xC16C7E2C
+NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle handle, NVAPI_VOLT_STATUS* data) {
+	static NvAPI_Status (*pointer)(NvPhysicalGpuHandle,  NVAPI_VOLT_STATUS*) = NULL;
+	if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
+	if(!pointer) {
+		pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLT_STATUS*))nvidia_handle->query(NVAPI_ID_VOLT_STATUS_GET);
+	}
+	return (*pointer)(handle, data);
+}
 
 #define NVAPI_ID_PERFCLOCKS_GET 0x1EA54A3B
 NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){
@@ -212,14 +239,25 @@ NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){
 	return (*pointer)(handle, pFreqs);
 }
 
-#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B // NOT SUPPORTED
-NvAPI_Status NvAPI_DLL_SetPstates20(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO *pPerfPstatesInfo) {
-	static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO*) = NULL;
+#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B // Need struct v1 of 7316 bytes (v2 semms unsupported)
+// allow to set gpu/mem core freq delta
+NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet) {
+	static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V1*) = NULL;
+	if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
+	if(!pointer) {
+		pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V1*))nvidia_handle->query(NVAPI_ID_PSTATE20_SET);
+	}
+	return (*pointer)(handle, pSet);
+}
+
+// allow to set gpu core voltage delta
+NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V2 *pSet) {
+	static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V2*) = NULL;
 	if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
 	if(!pointer) {
-		pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO*))nvidia_handle->query(NVAPI_ID_PSTATE20_SET);
+		pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V2*))nvidia_handle->query(NVAPI_ID_PSTATE20_SET);
 	}
-	return (*pointer)(handle, pPerfPstatesInfo);
+	return (*pointer)(handle, pSet);
 }
 
 #define NVAPI_ID_UNLOAD 0xD22BDD7E
diff --git a/nvml.cpp b/nvml.cpp
index fffdefc..92e5939 100644
--- a/nvml.cpp
+++ b/nvml.cpp
@@ -35,6 +35,7 @@ static uint32_t device_bus_ids[MAX_GPUS] = { 0 };
 extern uint32_t device_gpu_clocks[MAX_GPUS];
 extern uint32_t device_mem_clocks[MAX_GPUS];
 extern uint32_t device_plimit[MAX_GPUS];
+extern uint8_t device_tlimit[MAX_GPUS];
 extern int8_t device_pstate[MAX_GPUS];
 
 uint32_t clock_prev[MAX_GPUS] = { 0 };
@@ -371,7 +372,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
 	if (rc == NVML_SUCCESS)
 		applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk);
 	else {
-		applog(LOG_WARNING, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
+		applog(LOG_WARNING, "GPU #%d: %u/%u - %s (NVML)", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
 		return -1;
 	}
 
@@ -953,8 +954,6 @@ int nvapi_getbios(unsigned int devNum, char *desc, unsigned int maxlen)
 	return 0;
 }
 
-#define FREQ_GETVAL(clk) (clk.typeId == 0 ? clk.data.single.freq_kHz : clk.data.range.maxFreq_kHz)
-
 int nvapi_pstateinfo(unsigned int devNum)
 {
 	uint32_t n;
@@ -964,6 +963,23 @@ int nvapi_pstateinfo(unsigned int devNum)
 	// useless on init but...
 	nvapi_getpstate(devNum, &current);
 
+#if 0
+	// Unsure of the meaning of these values
+	NVAPI_GPU_POWER_TOPO topo = { 0 };
+	topo.version = NVAPI_GPU_POWER_TOPO_VER;
+	if ((ret = NvAPI_DLL_ClientPowerTopologyGetStatus(phys[devNum], &topo)) == NVAPI_OK) {
+		if (topo.count)
+			applog(LOG_RAW, " GPU TDP is %.1f~%.1f W ?",
+			(double) topo.entries[0].power/1000, (double) topo.entries[1].power/1000);
+
+	// Ok on 970, not pascal
+	NV_GPU_PERF_PSTATES20_INFO_V2 pset2 = { 0 };
+	pset2.version = NV_GPU_PERF_PSTATES20_INFO_VER2;
+	pset2.ov.numVoltages = 1;
+	pset2.ov.voltages[0].voltDelta_uV.value = 3000;  // gpu + 3000 uv;
+	ret = NvAPI_DLL_SetPstates20v2(phys[devNum], &pset2);
+#endif
+
 	NV_GPU_PERF_PSTATES20_INFO info = { 0 };
 	info.version = NV_GPU_PERF_PSTATES20_INFO_VER;
 	if ((ret = NvAPI_GPU_GetPstates20(phys[devNum], &info)) != NVAPI_OK) {
@@ -973,46 +989,59 @@ int nvapi_pstateinfo(unsigned int devNum)
 			applog(LOG_RAW, "NVAPI GetPstates20: %s", string);
 		return -1;
 	}
-	applog(LOG_RAW, "%u P-states with %u clocks %s",
-		info.numPstates, info.numClocks, info.numBaseVoltages ? "and voltage":"");
+
 	for (n=0; n < info.numPstates; n++) {
 		NV_GPU_PSTATE20_CLOCK_ENTRY_V1* clocks = info.pstates[n].clocks;
-		applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %3u-%4u MHz%s %4u mV%s \x7F %d/%d",
+		applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %6.1f MHz%s %4u mV%s \x7F %d/%d",
 			info.pstates[n].pstateId == current ? ">":" ", info.pstates[n].pstateId,
-			FREQ_GETVAL(clocks[1])/1000, clocks[1].bIsEditable ? "*":" ",
-			clocks[0].data.range.minFreq_kHz/1000, FREQ_GETVAL(clocks[0])/1000, clocks[0].bIsEditable ? "*":" ",
+			clocks[1].data.single.freq_kHz/1000, clocks[1].bIsEditable ? "*":" ",
+			(double) clocks[0].data.single.freq_kHz/1000, clocks[0].bIsEditable ? "*":" ",
 			info.pstates[n].baseVoltages[0].volt_uV/1000, info.pstates[n].baseVoltages[0].bIsEditable ? "*": " ",
 			info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable
 			info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000);
+		if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) {
+			applog(LOG_RAW, "      OC %4d MHz      %6.1f MHz",
+				clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000);
+		}
 	}
-	// boost over volting (GTX 9xx) ?
+	// boost over volting (GTX 9xx only ?)
 	for (n=0; n < info.ov.numVoltages; n++) {
-		applog(LOG_RAW, " OV: %u mV%s + %d/%d",
-			info.ov.voltages[n].volt_uV/1000, info.ov.voltages[n].bIsEditable ? "*":" ",
+		applog(LOG_RAW, " OV: %u+%u mV%s \x7F %d/%d",
+			info.ov.voltages[n].volt_uV/1000, info.ov.voltages[n].voltDelta_uV.value/1000, info.ov.voltages[n].bIsEditable ? "*":" ",
 			info.ov.voltages[n].voltDelta_uV.valueRange.min/1000, info.ov.voltages[n].voltDelta_uV.valueRange.max/1000);
 	}
 
 	NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
 	freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
-	freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ;
+	freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
 	ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
-	applog(LOG_RAW, "     MEM %4.0f MHz  GPU %8.2f MHz    >Current",
+	applog(LOG_RAW, "     MEM %4.0f MHz  GPU %6.1f MHz     Base Clocks",
 		(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
 		(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
 
-	freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
+	freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK;
 	ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
-	applog(LOG_RAW, "     MEM %4.0f MHz  GPU %8.2f MHz     Base Clocks",
+	applog(LOG_RAW, "     MEM %4.0f MHz  GPU %6.1f MHz     Boost Clocks",
 		(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
 		(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
 
-	freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK;
+	freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ;
 	ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
-	applog(LOG_RAW, "     MEM %4.0f MHz  GPU %8.2f MHz     Boost Clocks",
+	applog(LOG_RAW, "     MEM %4.0f MHz  GPU %6.1f MHz    >Current",
 		(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
 		(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
-	
-#if 1
+
+	// Maxwell only
+	NVAPI_VOLT_STATUS pvdom = { 0 };
+	pvdom.version = NVAPI_VOLT_STATUS_VER;
+	if ((ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &pvdom)) == NVAPI_OK) {
+		if (pvdom.value_uV)
+			applog(LOG_RAW, " GPU Voltage is %u mV", pvdom.value_uV/1000);
+	}
+
+	uint8_t plim = nvapi_get_plimit(devNum);
+	applog(LOG_RAW, " Power limit is set to %u%%", (uint32_t) plim);
+
 	NV_GPU_THERMAL_SETTINGS tset = { 0 };
 	NVAPI_GPU_THERMAL_INFO tnfo = { 0 };
 	NVAPI_GPU_THERMAL_LIMIT tlim = { 0 };
@@ -1025,16 +1054,9 @@ int nvapi_pstateinfo(unsigned int devNum)
 		applog(LOG_RAW, " Thermal limit is set to %u, current Tc %d, range [%u-%u]",
 			tlim.entries[0].value >> 8, tset.sensor[0].currentTemp,
 			tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
-		// ok
-		//tlim.entries[0].value = 80 << 8;
-		//tlim.flags = 1;
-		//ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim);
 	}
-#endif
-	uint8_t plim = nvapi_getplimit(devNum);
-	applog(LOG_RAW, " Power limit coef. is set to %u%%", (uint32_t) plim);
 
-#if 1
+#if 0
 	// seems empty..
 	NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS volts = { 0 };
 	volts.version = NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS_VER;
@@ -1055,32 +1077,61 @@ int nvapi_pstateinfo(unsigned int devNum)
 		if (boost.clocks[n].gpuDelta) gpuClocks++;
 	}
 
-	if (gpuClocks || memClocks) {
-		applog(LOG_RAW, "Boost table contains %d gpu clocks and %d mem clocks.", gpuClocks, memClocks);
+	// PASCAL GTX ONLY
+	//if (gpuClocks || memClocks) {
 		NVAPI_CLOCK_TABLE table = { 0 };
 		table.version = NVAPI_CLOCK_TABLE_VER;
 		memcpy(table.mask, boost.mask, 12);
 		ret = NvAPI_DLL_GetClockBoostTable(phys[devNum], &table);
+		gpuClocks = 0, memClocks = 0;
 		for (n=0; n < 12; n++) {
 			if (table.buf0[n] != 0) applog(LOG_RAW, "boost table 0[%u] not empty (%u)", n, table.buf0[n]);
 		}
 		for (n=0; n < 80; n++) {
-			if (table.gpuDeltas[n].freqDelta)
-				applog(LOG_RAW, "boost gpu clock delta %u set to %d MHz", n, table.gpuDeltas[n].freqDelta/1000);
+			if (table.gpuDeltas[n].freqDelta) {
+				// note: gpu delta value seems to be x2, not the memory
+				//applog(LOG_RAW, " Boost gpu clock delta %u set to %d MHz", n, table.gpuDeltas[n].freqDelta/2000);
+				gpuClocks++;
+			}
 		}
 		for (n=0; n < 23; n++) {
-			if (table.memFilled[n])
-				applog(LOG_RAW, "boost mem clock delta %u set to %d MHz", n, table.memDeltas[n]/1000);
+			if (table.memFilled[n]) {
+				//applog(LOG_RAW, " Boost mem clock delta %u set to %d MHz", n, table.memDeltas[n]/1000);
+				memClocks++;
+			}
 		}
 		for (n=0; n < 1529; n++) {
 			if (table.buf1[n] != 0) applog(LOG_RAW, "boost table 1[%u] not empty (%u)", n, table.buf1[n]);
 		}
-	}
+		applog(LOG_RAW, " Boost table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
+
+		NVAPI_VFP_CURVE curve = { 0 };
+		curve.version = NVAPI_VFP_CURVE_VER;
+		memcpy(curve.mask, boost.mask, 12);
+		ret = NvAPI_DLL_GetVFPCurve(phys[devNum], &curve);
+		gpuClocks = 0, memClocks = 0;
+		for (n=0; n < 80; n++) {
+			if (curve.gpuEntries[n].freq_kHz || curve.gpuEntries[n].volt_uV) {
+			//	applog(LOG_RAW, "gpu volt table %2u %4u MHz - %6u mV", n, curve.gpuEntries[n].freq_kHz/1000, curve.gpuEntries[n].volt_uV/1000);
+				gpuClocks++;
+			}
+		}
+		for (n=0; n < 23; n++) {
+			if (curve.memEntries[n].freq_kHz || curve.memEntries[n].volt_uV) {
+			//	applog(LOG_RAW, "mem volt table %2u %4u MHz - %6u mV", n, curve.memEntries[n].freq_kHz/1000, curve.memEntries[n].volt_uV/1000);
+				memClocks++;
+			}
+		}
+		for (n=0; n < 1064; n++) {
+			if (table.buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve.buf1[n]);
+		}
+		applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
+	//}
 #endif
 	return 0;
 }
 
-uint8_t nvapi_getplimit(unsigned int devNum)
+uint8_t nvapi_get_plimit(unsigned int devNum)
 {
 	NvAPI_Status ret = NVAPI_OK;
 	NVAPI_GPU_POWER_STATUS pol = { 0 };
@@ -1095,7 +1146,7 @@ uint8_t nvapi_getplimit(unsigned int devNum)
 	return (uint8_t) (pol.entries[0].power / 1000); // in percent
 }
 
-int nvapi_setplimit(unsigned int devNum, uint16_t percent)
+int nvapi_set_plimit(unsigned int devNum, uint16_t percent)
 {
 	NvAPI_Status ret = NVAPI_OK;
 	uint32_t val = percent * 1000;
@@ -1126,6 +1177,98 @@ int nvapi_setplimit(unsigned int devNum, uint16_t percent)
 	return ret;
 }
 
+int nvapi_set_tlimit(unsigned int devNum, uint8_t limit)
+{
+	NvAPI_Status ret;
+	uint32_t val = limit;
+
+	if (devNum >= nvapi_dev_cnt)
+		return -ENODEV;
+
+	NV_GPU_THERMAL_SETTINGS tset = { 0 };
+	NVAPI_GPU_THERMAL_INFO tnfo = { 0 };
+	NVAPI_GPU_THERMAL_LIMIT tlim = { 0 };
+	tset.version = NV_GPU_THERMAL_SETTINGS_VER;
+	NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &tset);
+	tnfo.version = NVAPI_GPU_THERMAL_INFO_VER;
+	NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], &tnfo);
+	tlim.version = NVAPI_GPU_THERMAL_LIMIT_VER;
+	if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], &tlim)) == NVAPI_OK) {
+		tlim.entries[0].value = val << 8;
+		tlim.flags = 1;
+		ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim);
+		if (ret == NVAPI_OK) {
+			applog(LOG_INFO, "GPU #%u: thermal limit set to %u, current Tc %d, range [%u-%u]",
+				devNum, val, tset.sensor[0].currentTemp,
+				tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
+		} else {
+			NvAPI_ShortString string;
+			NvAPI_GetErrorMessage(ret, string);
+			applog(LOG_WARNING, "GPU #%u: thermal limit: %s, valid range is [%u-%u]", devNum, string,
+				tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
+		}
+	}
+	return (int) ret;
+}
+
+int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
+{
+	NvAPI_Status ret;
+
+	if (devNum >= nvapi_dev_cnt)
+		return -ENODEV;
+
+	NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
+	freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
+	freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
+	ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
+	if (ret) return ret;
+
+	NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
+
+	NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
+	pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
+	pset1.numPstates = 1;
+	pset1.numClocks = 1;
+	// Ok on both 1080 and 970
+	pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS;
+	pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff;
+	ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
+	if (ret == NVAPI_OK) {
+		applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, diff/1000);
+	}
+	return ret;
+}
+
+int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
+{
+	NvAPI_Status ret;
+
+	if (devNum >= nvapi_dev_cnt)
+		return -ENODEV;
+
+	NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
+	freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
+	freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
+	ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
+	if (ret) return ret;
+
+	NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
+
+	NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
+	pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
+	pset1.numPstates = 1;
+	pset1.numClocks = 1;
+	// Memory boost clock seems only ok on pascal with this api
+	pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
+	pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff;
+	ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
+	if (ret == NVAPI_OK) {
+		applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, diff/1000);
+	}
+	return ret;
+}
+
 int nvapi_init()
 {
 	int num_gpus = cuda_num_devices();
@@ -1191,23 +1334,49 @@ int nvapi_init()
 		sprintf(driver_version,"%d.%02d", udv / 100, udv % 100);
 	}
 
+	return 0;
+}
+
+int nvapi_init_settings()
+{
 	// nvapi.dll
-	ret = nvapi_dll_init();
-	if (ret == NVAPI_OK) {
-		for (int n=0; n < opt_n_threads; n++) {
-			int dev_id = device_map[n % MAX_GPUS];
-			if (device_plimit[dev_id]) {
-				nvapi_setplimit(nvapi_dev_map[dev_id], device_plimit[dev_id]); // 0=default
-				uint32_t res = nvapi_getplimit(nvapi_dev_map[dev_id]);
-				gpulog(LOG_INFO, n, "NVAPI power limit is set to %u%%", res);
+	int ret = nvapi_dll_init();
+	if (ret != NVAPI_OK)
+		return ret;
+
+	for (int n=0; n < opt_n_threads; n++) {
+		int dev_id = device_map[n % MAX_GPUS];
+		if (device_plimit[dev_id]) {
+			if (nvapi_set_plimit(nvapi_dev_map[dev_id], device_plimit[dev_id]) == NVAPI_OK) {
+				uint32_t res = nvapi_get_plimit(nvapi_dev_map[dev_id]);
+				gpulog(LOG_INFO, n, "Power limit is set to %u%%", res);
+			}
+		}
+		if (device_tlimit[dev_id]) {
+			nvapi_set_tlimit(nvapi_dev_map[dev_id], device_tlimit[dev_id]);
+		}
+		if (device_gpu_clocks[dev_id]) {
+			ret = nvapi_set_gpuclock(nvapi_dev_map[dev_id], device_gpu_clocks[dev_id]);
+			if (ret) {
+				NvAPI_ShortString string;
+				NvAPI_GetErrorMessage((NvAPI_Status) ret, string);
+				gpulog(LOG_WARNING, n, "Boost gpu clock %s", string);
 			}
-			if (device_pstate[dev_id]) {
-				// todo...
+		}
+		if (device_mem_clocks[dev_id]) {
+			ret = nvapi_set_memclock(nvapi_dev_map[dev_id], device_mem_clocks[dev_id]);
+			if (ret) {
+				NvAPI_ShortString string;
+				NvAPI_GetErrorMessage((NvAPI_Status) ret, string);
+				gpulog(LOG_WARNING, n, "Boost mem clock %s", string);
 			}
 		}
+		if (device_pstate[dev_id]) {
+			// dunno how via nvapi or/and pascal
+		}
 	}
 
-	return 0;
+	return ret;
 }
 #endif
 
@@ -1306,7 +1475,7 @@ unsigned int gpu_power(struct cgpu_info *gpu)
 	if (support == -1) {
 		unsigned int pct = 0;
 		nvapi_getusage(nvapi_dev_map[gpu->gpu_id], &pct);
-		pct *= nvapi_getplimit(nvapi_dev_map[gpu->gpu_id]);
+		pct *= nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]);
 		pct /= 100;
 		mw = pct; // to fix
 	}
diff --git a/nvml.h b/nvml.h
index e9aea2f..8f98934 100644
--- a/nvml.h
+++ b/nvml.h
@@ -217,13 +217,15 @@ int gpu_info(struct cgpu_info *gpu);
 
 int gpu_vendor(uint8_t pci_bus_id, char *vendorname);
 
-// to debug clocks..
-int nvapi_pstateinfo(unsigned int devNum);
-uint8_t nvapi_getplimit(unsigned int devNum);
-
 /* nvapi functions */
 #ifdef WIN32
 int nvapi_init();
+int nvapi_init_settings();
+
+// to debug nvapi..
+int nvapi_pstateinfo(unsigned int devNum);
+uint8_t nvapi_get_plimit(unsigned int devNum);
+
 #endif
 
 #endif /* USE_WRAPNVML */