From bc6ac3a3ab3e75a585e9e983a94673d4286e4f16 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Sat, 25 Jun 2016 09:40:37 +0200 Subject: [PATCH] nvapi: link clocks and tlimit to command line boost clocks and the thermal limit are shared with afterburner beware with your settings, not as safe as application clocks! Note: both nvapi and nvml are now used on windows x64 Signed-off-by: Tanguy Pruvot --- README.txt | 2 + ccminer.cpp | 49 +++++-- compat/nvapi/nvapi_ccminer.h | 57 +++++++- cuda.cpp | 14 +- nvapi.cpp | 56 ++++++-- nvml.cpp | 265 ++++++++++++++++++++++++++++------- nvml.h | 10 +- 7 files changed, 375 insertions(+), 78 deletions(-) diff --git a/README.txt b/README.txt index 7512b69..e1bce2e 100644 --- a/README.txt +++ b/README.txt @@ -243,6 +243,8 @@ features. June 2016 v1.8.0 Pascal support with cuda 8 x11evo algo (XRE) + Lyra2v2 and Decred hashrate improvements + Enhance windows NVAPI clock and power limits May 18th 2016 v1.7.6 Decred vote support diff --git a/ccminer.cpp b/ccminer.cpp index 3aa82e7..476bc80 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -128,7 +128,8 @@ uint32_t gpus_intensity[MAX_GPUS] = { 0 }; uint32_t device_gpu_clocks[MAX_GPUS] = { 0 }; uint32_t device_mem_clocks[MAX_GPUS] = { 0 }; uint32_t device_plimit[MAX_GPUS] = { 0 }; -int8_t device_pstate[MAX_GPUS] = { -1 }; +uint8_t device_tlimit[MAX_GPUS] = { 0 }; +int8_t device_pstate[MAX_GPUS] = { -1, -1 }; int opt_cudaschedule = -1; static bool opt_keep_clocks = false; @@ -303,7 +304,10 @@ Options:\n\ --plimit=100W Set the gpu power limit (352.21+ driver)\n" #else /* via nvapi.dll */ "\ - --plimit=100 Set the gpu power limit in percentage\n" + --mem-clock=3505 Set the gpu memory boost clock\n\ + --gpu-clock=1150 Set the gpu engine boost clock\n\ + --plimit=100 Set the gpu power limit in percentage\n\ + --tlimit=80 Set the gpu thermal limit in degrees\n" #endif #ifdef HAVE_SYSLOG_H "\ @@ -380,6 +384,7 @@ struct option options[] = { { "pstate", 1, NULL, 1072 }, { "plimit", 1, NULL, 1073 }, { "keep-clocks", 0, NULL, 1074 }, + { "tlimit", 1, NULL, 1075 }, #ifdef HAVE_SYSLOG_H { "syslog", 0, NULL, 'S' }, { "syslog-prefix", 1, NULL, 1018 }, @@ -2687,7 +2692,8 @@ void parse_arg(int key, char *arg) #ifdef USE_WRAPNVML hnvml = nvml_create(); #ifdef WIN32 - if (!hnvml) nvapi_init(); + nvapi_init(); + nvapi_init_settings(); #endif #endif cuda_print_devices(); @@ -2931,6 +2937,17 @@ void parse_arg(int key, char *arg) case 1074: /* --keep-clocks */ opt_keep_clocks = true; break; + case 1075: /* --tlimit */ + { + char *pch = strtok(arg,","); + int n = 0; + while (pch != NULL && n < MAX_GPUS) { + int dev_id = device_map[n++]; + device_tlimit[dev_id] = (uint8_t) atoi(pch); + pch = strtok(NULL, ","); + } + } + break; case 1005: opt_benchmark = true; want_longpoll = false; @@ -3504,13 +3521,14 @@ int main(int argc, char *argv[]) if (hnvml) { bool gpu_reinit = (opt_cudaschedule >= 0); //false cuda_devicenames(); // refresh gpu vendor name - applog(LOG_INFO, "NVML GPU monitoring enabled."); + if (!opt_quiet) + applog(LOG_INFO, "NVML GPU monitoring enabled."); for (int n=0; n < active_gpus; n++) { if (nvml_set_pstate(hnvml, device_map[n]) == 1) gpu_reinit = true; if (nvml_set_plimit(hnvml, device_map[n]) == 1) gpu_reinit = true; - if (nvml_set_clocks(hnvml, device_map[n]) == 1) + if (!is_windows() && nvml_set_clocks(hnvml, device_map[n]) == 1) gpu_reinit = true; if (gpu_reinit) { cuda_reset_device(n, NULL); @@ -3518,20 +3536,25 @@ int main(int argc, char *argv[]) } } #endif +#ifdef WIN32 + if (nvapi_init() == 0) { + if (!opt_quiet) + applog(LOG_INFO, "NVAPI GPU monitoring enabled."); + if (!hnvml) { + cuda_devicenames(); // refresh gpu vendor name + } + nvapi_init_settings(); + } +#endif + else if (!hnvml && !opt_quiet) + applog(LOG_INFO, "GPU monitoring is not available."); + // force reinit to set default device flags if (opt_cudaschedule >= 0 && !hnvml) { for (int n=0; n < active_gpus; n++) { cuda_reset_device(n, NULL); } } -#ifdef WIN32 - if (!hnvml && nvapi_init() == 0) { - applog(LOG_INFO, "NVAPI GPU monitoring enabled."); - cuda_devicenames(); // refresh gpu vendor name - } -#endif - else if (!hnvml) - applog(LOG_INFO, "GPU monitoring is not available."); #endif if (opt_api_listen) { diff --git a/compat/nvapi/nvapi_ccminer.h b/compat/nvapi/nvapi_ccminer.h index 45218d2..83b9697 100644 --- a/compat/nvapi/nvapi_ccminer.h +++ b/compat/nvapi/nvapi_ccminer.h @@ -33,6 +33,18 @@ typedef struct { } NVAPI_GPU_POWER_STATUS; #define NVAPI_GPU_POWER_STATUS_VER MAKE_NVAPI_VERSION(NVAPI_GPU_POWER_STATUS, 1) +typedef struct { + NvU32 version; + NvU32 count; + struct { + NvU32 unknown1; + NvU32 unknown2; + NvU32 power; // unsure ?? 85536 to 95055 on 1080, 104825+ on 970 + NvU32 unknown4; + } entries[4]; +} NVAPI_GPU_POWER_TOPO; +#define NVAPI_GPU_POWER_TOPO_VER MAKE_NVAPI_VERSION(NVAPI_GPU_POWER_TOPO, 1) + typedef struct { NvU32 version; NvU32 flags; @@ -134,11 +146,48 @@ typedef struct { } NVAPI_CLOCK_TABLE; // 9248 bytes #define NVAPI_CLOCK_TABLE_VER MAKE_NVAPI_VERSION(NVAPI_CLOCK_TABLE, 1) +typedef struct { + NvU32 version; + NvU32 mask[4]; // 80 bits mask + NvU32 buf0[12]; + struct { + NvU32 a; // 0 + NvU32 freq_kHz; + NvU32 volt_uV; + NvU32 d; + NvU32 e; + NvU32 f; + NvU32 g; + } gpuEntries[80]; + struct { + NvU32 a; // 1 for idle values ? + NvU32 freq_kHz; + NvU32 volt_uV; + NvU32 d; + NvU32 e; + NvU32 f; + NvU32 g; + } memEntries[23]; + NvU32 buf1[1064]; +} NVAPI_VFP_CURVE; // 7208 bytes (1-1c28) +#define NVAPI_VFP_CURVE_VER MAKE_NVAPI_VERSION(NVAPI_VFP_CURVE, 1) + +typedef struct { + NvU32 version; + NvU32 flags; + NvU32 count; // unsure + NvU32 unknown; + NvU32 value_uV; + NvU32 buf1[30]; +} NVAPI_VOLT_STATUS; // 140 bytes (1-008c) +#define NVAPI_VOLT_STATUS_VER MAKE_NVAPI_VERSION(NVAPI_VOLT_STATUS, 1) + NvAPI_Status NvAPI_DLL_GetInterfaceVersionString(NvAPI_ShortString string); NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_INFO*); NvAPI_Status NvAPI_DLL_ClientPowerPoliciesGetStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_STATUS*); NvAPI_Status NvAPI_DLL_ClientPowerPoliciesSetStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_STATUS*); +NvAPI_Status NvAPI_DLL_ClientPowerTopologyGetStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_POWER_TOPO*); // EDCF624E 1-0048 NvAPI_Status NvAPI_DLL_ClientThermalPoliciesGetInfo(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_THERMAL_INFO*); NvAPI_Status NvAPI_DLL_ClientThermalPoliciesGetLimit(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_GPU_THERMAL_LIMIT*); @@ -146,17 +195,21 @@ NvAPI_Status NvAPI_DLL_ClientThermalPoliciesSetLimit(NvPhysicalGpuHandle hPhysic NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS*); -// to dig... +// Pascal GTX only NvAPI_Status NvAPI_DLL_GetClockBoostRanges(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCKS_RANGE*); NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_MASKS*); // 0x507B4B59 NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x23F1B133 +NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4 +// Maxwell ? +NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle hPhysicalGpu, void* pFreqs); NvAPI_Status NvAPI_DLL_GetSerialNumber(NvPhysicalGpuHandle handle, NvAPI_ShortString serial); -NvAPI_Status NvAPI_DLL_SetPstates20(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO *pPerfPstatesInfo); +NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet); +NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V2 *pSet); NvAPI_Status NvAPI_DLL_Unload(); diff --git a/cuda.cpp b/cuda.cpp index 810ce9d..cd378fb 100644 --- a/cuda.cpp +++ b/cuda.cpp @@ -199,9 +199,19 @@ void cuda_reset_device(int thr_id, bool *init) int cuda_available_memory(int thr_id) { int dev_id = device_map[thr_id % MAX_GPUS]; - size_t mtotal, mfree = 0; + size_t mtotal = 0, mfree = 0; + cudaDeviceProp props; cudaSetDevice(dev_id); - cudaMemGetInfo(&mfree, &mtotal); + cudaDeviceSynchronize(); + if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) { +#if defined(_WIN32) && CUDART_VERSION == 6050 && defined(_DEBUG) + if (!strstr(props.name, "GTX 10")) + // seems to crash in vstudio on 8GB cards (pascal ?) with cuda 6.5 + cudaMemGetInfo(&mfree, &mtotal); +#else + cudaMemGetInfo(&mfree, &mtotal); +#endif + } return (int) (mfree / (1024 * 1024)); } diff --git a/nvapi.cpp b/nvapi.cpp index f81ebd5..4acf405 100644 --- a/nvapi.cpp +++ b/nvapi.cpp @@ -119,6 +119,16 @@ NvAPI_Status NvAPI_DLL_ClientPowerPoliciesSetStatus(NvPhysicalGpuHandle handle, return (*pointer)(handle, pPolicies); } +#define NVAPI_ID_POWERTOPO_GET 0xEDCF624E +NvAPI_Status NvAPI_DLL_ClientPowerTopologyGetStatus(NvPhysicalGpuHandle handle, NVAPI_GPU_POWER_TOPO* topo) { + static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_GPU_POWER_TOPO*) = NULL; + if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; + if(!pointer) { + pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_GPU_POWER_TOPO*))nvidia_handle->query(NVAPI_ID_POWERTOPO_GET); + } + return (*pointer)(handle, topo); +} + #define NVAPI_ID_THERMAL_INFO 0x0D258BB5 NvAPI_Status NvAPI_DLL_ClientThermalPoliciesGetInfo(NvPhysicalGpuHandle handle, NVAPI_GPU_THERMAL_INFO* pInfo) { static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_GPU_THERMAL_INFO*) = NULL; @@ -169,7 +179,7 @@ NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle handle, NVIDI return (*pointer)(handle, status); } -#define NVAPI_ID_CLK_RANGE_GET 0x64B43A6A +#define NVAPI_ID_CLK_RANGE_GET 0x64B43A6A // Pascal NvAPI_Status NvAPI_DLL_GetClockBoostRanges(NvPhysicalGpuHandle handle, NVAPI_CLOCKS_RANGE* range) { static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_CLOCKS_RANGE*) = NULL; if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; @@ -179,7 +189,7 @@ NvAPI_Status NvAPI_DLL_GetClockBoostRanges(NvPhysicalGpuHandle handle, NVAPI_CLO return (*pointer)(handle, range); } -#define NVAPI_ID_CLK_BOOST_MASK 0x507B4B59 +#define NVAPI_ID_CLK_BOOST_MASK 0x507B4B59 // Pascal NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle handle, NVAPI_CLOCK_MASKS* range) { static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_CLOCK_MASKS*) = NULL; if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; @@ -189,7 +199,7 @@ NvAPI_Status NvAPI_DLL_GetClockBoostMask(NvPhysicalGpuHandle handle, NVAPI_CLOCK return (*pointer)(handle, range); } -#define NVAPI_ID_CLK_BOOST_TABLE 0x23F1B133 +#define NVAPI_ID_CLK_BOOST_TABLE 0x23F1B133 // Pascal NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOCK_TABLE* range) { static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_CLOCK_TABLE*) = NULL; if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; @@ -199,8 +209,25 @@ NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOC return (*pointer)(handle, range); } -#define NVAPI_ID_CLK_BOOST_CURVE 0x0700004A //?? +#define NVAPI_ID_VFP_CURVE_GET 0x21537AD4 // Pascal 39442CFB to check also, Set ? +NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE* curve) { + static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VFP_CURVE*) = NULL; + if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; + if(!pointer) { + pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VFP_CURVE*))nvidia_handle->query(NVAPI_ID_VFP_CURVE_GET); + } + return (*pointer)(handle, curve); +} +#define NVAPI_ID_VOLT_STATUS_GET 0xC16C7E2C +NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle handle, NVAPI_VOLT_STATUS* data) { + static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLT_STATUS*) = NULL; + if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; + if(!pointer) { + pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLT_STATUS*))nvidia_handle->query(NVAPI_ID_VOLT_STATUS_GET); + } + return (*pointer)(handle, data); +} #define NVAPI_ID_PERFCLOCKS_GET 0x1EA54A3B NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){ @@ -212,14 +239,25 @@ NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){ return (*pointer)(handle, pFreqs); } -#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B // NOT SUPPORTED -NvAPI_Status NvAPI_DLL_SetPstates20(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO *pPerfPstatesInfo) { - static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO*) = NULL; +#define NVAPI_ID_PSTATE20_SET 0x0F4DAE6B // Need struct v1 of 7316 bytes (v2 semms unsupported) +// allow to set gpu/mem core freq delta +NvAPI_Status NvAPI_DLL_SetPstates20v1(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V1 *pSet) { + static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V1*) = NULL; + if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; + if(!pointer) { + pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V1*))nvidia_handle->query(NVAPI_ID_PSTATE20_SET); + } + return (*pointer)(handle, pSet); +} + +// allow to set gpu core voltage delta +NvAPI_Status NvAPI_DLL_SetPstates20v2(NvPhysicalGpuHandle handle, NV_GPU_PERF_PSTATES20_INFO_V2 *pSet) { + static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V2*) = NULL; if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; if(!pointer) { - pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO*))nvidia_handle->query(NVAPI_ID_PSTATE20_SET); + pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NV_GPU_PERF_PSTATES20_INFO_V2*))nvidia_handle->query(NVAPI_ID_PSTATE20_SET); } - return (*pointer)(handle, pPerfPstatesInfo); + return (*pointer)(handle, pSet); } #define NVAPI_ID_UNLOAD 0xD22BDD7E diff --git a/nvml.cpp b/nvml.cpp index fffdefc..92e5939 100644 --- a/nvml.cpp +++ b/nvml.cpp @@ -35,6 +35,7 @@ static uint32_t device_bus_ids[MAX_GPUS] = { 0 }; extern uint32_t device_gpu_clocks[MAX_GPUS]; extern uint32_t device_mem_clocks[MAX_GPUS]; extern uint32_t device_plimit[MAX_GPUS]; +extern uint8_t device_tlimit[MAX_GPUS]; extern int8_t device_pstate[MAX_GPUS]; uint32_t clock_prev[MAX_GPUS] = { 0 }; @@ -371,7 +372,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id) if (rc == NVML_SUCCESS) applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk); else { - applog(LOG_WARNING, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc)); + applog(LOG_WARNING, "GPU #%d: %u/%u - %s (NVML)", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc)); return -1; } @@ -953,8 +954,6 @@ int nvapi_getbios(unsigned int devNum, char *desc, unsigned int maxlen) return 0; } -#define FREQ_GETVAL(clk) (clk.typeId == 0 ? clk.data.single.freq_kHz : clk.data.range.maxFreq_kHz) - int nvapi_pstateinfo(unsigned int devNum) { uint32_t n; @@ -964,6 +963,23 @@ int nvapi_pstateinfo(unsigned int devNum) // useless on init but... nvapi_getpstate(devNum, ¤t); +#if 0 + // Unsure of the meaning of these values + NVAPI_GPU_POWER_TOPO topo = { 0 }; + topo.version = NVAPI_GPU_POWER_TOPO_VER; + if ((ret = NvAPI_DLL_ClientPowerTopologyGetStatus(phys[devNum], &topo)) == NVAPI_OK) { + if (topo.count) + applog(LOG_RAW, " GPU TDP is %.1f~%.1f W ?", + (double) topo.entries[0].power/1000, (double) topo.entries[1].power/1000); + + // Ok on 970, not pascal + NV_GPU_PERF_PSTATES20_INFO_V2 pset2 = { 0 }; + pset2.version = NV_GPU_PERF_PSTATES20_INFO_VER2; + pset2.ov.numVoltages = 1; + pset2.ov.voltages[0].voltDelta_uV.value = 3000; // gpu + 3000 uv; + ret = NvAPI_DLL_SetPstates20v2(phys[devNum], &pset2); +#endif + NV_GPU_PERF_PSTATES20_INFO info = { 0 }; info.version = NV_GPU_PERF_PSTATES20_INFO_VER; if ((ret = NvAPI_GPU_GetPstates20(phys[devNum], &info)) != NVAPI_OK) { @@ -973,46 +989,59 @@ int nvapi_pstateinfo(unsigned int devNum) applog(LOG_RAW, "NVAPI GetPstates20: %s", string); return -1; } - applog(LOG_RAW, "%u P-states with %u clocks %s", - info.numPstates, info.numClocks, info.numBaseVoltages ? "and voltage":""); + for (n=0; n < info.numPstates; n++) { NV_GPU_PSTATE20_CLOCK_ENTRY_V1* clocks = info.pstates[n].clocks; - applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %3u-%4u MHz%s %4u mV%s \x7F %d/%d", + applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %6.1f MHz%s %4u mV%s \x7F %d/%d", info.pstates[n].pstateId == current ? ">":" ", info.pstates[n].pstateId, - FREQ_GETVAL(clocks[1])/1000, clocks[1].bIsEditable ? "*":" ", - clocks[0].data.range.minFreq_kHz/1000, FREQ_GETVAL(clocks[0])/1000, clocks[0].bIsEditable ? "*":" ", + clocks[1].data.single.freq_kHz/1000, clocks[1].bIsEditable ? "*":" ", + (double) clocks[0].data.single.freq_kHz/1000, clocks[0].bIsEditable ? "*":" ", info.pstates[n].baseVoltages[0].volt_uV/1000, info.pstates[n].baseVoltages[0].bIsEditable ? "*": " ", info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000); + if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) { + applog(LOG_RAW, " OC %4d MHz %6.1f MHz", + clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000); + } } - // boost over volting (GTX 9xx) ? + // boost over volting (GTX 9xx only ?) for (n=0; n < info.ov.numVoltages; n++) { - applog(LOG_RAW, " OV: %u mV%s + %d/%d", - info.ov.voltages[n].volt_uV/1000, info.ov.voltages[n].bIsEditable ? "*":" ", + applog(LOG_RAW, " OV: %u+%u mV%s \x7F %d/%d", + info.ov.voltages[n].volt_uV/1000, info.ov.voltages[n].voltDelta_uV.value/1000, info.ov.voltages[n].bIsEditable ? "*":" ", info.ov.voltages[n].voltDelta_uV.valueRange.min/1000, info.ov.voltages[n].voltDelta_uV.valueRange.max/1000); } NV_GPU_CLOCK_FREQUENCIES freqs = { 0 }; freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER; - freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ; + freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); - applog(LOG_RAW, " MEM %4.0f MHz GPU %8.2f MHz >Current", + applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Base Clocks", (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); - freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; + freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK; ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); - applog(LOG_RAW, " MEM %4.0f MHz GPU %8.2f MHz Base Clocks", + applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Boost Clocks", (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); - freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK; + freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ; ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); - applog(LOG_RAW, " MEM %4.0f MHz GPU %8.2f MHz Boost Clocks", + applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz >Current", (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); - -#if 1 + + // Maxwell only + NVAPI_VOLT_STATUS pvdom = { 0 }; + pvdom.version = NVAPI_VOLT_STATUS_VER; + if ((ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &pvdom)) == NVAPI_OK) { + if (pvdom.value_uV) + applog(LOG_RAW, " GPU Voltage is %u mV", pvdom.value_uV/1000); + } + + uint8_t plim = nvapi_get_plimit(devNum); + applog(LOG_RAW, " Power limit is set to %u%%", (uint32_t) plim); + NV_GPU_THERMAL_SETTINGS tset = { 0 }; NVAPI_GPU_THERMAL_INFO tnfo = { 0 }; NVAPI_GPU_THERMAL_LIMIT tlim = { 0 }; @@ -1025,16 +1054,9 @@ int nvapi_pstateinfo(unsigned int devNum) applog(LOG_RAW, " Thermal limit is set to %u, current Tc %d, range [%u-%u]", tlim.entries[0].value >> 8, tset.sensor[0].currentTemp, tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8); - // ok - //tlim.entries[0].value = 80 << 8; - //tlim.flags = 1; - //ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim); } -#endif - uint8_t plim = nvapi_getplimit(devNum); - applog(LOG_RAW, " Power limit coef. is set to %u%%", (uint32_t) plim); -#if 1 +#if 0 // seems empty.. NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS volts = { 0 }; volts.version = NVIDIA_GPU_VOLTAGE_DOMAINS_STATUS_VER; @@ -1055,32 +1077,61 @@ int nvapi_pstateinfo(unsigned int devNum) if (boost.clocks[n].gpuDelta) gpuClocks++; } - if (gpuClocks || memClocks) { - applog(LOG_RAW, "Boost table contains %d gpu clocks and %d mem clocks.", gpuClocks, memClocks); + // PASCAL GTX ONLY + //if (gpuClocks || memClocks) { NVAPI_CLOCK_TABLE table = { 0 }; table.version = NVAPI_CLOCK_TABLE_VER; memcpy(table.mask, boost.mask, 12); ret = NvAPI_DLL_GetClockBoostTable(phys[devNum], &table); + gpuClocks = 0, memClocks = 0; for (n=0; n < 12; n++) { if (table.buf0[n] != 0) applog(LOG_RAW, "boost table 0[%u] not empty (%u)", n, table.buf0[n]); } for (n=0; n < 80; n++) { - if (table.gpuDeltas[n].freqDelta) - applog(LOG_RAW, "boost gpu clock delta %u set to %d MHz", n, table.gpuDeltas[n].freqDelta/1000); + if (table.gpuDeltas[n].freqDelta) { + // note: gpu delta value seems to be x2, not the memory + //applog(LOG_RAW, " Boost gpu clock delta %u set to %d MHz", n, table.gpuDeltas[n].freqDelta/2000); + gpuClocks++; + } } for (n=0; n < 23; n++) { - if (table.memFilled[n]) - applog(LOG_RAW, "boost mem clock delta %u set to %d MHz", n, table.memDeltas[n]/1000); + if (table.memFilled[n]) { + //applog(LOG_RAW, " Boost mem clock delta %u set to %d MHz", n, table.memDeltas[n]/1000); + memClocks++; + } } for (n=0; n < 1529; n++) { if (table.buf1[n] != 0) applog(LOG_RAW, "boost table 1[%u] not empty (%u)", n, table.buf1[n]); } - } + applog(LOG_RAW, " Boost table contains %d gpu and %d mem levels.", gpuClocks, memClocks); + + NVAPI_VFP_CURVE curve = { 0 }; + curve.version = NVAPI_VFP_CURVE_VER; + memcpy(curve.mask, boost.mask, 12); + ret = NvAPI_DLL_GetVFPCurve(phys[devNum], &curve); + gpuClocks = 0, memClocks = 0; + for (n=0; n < 80; n++) { + if (curve.gpuEntries[n].freq_kHz || curve.gpuEntries[n].volt_uV) { + // applog(LOG_RAW, "gpu volt table %2u %4u MHz - %6u mV", n, curve.gpuEntries[n].freq_kHz/1000, curve.gpuEntries[n].volt_uV/1000); + gpuClocks++; + } + } + for (n=0; n < 23; n++) { + if (curve.memEntries[n].freq_kHz || curve.memEntries[n].volt_uV) { + // applog(LOG_RAW, "mem volt table %2u %4u MHz - %6u mV", n, curve.memEntries[n].freq_kHz/1000, curve.memEntries[n].volt_uV/1000); + memClocks++; + } + } + for (n=0; n < 1064; n++) { + if (table.buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve.buf1[n]); + } + applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks); + //} #endif return 0; } -uint8_t nvapi_getplimit(unsigned int devNum) +uint8_t nvapi_get_plimit(unsigned int devNum) { NvAPI_Status ret = NVAPI_OK; NVAPI_GPU_POWER_STATUS pol = { 0 }; @@ -1095,7 +1146,7 @@ uint8_t nvapi_getplimit(unsigned int devNum) return (uint8_t) (pol.entries[0].power / 1000); // in percent } -int nvapi_setplimit(unsigned int devNum, uint16_t percent) +int nvapi_set_plimit(unsigned int devNum, uint16_t percent) { NvAPI_Status ret = NVAPI_OK; uint32_t val = percent * 1000; @@ -1126,6 +1177,98 @@ int nvapi_setplimit(unsigned int devNum, uint16_t percent) return ret; } +int nvapi_set_tlimit(unsigned int devNum, uint8_t limit) +{ + NvAPI_Status ret; + uint32_t val = limit; + + if (devNum >= nvapi_dev_cnt) + return -ENODEV; + + NV_GPU_THERMAL_SETTINGS tset = { 0 }; + NVAPI_GPU_THERMAL_INFO tnfo = { 0 }; + NVAPI_GPU_THERMAL_LIMIT tlim = { 0 }; + tset.version = NV_GPU_THERMAL_SETTINGS_VER; + NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &tset); + tnfo.version = NVAPI_GPU_THERMAL_INFO_VER; + NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], &tnfo); + tlim.version = NVAPI_GPU_THERMAL_LIMIT_VER; + if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], &tlim)) == NVAPI_OK) { + tlim.entries[0].value = val << 8; + tlim.flags = 1; + ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim); + if (ret == NVAPI_OK) { + applog(LOG_INFO, "GPU #%u: thermal limit set to %u, current Tc %d, range [%u-%u]", + devNum, val, tset.sensor[0].currentTemp, + tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8); + } else { + NvAPI_ShortString string; + NvAPI_GetErrorMessage(ret, string); + applog(LOG_WARNING, "GPU #%u: thermal limit: %s, valid range is [%u-%u]", devNum, string, + tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8); + } + } + return (int) ret; +} + +int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock) +{ + NvAPI_Status ret; + + if (devNum >= nvapi_dev_cnt) + return -ENODEV; + + NV_GPU_CLOCK_FREQUENCIES freqs = { 0 }; + freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER; + freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; + ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); + if (ret) return ret; + + NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency; + + NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 }; + pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1; + pset1.numPstates = 1; + pset1.numClocks = 1; + // Ok on both 1080 and 970 + pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS; + pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff; + ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1); + if (ret == NVAPI_OK) { + applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, diff/1000); + } + return ret; +} + +int nvapi_set_memclock(unsigned int devNum, uint32_t clock) +{ + NvAPI_Status ret; + + if (devNum >= nvapi_dev_cnt) + return -ENODEV; + + NV_GPU_CLOCK_FREQUENCIES freqs = { 0 }; + freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER; + freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; + ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); + if (ret) return ret; + + NvS32 diff = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency; + + NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 }; + pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1; + pset1.numPstates = 1; + pset1.numClocks = 1; + // Memory boost clock seems only ok on pascal with this api + pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY; + pset1.pstates[0].clocks[0].freqDelta_kHz.value = diff; + ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1); + if (ret == NVAPI_OK) { + applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, diff/1000); + } + return ret; +} + int nvapi_init() { int num_gpus = cuda_num_devices(); @@ -1191,23 +1334,49 @@ int nvapi_init() sprintf(driver_version,"%d.%02d", udv / 100, udv % 100); } + return 0; +} + +int nvapi_init_settings() +{ // nvapi.dll - ret = nvapi_dll_init(); - if (ret == NVAPI_OK) { - for (int n=0; n < opt_n_threads; n++) { - int dev_id = device_map[n % MAX_GPUS]; - if (device_plimit[dev_id]) { - nvapi_setplimit(nvapi_dev_map[dev_id], device_plimit[dev_id]); // 0=default - uint32_t res = nvapi_getplimit(nvapi_dev_map[dev_id]); - gpulog(LOG_INFO, n, "NVAPI power limit is set to %u%%", res); + int ret = nvapi_dll_init(); + if (ret != NVAPI_OK) + return ret; + + for (int n=0; n < opt_n_threads; n++) { + int dev_id = device_map[n % MAX_GPUS]; + if (device_plimit[dev_id]) { + if (nvapi_set_plimit(nvapi_dev_map[dev_id], device_plimit[dev_id]) == NVAPI_OK) { + uint32_t res = nvapi_get_plimit(nvapi_dev_map[dev_id]); + gpulog(LOG_INFO, n, "Power limit is set to %u%%", res); + } + } + if (device_tlimit[dev_id]) { + nvapi_set_tlimit(nvapi_dev_map[dev_id], device_tlimit[dev_id]); + } + if (device_gpu_clocks[dev_id]) { + ret = nvapi_set_gpuclock(nvapi_dev_map[dev_id], device_gpu_clocks[dev_id]); + if (ret) { + NvAPI_ShortString string; + NvAPI_GetErrorMessage((NvAPI_Status) ret, string); + gpulog(LOG_WARNING, n, "Boost gpu clock %s", string); } - if (device_pstate[dev_id]) { - // todo... + } + if (device_mem_clocks[dev_id]) { + ret = nvapi_set_memclock(nvapi_dev_map[dev_id], device_mem_clocks[dev_id]); + if (ret) { + NvAPI_ShortString string; + NvAPI_GetErrorMessage((NvAPI_Status) ret, string); + gpulog(LOG_WARNING, n, "Boost mem clock %s", string); } } + if (device_pstate[dev_id]) { + // dunno how via nvapi or/and pascal + } } - return 0; + return ret; } #endif @@ -1306,7 +1475,7 @@ unsigned int gpu_power(struct cgpu_info *gpu) if (support == -1) { unsigned int pct = 0; nvapi_getusage(nvapi_dev_map[gpu->gpu_id], &pct); - pct *= nvapi_getplimit(nvapi_dev_map[gpu->gpu_id]); + pct *= nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]); pct /= 100; mw = pct; // to fix } diff --git a/nvml.h b/nvml.h index e9aea2f..8f98934 100644 --- a/nvml.h +++ b/nvml.h @@ -217,13 +217,15 @@ int gpu_info(struct cgpu_info *gpu); int gpu_vendor(uint8_t pci_bus_id, char *vendorname); -// to debug clocks.. -int nvapi_pstateinfo(unsigned int devNum); -uint8_t nvapi_getplimit(unsigned int devNum); - /* nvapi functions */ #ifdef WIN32 int nvapi_init(); +int nvapi_init_settings(); + +// to debug nvapi.. +int nvapi_pstateinfo(unsigned int devNum); +uint8_t nvapi_get_plimit(unsigned int devNum); + #endif #endif /* USE_WRAPNVML */