From 095f25f9a8d02fd23fca3451945259292cfa4464 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Sun, 26 Jun 2016 22:03:17 +0200 Subject: [PATCH] nvapi: pascal core voltage boost + meminfo fix x86 cuda 6.5 one seems to crash on pascal or report invalid mem sizes --- compat/nvapi/nvapi_ccminer.h | 10 ++++++++ cuda.cpp | 17 ++++++------- nvapi.cpp | 24 ++++++++++++++++++- nvml.cpp | 46 ++++++++++++++++++++++++++---------- nvml.h | 2 ++ 5 files changed, 75 insertions(+), 24 deletions(-) diff --git a/compat/nvapi/nvapi_ccminer.h b/compat/nvapi/nvapi_ccminer.h index 2bad8f0..c368100 100644 --- a/compat/nvapi/nvapi_ccminer.h +++ b/compat/nvapi/nvapi_ccminer.h @@ -1,5 +1,6 @@ #pragma once +#define NVAPI_INTERNAL #include "nvapi.h" NvAPI_Status nvapi_dll_init(); @@ -183,6 +184,13 @@ typedef struct { } NVAPI_VFP_CURVE; // 7208 bytes (1-1c28) #define NVAPI_VFP_CURVE_VER MAKE_NVAPI_VERSION(NVAPI_VFP_CURVE, 1) +typedef struct { + NvU32 version; + NvS32 percent; + NvU32 pad[8]; +} NVAPI_VOLTBOOST_PERCENT; // 40 bytes (1-0028) +#define NVAPI_VOLTBOOST_PERCENT_VER MAKE_NVAPI_VERSION(NVAPI_VOLTBOOST_PERCENT, 1) + typedef struct { NvU32 version; NvU32 flags; @@ -238,6 +246,8 @@ NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAP NvAPI_Status NvAPI_DLL_SetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x0733E009 NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4 NvAPI_Status NvAPI_DLL_GetCurrentVoltage(NvPhysicalGpuHandle handle, NVAPI_VOLTAGE_STATUS* status); // 0x465F9BCF 1-004c +NvAPI_Status NvAPI_DLL_GetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT*); +NvAPI_Status NvAPI_DLL_SetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT*); // Maxwell only NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C diff --git a/cuda.cpp b/cuda.cpp index cd378fb..f7b2883 100644 --- a/cuda.cpp +++ b/cuda.cpp @@ -105,7 +105,8 @@ void cuda_print_devices() cudaDeviceProp props; cudaGetDeviceProperties(&props, dev_id); if (!opt_n_threads || n < opt_n_threads) { - fprintf(stderr, "GPU #%d: SM %d.%d %s\n", dev_id, props.major, props.minor, device_name[dev_id]); + fprintf(stderr, "GPU #%d: SM %d.%d %s @ %.0f MHz (MEM %.0f)\n", dev_id, props.major, props.minor, + device_name[dev_id], (double) props.clockRate/1000, (double) props.memoryClockRate/1000); #ifdef USE_WRAPNVML if (opt_debug) nvml_print_device_info(dev_id); #ifdef WIN32 @@ -200,18 +201,14 @@ int cuda_available_memory(int thr_id) { int dev_id = device_map[thr_id % MAX_GPUS]; size_t mtotal = 0, mfree = 0; - cudaDeviceProp props; +#if defined(_WIN32) && defined(USE_WRAPNVML) + // cuda (6.5) one can crash on pascal and dont handle 8GB + nvapiMemGetInfo(dev_id, &mfree, &mtotal); +#else cudaSetDevice(dev_id); cudaDeviceSynchronize(); - if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) { -#if defined(_WIN32) && CUDART_VERSION == 6050 && defined(_DEBUG) - if (!strstr(props.name, "GTX 10")) - // seems to crash in vstudio on 8GB cards (pascal ?) with cuda 6.5 - cudaMemGetInfo(&mfree, &mtotal); -#else - cudaMemGetInfo(&mfree, &mtotal); + cudaMemGetInfo(&mfree, &mtotal); #endif - } return (int) (mfree / (1024 * 1024)); } diff --git a/nvapi.cpp b/nvapi.cpp index 2f08120..b918526 100644 --- a/nvapi.cpp +++ b/nvapi.cpp @@ -249,7 +249,7 @@ NvAPI_Status NvAPI_DLL_SetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOC return (*pointer)(handle, table); } -#define NVAPI_ID_VFP_CURVE_GET 0x21537AD4 // Pascal 39442CFB to check also, Set ? +#define NVAPI_ID_VFP_CURVE_GET 0x21537AD4 // Pascal NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE* curve) { static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VFP_CURVE*) = NULL; if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; @@ -259,6 +259,28 @@ NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE* return (*pointer)(handle, curve); } +#define NVAPI_ID_CURVE_GET 0xE440B867 // Pascal 2-030c struct 0C 03 02 00 00 00 00 00 01 00 00 00 06 00 00 00 +#define NVAPI_ID_CURVE_SET 0x39442CFB // Pascal 2-030c struct 0C 03 02 00 00 00 00 00 01 00 00 00 06 00 00 00 + +#define NVAPI_ID_VOLTBOOST_GET 0x9DF23CA1 // Pascal 1-0028 +NvAPI_Status NvAPI_DLL_GetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT* boost) { + static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*) = NULL; + if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; + if(!pointer) { + pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*))nvidia_handle->query(NVAPI_ID_VOLTBOOST_GET); + } + return (*pointer)(handle, boost); +} +#define NVAPI_ID_VOLTBOOST_SET 0xB9306D9B // Pascal 1-0028 +NvAPI_Status NvAPI_DLL_SetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT* boost) { + static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*) = NULL; + if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; + if(!pointer) { + pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*))nvidia_handle->query(NVAPI_ID_VOLTBOOST_SET); + } + return (*pointer)(handle, boost); +} + #define NVAPI_ID_PERFCLOCKS_GET 0x1EA54A3B NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){ static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, void*) = NULL; diff --git a/nvml.cpp b/nvml.cpp index 97f7ec9..c9e0295 100644 --- a/nvml.cpp +++ b/nvml.cpp @@ -1017,13 +1017,13 @@ int nvapi_pstateinfo(unsigned int devNum) info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000); if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) { - applog(LOG_RAW, " OC %4d MHz %6.1f MHz", + applog(LOG_RAW, " OC %+4d MHz %+6.1f MHz", clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000); } } // boost over volting (GTX 9xx only ?) for (n=0; n < info.ov.numVoltages; n++) { - applog(LOG_RAW, " OV: %u+%u mV%s \x7F %d/%d", + applog(LOG_RAW, " OV: %u%+u mV%s \x7F %d/%d", info.ov.voltages[n].volt_uV/1000, info.ov.voltages[n].voltDelta_uV.value/1000, info.ov.voltages[n].bIsEditable ? "*":" ", info.ov.voltages[n].voltDelta_uV.valueRange.min/1000, info.ov.voltages[n].voltDelta_uV.valueRange.max/1000); } @@ -1048,19 +1048,25 @@ int nvapi_pstateinfo(unsigned int devNum) (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); - // Maxwell only - NVAPI_VOLT_STATUS mvdom = { 0 }; - mvdom.version = NVAPI_VOLT_STATUS_VER; - if ((ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &mvdom)) == NVAPI_OK) { - if (mvdom.value_uV) - applog(LOG_RAW, " GPU Voltage is %u mV", mvdom.value_uV/1000); - } // Pascal only - NVAPI_VOLTAGE_STATUS pvdom = { 0 }; - pvdom.version = NVAPI_VOLTAGE_STATUS_VER; - if ((ret = NvAPI_DLL_GetCurrentVoltage(phys[devNum], &pvdom)) == NVAPI_OK) { + NVAPI_VOLTBOOST_PERCENT pvb = { 0 }; + pvb.version = NVAPI_VOLTBOOST_PERCENT_VER; + if ((ret = NvAPI_DLL_GetCoreVoltageBoostPercent(phys[devNum], &pvb)) == NVAPI_OK) { + NVAPI_VOLTAGE_STATUS pvdom = { 0 }; + pvdom.version = NVAPI_VOLTAGE_STATUS_VER; + NvAPI_DLL_GetCurrentVoltage(phys[devNum], &pvdom); if (pvdom.value_uV) + applog(LOG_RAW, " GPU Voltage is %u mV %+d%% boost", pvdom.value_uV/1000, pvb.percent); + else applog(LOG_RAW, " GPU Voltage is %u mV", pvdom.value_uV/1000); + } else { + // Maxwell 9xx + NVAPI_VOLT_STATUS mvdom = { 0 }; + mvdom.version = NVAPI_VOLT_STATUS_VER; + if ((ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &mvdom)) == NVAPI_OK) { + if (mvdom.value_uV) + applog(LOG_RAW, " GPU Voltage is %u mV", mvdom.value_uV/1000); + } } uint8_t plim = nvapi_get_plimit(devNum); @@ -1334,7 +1340,7 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock) // seems ok on maxwell and pascal for the mem clocks NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 }; deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER; - ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // wrong def clocks, useless + ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // deprecated but req for def clocks if (ret == NVAPI_OK) { if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY) delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq; @@ -1358,6 +1364,20 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock) return ret; } +// Replacement for WIN32 CUDA 6.5 on pascal +int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total) +{ + NvAPI_Status ret = NVAPI_OK; + NV_DISPLAY_DRIVER_MEMORY_INFO mem = { 0 }; + mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER; + unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS]; + if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) { + *total = mem.availableDedicatedVideoMemory; + *free = mem.curAvailableDedicatedVideoMemory; + } + return (int) ret; +} + int nvapi_init() { int num_gpus = cuda_num_devices(); diff --git a/nvml.h b/nvml.h index 8f98934..07b4841 100644 --- a/nvml.h +++ b/nvml.h @@ -226,6 +226,8 @@ int nvapi_init_settings(); int nvapi_pstateinfo(unsigned int devNum); uint8_t nvapi_get_plimit(unsigned int devNum); +// cuda Replacement for 6.5 compat +int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total); #endif #endif /* USE_WRAPNVML */