Browse Source

nvapi: pascal core voltage boost + meminfo fix x86

cuda 6.5 one seems to crash on pascal or report invalid mem sizes
master
Tanguy Pruvot 9 years ago
parent
commit
095f25f9a8
  1. 10
      compat/nvapi/nvapi_ccminer.h
  2. 15
      cuda.cpp
  3. 24
      nvapi.cpp
  4. 40
      nvml.cpp
  5. 2
      nvml.h

10
compat/nvapi/nvapi_ccminer.h

@ -1,5 +1,6 @@
#pragma once #pragma once
#define NVAPI_INTERNAL
#include "nvapi.h" #include "nvapi.h"
NvAPI_Status nvapi_dll_init(); NvAPI_Status nvapi_dll_init();
@ -183,6 +184,13 @@ typedef struct {
} NVAPI_VFP_CURVE; // 7208 bytes (1-1c28) } NVAPI_VFP_CURVE; // 7208 bytes (1-1c28)
#define NVAPI_VFP_CURVE_VER MAKE_NVAPI_VERSION(NVAPI_VFP_CURVE, 1) #define NVAPI_VFP_CURVE_VER MAKE_NVAPI_VERSION(NVAPI_VFP_CURVE, 1)
typedef struct {
NvU32 version;
NvS32 percent;
NvU32 pad[8];
} NVAPI_VOLTBOOST_PERCENT; // 40 bytes (1-0028)
#define NVAPI_VOLTBOOST_PERCENT_VER MAKE_NVAPI_VERSION(NVAPI_VOLTBOOST_PERCENT, 1)
typedef struct { typedef struct {
NvU32 version; NvU32 version;
NvU32 flags; NvU32 flags;
@ -238,6 +246,8 @@ NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAP
NvAPI_Status NvAPI_DLL_SetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x0733E009 NvAPI_Status NvAPI_DLL_SetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x0733E009
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4 NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4
NvAPI_Status NvAPI_DLL_GetCurrentVoltage(NvPhysicalGpuHandle handle, NVAPI_VOLTAGE_STATUS* status); // 0x465F9BCF 1-004c NvAPI_Status NvAPI_DLL_GetCurrentVoltage(NvPhysicalGpuHandle handle, NVAPI_VOLTAGE_STATUS* status); // 0x465F9BCF 1-004c
NvAPI_Status NvAPI_DLL_GetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT*);
NvAPI_Status NvAPI_DLL_SetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT*);
// Maxwell only // Maxwell only
NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C

15
cuda.cpp

@ -105,7 +105,8 @@ void cuda_print_devices()
cudaDeviceProp props; cudaDeviceProp props;
cudaGetDeviceProperties(&props, dev_id); cudaGetDeviceProperties(&props, dev_id);
if (!opt_n_threads || n < opt_n_threads) { if (!opt_n_threads || n < opt_n_threads) {
fprintf(stderr, "GPU #%d: SM %d.%d %s\n", dev_id, props.major, props.minor, device_name[dev_id]); fprintf(stderr, "GPU #%d: SM %d.%d %s @ %.0f MHz (MEM %.0f)\n", dev_id, props.major, props.minor,
device_name[dev_id], (double) props.clockRate/1000, (double) props.memoryClockRate/1000);
#ifdef USE_WRAPNVML #ifdef USE_WRAPNVML
if (opt_debug) nvml_print_device_info(dev_id); if (opt_debug) nvml_print_device_info(dev_id);
#ifdef WIN32 #ifdef WIN32
@ -200,18 +201,14 @@ int cuda_available_memory(int thr_id)
{ {
int dev_id = device_map[thr_id % MAX_GPUS]; int dev_id = device_map[thr_id % MAX_GPUS];
size_t mtotal = 0, mfree = 0; size_t mtotal = 0, mfree = 0;
cudaDeviceProp props; #if defined(_WIN32) && defined(USE_WRAPNVML)
// cuda (6.5) one can crash on pascal and dont handle 8GB
nvapiMemGetInfo(dev_id, &mfree, &mtotal);
#else
cudaSetDevice(dev_id); cudaSetDevice(dev_id);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) {
#if defined(_WIN32) && CUDART_VERSION == 6050 && defined(_DEBUG)
if (!strstr(props.name, "GTX 10"))
// seems to crash in vstudio on 8GB cards (pascal ?) with cuda 6.5
cudaMemGetInfo(&mfree, &mtotal);
#else
cudaMemGetInfo(&mfree, &mtotal); cudaMemGetInfo(&mfree, &mtotal);
#endif #endif
}
return (int) (mfree / (1024 * 1024)); return (int) (mfree / (1024 * 1024));
} }

24
nvapi.cpp

@ -249,7 +249,7 @@ NvAPI_Status NvAPI_DLL_SetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOC
return (*pointer)(handle, table); return (*pointer)(handle, table);
} }
#define NVAPI_ID_VFP_CURVE_GET 0x21537AD4 // Pascal 39442CFB to check also, Set ? #define NVAPI_ID_VFP_CURVE_GET 0x21537AD4 // Pascal
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE* curve) { NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE* curve) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VFP_CURVE*) = NULL; static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VFP_CURVE*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED; if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
@ -259,6 +259,28 @@ NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE*
return (*pointer)(handle, curve); return (*pointer)(handle, curve);
} }
#define NVAPI_ID_CURVE_GET 0xE440B867 // Pascal 2-030c struct 0C 03 02 00 00 00 00 00 01 00 00 00 06 00 00 00
#define NVAPI_ID_CURVE_SET 0x39442CFB // Pascal 2-030c struct 0C 03 02 00 00 00 00 00 01 00 00 00 06 00 00 00
#define NVAPI_ID_VOLTBOOST_GET 0x9DF23CA1 // Pascal 1-0028
NvAPI_Status NvAPI_DLL_GetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT* boost) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
if(!pointer) {
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*))nvidia_handle->query(NVAPI_ID_VOLTBOOST_GET);
}
return (*pointer)(handle, boost);
}
#define NVAPI_ID_VOLTBOOST_SET 0xB9306D9B // Pascal 1-0028
NvAPI_Status NvAPI_DLL_SetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT* boost) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
if(!pointer) {
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*))nvidia_handle->query(NVAPI_ID_VOLTBOOST_SET);
}
return (*pointer)(handle, boost);
}
#define NVAPI_ID_PERFCLOCKS_GET 0x1EA54A3B #define NVAPI_ID_PERFCLOCKS_GET 0x1EA54A3B
NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){ NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, void*) = NULL; static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, void*) = NULL;

40
nvml.cpp

@ -1017,13 +1017,13 @@ int nvapi_pstateinfo(unsigned int devNum)
info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable
info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000); info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000);
if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) { if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) {
applog(LOG_RAW, " OC %4d MHz %6.1f MHz", applog(LOG_RAW, " OC %+4d MHz %+6.1f MHz",
clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000); clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000);
} }
} }
// boost over volting (GTX 9xx only ?) // boost over volting (GTX 9xx only ?)
for (n=0; n < info.ov.numVoltages; n++) { for (n=0; n < info.ov.numVoltages; n++) {
applog(LOG_RAW, " OV: %u+%u mV%s \x7F %d/%d", applog(LOG_RAW, " OV: %u%+u mV%s \x7F %d/%d",
info.ov.voltages[n].volt_uV/1000, info.ov.voltages[n].voltDelta_uV.value/1000, info.ov.voltages[n].bIsEditable ? "*":" ", info.ov.voltages[n].volt_uV/1000, info.ov.voltages[n].voltDelta_uV.value/1000, info.ov.voltages[n].bIsEditable ? "*":" ",
info.ov.voltages[n].voltDelta_uV.valueRange.min/1000, info.ov.voltages[n].voltDelta_uV.valueRange.max/1000); info.ov.voltages[n].voltDelta_uV.valueRange.min/1000, info.ov.voltages[n].voltDelta_uV.valueRange.max/1000);
} }
@ -1048,19 +1048,25 @@ int nvapi_pstateinfo(unsigned int devNum)
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); (double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
// Maxwell only // Pascal only
NVAPI_VOLTBOOST_PERCENT pvb = { 0 };
pvb.version = NVAPI_VOLTBOOST_PERCENT_VER;
if ((ret = NvAPI_DLL_GetCoreVoltageBoostPercent(phys[devNum], &pvb)) == NVAPI_OK) {
NVAPI_VOLTAGE_STATUS pvdom = { 0 };
pvdom.version = NVAPI_VOLTAGE_STATUS_VER;
NvAPI_DLL_GetCurrentVoltage(phys[devNum], &pvdom);
if (pvdom.value_uV)
applog(LOG_RAW, " GPU Voltage is %u mV %+d%% boost", pvdom.value_uV/1000, pvb.percent);
else
applog(LOG_RAW, " GPU Voltage is %u mV", pvdom.value_uV/1000);
} else {
// Maxwell 9xx
NVAPI_VOLT_STATUS mvdom = { 0 }; NVAPI_VOLT_STATUS mvdom = { 0 };
mvdom.version = NVAPI_VOLT_STATUS_VER; mvdom.version = NVAPI_VOLT_STATUS_VER;
if ((ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &mvdom)) == NVAPI_OK) { if ((ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &mvdom)) == NVAPI_OK) {
if (mvdom.value_uV) if (mvdom.value_uV)
applog(LOG_RAW, " GPU Voltage is %u mV", mvdom.value_uV/1000); applog(LOG_RAW, " GPU Voltage is %u mV", mvdom.value_uV/1000);
} }
// Pascal only
NVAPI_VOLTAGE_STATUS pvdom = { 0 };
pvdom.version = NVAPI_VOLTAGE_STATUS_VER;
if ((ret = NvAPI_DLL_GetCurrentVoltage(phys[devNum], &pvdom)) == NVAPI_OK) {
if (pvdom.value_uV)
applog(LOG_RAW, " GPU Voltage is %u mV", pvdom.value_uV/1000);
} }
uint8_t plim = nvapi_get_plimit(devNum); uint8_t plim = nvapi_get_plimit(devNum);
@ -1334,7 +1340,7 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
// seems ok on maxwell and pascal for the mem clocks // seems ok on maxwell and pascal for the mem clocks
NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 }; NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER; deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // wrong def clocks, useless ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // deprecated but req for def clocks
if (ret == NVAPI_OK) { if (ret == NVAPI_OK) {
if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY) if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY)
delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq; delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq;
@ -1358,6 +1364,20 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
return ret; return ret;
} }
// Replacement for WIN32 CUDA 6.5 on pascal
int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total)
{
NvAPI_Status ret = NVAPI_OK;
NV_DISPLAY_DRIVER_MEMORY_INFO mem = { 0 };
mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS];
if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) {
*total = mem.availableDedicatedVideoMemory;
*free = mem.curAvailableDedicatedVideoMemory;
}
return (int) ret;
}
int nvapi_init() int nvapi_init()
{ {
int num_gpus = cuda_num_devices(); int num_gpus = cuda_num_devices();

2
nvml.h

@ -226,6 +226,8 @@ int nvapi_init_settings();
int nvapi_pstateinfo(unsigned int devNum); int nvapi_pstateinfo(unsigned int devNum);
uint8_t nvapi_get_plimit(unsigned int devNum); uint8_t nvapi_get_plimit(unsigned int devNum);
// cuda Replacement for 6.5 compat
int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total);
#endif #endif
#endif /* USE_WRAPNVML */ #endif /* USE_WRAPNVML */

Loading…
Cancel
Save