Browse Source

nvapi: pascal core voltage boost + meminfo fix x86

cuda 6.5 one seems to crash on pascal or report invalid mem sizes
2upstream
Tanguy Pruvot 8 years ago
parent
commit
095f25f9a8
  1. 10
      compat/nvapi/nvapi_ccminer.h
  2. 17
      cuda.cpp
  3. 24
      nvapi.cpp
  4. 46
      nvml.cpp
  5. 2
      nvml.h

10
compat/nvapi/nvapi_ccminer.h

@ -1,5 +1,6 @@ @@ -1,5 +1,6 @@
#pragma once
#define NVAPI_INTERNAL
#include "nvapi.h"
NvAPI_Status nvapi_dll_init();
@ -183,6 +184,13 @@ typedef struct { @@ -183,6 +184,13 @@ typedef struct {
} NVAPI_VFP_CURVE; // 7208 bytes (1-1c28)
#define NVAPI_VFP_CURVE_VER MAKE_NVAPI_VERSION(NVAPI_VFP_CURVE, 1)
typedef struct {
NvU32 version;
NvS32 percent;
NvU32 pad[8];
} NVAPI_VOLTBOOST_PERCENT; // 40 bytes (1-0028)
#define NVAPI_VOLTBOOST_PERCENT_VER MAKE_NVAPI_VERSION(NVAPI_VOLTBOOST_PERCENT, 1)
typedef struct {
NvU32 version;
NvU32 flags;
@ -238,6 +246,8 @@ NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAP @@ -238,6 +246,8 @@ NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAP
NvAPI_Status NvAPI_DLL_SetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x0733E009
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4
NvAPI_Status NvAPI_DLL_GetCurrentVoltage(NvPhysicalGpuHandle handle, NVAPI_VOLTAGE_STATUS* status); // 0x465F9BCF 1-004c
NvAPI_Status NvAPI_DLL_GetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT*);
NvAPI_Status NvAPI_DLL_SetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT*);
// Maxwell only
NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C

17
cuda.cpp

@ -105,7 +105,8 @@ void cuda_print_devices() @@ -105,7 +105,8 @@ void cuda_print_devices()
cudaDeviceProp props;
cudaGetDeviceProperties(&props, dev_id);
if (!opt_n_threads || n < opt_n_threads) {
fprintf(stderr, "GPU #%d: SM %d.%d %s\n", dev_id, props.major, props.minor, device_name[dev_id]);
fprintf(stderr, "GPU #%d: SM %d.%d %s @ %.0f MHz (MEM %.0f)\n", dev_id, props.major, props.minor,
device_name[dev_id], (double) props.clockRate/1000, (double) props.memoryClockRate/1000);
#ifdef USE_WRAPNVML
if (opt_debug) nvml_print_device_info(dev_id);
#ifdef WIN32
@ -200,18 +201,14 @@ int cuda_available_memory(int thr_id) @@ -200,18 +201,14 @@ int cuda_available_memory(int thr_id)
{
int dev_id = device_map[thr_id % MAX_GPUS];
size_t mtotal = 0, mfree = 0;
cudaDeviceProp props;
#if defined(_WIN32) && defined(USE_WRAPNVML)
// cuda (6.5) one can crash on pascal and dont handle 8GB
nvapiMemGetInfo(dev_id, &mfree, &mtotal);
#else
cudaSetDevice(dev_id);
cudaDeviceSynchronize();
if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) {
#if defined(_WIN32) && CUDART_VERSION == 6050 && defined(_DEBUG)
if (!strstr(props.name, "GTX 10"))
// seems to crash in vstudio on 8GB cards (pascal ?) with cuda 6.5
cudaMemGetInfo(&mfree, &mtotal);
#else
cudaMemGetInfo(&mfree, &mtotal);
cudaMemGetInfo(&mfree, &mtotal);
#endif
}
return (int) (mfree / (1024 * 1024));
}

24
nvapi.cpp

@ -249,7 +249,7 @@ NvAPI_Status NvAPI_DLL_SetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOC @@ -249,7 +249,7 @@ NvAPI_Status NvAPI_DLL_SetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOC
return (*pointer)(handle, table);
}
#define NVAPI_ID_VFP_CURVE_GET 0x21537AD4 // Pascal 39442CFB to check also, Set ?
#define NVAPI_ID_VFP_CURVE_GET 0x21537AD4 // Pascal
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE* curve) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VFP_CURVE*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
@ -259,6 +259,28 @@ NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE* @@ -259,6 +259,28 @@ NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE*
return (*pointer)(handle, curve);
}
#define NVAPI_ID_CURVE_GET 0xE440B867 // Pascal 2-030c struct 0C 03 02 00 00 00 00 00 01 00 00 00 06 00 00 00
#define NVAPI_ID_CURVE_SET 0x39442CFB // Pascal 2-030c struct 0C 03 02 00 00 00 00 00 01 00 00 00 06 00 00 00
#define NVAPI_ID_VOLTBOOST_GET 0x9DF23CA1 // Pascal 1-0028
NvAPI_Status NvAPI_DLL_GetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT* boost) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
if(!pointer) {
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*))nvidia_handle->query(NVAPI_ID_VOLTBOOST_GET);
}
return (*pointer)(handle, boost);
}
#define NVAPI_ID_VOLTBOOST_SET 0xB9306D9B // Pascal 1-0028
NvAPI_Status NvAPI_DLL_SetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT* boost) {
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*) = NULL;
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
if(!pointer) {
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*))nvidia_handle->query(NVAPI_ID_VOLTBOOST_SET);
}
return (*pointer)(handle, boost);
}
#define NVAPI_ID_PERFCLOCKS_GET 0x1EA54A3B
NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, void*) = NULL;

46
nvml.cpp

@ -1017,13 +1017,13 @@ int nvapi_pstateinfo(unsigned int devNum) @@ -1017,13 +1017,13 @@ int nvapi_pstateinfo(unsigned int devNum)
info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable
info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000);
if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) {
applog(LOG_RAW, " OC %4d MHz %6.1f MHz",
applog(LOG_RAW, " OC %+4d MHz %+6.1f MHz",
clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000);
}
}
// boost over volting (GTX 9xx only ?)
for (n=0; n < info.ov.numVoltages; n++) {
applog(LOG_RAW, " OV: %u+%u mV%s \x7F %d/%d",
applog(LOG_RAW, " OV: %u%+u mV%s \x7F %d/%d",
info.ov.voltages[n].volt_uV/1000, info.ov.voltages[n].voltDelta_uV.value/1000, info.ov.voltages[n].bIsEditable ? "*":" ",
info.ov.voltages[n].voltDelta_uV.valueRange.min/1000, info.ov.voltages[n].voltDelta_uV.valueRange.max/1000);
}
@ -1048,19 +1048,25 @@ int nvapi_pstateinfo(unsigned int devNum) @@ -1048,19 +1048,25 @@ int nvapi_pstateinfo(unsigned int devNum)
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
// Maxwell only
NVAPI_VOLT_STATUS mvdom = { 0 };
mvdom.version = NVAPI_VOLT_STATUS_VER;
if ((ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &mvdom)) == NVAPI_OK) {
if (mvdom.value_uV)
applog(LOG_RAW, " GPU Voltage is %u mV", mvdom.value_uV/1000);
}
// Pascal only
NVAPI_VOLTAGE_STATUS pvdom = { 0 };
pvdom.version = NVAPI_VOLTAGE_STATUS_VER;
if ((ret = NvAPI_DLL_GetCurrentVoltage(phys[devNum], &pvdom)) == NVAPI_OK) {
NVAPI_VOLTBOOST_PERCENT pvb = { 0 };
pvb.version = NVAPI_VOLTBOOST_PERCENT_VER;
if ((ret = NvAPI_DLL_GetCoreVoltageBoostPercent(phys[devNum], &pvb)) == NVAPI_OK) {
NVAPI_VOLTAGE_STATUS pvdom = { 0 };
pvdom.version = NVAPI_VOLTAGE_STATUS_VER;
NvAPI_DLL_GetCurrentVoltage(phys[devNum], &pvdom);
if (pvdom.value_uV)
applog(LOG_RAW, " GPU Voltage is %u mV %+d%% boost", pvdom.value_uV/1000, pvb.percent);
else
applog(LOG_RAW, " GPU Voltage is %u mV", pvdom.value_uV/1000);
} else {
// Maxwell 9xx
NVAPI_VOLT_STATUS mvdom = { 0 };
mvdom.version = NVAPI_VOLT_STATUS_VER;
if ((ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &mvdom)) == NVAPI_OK) {
if (mvdom.value_uV)
applog(LOG_RAW, " GPU Voltage is %u mV", mvdom.value_uV/1000);
}
}
uint8_t plim = nvapi_get_plimit(devNum);
@ -1334,7 +1340,7 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock) @@ -1334,7 +1340,7 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
// seems ok on maxwell and pascal for the mem clocks
NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // wrong def clocks, useless
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // deprecated but req for def clocks
if (ret == NVAPI_OK) {
if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY)
delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq;
@ -1358,6 +1364,20 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock) @@ -1358,6 +1364,20 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
return ret;
}
// Replacement for WIN32 CUDA 6.5 on pascal
int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total)
{
NvAPI_Status ret = NVAPI_OK;
NV_DISPLAY_DRIVER_MEMORY_INFO mem = { 0 };
mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS];
if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) {
*total = mem.availableDedicatedVideoMemory;
*free = mem.curAvailableDedicatedVideoMemory;
}
return (int) ret;
}
int nvapi_init()
{
int num_gpus = cuda_num_devices();

2
nvml.h

@ -226,6 +226,8 @@ int nvapi_init_settings(); @@ -226,6 +226,8 @@ int nvapi_init_settings();
int nvapi_pstateinfo(unsigned int devNum);
uint8_t nvapi_get_plimit(unsigned int devNum);
// cuda Replacement for 6.5 compat
int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total);
#endif
#endif /* USE_WRAPNVML */

Loading…
Cancel
Save