nvapi: pascal core voltage boost + meminfo fix x86
cuda 6.5 one seems to crash on pascal or report invalid mem sizes
This commit is contained in:
parent
0555201384
commit
095f25f9a8
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#define NVAPI_INTERNAL
|
||||
#include "nvapi.h"
|
||||
|
||||
NvAPI_Status nvapi_dll_init();
|
||||
@ -183,6 +184,13 @@ typedef struct {
|
||||
} NVAPI_VFP_CURVE; // 7208 bytes (1-1c28)
|
||||
#define NVAPI_VFP_CURVE_VER MAKE_NVAPI_VERSION(NVAPI_VFP_CURVE, 1)
|
||||
|
||||
typedef struct {
|
||||
NvU32 version;
|
||||
NvS32 percent;
|
||||
NvU32 pad[8];
|
||||
} NVAPI_VOLTBOOST_PERCENT; // 40 bytes (1-0028)
|
||||
#define NVAPI_VOLTBOOST_PERCENT_VER MAKE_NVAPI_VERSION(NVAPI_VOLTBOOST_PERCENT, 1)
|
||||
|
||||
typedef struct {
|
||||
NvU32 version;
|
||||
NvU32 flags;
|
||||
@ -238,6 +246,8 @@ NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAP
|
||||
NvAPI_Status NvAPI_DLL_SetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x0733E009
|
||||
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4
|
||||
NvAPI_Status NvAPI_DLL_GetCurrentVoltage(NvPhysicalGpuHandle handle, NVAPI_VOLTAGE_STATUS* status); // 0x465F9BCF 1-004c
|
||||
NvAPI_Status NvAPI_DLL_GetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT*);
|
||||
NvAPI_Status NvAPI_DLL_SetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT*);
|
||||
|
||||
// Maxwell only
|
||||
NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C
|
||||
|
17
cuda.cpp
17
cuda.cpp
@ -105,7 +105,8 @@ void cuda_print_devices()
|
||||
cudaDeviceProp props;
|
||||
cudaGetDeviceProperties(&props, dev_id);
|
||||
if (!opt_n_threads || n < opt_n_threads) {
|
||||
fprintf(stderr, "GPU #%d: SM %d.%d %s\n", dev_id, props.major, props.minor, device_name[dev_id]);
|
||||
fprintf(stderr, "GPU #%d: SM %d.%d %s @ %.0f MHz (MEM %.0f)\n", dev_id, props.major, props.minor,
|
||||
device_name[dev_id], (double) props.clockRate/1000, (double) props.memoryClockRate/1000);
|
||||
#ifdef USE_WRAPNVML
|
||||
if (opt_debug) nvml_print_device_info(dev_id);
|
||||
#ifdef WIN32
|
||||
@ -200,18 +201,14 @@ int cuda_available_memory(int thr_id)
|
||||
{
|
||||
int dev_id = device_map[thr_id % MAX_GPUS];
|
||||
size_t mtotal = 0, mfree = 0;
|
||||
cudaDeviceProp props;
|
||||
#if defined(_WIN32) && defined(USE_WRAPNVML)
|
||||
// cuda (6.5) one can crash on pascal and dont handle 8GB
|
||||
nvapiMemGetInfo(dev_id, &mfree, &mtotal);
|
||||
#else
|
||||
cudaSetDevice(dev_id);
|
||||
cudaDeviceSynchronize();
|
||||
if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) {
|
||||
#if defined(_WIN32) && CUDART_VERSION == 6050 && defined(_DEBUG)
|
||||
if (!strstr(props.name, "GTX 10"))
|
||||
// seems to crash in vstudio on 8GB cards (pascal ?) with cuda 6.5
|
||||
cudaMemGetInfo(&mfree, &mtotal);
|
||||
#else
|
||||
cudaMemGetInfo(&mfree, &mtotal);
|
||||
cudaMemGetInfo(&mfree, &mtotal);
|
||||
#endif
|
||||
}
|
||||
return (int) (mfree / (1024 * 1024));
|
||||
}
|
||||
|
||||
|
24
nvapi.cpp
24
nvapi.cpp
@ -249,7 +249,7 @@ NvAPI_Status NvAPI_DLL_SetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOC
|
||||
return (*pointer)(handle, table);
|
||||
}
|
||||
|
||||
#define NVAPI_ID_VFP_CURVE_GET 0x21537AD4 // Pascal 39442CFB to check also, Set ?
|
||||
#define NVAPI_ID_VFP_CURVE_GET 0x21537AD4 // Pascal
|
||||
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE* curve) {
|
||||
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VFP_CURVE*) = NULL;
|
||||
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
|
||||
@ -259,6 +259,28 @@ NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE*
|
||||
return (*pointer)(handle, curve);
|
||||
}
|
||||
|
||||
#define NVAPI_ID_CURVE_GET 0xE440B867 // Pascal 2-030c struct 0C 03 02 00 00 00 00 00 01 00 00 00 06 00 00 00
|
||||
#define NVAPI_ID_CURVE_SET 0x39442CFB // Pascal 2-030c struct 0C 03 02 00 00 00 00 00 01 00 00 00 06 00 00 00
|
||||
|
||||
#define NVAPI_ID_VOLTBOOST_GET 0x9DF23CA1 // Pascal 1-0028
|
||||
NvAPI_Status NvAPI_DLL_GetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT* boost) {
|
||||
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*) = NULL;
|
||||
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
|
||||
if(!pointer) {
|
||||
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*))nvidia_handle->query(NVAPI_ID_VOLTBOOST_GET);
|
||||
}
|
||||
return (*pointer)(handle, boost);
|
||||
}
|
||||
#define NVAPI_ID_VOLTBOOST_SET 0xB9306D9B // Pascal 1-0028
|
||||
NvAPI_Status NvAPI_DLL_SetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT* boost) {
|
||||
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*) = NULL;
|
||||
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
|
||||
if(!pointer) {
|
||||
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*))nvidia_handle->query(NVAPI_ID_VOLTBOOST_SET);
|
||||
}
|
||||
return (*pointer)(handle, boost);
|
||||
}
|
||||
|
||||
#define NVAPI_ID_PERFCLOCKS_GET 0x1EA54A3B
|
||||
NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){
|
||||
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, void*) = NULL;
|
||||
|
46
nvml.cpp
46
nvml.cpp
@ -1017,13 +1017,13 @@ int nvapi_pstateinfo(unsigned int devNum)
|
||||
info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable
|
||||
info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000);
|
||||
if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) {
|
||||
applog(LOG_RAW, " OC %4d MHz %6.1f MHz",
|
||||
applog(LOG_RAW, " OC %+4d MHz %+6.1f MHz",
|
||||
clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000);
|
||||
}
|
||||
}
|
||||
// boost over volting (GTX 9xx only ?)
|
||||
for (n=0; n < info.ov.numVoltages; n++) {
|
||||
applog(LOG_RAW, " OV: %u+%u mV%s \x7F %d/%d",
|
||||
applog(LOG_RAW, " OV: %u%+u mV%s \x7F %d/%d",
|
||||
info.ov.voltages[n].volt_uV/1000, info.ov.voltages[n].voltDelta_uV.value/1000, info.ov.voltages[n].bIsEditable ? "*":" ",
|
||||
info.ov.voltages[n].voltDelta_uV.valueRange.min/1000, info.ov.voltages[n].voltDelta_uV.valueRange.max/1000);
|
||||
}
|
||||
@ -1048,19 +1048,25 @@ int nvapi_pstateinfo(unsigned int devNum)
|
||||
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
|
||||
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
|
||||
|
||||
// Maxwell only
|
||||
NVAPI_VOLT_STATUS mvdom = { 0 };
|
||||
mvdom.version = NVAPI_VOLT_STATUS_VER;
|
||||
if ((ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &mvdom)) == NVAPI_OK) {
|
||||
if (mvdom.value_uV)
|
||||
applog(LOG_RAW, " GPU Voltage is %u mV", mvdom.value_uV/1000);
|
||||
}
|
||||
// Pascal only
|
||||
NVAPI_VOLTAGE_STATUS pvdom = { 0 };
|
||||
pvdom.version = NVAPI_VOLTAGE_STATUS_VER;
|
||||
if ((ret = NvAPI_DLL_GetCurrentVoltage(phys[devNum], &pvdom)) == NVAPI_OK) {
|
||||
NVAPI_VOLTBOOST_PERCENT pvb = { 0 };
|
||||
pvb.version = NVAPI_VOLTBOOST_PERCENT_VER;
|
||||
if ((ret = NvAPI_DLL_GetCoreVoltageBoostPercent(phys[devNum], &pvb)) == NVAPI_OK) {
|
||||
NVAPI_VOLTAGE_STATUS pvdom = { 0 };
|
||||
pvdom.version = NVAPI_VOLTAGE_STATUS_VER;
|
||||
NvAPI_DLL_GetCurrentVoltage(phys[devNum], &pvdom);
|
||||
if (pvdom.value_uV)
|
||||
applog(LOG_RAW, " GPU Voltage is %u mV %+d%% boost", pvdom.value_uV/1000, pvb.percent);
|
||||
else
|
||||
applog(LOG_RAW, " GPU Voltage is %u mV", pvdom.value_uV/1000);
|
||||
} else {
|
||||
// Maxwell 9xx
|
||||
NVAPI_VOLT_STATUS mvdom = { 0 };
|
||||
mvdom.version = NVAPI_VOLT_STATUS_VER;
|
||||
if ((ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &mvdom)) == NVAPI_OK) {
|
||||
if (mvdom.value_uV)
|
||||
applog(LOG_RAW, " GPU Voltage is %u mV", mvdom.value_uV/1000);
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t plim = nvapi_get_plimit(devNum);
|
||||
@ -1334,7 +1340,7 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
|
||||
// seems ok on maxwell and pascal for the mem clocks
|
||||
NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
|
||||
deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
|
||||
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // wrong def clocks, useless
|
||||
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // deprecated but req for def clocks
|
||||
if (ret == NVAPI_OK) {
|
||||
if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY)
|
||||
delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq;
|
||||
@ -1358,6 +1364,20 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Replacement for WIN32 CUDA 6.5 on pascal
|
||||
int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total)
|
||||
{
|
||||
NvAPI_Status ret = NVAPI_OK;
|
||||
NV_DISPLAY_DRIVER_MEMORY_INFO mem = { 0 };
|
||||
mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
|
||||
unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS];
|
||||
if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) {
|
||||
*total = mem.availableDedicatedVideoMemory;
|
||||
*free = mem.curAvailableDedicatedVideoMemory;
|
||||
}
|
||||
return (int) ret;
|
||||
}
|
||||
|
||||
int nvapi_init()
|
||||
{
|
||||
int num_gpus = cuda_num_devices();
|
||||
|
Loading…
Reference in New Issue
Block a user