nvapi: pascal core voltage boost + meminfo fix x86
cuda 6.5 one seems to crash on pascal or report invalid mem sizes
This commit is contained in:
parent
0555201384
commit
095f25f9a8
@ -1,5 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#define NVAPI_INTERNAL
|
||||||
#include "nvapi.h"
|
#include "nvapi.h"
|
||||||
|
|
||||||
NvAPI_Status nvapi_dll_init();
|
NvAPI_Status nvapi_dll_init();
|
||||||
@ -183,6 +184,13 @@ typedef struct {
|
|||||||
} NVAPI_VFP_CURVE; // 7208 bytes (1-1c28)
|
} NVAPI_VFP_CURVE; // 7208 bytes (1-1c28)
|
||||||
#define NVAPI_VFP_CURVE_VER MAKE_NVAPI_VERSION(NVAPI_VFP_CURVE, 1)
|
#define NVAPI_VFP_CURVE_VER MAKE_NVAPI_VERSION(NVAPI_VFP_CURVE, 1)
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
NvU32 version;
|
||||||
|
NvS32 percent;
|
||||||
|
NvU32 pad[8];
|
||||||
|
} NVAPI_VOLTBOOST_PERCENT; // 40 bytes (1-0028)
|
||||||
|
#define NVAPI_VOLTBOOST_PERCENT_VER MAKE_NVAPI_VERSION(NVAPI_VOLTBOOST_PERCENT, 1)
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
NvU32 version;
|
NvU32 version;
|
||||||
NvU32 flags;
|
NvU32 flags;
|
||||||
@ -238,6 +246,8 @@ NvAPI_Status NvAPI_DLL_GetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAP
|
|||||||
NvAPI_Status NvAPI_DLL_SetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x0733E009
|
NvAPI_Status NvAPI_DLL_SetClockBoostTable(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_CLOCK_TABLE*); // 0x0733E009
|
||||||
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4
|
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VFP_CURVE*); // 0x21537AD4
|
||||||
NvAPI_Status NvAPI_DLL_GetCurrentVoltage(NvPhysicalGpuHandle handle, NVAPI_VOLTAGE_STATUS* status); // 0x465F9BCF 1-004c
|
NvAPI_Status NvAPI_DLL_GetCurrentVoltage(NvPhysicalGpuHandle handle, NVAPI_VOLTAGE_STATUS* status); // 0x465F9BCF 1-004c
|
||||||
|
NvAPI_Status NvAPI_DLL_GetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT*);
|
||||||
|
NvAPI_Status NvAPI_DLL_SetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT*);
|
||||||
|
|
||||||
// Maxwell only
|
// Maxwell only
|
||||||
NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C
|
NvAPI_Status NvAPI_DLL_GetVoltageDomainsStatus(NvPhysicalGpuHandle hPhysicalGpu, NVAPI_VOLT_STATUS*); // 0xC16C7E2C
|
||||||
|
17
cuda.cpp
17
cuda.cpp
@ -105,7 +105,8 @@ void cuda_print_devices()
|
|||||||
cudaDeviceProp props;
|
cudaDeviceProp props;
|
||||||
cudaGetDeviceProperties(&props, dev_id);
|
cudaGetDeviceProperties(&props, dev_id);
|
||||||
if (!opt_n_threads || n < opt_n_threads) {
|
if (!opt_n_threads || n < opt_n_threads) {
|
||||||
fprintf(stderr, "GPU #%d: SM %d.%d %s\n", dev_id, props.major, props.minor, device_name[dev_id]);
|
fprintf(stderr, "GPU #%d: SM %d.%d %s @ %.0f MHz (MEM %.0f)\n", dev_id, props.major, props.minor,
|
||||||
|
device_name[dev_id], (double) props.clockRate/1000, (double) props.memoryClockRate/1000);
|
||||||
#ifdef USE_WRAPNVML
|
#ifdef USE_WRAPNVML
|
||||||
if (opt_debug) nvml_print_device_info(dev_id);
|
if (opt_debug) nvml_print_device_info(dev_id);
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
@ -200,18 +201,14 @@ int cuda_available_memory(int thr_id)
|
|||||||
{
|
{
|
||||||
int dev_id = device_map[thr_id % MAX_GPUS];
|
int dev_id = device_map[thr_id % MAX_GPUS];
|
||||||
size_t mtotal = 0, mfree = 0;
|
size_t mtotal = 0, mfree = 0;
|
||||||
cudaDeviceProp props;
|
#if defined(_WIN32) && defined(USE_WRAPNVML)
|
||||||
|
// cuda (6.5) one can crash on pascal and dont handle 8GB
|
||||||
|
nvapiMemGetInfo(dev_id, &mfree, &mtotal);
|
||||||
|
#else
|
||||||
cudaSetDevice(dev_id);
|
cudaSetDevice(dev_id);
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) {
|
cudaMemGetInfo(&mfree, &mtotal);
|
||||||
#if defined(_WIN32) && CUDART_VERSION == 6050 && defined(_DEBUG)
|
|
||||||
if (!strstr(props.name, "GTX 10"))
|
|
||||||
// seems to crash in vstudio on 8GB cards (pascal ?) with cuda 6.5
|
|
||||||
cudaMemGetInfo(&mfree, &mtotal);
|
|
||||||
#else
|
|
||||||
cudaMemGetInfo(&mfree, &mtotal);
|
|
||||||
#endif
|
#endif
|
||||||
}
|
|
||||||
return (int) (mfree / (1024 * 1024));
|
return (int) (mfree / (1024 * 1024));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
24
nvapi.cpp
24
nvapi.cpp
@ -249,7 +249,7 @@ NvAPI_Status NvAPI_DLL_SetClockBoostTable(NvPhysicalGpuHandle handle, NVAPI_CLOC
|
|||||||
return (*pointer)(handle, table);
|
return (*pointer)(handle, table);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define NVAPI_ID_VFP_CURVE_GET 0x21537AD4 // Pascal 39442CFB to check also, Set ?
|
#define NVAPI_ID_VFP_CURVE_GET 0x21537AD4 // Pascal
|
||||||
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE* curve) {
|
NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE* curve) {
|
||||||
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VFP_CURVE*) = NULL;
|
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VFP_CURVE*) = NULL;
|
||||||
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
|
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
|
||||||
@ -259,6 +259,28 @@ NvAPI_Status NvAPI_DLL_GetVFPCurve(NvPhysicalGpuHandle handle, NVAPI_VFP_CURVE*
|
|||||||
return (*pointer)(handle, curve);
|
return (*pointer)(handle, curve);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define NVAPI_ID_CURVE_GET 0xE440B867 // Pascal 2-030c struct 0C 03 02 00 00 00 00 00 01 00 00 00 06 00 00 00
|
||||||
|
#define NVAPI_ID_CURVE_SET 0x39442CFB // Pascal 2-030c struct 0C 03 02 00 00 00 00 00 01 00 00 00 06 00 00 00
|
||||||
|
|
||||||
|
#define NVAPI_ID_VOLTBOOST_GET 0x9DF23CA1 // Pascal 1-0028
|
||||||
|
NvAPI_Status NvAPI_DLL_GetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT* boost) {
|
||||||
|
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*) = NULL;
|
||||||
|
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
|
||||||
|
if(!pointer) {
|
||||||
|
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*))nvidia_handle->query(NVAPI_ID_VOLTBOOST_GET);
|
||||||
|
}
|
||||||
|
return (*pointer)(handle, boost);
|
||||||
|
}
|
||||||
|
#define NVAPI_ID_VOLTBOOST_SET 0xB9306D9B // Pascal 1-0028
|
||||||
|
NvAPI_Status NvAPI_DLL_SetCoreVoltageBoostPercent(NvPhysicalGpuHandle handle, NVAPI_VOLTBOOST_PERCENT* boost) {
|
||||||
|
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*) = NULL;
|
||||||
|
if(!nvapi_dll_loaded) return NVAPI_API_NOT_INITIALIZED;
|
||||||
|
if(!pointer) {
|
||||||
|
pointer = (NvAPI_Status (*)(NvPhysicalGpuHandle, NVAPI_VOLTBOOST_PERCENT*))nvidia_handle->query(NVAPI_ID_VOLTBOOST_SET);
|
||||||
|
}
|
||||||
|
return (*pointer)(handle, boost);
|
||||||
|
}
|
||||||
|
|
||||||
#define NVAPI_ID_PERFCLOCKS_GET 0x1EA54A3B
|
#define NVAPI_ID_PERFCLOCKS_GET 0x1EA54A3B
|
||||||
NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){
|
NvAPI_Status NvAPI_DLL_GetPerfClocks(NvPhysicalGpuHandle handle, void* pFreqs){
|
||||||
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, void*) = NULL;
|
static NvAPI_Status (*pointer)(NvPhysicalGpuHandle, void*) = NULL;
|
||||||
|
46
nvml.cpp
46
nvml.cpp
@ -1017,13 +1017,13 @@ int nvapi_pstateinfo(unsigned int devNum)
|
|||||||
info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable
|
info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable
|
||||||
info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000);
|
info.pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000);
|
||||||
if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) {
|
if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) {
|
||||||
applog(LOG_RAW, " OC %4d MHz %6.1f MHz",
|
applog(LOG_RAW, " OC %+4d MHz %+6.1f MHz",
|
||||||
clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000);
|
clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// boost over volting (GTX 9xx only ?)
|
// boost over volting (GTX 9xx only ?)
|
||||||
for (n=0; n < info.ov.numVoltages; n++) {
|
for (n=0; n < info.ov.numVoltages; n++) {
|
||||||
applog(LOG_RAW, " OV: %u+%u mV%s \x7F %d/%d",
|
applog(LOG_RAW, " OV: %u%+u mV%s \x7F %d/%d",
|
||||||
info.ov.voltages[n].volt_uV/1000, info.ov.voltages[n].voltDelta_uV.value/1000, info.ov.voltages[n].bIsEditable ? "*":" ",
|
info.ov.voltages[n].volt_uV/1000, info.ov.voltages[n].voltDelta_uV.value/1000, info.ov.voltages[n].bIsEditable ? "*":" ",
|
||||||
info.ov.voltages[n].voltDelta_uV.valueRange.min/1000, info.ov.voltages[n].voltDelta_uV.valueRange.max/1000);
|
info.ov.voltages[n].voltDelta_uV.valueRange.min/1000, info.ov.voltages[n].voltDelta_uV.valueRange.max/1000);
|
||||||
}
|
}
|
||||||
@ -1048,19 +1048,25 @@ int nvapi_pstateinfo(unsigned int devNum)
|
|||||||
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
|
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
|
||||||
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
|
(double) freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
|
||||||
|
|
||||||
// Maxwell only
|
|
||||||
NVAPI_VOLT_STATUS mvdom = { 0 };
|
|
||||||
mvdom.version = NVAPI_VOLT_STATUS_VER;
|
|
||||||
if ((ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &mvdom)) == NVAPI_OK) {
|
|
||||||
if (mvdom.value_uV)
|
|
||||||
applog(LOG_RAW, " GPU Voltage is %u mV", mvdom.value_uV/1000);
|
|
||||||
}
|
|
||||||
// Pascal only
|
// Pascal only
|
||||||
NVAPI_VOLTAGE_STATUS pvdom = { 0 };
|
NVAPI_VOLTBOOST_PERCENT pvb = { 0 };
|
||||||
pvdom.version = NVAPI_VOLTAGE_STATUS_VER;
|
pvb.version = NVAPI_VOLTBOOST_PERCENT_VER;
|
||||||
if ((ret = NvAPI_DLL_GetCurrentVoltage(phys[devNum], &pvdom)) == NVAPI_OK) {
|
if ((ret = NvAPI_DLL_GetCoreVoltageBoostPercent(phys[devNum], &pvb)) == NVAPI_OK) {
|
||||||
|
NVAPI_VOLTAGE_STATUS pvdom = { 0 };
|
||||||
|
pvdom.version = NVAPI_VOLTAGE_STATUS_VER;
|
||||||
|
NvAPI_DLL_GetCurrentVoltage(phys[devNum], &pvdom);
|
||||||
if (pvdom.value_uV)
|
if (pvdom.value_uV)
|
||||||
|
applog(LOG_RAW, " GPU Voltage is %u mV %+d%% boost", pvdom.value_uV/1000, pvb.percent);
|
||||||
|
else
|
||||||
applog(LOG_RAW, " GPU Voltage is %u mV", pvdom.value_uV/1000);
|
applog(LOG_RAW, " GPU Voltage is %u mV", pvdom.value_uV/1000);
|
||||||
|
} else {
|
||||||
|
// Maxwell 9xx
|
||||||
|
NVAPI_VOLT_STATUS mvdom = { 0 };
|
||||||
|
mvdom.version = NVAPI_VOLT_STATUS_VER;
|
||||||
|
if ((ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], &mvdom)) == NVAPI_OK) {
|
||||||
|
if (mvdom.value_uV)
|
||||||
|
applog(LOG_RAW, " GPU Voltage is %u mV", mvdom.value_uV/1000);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t plim = nvapi_get_plimit(devNum);
|
uint8_t plim = nvapi_get_plimit(devNum);
|
||||||
@ -1334,7 +1340,7 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
|
|||||||
// seems ok on maxwell and pascal for the mem clocks
|
// seems ok on maxwell and pascal for the mem clocks
|
||||||
NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
|
NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
|
||||||
deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
|
deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
|
||||||
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // wrong def clocks, useless
|
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // deprecated but req for def clocks
|
||||||
if (ret == NVAPI_OK) {
|
if (ret == NVAPI_OK) {
|
||||||
if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY)
|
if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY)
|
||||||
delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq;
|
delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq;
|
||||||
@ -1358,6 +1364,20 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Replacement for WIN32 CUDA 6.5 on pascal
|
||||||
|
int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total)
|
||||||
|
{
|
||||||
|
NvAPI_Status ret = NVAPI_OK;
|
||||||
|
NV_DISPLAY_DRIVER_MEMORY_INFO mem = { 0 };
|
||||||
|
mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
|
||||||
|
unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS];
|
||||||
|
if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) {
|
||||||
|
*total = mem.availableDedicatedVideoMemory;
|
||||||
|
*free = mem.curAvailableDedicatedVideoMemory;
|
||||||
|
}
|
||||||
|
return (int) ret;
|
||||||
|
}
|
||||||
|
|
||||||
int nvapi_init()
|
int nvapi_init()
|
||||||
{
|
{
|
||||||
int num_gpus = cuda_num_devices();
|
int num_gpus = cuda_num_devices();
|
||||||
|
2
nvml.h
2
nvml.h
@ -226,6 +226,8 @@ int nvapi_init_settings();
|
|||||||
int nvapi_pstateinfo(unsigned int devNum);
|
int nvapi_pstateinfo(unsigned int devNum);
|
||||||
uint8_t nvapi_get_plimit(unsigned int devNum);
|
uint8_t nvapi_get_plimit(unsigned int devNum);
|
||||||
|
|
||||||
|
// cuda Replacement for 6.5 compat
|
||||||
|
int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* USE_WRAPNVML */
|
#endif /* USE_WRAPNVML */
|
||||||
|
Loading…
Reference in New Issue
Block a user