mirror of https://github.com/GOSTSec/ccminer
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2276 lines
70 KiB
2276 lines
70 KiB
/* |
|
* A trivial little dlopen()-based wrapper library for the |
|
* NVIDIA NVML library, to allow runtime discovery of NVML on an |
|
* arbitrary system. This is all very hackish and simple-minded, but |
|
* it serves my immediate needs in the short term until NVIDIA provides |
|
* a static NVML wrapper library themselves, hopefully in |
|
* CUDA 6.5 or maybe sometime shortly after. |
|
* |
|
* This trivial code is made available under the "new" 3-clause BSD license, |
|
* and/or any of the GPL licenses you prefer. |
|
* Feel free to use the code and modify as you see fit. |
|
* |
|
* John E. Stone - john.stone@gmail.com |
|
* Tanguy Pruvot - tpruvot@github |
|
* |
|
*/ |
|
|
|
#include <errno.h> |
|
#include <stdio.h> |
|
#include <stdlib.h> |
|
#include <string.h> |
|
#include <unistd.h> |
|
|
|
#include "miner.h" |
|
#include "nvml.h" |
|
#include "cuda_runtime.h" |
|
|
|
#ifdef USE_WRAPNVML |
|
|
|
extern nvml_handle *hnvml; |
|
extern char driver_version[32]; |
|
|
|
static uint32_t device_bus_ids[MAX_GPUS] = { 0 }; |
|
|
|
extern uint32_t device_gpu_clocks[MAX_GPUS]; |
|
extern uint32_t device_mem_clocks[MAX_GPUS]; |
|
extern int32_t device_mem_offsets[MAX_GPUS]; |
|
extern uint8_t device_tlimit[MAX_GPUS]; |
|
extern int8_t device_pstate[MAX_GPUS]; |
|
extern int32_t device_led[MAX_GPUS]; |
|
int32_t device_led_state[MAX_GPUS] = { 0 }; |
|
static __thread bool has_rgb_ok = false; |
|
|
|
uint32_t clock_prev[MAX_GPUS] = { 0 }; |
|
uint32_t clock_prev_mem[MAX_GPUS] = { 0 }; |
|
uint32_t limit_prev[MAX_GPUS] = { 0 }; |
|
|
|
static bool nvml_plimit_set = false; |
|
extern bool need_memclockrst; |
|
|
|
/* |
|
* Wrappers to emulate dlopen() on other systems like Windows |
|
*/ |
|
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64) |
|
#include <windows.h> |
|
static void *wrap_dlopen(const char *filename) { |
|
HMODULE h = LoadLibrary(filename); |
|
if (!h && opt_debug) { |
|
applog(LOG_DEBUG, "dlopen(%d): failed to load %s", |
|
GetLastError(), filename); |
|
} |
|
return (void*)h; |
|
} |
|
static void *wrap_dlsym(void *h, const char *sym) { |
|
return (void *)GetProcAddress((HINSTANCE)h, sym); |
|
} |
|
static int wrap_dlclose(void *h) { |
|
/* FreeLibrary returns nonzero on success */ |
|
return (!FreeLibrary((HINSTANCE)h)); |
|
} |
|
#else |
|
/* assume we can use dlopen itself... */ |
|
#include <dlfcn.h> |
|
#include <errno.h> |
|
static void *wrap_dlopen(const char *filename) { |
|
void *h = dlopen(filename, RTLD_NOW); |
|
if (h == NULL && opt_debug) { |
|
applog(LOG_DEBUG, "dlopen(%d): failed to load %s", |
|
errno, filename); |
|
} |
|
return (void*)h; |
|
} |
|
|
|
static void *wrap_dlsym(void *h, const char *sym) { |
|
return dlsym(h, sym); |
|
} |
|
static int wrap_dlclose(void *h) { |
|
return dlclose(h); |
|
} |
|
#endif |
|
|
|
nvml_handle * nvml_create() |
|
{ |
|
int i=0; |
|
nvml_handle *nvmlh = NULL; |
|
|
|
#ifdef WIN32 |
|
/* Windows (do not use slashes, else ExpandEnvironmentStrings will mix them) */ |
|
#define libnvidia_ml "%PROGRAMFILES%\\NVIDIA Corporation\\NVSMI\\nvml.dll" |
|
#else |
|
/* linux assumed */ |
|
#define libnvidia_ml "libnvidia-ml.so" |
|
#endif |
|
|
|
char tmp[512]; |
|
#ifdef WIN32 |
|
ExpandEnvironmentStrings(libnvidia_ml, tmp, sizeof(tmp)); |
|
#else |
|
strcpy(tmp, libnvidia_ml); |
|
#endif |
|
|
|
void *nvml_dll = wrap_dlopen(tmp); |
|
if (nvml_dll == NULL) { |
|
#ifdef WIN32 |
|
nvml_dll = wrap_dlopen("nvml.dll"); |
|
if (nvml_dll == NULL) |
|
#endif |
|
return NULL; |
|
} |
|
|
|
nvmlh = (nvml_handle *) calloc(1, sizeof(nvml_handle)); |
|
|
|
nvmlh->nvml_dll = nvml_dll; |
|
|
|
nvmlh->nvmlInit = (nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2"); |
|
if (!nvmlh->nvmlInit) |
|
nvmlh->nvmlInit = (nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit"); |
|
nvmlh->nvmlDeviceGetCount = (nvmlReturn_t (*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2"); |
|
if (!nvmlh->nvmlDeviceGetCount) |
|
nvmlh->nvmlDeviceGetCount = (nvmlReturn_t (*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount"); |
|
nvmlh->nvmlDeviceGetHandleByIndex = (nvmlReturn_t (*)(int, nvmlDevice_t *)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2"); |
|
nvmlh->nvmlDeviceGetAPIRestriction = (nvmlReturn_t (*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t *)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetAPIRestriction"); |
|
nvmlh->nvmlDeviceSetAPIRestriction = (nvmlReturn_t (*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetAPIRestriction"); |
|
nvmlh->nvmlDeviceGetDefaultApplicationsClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetDefaultApplicationsClock"); |
|
nvmlh->nvmlDeviceGetApplicationsClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clocks)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetApplicationsClock"); |
|
nvmlh->nvmlDeviceSetApplicationsClocks = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int mem, unsigned int gpu)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetApplicationsClocks"); |
|
nvmlh->nvmlDeviceResetApplicationsClocks = (nvmlReturn_t (*)(nvmlDevice_t)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceResetApplicationsClocks"); |
|
nvmlh->nvmlDeviceGetSupportedGraphicsClocks = (nvmlReturn_t (*)(nvmlDevice_t, uint32_t mem, uint32_t *num, uint32_t *)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSupportedGraphicsClocks"); |
|
nvmlh->nvmlDeviceGetSupportedMemoryClocks = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *count, unsigned int *clocksMHz)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSupportedMemoryClocks"); |
|
nvmlh->nvmlDeviceGetClockInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClockInfo"); |
|
nvmlh->nvmlDeviceGetMaxClockInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxClockInfo"); |
|
nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo_v2"); |
|
if (!nvmlh->nvmlDeviceGetPciInfo) |
|
nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo"); |
|
nvmlh->nvmlDeviceGetCurrPcieLinkGeneration = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *gen)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCurrPcieLinkGeneration"); |
|
nvmlh->nvmlDeviceGetCurrPcieLinkWidth = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *width)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCurrPcieLinkWidth"); |
|
nvmlh->nvmlDeviceGetMaxPcieLinkGeneration = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *gen)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxPcieLinkGeneration"); |
|
nvmlh->nvmlDeviceGetMaxPcieLinkWidth = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *width)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxPcieLinkWidth"); |
|
nvmlh->nvmlDeviceGetPowerUsage = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage"); |
|
nvmlh->nvmlDeviceGetPowerManagementDefaultLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementDefaultLimit"); |
|
nvmlh->nvmlDeviceGetPowerManagementLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementLimit"); |
|
nvmlh->nvmlDeviceGetPowerManagementLimitConstraints = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *min, unsigned int *max)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementLimitConstraints"); |
|
nvmlh->nvmlDeviceSetPowerManagementLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int limit)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetPowerManagementLimit"); |
|
nvmlh->nvmlDeviceGetName = (nvmlReturn_t (*)(nvmlDevice_t, char *, int)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName"); |
|
nvmlh->nvmlDeviceGetTemperature = (nvmlReturn_t (*)(nvmlDevice_t, int, unsigned int *)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature"); |
|
nvmlh->nvmlDeviceGetFanSpeed = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed"); |
|
nvmlh->nvmlDeviceGetPerformanceState = (nvmlReturn_t (*)(nvmlDevice_t, int *)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPerformanceState"); /* or nvmlDeviceGetPowerState */ |
|
nvmlh->nvmlDeviceGetSerial = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSerial"); |
|
nvmlh->nvmlDeviceGetUUID = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetUUID"); |
|
nvmlh->nvmlDeviceGetVbiosVersion = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetVbiosVersion"); |
|
nvmlh->nvmlSystemGetDriverVersion = (nvmlReturn_t (*)(char *, unsigned int)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlSystemGetDriverVersion"); |
|
nvmlh->nvmlErrorString = (char* (*)(nvmlReturn_t)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlErrorString"); |
|
nvmlh->nvmlShutdown = (nvmlReturn_t (*)()) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown"); |
|
// v331 |
|
nvmlh->nvmlDeviceGetEnforcedPowerLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetEnforcedPowerLimit"); |
|
// v340 |
|
#ifdef __linux__ |
|
nvmlh->nvmlDeviceClearCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceClearCpuAffinity"); |
|
nvmlh->nvmlDeviceGetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int sz, unsigned long *cpuSet)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCpuAffinity"); |
|
nvmlh->nvmlDeviceSetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetCpuAffinity"); |
|
#endif |
|
// v346 |
|
nvmlh->nvmlDeviceGetPcieThroughput = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPcieThroughput"); |
|
// v36x (API 8 / Pascal) |
|
nvmlh->nvmlDeviceGetClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz)) |
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClock"); |
|
|
|
if (nvmlh->nvmlInit == NULL || |
|
nvmlh->nvmlShutdown == NULL || |
|
nvmlh->nvmlErrorString == NULL || |
|
nvmlh->nvmlDeviceGetCount == NULL || |
|
nvmlh->nvmlDeviceGetHandleByIndex == NULL || |
|
nvmlh->nvmlDeviceGetPciInfo == NULL || |
|
nvmlh->nvmlDeviceGetName == NULL) |
|
{ |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "Failed to obtain required NVML function pointers"); |
|
wrap_dlclose(nvmlh->nvml_dll); |
|
free(nvmlh); |
|
return NULL; |
|
} |
|
|
|
nvmlh->nvmlInit(); |
|
if (nvmlh->nvmlSystemGetDriverVersion) |
|
nvmlh->nvmlSystemGetDriverVersion(driver_version, sizeof(driver_version)); |
|
nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount); |
|
|
|
/* Query CUDA device count, in case it doesn't agree with NVML, since */ |
|
/* CUDA will only report GPUs with compute capability greater than 1.0 */ |
|
if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) { |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "Failed to query CUDA device count!"); |
|
wrap_dlclose(nvmlh->nvml_dll); |
|
free(nvmlh); |
|
return NULL; |
|
} |
|
|
|
nvmlh->devs = (nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(nvmlDevice_t)); |
|
nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); |
|
nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); |
|
nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); |
|
nvmlh->nvml_pci_vendor_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); |
|
nvmlh->nvml_pci_subsys_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); |
|
nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int)); |
|
nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int)); |
|
nvmlh->app_clocks = (nvmlEnableState_t*) calloc(nvmlh->nvml_gpucount, sizeof(nvmlEnableState_t)); |
|
|
|
/* Obtain GPU device handles we're going to need repeatedly... */ |
|
for (i=0; i<nvmlh->nvml_gpucount; i++) { |
|
nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]); |
|
} |
|
|
|
/* Query PCI info for each NVML device, and build table for mapping of */ |
|
/* CUDA device IDs to NVML device IDs and vice versa */ |
|
for (i=0; i<nvmlh->nvml_gpucount; i++) { |
|
nvmlPciInfo_t pciinfo; |
|
|
|
nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo); |
|
nvmlh->nvml_pci_domain_id[i] = pciinfo.domain; |
|
nvmlh->nvml_pci_bus_id[i] = pciinfo.bus; |
|
nvmlh->nvml_pci_device_id[i] = pciinfo.device; |
|
nvmlh->nvml_pci_vendor_id[i] = pciinfo.pci_device_id; |
|
nvmlh->nvml_pci_subsys_id[i] = pciinfo.pci_subsystem_id; |
|
|
|
nvmlh->app_clocks[i] = NVML_FEATURE_UNKNOWN; |
|
if (nvmlh->nvmlDeviceSetAPIRestriction) { |
|
nvmlh->nvmlDeviceSetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, |
|
NVML_FEATURE_ENABLED); |
|
/* there is only this API_SET_APPLICATION_CLOCKS on the 750 Ti (340.58) */ |
|
} |
|
if (nvmlh->nvmlDeviceGetAPIRestriction) { |
|
nvmlh->nvmlDeviceGetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, |
|
&nvmlh->app_clocks[i]); |
|
} |
|
} |
|
|
|
/* build mapping of NVML device IDs to CUDA IDs */ |
|
for (i=0; i<nvmlh->nvml_gpucount; i++) { |
|
nvmlh->nvml_cuda_device_id[i] = -1; |
|
} |
|
for (i=0; i<nvmlh->cuda_gpucount; i++) { |
|
cudaDeviceProp props; |
|
nvmlh->cuda_nvml_device_id[i] = -1; |
|
|
|
if (cudaGetDeviceProperties(&props, i) == cudaSuccess) { |
|
device_bus_ids[i] = props.pciBusID; |
|
for (int j = 0; j < nvmlh->nvml_gpucount; j++) { |
|
if ((nvmlh->nvml_pci_domain_id[j] == (uint32_t) props.pciDomainID) && |
|
(nvmlh->nvml_pci_bus_id[j] == (uint32_t) props.pciBusID) && |
|
(nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) { |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "CUDA GPU %d matches NVML GPU %d by busId %u", |
|
i, j, (uint32_t) props.pciBusID); |
|
nvmlh->nvml_cuda_device_id[j] = i; |
|
nvmlh->cuda_nvml_device_id[i] = j; |
|
} |
|
} |
|
} |
|
} |
|
|
|
return nvmlh; |
|
} |
|
|
|
/* apply config clocks to an used device */ |
|
int nvml_set_clocks(nvml_handle *nvmlh, int dev_id) |
|
{ |
|
nvmlReturn_t rc; |
|
uint32_t gpu_clk = 0, mem_clk = 0; |
|
int n = nvmlh->cuda_nvml_device_id[dev_id]; |
|
//if (need_nvsettings) /* prefer later than init time */ |
|
// nvs_set_clocks(dev_id); |
|
if (n < 0 || n >= nvmlh->nvml_gpucount) |
|
return -ENODEV; |
|
|
|
if (!device_gpu_clocks[dev_id] && !device_mem_clocks[dev_id]) |
|
return 0; // nothing to do |
|
|
|
if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) { |
|
applog(LOG_WARNING, "GPU #%d: NVML application clock feature is not allowed!", dev_id); |
|
return -EPERM; |
|
} |
|
|
|
uint32_t mem_prev = clock_prev_mem[dev_id]; |
|
if (!mem_prev) |
|
nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_prev); |
|
uint32_t gpu_prev = clock_prev[dev_id]; |
|
if (!gpu_prev) |
|
nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_prev); |
|
|
|
nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk); |
|
rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk); |
|
if (rc != NVML_SUCCESS) { |
|
applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id); |
|
return -EINVAL; |
|
} |
|
|
|
if (opt_debug) |
|
applog(LOG_DEBUG, "GPU #%d: default application clocks are %u/%u", dev_id, mem_clk, gpu_clk); |
|
|
|
// get application config values |
|
if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id]; |
|
if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id]; |
|
|
|
// these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+ |
|
uint32_t nclocks = 0, mem_clocks[32] = { 0 }; |
|
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL); |
|
nclocks = min(nclocks, 32); |
|
if (nclocks) |
|
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks); |
|
for (uint8_t u=0; u < nclocks; u++) { |
|
// ordered by pstate (so highest is first memory clock - P0) |
|
if (mem_clocks[u] <= mem_clk) { |
|
mem_clk = mem_clocks[u]; |
|
break; |
|
} |
|
} |
|
|
|
uint32_t* gpu_clocks = NULL; |
|
nclocks = 0; |
|
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL); |
|
if (nclocks) { |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "GPU #%d: %u clocks found for mem %u", dev_id, nclocks, mem_clk); |
|
gpu_clocks = (uint32_t*) calloc(1, sizeof(uint32_t) * nclocks + 4); |
|
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks); |
|
for (uint8_t u=0; u < nclocks; u++) { |
|
// ordered desc, so get first |
|
if (gpu_clocks[u] <= gpu_clk) { |
|
gpu_clk = gpu_clocks[u]; |
|
break; |
|
} |
|
} |
|
free(gpu_clocks); |
|
} |
|
|
|
rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk); |
|
if (rc == NVML_SUCCESS) |
|
applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk); |
|
else { |
|
applog(LOG_WARNING, "GPU #%d: %u/%u - %s (NVML)", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc)); |
|
return -1; |
|
} |
|
|
|
// store previous clocks for reset on exit (or during wait...) |
|
clock_prev[dev_id] = gpu_prev; |
|
clock_prev_mem[dev_id] = mem_prev; |
|
return 1; |
|
} |
|
|
|
/* reset default app clocks and limits on exit */ |
|
int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id) |
|
{ |
|
int ret = 0; |
|
nvmlReturn_t rc; |
|
uint32_t gpu_clk = 0, mem_clk = 0; |
|
int n = nvmlh->cuda_nvml_device_id[dev_id]; |
|
if (need_nvsettings) |
|
nvs_reset_clocks(dev_id); |
|
if (n < 0 || n >= nvmlh->nvml_gpucount) |
|
return -ENODEV; |
|
|
|
if (clock_prev[dev_id]) { |
|
rc = nvmlh->nvmlDeviceResetApplicationsClocks(nvmlh->devs[n]); |
|
if (rc != NVML_SUCCESS) { |
|
applog(LOG_WARNING, "GPU #%d: unable to reset application clocks", dev_id); |
|
} |
|
clock_prev[dev_id] = 0; |
|
ret = 1; |
|
} |
|
|
|
if (limit_prev[dev_id]) { |
|
uint32_t plimit = limit_prev[dev_id]; |
|
if (nvmlh->nvmlDeviceGetPowerManagementDefaultLimit && !plimit) { |
|
rc = nvmlh->nvmlDeviceGetPowerManagementDefaultLimit(nvmlh->devs[n], &plimit); |
|
} else if (plimit) { |
|
rc = NVML_SUCCESS; |
|
} |
|
if (rc == NVML_SUCCESS) |
|
nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit); |
|
ret = 1; |
|
} |
|
return ret; |
|
} |
|
|
|
/** |
|
* Set power state of a device (9xx) |
|
* Code is similar as clocks one, which allow the change of the pstate |
|
*/ |
|
int nvml_set_pstate(nvml_handle *nvmlh, int dev_id) |
|
{ |
|
nvmlReturn_t rc; |
|
uint32_t gpu_clk = 0, mem_clk = 0; |
|
int n = nvmlh->cuda_nvml_device_id[dev_id]; |
|
if (n < 0 || n >= nvmlh->nvml_gpucount) |
|
return -ENODEV; |
|
|
|
if (device_pstate[dev_id] < 0) |
|
return 0; |
|
|
|
if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) { |
|
applog(LOG_WARNING, "GPU #%d: NVML app. clock feature is not allowed!", dev_id); |
|
return -EPERM; |
|
} |
|
|
|
nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk); |
|
rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk); |
|
if (rc != NVML_SUCCESS) { |
|
applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id); |
|
return -EINVAL; |
|
} |
|
|
|
// get application config values |
|
if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id]; |
|
if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id]; |
|
|
|
// these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+ |
|
uint32_t nclocks = 0, mem_clocks[32] = { 0 }; |
|
int8_t wanted_pstate = device_pstate[dev_id]; |
|
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL); |
|
nclocks = min(nclocks, 32); |
|
if (nclocks) |
|
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks); |
|
if ((uint32_t) wanted_pstate+1 > nclocks) { |
|
applog(LOG_WARNING, "GPU #%d: only %u mem clocks available (p-states)", dev_id, nclocks); |
|
} |
|
for (uint8_t u=0; u < nclocks; u++) { |
|
// ordered by pstate (so highest P0 first) |
|
if (u == wanted_pstate) { |
|
mem_clk = mem_clocks[u]; |
|
break; |
|
} |
|
} |
|
|
|
uint32_t* gpu_clocks = NULL; |
|
nclocks = 0; |
|
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL); |
|
if (nclocks) { |
|
gpu_clocks = (uint32_t*) calloc(1, sizeof(uint32_t) * nclocks + 4); |
|
rc = nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks); |
|
if (rc == NVML_SUCCESS) { |
|
// ordered desc, get the max app clock (do not limit) |
|
gpu_clk = gpu_clocks[0]; |
|
} |
|
free(gpu_clocks); |
|
} |
|
|
|
rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk); |
|
if (rc != NVML_SUCCESS) { |
|
applog(LOG_WARNING, "GPU #%d: pstate P%d (%u/%u) %s", dev_id, (int) wanted_pstate, |
|
mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc)); |
|
return -1; |
|
} |
|
|
|
if (!opt_quiet) |
|
applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int) wanted_pstate, mem_clk, gpu_clk); |
|
|
|
clock_prev[dev_id] = 1; |
|
return 1; |
|
} |
|
|
|
int nvml_set_plimit(nvml_handle *nvmlh, int dev_id) |
|
{ |
|
nvmlReturn_t rc = NVML_ERROR_UNKNOWN; |
|
uint32_t gpu_clk = 0, mem_clk = 0; |
|
int n = nvmlh->cuda_nvml_device_id[dev_id]; |
|
if (n < 0 || n >= nvmlh->nvml_gpucount) |
|
return -ENODEV; |
|
|
|
if (!device_plimit[dev_id]) |
|
return 0; // nothing to do |
|
|
|
if (!nvmlh->nvmlDeviceSetPowerManagementLimit) |
|
return -ENOSYS; |
|
|
|
uint32_t plimit = device_plimit[dev_id] * 1000; |
|
uint32_t pmin = 1000, pmax = 0, prev_limit = 0; |
|
if (nvmlh->nvmlDeviceGetPowerManagementLimitConstraints) |
|
rc = nvmlh->nvmlDeviceGetPowerManagementLimitConstraints(nvmlh->devs[n], &pmin, &pmax); |
|
|
|
if (rc != NVML_SUCCESS) { |
|
if (!nvmlh->nvmlDeviceGetPowerManagementLimit) |
|
return -ENOSYS; |
|
} |
|
nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &prev_limit); |
|
if (!pmax) pmax = prev_limit; |
|
|
|
plimit = min(plimit, pmax); |
|
plimit = max(plimit, pmin); |
|
rc = nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit); |
|
if (rc != NVML_SUCCESS) { |
|
#ifndef WIN32 |
|
applog(LOG_WARNING, "GPU #%d: plimit %s", dev_id, nvmlh->nvmlErrorString(rc)); |
|
#endif |
|
return -1; |
|
} else { |
|
device_plimit[dev_id] = plimit / 1000; |
|
nvml_plimit_set = true; |
|
} |
|
|
|
if (!opt_quiet) { |
|
applog(LOG_INFO, "GPU #%d: power limit set to %uW (allowed range is %u-%u)", |
|
dev_id, plimit/1000U, pmin/1000U, pmax/1000U); |
|
} |
|
|
|
limit_prev[dev_id] = prev_limit; |
|
return 1; |
|
} |
|
|
|
uint32_t nvml_get_plimit(nvml_handle *nvmlh, int dev_id) |
|
{ |
|
uint32_t plimit = 0; |
|
int n = nvmlh ? nvmlh->cuda_nvml_device_id[dev_id] : -1; |
|
if (n < 0 || n >= nvmlh->nvml_gpucount) |
|
return 0; |
|
|
|
if (nvmlh->nvmlDeviceGetPowerManagementLimit) { |
|
nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &plimit); |
|
} |
|
return plimit; |
|
} |
|
|
|
// ccminer -D -n |
|
#define LSTDEV_PFX " " |
|
void nvml_print_device_info(int dev_id) |
|
{ |
|
if (!hnvml) return; |
|
|
|
int n = hnvml->cuda_nvml_device_id[dev_id]; |
|
if (n < 0 || n >= hnvml->nvml_gpucount) |
|
return; |
|
|
|
nvmlReturn_t rc; |
|
|
|
// fprintf(stderr, "------ Hardware ------\n"); |
|
int gvid = hnvml->nvml_pci_vendor_id[n] & 0xFFFF; |
|
int gpid = hnvml->nvml_pci_vendor_id[n] >> 16; |
|
int svid = hnvml->nvml_pci_subsys_id[n] & 0xFFFF; |
|
int spid = hnvml->nvml_pci_subsys_id[n] >> 16; |
|
|
|
fprintf(stderr, LSTDEV_PFX "ID %04x:%04x/%04x:%04x BUS %04x:%02x:%02x.0\n", gvid, gpid, svid, spid, |
|
(int) hnvml->nvml_pci_domain_id[n], (int) hnvml->nvml_pci_bus_id[n], (int) hnvml->nvml_pci_device_id[n]); |
|
|
|
if (hnvml->nvmlDeviceGetClock) { |
|
uint32_t gpu_clk = 0, mem_clk = 0; |
|
|
|
// fprintf(stderr, "------- Clocks -------\n"); |
|
|
|
hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &gpu_clk); |
|
rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &mem_clk); |
|
if (rc == NVML_SUCCESS) { |
|
fprintf(stderr, LSTDEV_PFX "DEFAULT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk); |
|
} |
|
hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_TARGET, &gpu_clk); |
|
rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_TARGET, &mem_clk); |
|
if (rc == NVML_SUCCESS) { |
|
fprintf(stderr, LSTDEV_PFX "TARGET MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk); |
|
} |
|
hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_CURRENT, &gpu_clk); |
|
rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_CURRENT, &mem_clk); |
|
if (rc == NVML_SUCCESS) { |
|
fprintf(stderr, LSTDEV_PFX "CURRENT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk); |
|
} |
|
} |
|
} |
|
|
|
int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount) |
|
{ |
|
*gpucount = nvmlh->nvml_gpucount; |
|
return 0; |
|
} |
|
|
|
int cuda_get_gpucount(nvml_handle *nvmlh, int *gpucount) |
|
{ |
|
*gpucount = nvmlh->cuda_gpucount; |
|
return 0; |
|
} |
|
|
|
|
|
int nvml_get_gpu_name(nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize) |
|
{ |
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; |
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) |
|
return -ENODEV; |
|
|
|
if (!nvmlh->nvmlDeviceGetName) |
|
return -ENOSYS; |
|
|
|
if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != NVML_SUCCESS) |
|
return -1; |
|
|
|
return 0; |
|
} |
|
|
|
|
|
int nvml_get_tempC(nvml_handle *nvmlh, int cudaindex, unsigned int *tempC) |
|
{ |
|
nvmlReturn_t rc; |
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; |
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) |
|
return -ENODEV; |
|
|
|
if (!nvmlh->nvmlDeviceGetTemperature) |
|
return -ENOSYS; |
|
|
|
rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC); |
|
if (rc != NVML_SUCCESS) { |
|
return -1; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
|
|
int nvml_get_fanpcnt(nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt) |
|
{ |
|
nvmlReturn_t rc; |
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; |
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) |
|
return -ENODEV; |
|
|
|
if (!nvmlh->nvmlDeviceGetFanSpeed) |
|
return -ENOSYS; |
|
|
|
rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt); |
|
if (rc != NVML_SUCCESS) { |
|
return -1; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
|
|
int nvml_get_current_clocks(int cudaindex, unsigned int *graphics_clock, unsigned int *mem_clock) |
|
{ |
|
nvmlReturn_t rc; |
|
int gpuindex = hnvml->cuda_nvml_device_id[cudaindex]; |
|
if (gpuindex < 0 || gpuindex >= hnvml->nvml_gpucount) return -ENODEV; |
|
if (!hnvml->nvmlDeviceGetClockInfo) return -ENOSYS; |
|
|
|
rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_SM, graphics_clock); |
|
if (rc != NVML_SUCCESS) return -1; |
|
rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_MEM, mem_clock); |
|
if (rc != NVML_SUCCESS) return -1; |
|
|
|
return 0; |
|
} |
|
|
|
/* Not Supported on 750Ti 340.23 */ |
|
int nvml_get_power_usage(nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts) |
|
{ |
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; |
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) |
|
return -ENODEV; |
|
|
|
if (!nvmlh->nvmlDeviceGetPowerUsage) |
|
return -ENOSYS; |
|
|
|
nvmlReturn_t res = nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts); |
|
if (res != NVML_SUCCESS) { |
|
//if (opt_debug) |
|
// applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res)); |
|
return -1; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
/* Not Supported on 750Ti 340.23 */ |
|
int nvml_get_pstate(nvml_handle *nvmlh, int cudaindex, int *pstate) |
|
{ |
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; |
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) |
|
return -ENODEV; |
|
|
|
if (!nvmlh->nvmlDeviceGetPerformanceState) |
|
return -ENOSYS; |
|
|
|
nvmlReturn_t res = nvmlh->nvmlDeviceGetPerformanceState(nvmlh->devs[gpuindex], pstate); |
|
if (res != NVML_SUCCESS) { |
|
//if (opt_debug) |
|
// applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res)); |
|
return -1; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
int nvml_get_busid(nvml_handle *nvmlh, int cudaindex, int *busid) |
|
{ |
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; |
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) |
|
return -ENODEV; |
|
|
|
(*busid) = nvmlh->nvml_pci_bus_id[gpuindex]; |
|
return 0; |
|
} |
|
|
|
int nvml_get_serial(nvml_handle *nvmlh, int cudaindex, char *sn, int maxlen) |
|
{ |
|
uint32_t subids = 0; |
|
char uuid[NVML_DEVICE_UUID_BUFFER_SIZE]; |
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; |
|
nvmlReturn_t res; |
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) |
|
return -ENODEV; |
|
|
|
if (nvmlh->nvmlDeviceGetSerial) { |
|
res = nvmlh->nvmlDeviceGetSerial(nvmlh->devs[gpuindex], sn, maxlen); |
|
if (res == NVML_SUCCESS) |
|
return 0; |
|
} |
|
|
|
if (!nvmlh->nvmlDeviceGetUUID) |
|
return -ENOSYS; |
|
|
|
// nvmlDeviceGetUUID: GPU-f2bd642c-369f-5a14-e0b4-0d22dfe9a1fc |
|
// use a part of uuid to generate an unique serial |
|
// todo: check if there is vendor id is inside |
|
memset(uuid, 0, sizeof(uuid)); |
|
res = nvmlh->nvmlDeviceGetUUID(nvmlh->devs[gpuindex], uuid, sizeof(uuid)-1); |
|
if (res != NVML_SUCCESS) { |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "nvmlDeviceGetUUID: %s", nvmlh->nvmlErrorString(res)); |
|
return -1; |
|
} |
|
strncpy(sn, &uuid[4], min((int) strlen(uuid), maxlen)); |
|
sn[maxlen-1] = '\0'; |
|
return 0; |
|
} |
|
|
|
int nvml_get_bios(nvml_handle *nvmlh, int cudaindex, char *desc, int maxlen) |
|
{ |
|
uint32_t subids = 0; |
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; |
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) |
|
return -ENODEV; |
|
|
|
if (!nvmlh->nvmlDeviceGetVbiosVersion) |
|
return -ENOSYS; |
|
|
|
nvmlReturn_t res = nvmlh->nvmlDeviceGetVbiosVersion(nvmlh->devs[gpuindex], desc, maxlen); |
|
if (res != NVML_SUCCESS) { |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "nvmlDeviceGetVbiosVersion: %s", nvmlh->nvmlErrorString(res)); |
|
return -1; |
|
} |
|
return 0; |
|
} |
|
|
|
int nvml_get_info(nvml_handle *nvmlh, int cudaindex, uint16_t &vid, uint16_t &pid) |
|
{ |
|
uint32_t subids = 0; |
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; |
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) |
|
return -ENODEV; |
|
|
|
subids = nvmlh->nvml_pci_subsys_id[gpuindex]; |
|
if (!subids) subids = nvmlh->nvml_pci_vendor_id[gpuindex]; |
|
pid = subids >> 16; |
|
vid = subids & 0xFFFF; |
|
// Colorful and Inno3D |
|
if (pid == 0) pid = nvmlh->nvml_pci_vendor_id[gpuindex] >> 16; |
|
return 0; |
|
} |
|
|
|
int nvml_destroy(nvml_handle *nvmlh) |
|
{ |
|
nvmlh->nvmlShutdown(); |
|
|
|
wrap_dlclose(nvmlh->nvml_dll); |
|
|
|
free(nvmlh->nvml_pci_bus_id); |
|
free(nvmlh->nvml_pci_device_id); |
|
free(nvmlh->nvml_pci_domain_id); |
|
free(nvmlh->nvml_pci_vendor_id); |
|
free(nvmlh->nvml_pci_subsys_id); |
|
free(nvmlh->nvml_cuda_device_id); |
|
free(nvmlh->cuda_nvml_device_id); |
|
free(nvmlh->app_clocks); |
|
free(nvmlh->devs); |
|
|
|
free(nvmlh); |
|
return 0; |
|
} |
|
|
|
// ---------------------------------------------------------------------------- |
|
|
|
/** |
|
* nvapi alternative for windows x86 binaries |
|
* nvml api doesn't exists as 32bit dll :/// |
|
*/ |
|
#ifdef WIN32 |
|
#include "nvapi/nvapi_ccminer.h" |
|
|
|
static unsigned int nvapi_dev_map[MAX_GPUS] = { 0 }; |
|
static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = { 0 }; |
|
static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = { 0 }; |
|
static NvU32 nvapi_dev_cnt = 0; |
|
extern bool nvapi_dll_loaded; |
|
|
|
int nvapi_temperature(unsigned int devNum, unsigned int *temperature) |
|
{ |
|
NvAPI_Status ret; |
|
|
|
if (devNum >= nvapi_dev_cnt) |
|
return -ENODEV; |
|
|
|
NV_GPU_THERMAL_SETTINGS thermal; |
|
thermal.version = NV_GPU_THERMAL_SETTINGS_VER; |
|
ret = NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &thermal); |
|
if (ret != NVAPI_OK) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage(ret, string); |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetThermalSettings: %s", string); |
|
return -1; |
|
} |
|
|
|
(*temperature) = (unsigned int) thermal.sensor[0].currentTemp; |
|
|
|
return 0; |
|
} |
|
|
|
int nvapi_fanspeed(unsigned int devNum, unsigned int *speed) |
|
{ |
|
NvAPI_Status ret; |
|
|
|
if (devNum >= nvapi_dev_cnt) |
|
return -ENODEV; |
|
|
|
NvU32 fanspeed = 0; |
|
ret = NvAPI_GPU_GetTachReading(phys[devNum], &fanspeed); |
|
if (ret != NVAPI_OK) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage(ret, string); |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetTachReading: %s", string); |
|
return -1; |
|
} |
|
|
|
(*speed) = (unsigned int) fanspeed; |
|
|
|
return 0; |
|
} |
|
|
|
int nvapi_getpstate(unsigned int devNum, unsigned int *pstate) |
|
{ |
|
NvAPI_Status ret; |
|
|
|
if (devNum >= nvapi_dev_cnt) |
|
return -ENODEV; |
|
|
|
NV_GPU_PERF_PSTATE_ID CurrentPstate = NVAPI_GPU_PERF_PSTATE_UNDEFINED; /* 16 */ |
|
ret = NvAPI_GPU_GetCurrentPstate(phys[devNum], &CurrentPstate); |
|
if (ret != NVAPI_OK) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage(ret, string); |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetCurrentPstate: %s", string); |
|
return -1; |
|
} |
|
else { |
|
// get pstate for the moment... often 0 = P0 |
|
(*pstate) = (unsigned int)CurrentPstate; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
#define UTIL_DOMAIN_GPU 0 |
|
int nvapi_getusage(unsigned int devNum, unsigned int *pct) |
|
{ |
|
NvAPI_Status ret; |
|
|
|
if (devNum >= nvapi_dev_cnt) |
|
return -ENODEV; |
|
|
|
NV_GPU_DYNAMIC_PSTATES_INFO_EX info; |
|
info.version = NV_GPU_DYNAMIC_PSTATES_INFO_EX_VER; |
|
ret = NvAPI_GPU_GetDynamicPstatesInfoEx(phys[devNum], &info); |
|
if (ret != NVAPI_OK) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage(ret, string); |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "NVAPI GetDynamicPstatesInfoEx: %s", string); |
|
return -1; |
|
} |
|
else { |
|
if (info.utilization[UTIL_DOMAIN_GPU].bIsPresent) |
|
(*pct) = info.utilization[UTIL_DOMAIN_GPU].percentage; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
int nvapi_getinfo(unsigned int devNum, uint16_t &vid, uint16_t &pid) |
|
{ |
|
NvAPI_Status ret; |
|
NvU32 pDeviceId, pSubSystemId, pRevisionId, pExtDeviceId; |
|
|
|
if (devNum >= nvapi_dev_cnt) |
|
return -ENODEV; |
|
|
|
ret = NvAPI_GPU_GetPCIIdentifiers(phys[devNum], &pDeviceId, &pSubSystemId, &pRevisionId, &pExtDeviceId); |
|
if (ret != NVAPI_OK) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage(ret, string); |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "NVAPI GetPCIIdentifiers: %s", string); |
|
return -1; |
|
} |
|
|
|
pid = pDeviceId >> 16; |
|
vid = pDeviceId & 0xFFFF; |
|
if (vid == 0x10DE && pSubSystemId) { |
|
vid = pSubSystemId & 0xFFFF; |
|
pid = pSubSystemId >> 16; |
|
// Colorful and Inno3D |
|
if (pid == 0) pid = pDeviceId >> 16; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
int nvapi_getserial(unsigned int devNum, char *serial, unsigned int maxlen) |
|
{ |
|
NvAPI_Status ret; |
|
if (devNum >= nvapi_dev_cnt) |
|
return -ENODEV; |
|
|
|
memset(serial, 0, maxlen); |
|
|
|
if (maxlen < 11) |
|
return -EINVAL; |
|
|
|
NvAPI_ShortString ser = { 0 }; |
|
ret = NvAPI_DLL_GetSerialNumber(phys[devNum], ser); |
|
if (ret != NVAPI_OK) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage(ret, string); |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "NVAPI GetSerialNumber: %s", string); |
|
return -1; |
|
} |
|
|
|
uint8_t *bytes = (uint8_t*) ser; |
|
for (int n=0; n<5; n++) sprintf(&serial[n*2], "%02X", bytes[n]); |
|
return 0; |
|
} |
|
|
|
int nvapi_getbios(unsigned int devNum, char *desc, unsigned int maxlen) |
|
{ |
|
NvAPI_Status ret; |
|
if (devNum >= nvapi_dev_cnt) |
|
return -ENODEV; |
|
|
|
if (maxlen < 64) // Short String |
|
return -1; |
|
|
|
ret = NvAPI_GPU_GetVbiosVersionString(phys[devNum], desc); |
|
if (ret != NVAPI_OK) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage(ret, string); |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "NVAPI GetVbiosVersionString: %s", string); |
|
return -1; |
|
} |
|
return 0; |
|
} |
|
|
|
static int SetAsusRGBLogo(unsigned int devNum, uint32_t RGB, bool ignorePrevState) |
|
{ |
|
NvAPI_Status ret = NVAPI_OK; |
|
NV_I2C_INFO_EX* i2cInfo; |
|
|
|
int delay1 = 20000; |
|
int delay2 = 0; |
|
|
|
uchar4 rgb = { 0 }; |
|
memcpy(&rgb, &RGB, 4); |
|
uchar4 prgb = { 0 }; |
|
int32_t prev = device_led_state[nvapi_devid(devNum)]; |
|
memcpy(&prgb, &prev, 4); |
|
|
|
NV_INIT_STRUCT_ALLOC(NV_I2C_INFO_EX, i2cInfo); |
|
if (i2cInfo == NULL) return -ENOMEM; |
|
|
|
NvU32 data[5] = { 0 }; |
|
NvU32 datv[2] = { 0, 1 }; |
|
NvU32 datw[2] = { 1, 0 }; |
|
if (rgb.z != prgb.z || ignorePrevState) { |
|
data[2] = 4; // R:4 G:5 B:6, Mode = 7 (1 static, 2 breath, 3 blink, 4 demo) |
|
data[3] = 1; |
|
datv[0] = rgb.z | 0x13384000; |
|
|
|
i2cInfo->i2cDevAddress = 0x52; |
|
i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); |
|
i2cInfo->regAddrSize = 1; |
|
i2cInfo->pbData = (NvU8*) datv; |
|
i2cInfo->cbRead = 5; |
|
i2cInfo->cbSize = 1; |
|
i2cInfo->portId = 1; |
|
i2cInfo->bIsPortIdSet = 1; |
|
|
|
ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw); |
|
usleep(delay1); |
|
has_rgb_ok = (ret == NVAPI_OK); |
|
} |
|
|
|
if (rgb.y != prgb.y || ignorePrevState) { |
|
data[2] = 5; |
|
data[3] = 1; |
|
datv[0] = rgb.y | 0x4000; |
|
|
|
i2cInfo->i2cDevAddress = 0x52; |
|
i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); |
|
i2cInfo->regAddrSize = 1; |
|
i2cInfo->pbData = (NvU8*) datv; |
|
i2cInfo->cbRead = 5; |
|
i2cInfo->cbSize = 1; |
|
i2cInfo->portId = 1; |
|
i2cInfo->bIsPortIdSet = 1; |
|
|
|
ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw); |
|
usleep(delay1); |
|
has_rgb_ok = (ret == NVAPI_OK); |
|
} |
|
|
|
if (rgb.y != prgb.y || ignorePrevState) { |
|
data[2] = 6; |
|
data[3] = 1; |
|
datv[0] = rgb.x | 0x4000; |
|
|
|
i2cInfo->i2cDevAddress = 0x52; |
|
i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); |
|
i2cInfo->regAddrSize = 1; |
|
i2cInfo->pbData = (NvU8*) datv; |
|
i2cInfo->cbRead = 5; |
|
i2cInfo->cbSize = 1; |
|
i2cInfo->portId = 1; |
|
i2cInfo->bIsPortIdSet = 1; |
|
|
|
ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw); |
|
usleep(delay1); |
|
has_rgb_ok = (ret == NVAPI_OK); |
|
} |
|
|
|
if (rgb.w && ignorePrevState) { |
|
data[2] = 7; |
|
data[3] = 1; |
|
datv[0] = rgb.w | 0x4000; |
|
|
|
i2cInfo->i2cDevAddress = 0x52; |
|
i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); |
|
i2cInfo->regAddrSize = 1; |
|
i2cInfo->pbData = (NvU8*) datv; |
|
i2cInfo->cbRead = 5; |
|
i2cInfo->cbSize = 1; |
|
i2cInfo->portId = 1; |
|
i2cInfo->bIsPortIdSet = 1; |
|
|
|
ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw); |
|
usleep(delay1); |
|
has_rgb_ok = (ret == NVAPI_OK); |
|
} |
|
usleep(delay2); |
|
free(i2cInfo); |
|
return (int) ret; |
|
} |
|
|
|
static int SetGigabyteRGBLogo(unsigned int devNum, uint32_t RGB) |
|
{ |
|
NvAPI_Status ret; |
|
NV_I2C_INFO_EX* i2cInfo; |
|
NV_INIT_STRUCT_ALLOC(NV_I2C_INFO_EX, i2cInfo); |
|
if (i2cInfo == NULL) |
|
return -ENOMEM; |
|
|
|
NvU32 readBuf[25] = { 0 }; |
|
NvU32 data[5] = { 0 }; |
|
data[0] = 1; |
|
data[2] = swab32(RGB & 0xfcfcfcU) | 0x40; |
|
|
|
i2cInfo->i2cDevAddress = 0x48 << 1; |
|
i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); |
|
i2cInfo->regAddrSize = 4; // NVAPI_MAX_SIZEOF_I2C_REG_ADDRESS |
|
i2cInfo->pbData = (NvU8*) readBuf; |
|
i2cInfo->cbRead = 2; |
|
i2cInfo->cbSize = sizeof(readBuf); |
|
i2cInfo->portId = 1; |
|
i2cInfo->bIsPortIdSet = 1; |
|
|
|
//ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, data); |
|
ret = NvAPI_DLL_I2CReadEx(phys[devNum], i2cInfo, data); |
|
usleep(20000); |
|
free(i2cInfo); |
|
return (int) ret; |
|
} |
|
|
|
static int SetZotacRGBLogo(unsigned int devNum, uint32_t RGB) |
|
{ |
|
NvAPI_Status ret; |
|
NV_I2C_INFO* i2cInfo; |
|
NV_INIT_STRUCT_ALLOC(NV_I2C_INFO, i2cInfo); |
|
if (i2cInfo == NULL) |
|
return -ENOMEM; |
|
|
|
NvU32 buf[25] = { 0 }; |
|
NvU32 data[5] = { 0 }; |
|
|
|
uint32_t color = 0, level = 0x40; |
|
|
|
uchar4 rgb = { 0 }; |
|
memcpy(&rgb, &RGB, 4); |
|
level = rgb.x & 0xF0; |
|
level |= rgb.y & 0xF0; |
|
level |= rgb.z & 0xF0; |
|
//applog(LOG_DEBUG, "R %u G %u B %u", rgb.z, rgb.y, rgb.x); |
|
|
|
// Not really RGB custom, only some basic colors, so convert |
|
// 0: Red, 1: Yellow, 2: Green, 3: Cyan, 4: Blue, 5: magenta, 6: white |
|
if ((RGB & 0xFF0000) && (RGB & 0xFF00) && (RGB & 0xFF)) color = 6; |
|
else if ((RGB & 0xFF0000) && (RGB & 0xFF)) color = 5; |
|
else if ((RGB & 0xFF00) && (RGB & 0xFF)) color = 3; |
|
else if ((RGB & 0xFF0000) && (RGB & 0xFF00)) color = 1; |
|
else if (RGB & 0xFF) color = 4; |
|
else if (RGB & 0xFF00) color = 2; |
|
|
|
buf[0] = 0xF0; // F0 set colors |
|
buf[0] |= (color << 8); // logo |
|
buf[0] |= (1 << 16); // top |
|
if (RGB != 0) // level : 0x10 to 0xF0 |
|
buf[0] |= (level << 24); |
|
else |
|
buf[0] |= (0x10U << 24); |
|
|
|
// todo: i2c data crc ? |
|
|
|
i2cInfo->displayMask = 1; |
|
i2cInfo->bIsDDCPort = 1; |
|
i2cInfo->i2cDevAddress = 0x48 << 1; |
|
i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); |
|
i2cInfo->regAddrSize = 1; |
|
i2cInfo->pbData = (NvU8*) buf; |
|
i2cInfo->cbSize = 4; |
|
i2cInfo->i2cSpeed = NVAPI_I2C_SPEED_DEPRECATED; |
|
i2cInfo->i2cSpeedKhz = NVAPI_I2C_SPEED_100KHZ; // 4 |
|
i2cInfo->portId = 1; |
|
i2cInfo->bIsPortIdSet = 1; |
|
|
|
ret = NvAPI_I2CWrite(phys[devNum], i2cInfo); |
|
// required to prevent i2c lock |
|
usleep(20000); |
|
|
|
#if 0 |
|
buf[0] = 0xF7; // F7 toggle leds |
|
if (RGB == 0) |
|
buf[0] |= (1 << 8); // 0 logo on, 1 off |
|
buf[0] |= (1 << 16); // 1 top off |
|
ret = NvAPI_I2CWrite(phys[devNum], i2cInfo); |
|
usleep(20000); |
|
#endif |
|
// other modes: |
|
// 0xF1 breathing green (0x070202F1) |
|
// 0xF2 strobe green (0x070202F2) |
|
// 0xF3 cycle (0x000000F3) |
|
|
|
free(i2cInfo); |
|
return (int) ret; |
|
} |
|
|
|
int nvapi_set_led(unsigned int devNum, int RGB, char *device_name) |
|
{ |
|
uint16_t vid = 0, pid = 0; |
|
NvAPI_Status ret; |
|
if (strstr(device_name, "Gigabyte GTX 10")) { |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB); |
|
return SetGigabyteRGBLogo(devNum, (uint32_t) RGB); |
|
} else if (strstr(device_name, "ASUS GTX 10")) { |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB); |
|
return SetAsusRGBLogo(devNum, (uint32_t) RGB, !has_rgb_ok); |
|
} else if (strstr(device_name, "Zotac GTX 10")) { |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB); |
|
return SetZotacRGBLogo(devNum, (uint32_t) RGB); |
|
} else { |
|
NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM* illu; |
|
NV_INIT_STRUCT_ALLOC(NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM, illu); |
|
illu->hPhysicalGpu = phys[devNum]; |
|
illu->Attribute = NV_GPU_IA_LOGO_BRIGHTNESS; |
|
ret = NvAPI_GPU_QueryIlluminationSupport(illu); |
|
if (!ret && illu->bSupported) { |
|
NV_GPU_GET_ILLUMINATION_PARM *led; |
|
NV_INIT_STRUCT_ALLOC(NV_GPU_GET_ILLUMINATION_PARM, led); |
|
led->hPhysicalGpu = phys[devNum]; |
|
led->Attribute = NV_GPU_IA_LOGO_BRIGHTNESS; |
|
NvAPI_GPU_GetIllumination(led); |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "GPU %x: Led level was %d, set to %d", (int) phys[devNum], led->Value, RGB); |
|
led->Value = (uint32_t) RGB; |
|
ret = NvAPI_GPU_SetIllumination((NV_GPU_SET_ILLUMINATION_PARM*) led); |
|
free(led); |
|
} |
|
free(illu); |
|
return ret; |
|
} |
|
} |
|
|
|
int nvapi_pstateinfo(unsigned int devNum) |
|
{ |
|
uint32_t n; |
|
NvAPI_Status ret; |
|
uint32_t* mem = (uint32_t*) calloc(1, 0x4000); |
|
if (!mem) |
|
return -ENOMEM; |
|
|
|
unsigned int current = 0xFF; |
|
// useless on init but... |
|
nvapi_getpstate(devNum, ¤t); |
|
|
|
#if 0 |
|
// try :p |
|
uint32_t* buf = (uint32_t*) calloc(1, 0x8000); |
|
for (int i=8; i < 0x8000 && buf; i+=4) { |
|
buf[0] = 0x10000 + i; |
|
NV_GPU_PERF_PSTATE_ID pst = NVAPI_GPU_PERF_PSTATE_P0; |
|
ret = NvAPI_DLL_GetPstateClientLimits(phys[devNum], pst, buf); |
|
if (ret != NVAPI_INCOMPATIBLE_STRUCT_VERSION) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage(ret, string); |
|
applog(LOG_BLUE, "struct size is %06x : %s", buf[0], string); |
|
for (int n=0; n < i/32; n++) |
|
applog_hex(&buf[n*(32/4)], 32); |
|
break; |
|
} |
|
} |
|
free(buf); |
|
#endif |
|
|
|
#if 0 |
|
// Unsure of the meaning of these values |
|
NVAPI_GPU_POWER_TOPO topo = { 0 }; |
|
topo.version = NVAPI_GPU_POWER_TOPO_VER; |
|
if ((ret = NvAPI_DLL_ClientPowerTopologyGetStatus(phys[devNum], &topo)) == NVAPI_OK) { |
|
if (topo.count) |
|
applog(LOG_RAW, " GPU TDP is %.1f~%.1f W ?", |
|
(double) topo.entries[0].power/1000, (double) topo.entries[1].power/1000); |
|
|
|
// Ok on 970, not pascal |
|
NV_GPU_PERF_PSTATES20_INFO_V2 pset2 = { 0 }; |
|
pset2.version = NV_GPU_PERF_PSTATES20_INFO_VER2; |
|
pset2.ov.numVoltages = 1; |
|
pset2.ov.voltages[0].voltDelta_uV.value = 3000; // gpu + 3000 uv; |
|
ret = NvAPI_DLL_SetPstates20v2(phys[devNum], &pset2); |
|
#endif |
|
|
|
NV_GPU_PERF_PSTATES20_INFO* info; |
|
NV_INIT_STRUCT_ON(NV_GPU_PERF_PSTATES20_INFO, info, mem); |
|
if ((ret = NvAPI_GPU_GetPstates20(phys[devNum], info)) != NVAPI_OK) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage(ret, string); |
|
if (opt_debug) |
|
applog(LOG_RAW, "NVAPI GetPstates20: %s", string); |
|
return -1; |
|
} |
|
|
|
for (n=0; n < info->numPstates; n++) { |
|
NV_GPU_PSTATE20_CLOCK_ENTRY_V1* clocks = info->pstates[n].clocks; |
|
applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %6.1f MHz%s %4u mV%s \x7F %d/%d", |
|
info->pstates[n].pstateId == current ? ">":" ", (int) info->pstates[n].pstateId, |
|
clocks[1].data.single.freq_kHz/1000, clocks[1].bIsEditable ? "*":" ", |
|
(double) clocks[0].data.single.freq_kHz/1000, clocks[0].bIsEditable ? "*":" ", |
|
info->pstates[n].baseVoltages[0].volt_uV/1000, info->pstates[n].baseVoltages[0].bIsEditable ? "*": " ", |
|
info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable |
|
info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000); |
|
if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) { |
|
applog(LOG_RAW, " OC %+4d MHz %+6.1f MHz", |
|
clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000); |
|
} |
|
} |
|
// boost over volting (GTX 9xx only ?) |
|
for (n=0; n < info->ov.numVoltages; n++) { |
|
applog(LOG_RAW, " OV: %u%+d mV%s \x7F %d/%d", |
|
info->ov.voltages[n].volt_uV/1000, info->ov.voltages[n].voltDelta_uV.value/1000, info->ov.voltages[n].bIsEditable ? "*":" ", |
|
info->ov.voltages[n].voltDelta_uV.valueRange.min/1000, info->ov.voltages[n].voltDelta_uV.valueRange.max/1000); |
|
} |
|
|
|
NV_GPU_CLOCK_FREQUENCIES *freqs; |
|
NV_INIT_STRUCT_ON(NV_GPU_CLOCK_FREQUENCIES, freqs, mem); |
|
freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; |
|
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs); |
|
applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Base Clocks", |
|
(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, |
|
(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); |
|
|
|
freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK; |
|
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs); |
|
applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Boost Clocks", |
|
(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, |
|
(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); |
|
|
|
freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ; |
|
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs); |
|
applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz >Current", |
|
(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, |
|
(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); |
|
|
|
// Other clock values ?? |
|
NVAPI_GPU_PERF_CLOCKS *pcl; |
|
NV_INIT_STRUCT_ALLOC(NVAPI_GPU_PERF_CLOCKS, pcl); |
|
int numClock=0; ret = NVAPI_OK; |
|
while (ret == NVAPI_OK) { |
|
if ((ret = NvAPI_DLL_GetPerfClocks(phys[devNum], numClock, pcl)) == NVAPI_OK) { |
|
applog(LOG_RAW, " C%d: MEM %4.0f MHz GPU %6.1f MHz [%5.1f/%6.1f]", numClock, |
|
(double) pcl->memFreq1/1000, (double) pcl->gpuFreq1/1000, (double) pcl->gpuFreqMin/1000, (double) pcl->gpuFreqMax/1000); |
|
// ret = NvAPI_DLL_SetPerfClocks(phys[devNum], numClock, pcl); // error |
|
} |
|
numClock++; |
|
} |
|
|
|
// Pascal only |
|
NVAPI_VOLTBOOST_PERCENT *pvb; |
|
NV_INIT_STRUCT_ON(NVAPI_VOLTBOOST_PERCENT, pvb, mem); |
|
if ((ret = NvAPI_DLL_GetCoreVoltageBoostPercent(phys[devNum], pvb)) == NVAPI_OK) { |
|
NVAPI_VOLTAGE_STATUS *pvdom; |
|
NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGE_STATUS, pvdom); |
|
NvAPI_DLL_GetCurrentVoltage(phys[devNum], pvdom); |
|
if (pvdom && pvdom->value_uV) |
|
applog(LOG_RAW, " GPU Voltage is %u mV %+d%% boost", pvdom->value_uV/1000, pvb->percent); |
|
else if (pvdom) |
|
applog(LOG_RAW, " GPU Voltage is %u mV", pvdom->value_uV/1000); |
|
free(pvdom); |
|
} else { |
|
// Maxwell 9xx |
|
NVAPI_VOLT_STATUS *mvdom, *mvstep; |
|
NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvdom); |
|
if (mvdom && (ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], mvdom)) == NVAPI_OK) { |
|
NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvstep); |
|
NvAPI_DLL_GetVoltageStep(phys[devNum], mvstep); |
|
if (mvdom->value_uV) applog(LOG_RAW, " GPU Voltage is %.1f mV with %.3f mV resolution", |
|
(double) mvdom->value_uV/1000, (double) mvstep->value_uV/1000); |
|
free(mvstep); |
|
} |
|
free(mvdom); |
|
} |
|
|
|
uint32_t plim = nvapi_get_plimit(devNum); |
|
double min_pw = 0, max_pw = 0; // percent |
|
|
|
NVAPI_GPU_POWER_INFO nfo = { 0 }; |
|
nfo.version = NVAPI_GPU_POWER_INFO_VER; |
|
ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo); |
|
if (ret == NVAPI_OK && nfo.valid) { |
|
min_pw = (double)nfo.entries[0].min_power / 1000; |
|
max_pw = (double)nfo.entries[0].max_power / 1000; |
|
} |
|
applog(LOG_RAW, " Power limit is set to %u%%, range [%.0f-%.0f%%]", plim, min_pw, max_pw); |
|
|
|
#if 0 |
|
NVAPI_COOLER_SETTINGS *cooler; |
|
NV_INIT_STRUCT_ON(NVAPI_COOLER_SETTINGS, cooler, mem); |
|
ret = NvAPI_DLL_GetCoolerSettings(phys[devNum], 7, cooler); |
|
if (ret == NVAPI_OK) { |
|
applog(LOG_RAW, " Fan level is set to %u%%", cooler->level); // wrong val, seems 1 (auto ?) |
|
NVAPI_COOLER_LEVEL *fan; |
|
NV_INIT_STRUCT_ALLOC(NVAPI_COOLER_LEVEL, fan); |
|
fan->level = 100; |
|
fan->count = 1; |
|
ret = NvAPI_DLL_SetCoolerLevels(phys[devNum], 7, fan); |
|
free(fan); |
|
sleep(10); |
|
ret = NvAPI_DLL_RestoreCoolerSettings(phys[devNum], cooler, 7); |
|
} |
|
#endif |
|
|
|
NV_GPU_THERMAL_SETTINGS *tset; |
|
NV_INIT_STRUCT_ON(NV_GPU_THERMAL_SETTINGS, tset, mem); |
|
|
|
NVAPI_GPU_THERMAL_INFO *tnfo; |
|
NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_INFO, tnfo); |
|
NVAPI_GPU_THERMAL_LIMIT *tlim; |
|
NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_LIMIT, tlim); |
|
NvAPI_GPU_GetThermalSettings(phys[devNum], 0, tset); |
|
NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], tnfo); |
|
if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], tlim)) == NVAPI_OK) { |
|
applog(LOG_RAW, " Thermal limit is set to %u, current Tc %d, range [%u-%u]", |
|
tlim->entries[0].value >> 8, tset->sensor[0].currentTemp, |
|
tnfo->entries[0].min_temp >> 8, tnfo->entries[0].max_temp >> 8); |
|
} |
|
free(tnfo); |
|
free(tlim); |
|
|
|
#if 1 |
|
// Read pascal Clocks Table, Empty on 9xx |
|
//NVAPI_CLOCKS_RANGE* ranges; |
|
//NV_INIT_STRUCT_ON(NVAPI_CLOCKS_RANGE, ranges, mem); |
|
//ret = NvAPI_DLL_GetClockBoostRanges(phys[devNum], ranges); |
|
|
|
NVAPI_CLOCK_MASKS* boost; |
|
NV_INIT_STRUCT_ON(NVAPI_CLOCK_MASKS, boost, mem); |
|
ret = NvAPI_DLL_GetClockBoostMask(phys[devNum], boost); |
|
int gpuClocks = 0, memClocks = 0; |
|
for (n=0; n < 80+23; n++) { |
|
if (boost->clocks[n].memDelta) memClocks++; |
|
if (boost->clocks[n].gpuDelta) gpuClocks++; |
|
} |
|
|
|
// PASCAL GTX ONLY |
|
if (gpuClocks || memClocks) { |
|
NVAPI_CLOCK_TABLE *table; |
|
NV_INIT_STRUCT_ALLOC(NVAPI_CLOCK_TABLE, table); |
|
memcpy(table->mask, boost->mask, 12); |
|
ret = NvAPI_DLL_GetClockBoostTable(phys[devNum], table); |
|
gpuClocks = 0, memClocks = 0; |
|
for (n=0; n < 12; n++) { |
|
if (table->buf0[n] != 0) applog(LOG_RAW, "boost table 0[%u] not empty (%u)", n, table->buf0[n]); |
|
} |
|
for (n=0; n < 80; n++) { |
|
if (table->gpuDeltas[n].freqDelta) { |
|
// note: gpu delta value seems to be x2, not the memory |
|
//applog(LOG_RAW, " Boost gpu clock delta %u set to %d MHz", n, table->gpuDeltas[n].freqDelta/2000); |
|
gpuClocks++; |
|
} |
|
} |
|
for (n=0; n < 23; n++) { |
|
if (table->memFilled[n]) { |
|
//applog(LOG_RAW, " Boost mem clock delta %u set to %d MHz", n, table->memDeltas[n]/1000); |
|
memClocks++; |
|
} |
|
} |
|
for (n=0; n < 1529; n++) { |
|
if (table->buf1[n] != 0) applog(LOG_RAW, "boost table 1[%u] not empty (%u)", n, table->buf1[n]); |
|
} |
|
applog(LOG_RAW, " Boost table contains %d gpu and %d mem levels.", gpuClocks, memClocks); |
|
free(table); |
|
|
|
NVAPI_VFP_CURVE *curve; |
|
NV_INIT_STRUCT_ALLOC(NVAPI_VFP_CURVE, curve); |
|
memcpy(curve->mask, boost->mask, 12); |
|
ret = NvAPI_DLL_GetVFPCurve(phys[devNum], curve); |
|
gpuClocks = 0, memClocks = 0; |
|
for (n=0; n < 80; n++) { |
|
if (curve->gpuEntries[n].freq_kHz || curve->gpuEntries[n].volt_uV) { |
|
// applog(LOG_RAW, "gpu volt table %2u %4u MHz - %6u mV", n, curve->gpuEntries[n].freq_kHz/1000, curve->gpuEntries[n].volt_uV/1000); |
|
gpuClocks++; |
|
} |
|
} |
|
for (n=0; n < 23; n++) { |
|
if (curve->memEntries[n].freq_kHz || curve->memEntries[n].volt_uV) { |
|
// applog(LOG_RAW, "mem volt table %2u %4u MHz - %6u mV", n, curve->memEntries[n].freq_kHz/1000, curve->memEntries[n].volt_uV/1000); |
|
memClocks++; |
|
} |
|
} |
|
for (n=0; n < 1064; n++) { |
|
if (curve->buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve->buf1[n]); |
|
} |
|
applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks); |
|
free(curve); |
|
} |
|
|
|
// Maxwell |
|
else { |
|
NVAPI_VOLTAGES_TABLE* volts; |
|
NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGES_TABLE, volts); |
|
int entries = 0; |
|
ret = NvAPI_DLL_GetVoltages(phys[devNum], volts); |
|
for (n=0; n < 128; n++) { |
|
if (volts->entries[n].volt_uV) |
|
entries++; |
|
} |
|
applog(LOG_RAW, " Volts table contains %d gpu levels.", entries); |
|
free(volts); |
|
} |
|
|
|
NV_DISPLAY_DRIVER_MEMORY_INFO* meminfo; |
|
NV_INIT_STRUCT_ON(NV_DISPLAY_DRIVER_MEMORY_INFO, meminfo, mem); |
|
meminfo->version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER; |
|
if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], meminfo)) == NVAPI_OK) { |
|
applog(LOG_RAW, " Memory: %u MB, %.1f used", meminfo->dedicatedVideoMemory/1024, |
|
(double) (meminfo->availableDedicatedVideoMemory - meminfo->curAvailableDedicatedVideoMemory)/1024); |
|
} |
|
#if 0 /* some undetermined stats */ |
|
NVAPI_GPU_PERF_INFO pi = { 0 }; |
|
pi.version = NVAPI_GPU_PERF_INFO_VER; |
|
ret = NvAPI_DLL_PerfPoliciesGetInfo(phys[devNum], &pi); |
|
|
|
NVAPI_GPU_PERF_STATUS ps = { 0 }; |
|
ps.version = NVAPI_GPU_PERF_STATUS_VER; |
|
ret = NvAPI_DLL_PerfPoliciesGetStatus(phys[devNum], &ps); |
|
applog(LOG_BLUE, "%llx %lld. %lld. %llx %llx %llx", ps.timeRef, ps.val1, ps.val2, ps.values[0], ps.values[1], ps.values[2]); |
|
#endif |
|
|
|
#endif |
|
free(mem); |
|
return 0; |
|
} |
|
|
|
// workaround for buggy driver 378.49 |
|
unsigned int nvapi_get_gpu_clock(unsigned int devNum) |
|
{ |
|
NvAPI_Status ret = NVAPI_OK; |
|
unsigned int freq = 0; |
|
NV_GPU_CLOCK_FREQUENCIES *freqs; |
|
NV_INIT_STRUCT_ALLOC(NV_GPU_CLOCK_FREQUENCIES, freqs); |
|
freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ; |
|
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs); |
|
if (ret == NVAPI_OK) { |
|
freq = freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000; |
|
} |
|
free(freqs); |
|
return freq; // in MHz |
|
} |
|
|
|
uint8_t nvapi_get_plimit(unsigned int devNum) |
|
{ |
|
NvAPI_Status ret = NVAPI_OK; |
|
NVAPI_GPU_POWER_STATUS pol = { 0 }; |
|
pol.version = NVAPI_GPU_POWER_STATUS_VER; |
|
if ((ret = NvAPI_DLL_ClientPowerPoliciesGetStatus(phys[devNum], &pol)) != NVAPI_OK) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage(ret, string); |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "NVAPI PowerPoliciesGetStatus: %s", string); |
|
return 0; |
|
} |
|
return (uint8_t) (pol.entries[0].power / 1000); // in percent |
|
} |
|
|
|
int nvapi_set_plimit(unsigned int devNum, uint16_t percent) |
|
{ |
|
NvAPI_Status ret = NVAPI_OK; |
|
uint32_t val = percent * 1000; |
|
|
|
NVAPI_GPU_POWER_INFO nfo = { 0 }; |
|
nfo.version = NVAPI_GPU_POWER_INFO_VER; |
|
ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo); |
|
if (ret == NVAPI_OK) { |
|
if (val == 0) |
|
val = nfo.entries[0].def_power; |
|
else if (val < nfo.entries[0].min_power) |
|
val = nfo.entries[0].min_power; |
|
else if (val > nfo.entries[0].max_power) |
|
val = nfo.entries[0].max_power; |
|
} |
|
|
|
NVAPI_GPU_POWER_STATUS pol = { 0 }; |
|
pol.version = NVAPI_GPU_POWER_STATUS_VER; |
|
pol.flags = 1; |
|
pol.entries[0].power = val; |
|
if ((ret = NvAPI_DLL_ClientPowerPoliciesSetStatus(phys[devNum], &pol)) != NVAPI_OK) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage(ret, string); |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "NVAPI PowerPoliciesSetStatus: %s", string); |
|
return -1; |
|
} |
|
return ret; |
|
} |
|
|
|
int nvapi_set_tlimit(unsigned int devNum, uint8_t limit) |
|
{ |
|
NvAPI_Status ret; |
|
uint32_t val = limit; |
|
|
|
if (devNum >= nvapi_dev_cnt) |
|
return -ENODEV; |
|
|
|
NV_GPU_THERMAL_SETTINGS tset = { 0 }; |
|
NVAPI_GPU_THERMAL_INFO tnfo = { 0 }; |
|
NVAPI_GPU_THERMAL_LIMIT tlim = { 0 }; |
|
tset.version = NV_GPU_THERMAL_SETTINGS_VER; |
|
NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &tset); |
|
tnfo.version = NVAPI_GPU_THERMAL_INFO_VER; |
|
NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], &tnfo); |
|
tlim.version = NVAPI_GPU_THERMAL_LIMIT_VER; |
|
if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], &tlim)) == NVAPI_OK) { |
|
tlim.entries[0].value = val << 8; |
|
tlim.flags = 1; |
|
ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim); |
|
if (ret == NVAPI_OK) { |
|
applog(LOG_INFO, "GPU #%u: thermal limit set to %u, current Tc %d, range [%u-%u]", |
|
devNum, val, tset.sensor[0].currentTemp, |
|
tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8); |
|
} else { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage(ret, string); |
|
applog(LOG_WARNING, "GPU #%u: thermal limit: %s, valid range is [%u-%u]", devNum, string, |
|
tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8); |
|
} |
|
} |
|
return (int) ret; |
|
} |
|
|
|
int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock) |
|
{ |
|
NvAPI_Status ret; |
|
NvS32 delta = 0; |
|
|
|
if (devNum >= nvapi_dev_cnt) |
|
return -ENODEV; |
|
#if 0 |
|
// wrong api to get default base clock when modified, cuda props seems fine |
|
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 }; |
|
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER; |
|
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; |
|
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); |
|
if (ret == NVAPI_OK) { |
|
delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency; |
|
} |
|
|
|
NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 }; |
|
deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER; |
|
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0); // we want default clock grr! |
|
if (ret == NVAPI_OK) { |
|
if (deffreqs.pstates[0].clocks[1].domainId == NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS) |
|
delta = (clock * 1000) - deffreqs.pstates[0].clocks[1].freq*2; |
|
} |
|
#endif |
|
|
|
cudaDeviceProp props = { 0 }; |
|
NvU32 busId = 0xFFFF; |
|
ret = NvAPI_GPU_GetBusId(phys[devNum], &busId); |
|
for (int d=0; d < (int) nvapi_dev_cnt; d++) { |
|
// unsure about devNum, so be safe |
|
cudaGetDeviceProperties(&props, d); |
|
if (props.pciBusID == busId) { |
|
delta = (clock * 1000) - props.clockRate; |
|
break; |
|
} |
|
} |
|
|
|
if (delta == (clock * 1000)) |
|
return ret; |
|
|
|
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 }; |
|
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1; |
|
pset1.numPstates = 1; |
|
pset1.numClocks = 1; |
|
// Ok on both 1080 and 970 |
|
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS; |
|
pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta; |
|
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1); |
|
if (ret == NVAPI_OK) { |
|
applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, delta/1000); |
|
} |
|
return ret; |
|
} |
|
|
|
int nvapi_set_memclock(unsigned int devNum, uint32_t clock) |
|
{ |
|
NvAPI_Status ret; |
|
NvS32 delta = 0; |
|
|
|
if (devNum >= nvapi_dev_cnt) |
|
return -ENODEV; |
|
|
|
// wrong to get default base clock (when modified) on maxwell (same as cuda props one) |
|
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 }; |
|
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER; |
|
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; |
|
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); // wrong base clocks, useless |
|
if (ret == NVAPI_OK) { |
|
delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency; |
|
} |
|
|
|
// seems ok on maxwell and pascal for the mem clocks |
|
NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 }; |
|
deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER; |
|
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // deprecated but req for def clocks |
|
if (ret == NVAPI_OK) { |
|
if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY) |
|
delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq; |
|
} |
|
|
|
if (delta == (clock * 1000)) |
|
return ret; |
|
|
|
// todo: bounds check with GetPstates20 |
|
|
|
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 }; |
|
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1; |
|
pset1.numPstates = 1; |
|
pset1.numClocks = 1; |
|
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY; |
|
pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta; |
|
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1); |
|
if (ret == NVAPI_OK) { |
|
applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, delta/1000); |
|
} |
|
return ret; |
|
} |
|
|
|
static int nvapi_set_memoffset(unsigned int devNum, int32_t delta, bool log=true) |
|
{ |
|
NvAPI_Status ret; |
|
NvS32 deltaKHz = delta * 1000; |
|
|
|
if (devNum >= nvapi_dev_cnt) |
|
return -ENODEV; |
|
|
|
// todo: bounds check with GetPstates20 |
|
|
|
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 }; |
|
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1; |
|
pset1.numPstates = 1; |
|
pset1.numClocks = 1; |
|
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY; |
|
pset1.pstates[0].clocks[0].freqDelta_kHz.value = deltaKHz; |
|
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1); |
|
if (ret == NVAPI_OK) { |
|
if (log) applog(LOG_INFO, "GPU #%u: Memory clock offset set to %+d MHz", devNum, deltaKHz / 1000); |
|
need_memclockrst = true; |
|
} |
|
return ret; |
|
} |
|
|
|
// Replacement for WIN32 CUDA 6.5 on pascal |
|
int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total) |
|
{ |
|
NvAPI_Status ret = NVAPI_OK; |
|
NV_DISPLAY_DRIVER_MEMORY_INFO mem = { 0 }; |
|
mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER; |
|
unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS]; |
|
if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) { |
|
*total = (uint64_t) mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory; |
|
*free = (uint64_t) mem.curAvailableDedicatedVideoMemory; |
|
} |
|
return (int) ret; |
|
} |
|
|
|
int nvapi_init() |
|
{ |
|
int num_gpus = cuda_num_devices(); |
|
NvAPI_Status ret = NvAPI_Initialize(); |
|
if (ret != NVAPI_OK) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage(ret, string); |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "NVAPI NvAPI_Initialize: %s", string); |
|
return -1; |
|
} |
|
|
|
ret = NvAPI_EnumPhysicalGPUs(phys, &nvapi_dev_cnt); |
|
if (ret != NVAPI_OK) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage(ret, string); |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "NVAPI NvAPI_EnumPhysicalGPUs: %s", string); |
|
return -1; |
|
} |
|
|
|
for (int g = 0; g < num_gpus; g++) { |
|
cudaDeviceProp props; |
|
if (cudaGetDeviceProperties(&props, g) == cudaSuccess) { |
|
device_bus_ids[g] = props.pciBusID; |
|
} |
|
nvapi_dev_map[g] = g; // default mapping |
|
} |
|
|
|
for (NvU8 i = 0; i < nvapi_dev_cnt; i++) { |
|
NvAPI_ShortString name; |
|
ret = NvAPI_GPU_GetFullName(phys[i], name); |
|
if (ret == NVAPI_OK) { |
|
for (int g = 0; g < num_gpus; g++) { |
|
NvU32 busId; |
|
ret = NvAPI_GPU_GetBusId(phys[i], &busId); |
|
if (ret == NVAPI_OK && busId == device_bus_ids[g]) { |
|
nvapi_dev_map[g] = i; |
|
if (opt_debug) |
|
applog(LOG_DEBUG, "CUDA GPU %d matches NVAPI GPU %d by busId %u", |
|
g, i, busId); |
|
break; |
|
} |
|
} |
|
} else { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage(ret, string); |
|
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetFullName: %s", string); |
|
} |
|
} |
|
#if 0 |
|
if (opt_debug) { |
|
NvAPI_ShortString ver; |
|
NvAPI_GetInterfaceVersionString(ver); |
|
applog(LOG_DEBUG, "%s", ver); |
|
} |
|
#endif |
|
|
|
NvU32 udv; |
|
NvAPI_ShortString str; |
|
ret = NvAPI_SYS_GetDriverAndBranchVersion(&udv, str); |
|
if (ret == NVAPI_OK) { |
|
sprintf(driver_version,"%d.%02d", udv / 100, udv % 100); |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
int nvapi_init_settings() |
|
{ |
|
// nvapi.dll |
|
int ret = nvapi_dll_init(); |
|
if (ret != NVAPI_OK) |
|
return ret; |
|
|
|
if (!opt_n_threads) { |
|
opt_n_threads = active_gpus; |
|
} |
|
|
|
for (int n=0; n < opt_n_threads; n++) { |
|
int dev_id = device_map[n % MAX_GPUS]; |
|
if (device_plimit[dev_id] && !nvml_plimit_set) { |
|
if (nvapi_set_plimit(nvapi_dev_map[dev_id], device_plimit[dev_id]) == NVAPI_OK) { |
|
uint32_t res = nvapi_get_plimit(nvapi_dev_map[dev_id]); |
|
gpulog(LOG_INFO, n, "Power limit is set to %u%%", res); |
|
} |
|
} |
|
if (device_tlimit[dev_id]) { |
|
nvapi_set_tlimit(nvapi_dev_map[dev_id], device_tlimit[dev_id]); |
|
} |
|
if (device_gpu_clocks[dev_id]) { |
|
ret = nvapi_set_gpuclock(nvapi_dev_map[dev_id], device_gpu_clocks[dev_id]); |
|
if (ret) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage((NvAPI_Status) ret, string); |
|
gpulog(LOG_WARNING, n, "nvapi_set_gpuclock %s", string); |
|
} |
|
} |
|
if (device_mem_offsets[dev_id]) { |
|
ret = nvapi_set_memoffset(nvapi_dev_map[dev_id], device_mem_offsets[dev_id]); |
|
if (ret) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage((NvAPI_Status)ret, string); |
|
gpulog(LOG_WARNING, n, "nvapi_set_memoffset %s", string); |
|
} |
|
} |
|
else if (device_mem_clocks[dev_id]) { |
|
ret = nvapi_set_memclock(nvapi_dev_map[dev_id], device_mem_clocks[dev_id]); |
|
if (ret) { |
|
NvAPI_ShortString string; |
|
NvAPI_GetErrorMessage((NvAPI_Status) ret, string); |
|
gpulog(LOG_WARNING, n, "nvapi_set_memclock %s", string); |
|
} |
|
} |
|
if (device_pstate[dev_id]) { |
|
// dunno how via nvapi or/and pascal |
|
} |
|
if (device_led[dev_id] != -1) { |
|
int err = nvapi_set_led(nvapi_dev_map[dev_id], device_led[dev_id], device_name[dev_id]); |
|
if (err != 0) { |
|
gpulog(LOG_WARNING, n, "Unable to set led value (err %d)", err); |
|
} |
|
device_led_state[dev_id] = device_led[dev_id]; |
|
} |
|
} |
|
|
|
return ret; |
|
} |
|
|
|
void nvapi_toggle_clocks(int thr_id, bool enable) |
|
{ |
|
int dev_id = device_map[thr_id % MAX_GPUS]; |
|
if (device_mem_offsets[dev_id]) { |
|
nvapi_set_memoffset(nvapi_dev_map[dev_id], enable ? device_mem_offsets[dev_id] : 0, false); |
|
} |
|
} |
|
|
|
unsigned int nvapi_devnum(int dev_id) |
|
{ |
|
return nvapi_dev_map[dev_id]; |
|
} |
|
|
|
int nvapi_devid(unsigned int devNum) |
|
{ |
|
for (int i=0; i < opt_n_threads; i++) { |
|
int dev_id = device_map[i % MAX_GPUS]; |
|
if (nvapi_dev_map[dev_id] = devNum) |
|
return dev_id; |
|
} |
|
return 0; |
|
} |
|
|
|
#endif /* WIN32 : Windows specific (nvapi) */ |
|
|
|
/* api functions -------------------------------------- */ |
|
|
|
// assume 2500 rpm as default, auto-updated if more |
|
static unsigned int fan_speed_max = 2500; |
|
|
|
unsigned int gpu_fanpercent(struct cgpu_info *gpu) |
|
{ |
|
unsigned int pct = 0; |
|
if (hnvml) { |
|
nvml_get_fanpcnt(hnvml, gpu->gpu_id, &pct); |
|
} |
|
#ifdef WIN32 |
|
else { |
|
unsigned int rpm = 0; |
|
nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm); |
|
pct = (rpm * 100) / fan_speed_max; |
|
if (pct > 100) { |
|
pct = 100; |
|
fan_speed_max = rpm; |
|
} |
|
} |
|
#endif |
|
return pct; |
|
} |
|
|
|
unsigned int gpu_fanrpm(struct cgpu_info *gpu) |
|
{ |
|
unsigned int rpm = 0; |
|
#ifdef WIN32 |
|
nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm); |
|
#endif |
|
return rpm; |
|
} |
|
|
|
|
|
float gpu_temp(struct cgpu_info *gpu) |
|
{ |
|
float tc = 0.0; |
|
unsigned int tmp = 0; |
|
if (hnvml) { |
|
nvml_get_tempC(hnvml, gpu->gpu_id, &tmp); |
|
tc = (float)tmp; |
|
} |
|
#ifdef WIN32 |
|
else { |
|
nvapi_temperature(nvapi_dev_map[gpu->gpu_id], &tmp); |
|
tc = (float)tmp; |
|
} |
|
#endif |
|
return tc; |
|
} |
|
|
|
int gpu_pstate(struct cgpu_info *gpu) |
|
{ |
|
int pstate = -1; |
|
int support = -1; |
|
if (hnvml) { |
|
support = nvml_get_pstate(hnvml, gpu->gpu_id, &pstate); |
|
} |
|
#ifdef WIN32 |
|
if (support == -1) { |
|
unsigned int pst = 0; |
|
nvapi_getpstate(nvapi_dev_map[gpu->gpu_id], &pst); |
|
pstate = (int) pst; |
|
} |
|
#endif |
|
return pstate; |
|
} |
|
|
|
int gpu_busid(struct cgpu_info *gpu) |
|
{ |
|
int busid = -1; |
|
int support = -1; |
|
if (hnvml) { |
|
support = nvml_get_busid(hnvml, gpu->gpu_id, &busid); |
|
} |
|
#ifdef WIN32 |
|
if (support == -1) { |
|
busid = device_bus_ids[gpu->gpu_id]; |
|
} |
|
#endif |
|
return busid; |
|
} |
|
|
|
unsigned int gpu_power(struct cgpu_info *gpu) |
|
{ |
|
unsigned int mw = 0; |
|
int support = -1; |
|
if (hnvml) { |
|
support = nvml_get_power_usage(hnvml, gpu->gpu_id, &mw); |
|
} |
|
#ifdef WIN32 |
|
if (support == -1) { |
|
unsigned int pct = 0; |
|
nvapi_getusage(nvapi_dev_map[gpu->gpu_id], &pct); |
|
pct *= nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]); |
|
pct /= 100; |
|
mw = pct; // to fix |
|
} |
|
#endif |
|
if (gpu->gpu_power > 0) { |
|
// average |
|
mw = (gpu->gpu_power + mw) / 2; |
|
} |
|
return mw; |
|
} |
|
|
|
unsigned int gpu_plimit(struct cgpu_info *gpu) |
|
{ |
|
unsigned int mw = 0; |
|
int support = -1; |
|
if (hnvml) { |
|
mw = nvml_get_plimit(hnvml, gpu->gpu_id); |
|
support = (mw > 0); |
|
} |
|
#ifdef WIN32 |
|
// NVAPI value is in % (< 100 so) |
|
if (support == -1) { |
|
mw = nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]); |
|
} |
|
#endif |
|
return mw; |
|
} |
|
|
|
static int translate_vendor_id(uint16_t vid, char *vendorname) |
|
{ |
|
struct VENDORS { |
|
const uint16_t vid; |
|
const char *name; |
|
} vendors[] = { |
|
{ 0x1043, "ASUS" }, |
|
{ 0x1048, "Elsa" }, |
|
{ 0x107D, "Leadtek" }, |
|
{ 0x10B0, "Gainward" }, |
|
// { 0x10DE, "NVIDIA" }, |
|
{ 0x1458, "Gigabyte" }, |
|
{ 0x1462, "MSI" }, |
|
{ 0x154B, "PNY" }, // maybe storage devices |
|
{ 0x1569, "Palit" }, |
|
{ 0x1682, "XFX" }, |
|
{ 0x196D, "Club3D" }, |
|
{ 0x196E, "PNY" }, |
|
{ 0x19DA, "Zotac" }, |
|
{ 0x19F1, "BFG" }, |
|
{ 0x1ACC, "PoV" }, |
|
{ 0x1B4C, "Galax" }, // KFA2 in EU, to check on Pascal cards |
|
{ 0x3842, "EVGA" }, |
|
{ 0x7377, "Colorful" }, |
|
{ 0, "" } |
|
}; |
|
|
|
if (!vendorname) |
|
return -EINVAL; |
|
|
|
for(int v=0; v < ARRAY_SIZE(vendors); v++) { |
|
if (vid == vendors[v].vid) { |
|
strcpy(vendorname, vendors[v].name); |
|
return vid; |
|
} |
|
} |
|
if (opt_debug && vid != 0x10DE) |
|
applog(LOG_DEBUG, "nvml: Unknown vendor %04x\n", vid); |
|
return 0; |
|
} |
|
|
|
int gpu_vendor(uint8_t pci_bus_id, char *vendorname) |
|
{ |
|
uint16_t vid = 0, pid = 0; |
|
if (hnvml) { // may not be initialized on start... |
|
for (int id=0; id < hnvml->nvml_gpucount; id++) { |
|
if (hnvml->nvml_pci_bus_id[id] == pci_bus_id) { |
|
int dev_id = hnvml->nvml_cuda_device_id[id]; |
|
nvml_get_info(hnvml, dev_id, vid, pid); |
|
} |
|
} |
|
} else { |
|
#ifdef WIN32 |
|
for (unsigned id = 0; id < nvapi_dev_cnt; id++) { |
|
if (device_bus_ids[id] == pci_bus_id) { |
|
nvapi_getinfo(nvapi_dev_map[id], vid, pid); |
|
break; |
|
} |
|
} |
|
#endif |
|
} |
|
return translate_vendor_id(vid, vendorname); |
|
} |
|
|
|
int gpu_info(struct cgpu_info *gpu) |
|
{ |
|
char vendorname[32] = { 0 }; |
|
int id = gpu->gpu_id; |
|
uint8_t bus_id = 0; |
|
|
|
gpu->nvml_id = -1; |
|
gpu->nvapi_id = -1; |
|
|
|
if (id < 0) |
|
return -1; |
|
|
|
if (hnvml) { |
|
gpu->nvml_id = (int8_t) hnvml->cuda_nvml_device_id[id]; |
|
nvml_get_info(hnvml, id, gpu->gpu_vid, gpu->gpu_pid); |
|
nvml_get_serial(hnvml, id, gpu->gpu_sn, sizeof(gpu->gpu_sn)); |
|
nvml_get_bios(hnvml, id, gpu->gpu_desc, sizeof(gpu->gpu_desc)); |
|
} |
|
#ifdef WIN32 |
|
gpu->nvapi_id = (int8_t) nvapi_dev_map[id]; |
|
nvapi_getinfo(nvapi_dev_map[id], gpu->gpu_vid, gpu->gpu_pid); |
|
nvapi_getserial(nvapi_dev_map[id], gpu->gpu_sn, sizeof(gpu->gpu_sn)); |
|
nvapi_getbios(nvapi_dev_map[id], gpu->gpu_desc, sizeof(gpu->gpu_desc)); |
|
#endif |
|
return 0; |
|
} |
|
|
|
#endif /* USE_WRAPNVML */ |
|
|
|
static int rgb_percent(int RGB, int percent) |
|
{ |
|
uint8_t* comp = (uint8_t*) &RGB; |
|
int res = ((percent*comp[2]) / 100) << 16; |
|
res += ((percent*comp[1]) / 100) << 8; |
|
return res + ((percent*comp[0]) / 100); |
|
} |
|
|
|
void gpu_led_on(int dev_id) |
|
{ |
|
#if defined(WIN32) && defined(USE_WRAPNVML) |
|
int value = device_led[dev_id]; |
|
if (device_led_state[dev_id] != value) { |
|
if (nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0) |
|
device_led_state[dev_id] = value; |
|
} |
|
#endif |
|
} |
|
|
|
void gpu_led_percent(int dev_id, int percent) |
|
{ |
|
#if defined(WIN32) && defined(USE_WRAPNVML) |
|
int value = rgb_percent(device_led[dev_id], percent); |
|
if (device_led_state[dev_id] != value) { |
|
if (nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0) |
|
device_led_state[dev_id] = value; |
|
} |
|
#endif |
|
} |
|
|
|
void gpu_led_off(int dev_id) |
|
{ |
|
#if defined(WIN32) && defined(USE_WRAPNVML) |
|
if (device_led_state[dev_id]) { |
|
if (nvapi_set_led(nvapi_dev_map[dev_id], 0, device_name[dev_id]) == 0) |
|
device_led_state[dev_id] = 0; |
|
} |
|
#endif |
|
} |
|
|
|
#ifdef USE_WRAPNVML |
|
extern double thr_hashrates[MAX_GPUS]; |
|
extern bool opt_debug_threads; |
|
extern bool opt_hwmonitor; |
|
extern int num_cpus; |
|
|
|
void *monitor_thread(void *userdata) |
|
{ |
|
int thr_id = -1; |
|
|
|
while (!abort_flag && !opt_quiet) |
|
{ |
|
// This thread monitors card's power lazily during scans, one at a time... |
|
thr_id = (thr_id + 1) % opt_n_threads; |
|
struct cgpu_info *cgpu = &thr_info[thr_id].gpu; |
|
int dev_id = cgpu->gpu_id; cudaSetDevice(dev_id); |
|
|
|
if (hnvml != NULL && cgpu) |
|
{ |
|
char khw[32] = { 0 }; |
|
uint64_t clock = 0, mem_clock = 0; |
|
uint32_t fanpercent = 0, power = 0; |
|
double tempC = 0, khs_per_watt = 0; |
|
uint32_t counter = 0; |
|
int max_loops = 1000; |
|
|
|
pthread_cond_wait(&cgpu->monitor.sampling_signal, &cgpu->monitor.lock); |
|
|
|
do { |
|
unsigned int tmp_clock=0, tmp_memclock=0; |
|
nvml_get_current_clocks(dev_id, &tmp_clock, &tmp_memclock); |
|
#ifdef WIN32 |
|
if (tmp_clock < 200) { |
|
// workaround for buggy drivers 378.x (real clock) |
|
tmp_clock = nvapi_get_gpu_clock(nvapi_dev_map[dev_id]); |
|
} |
|
#endif |
|
if (tmp_clock < 200) { |
|
// some older cards only report a base clock with cuda props. |
|
if (cuda_gpu_info(cgpu) == 0) { |
|
tmp_clock = cgpu->gpu_clock/1000; |
|
tmp_memclock = cgpu->gpu_memclock/1000; |
|
} |
|
} |
|
clock += tmp_clock; |
|
mem_clock += tmp_memclock; |
|
tempC += gpu_temp(cgpu); |
|
fanpercent += gpu_fanpercent(cgpu); |
|
power += gpu_power(cgpu); |
|
counter++; |
|
|
|
usleep(50000); |
|
if (abort_flag) goto abort; |
|
|
|
} while (cgpu->monitor.sampling_flag && (--max_loops)); |
|
|
|
cgpu->monitor.gpu_temp = (uint32_t) (tempC/counter); |
|
cgpu->monitor.gpu_fan = fanpercent/counter; |
|
cgpu->monitor.gpu_power = power/counter; |
|
cgpu->monitor.gpu_clock = (uint32_t) (clock/counter); |
|
cgpu->monitor.gpu_memclock = (uint32_t) (mem_clock/counter); |
|
|
|
if (power) { |
|
khs_per_watt = stats_get_speed(thr_id, thr_hashrates[thr_id]); |
|
khs_per_watt = khs_per_watt / ((double)power / counter); |
|
format_hashrate(khs_per_watt * 1000, khw); |
|
if (strlen(khw)) |
|
sprintf(&khw[strlen(khw)-1], "W %uW ", cgpu->monitor.gpu_power / 1000); |
|
} |
|
|
|
if (opt_hwmonitor && (time(NULL) - cgpu->monitor.tm_displayed) > 60) { |
|
gpulog(LOG_INFO, thr_id, "%u MHz %s%uC FAN %u%%", |
|
cgpu->monitor.gpu_clock/*, cgpu->monitor.gpu_memclock*/, |
|
khw, cgpu->monitor.gpu_temp, cgpu->monitor.gpu_fan |
|
); |
|
cgpu->monitor.tm_displayed = (uint32_t)time(NULL); |
|
} |
|
|
|
pthread_mutex_unlock(&cgpu->monitor.lock); |
|
} |
|
usleep(500); // safety |
|
} |
|
abort: |
|
if (opt_debug_threads) |
|
applog(LOG_DEBUG, "%s() died", __func__); |
|
return NULL; |
|
} |
|
#endif
|
|
|