mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-11 23:38:03 +00:00
4da35e0bcf
plimit value is reported in mW like the power, or % on Windows x86 (via nvapi) + the monitor thread will no more report 0W if the device doesnt support it also upgrade nvml and sample php api. some more changes may come in this temporary API 1.9 (for the final 2.0)
2205 lines
68 KiB
C++
2205 lines
68 KiB
C++
/*
|
|
* A trivial little dlopen()-based wrapper library for the
|
|
* NVIDIA NVML library, to allow runtime discovery of NVML on an
|
|
* arbitrary system. This is all very hackish and simple-minded, but
|
|
* it serves my immediate needs in the short term until NVIDIA provides
|
|
* a static NVML wrapper library themselves, hopefully in
|
|
* CUDA 6.5 or maybe sometime shortly after.
|
|
*
|
|
* This trivial code is made available under the "new" 3-clause BSD license,
|
|
* and/or any of the GPL licenses you prefer.
|
|
* Feel free to use the code and modify as you see fit.
|
|
*
|
|
* John E. Stone - john.stone@gmail.com
|
|
* Tanguy Pruvot - tpruvot@github
|
|
*
|
|
*/
|
|
|
|
#include <errno.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
|
|
#include "miner.h"
|
|
#include "nvml.h"
|
|
#include "cuda_runtime.h"
|
|
|
|
#ifdef USE_WRAPNVML
|
|
|
|
extern nvml_handle *hnvml;
|
|
extern char driver_version[32];
|
|
|
|
static uint32_t device_bus_ids[MAX_GPUS] = { 0 };
|
|
|
|
extern uint32_t device_gpu_clocks[MAX_GPUS];
|
|
extern uint32_t device_mem_clocks[MAX_GPUS];
|
|
extern uint8_t device_tlimit[MAX_GPUS];
|
|
extern int8_t device_pstate[MAX_GPUS];
|
|
extern int32_t device_led[MAX_GPUS];
|
|
int32_t device_led_state[MAX_GPUS] = { 0 };
|
|
static __thread bool has_rgb_ok = false;
|
|
|
|
uint32_t clock_prev[MAX_GPUS] = { 0 };
|
|
uint32_t clock_prev_mem[MAX_GPUS] = { 0 };
|
|
uint32_t limit_prev[MAX_GPUS] = { 0 };
|
|
|
|
/*
|
|
* Wrappers to emulate dlopen() on other systems like Windows
|
|
*/
|
|
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
|
|
#include <windows.h>
|
|
static void *wrap_dlopen(const char *filename) {
|
|
HMODULE h = LoadLibrary(filename);
|
|
if (!h && opt_debug) {
|
|
applog(LOG_DEBUG, "dlopen(%d): failed to load %s",
|
|
GetLastError(), filename);
|
|
}
|
|
return (void*)h;
|
|
}
|
|
static void *wrap_dlsym(void *h, const char *sym) {
|
|
return (void *)GetProcAddress((HINSTANCE)h, sym);
|
|
}
|
|
static int wrap_dlclose(void *h) {
|
|
/* FreeLibrary returns nonzero on success */
|
|
return (!FreeLibrary((HINSTANCE)h));
|
|
}
|
|
#else
|
|
/* assume we can use dlopen itself... */
|
|
#include <dlfcn.h>
|
|
#include <errno.h>
|
|
static void *wrap_dlopen(const char *filename) {
|
|
void *h = dlopen(filename, RTLD_NOW);
|
|
if (h == NULL && opt_debug) {
|
|
applog(LOG_DEBUG, "dlopen(%d): failed to load %s",
|
|
errno, filename);
|
|
}
|
|
return (void*)h;
|
|
}
|
|
|
|
static void *wrap_dlsym(void *h, const char *sym) {
|
|
return dlsym(h, sym);
|
|
}
|
|
static int wrap_dlclose(void *h) {
|
|
return dlclose(h);
|
|
}
|
|
#endif
|
|
|
|
nvml_handle * nvml_create()
|
|
{
|
|
int i=0;
|
|
nvml_handle *nvmlh = NULL;
|
|
|
|
#ifdef WIN32
|
|
/* Windows (do not use slashes, else ExpandEnvironmentStrings will mix them) */
|
|
#define libnvidia_ml "%PROGRAMFILES%\\NVIDIA Corporation\\NVSMI\\nvml.dll"
|
|
#else
|
|
/* linux assumed */
|
|
#define libnvidia_ml "libnvidia-ml.so"
|
|
#endif
|
|
|
|
char tmp[512];
|
|
#ifdef WIN32
|
|
ExpandEnvironmentStrings(libnvidia_ml, tmp, sizeof(tmp));
|
|
#else
|
|
strcpy(tmp, libnvidia_ml);
|
|
#endif
|
|
|
|
void *nvml_dll = wrap_dlopen(tmp);
|
|
if (nvml_dll == NULL) {
|
|
#ifdef WIN32
|
|
nvml_dll = wrap_dlopen("nvml.dll");
|
|
if (nvml_dll == NULL)
|
|
#endif
|
|
return NULL;
|
|
}
|
|
|
|
nvmlh = (nvml_handle *) calloc(1, sizeof(nvml_handle));
|
|
|
|
nvmlh->nvml_dll = nvml_dll;
|
|
|
|
nvmlh->nvmlInit = (nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2");
|
|
if (!nvmlh->nvmlInit)
|
|
nvmlh->nvmlInit = (nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
|
|
nvmlh->nvmlDeviceGetCount = (nvmlReturn_t (*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
|
|
if (!nvmlh->nvmlDeviceGetCount)
|
|
nvmlh->nvmlDeviceGetCount = (nvmlReturn_t (*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount");
|
|
nvmlh->nvmlDeviceGetHandleByIndex = (nvmlReturn_t (*)(int, nvmlDevice_t *))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2");
|
|
nvmlh->nvmlDeviceGetAPIRestriction = (nvmlReturn_t (*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t *))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetAPIRestriction");
|
|
nvmlh->nvmlDeviceSetAPIRestriction = (nvmlReturn_t (*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetAPIRestriction");
|
|
nvmlh->nvmlDeviceGetDefaultApplicationsClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetDefaultApplicationsClock");
|
|
nvmlh->nvmlDeviceGetApplicationsClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clocks))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetApplicationsClock");
|
|
nvmlh->nvmlDeviceSetApplicationsClocks = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int mem, unsigned int gpu))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetApplicationsClocks");
|
|
nvmlh->nvmlDeviceResetApplicationsClocks = (nvmlReturn_t (*)(nvmlDevice_t))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceResetApplicationsClocks");
|
|
nvmlh->nvmlDeviceGetSupportedGraphicsClocks = (nvmlReturn_t (*)(nvmlDevice_t, uint32_t mem, uint32_t *num, uint32_t *))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSupportedGraphicsClocks");
|
|
nvmlh->nvmlDeviceGetSupportedMemoryClocks = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *count, unsigned int *clocksMHz))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSupportedMemoryClocks");
|
|
nvmlh->nvmlDeviceGetClockInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClockInfo");
|
|
nvmlh->nvmlDeviceGetMaxClockInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxClockInfo");
|
|
nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo_v2");
|
|
if (!nvmlh->nvmlDeviceGetPciInfo)
|
|
nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo");
|
|
nvmlh->nvmlDeviceGetCurrPcieLinkGeneration = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *gen))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCurrPcieLinkGeneration");
|
|
nvmlh->nvmlDeviceGetCurrPcieLinkWidth = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *width))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCurrPcieLinkWidth");
|
|
nvmlh->nvmlDeviceGetMaxPcieLinkGeneration = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *gen))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxPcieLinkGeneration");
|
|
nvmlh->nvmlDeviceGetMaxPcieLinkWidth = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *width))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxPcieLinkWidth");
|
|
nvmlh->nvmlDeviceGetPowerUsage = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
|
|
nvmlh->nvmlDeviceGetPowerManagementDefaultLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementDefaultLimit");
|
|
nvmlh->nvmlDeviceGetPowerManagementLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementLimit");
|
|
nvmlh->nvmlDeviceGetPowerManagementLimitConstraints = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *min, unsigned int *max))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementLimitConstraints");
|
|
nvmlh->nvmlDeviceSetPowerManagementLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int limit))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetPowerManagementLimit");
|
|
nvmlh->nvmlDeviceGetName = (nvmlReturn_t (*)(nvmlDevice_t, char *, int))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName");
|
|
nvmlh->nvmlDeviceGetTemperature = (nvmlReturn_t (*)(nvmlDevice_t, int, unsigned int *))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature");
|
|
nvmlh->nvmlDeviceGetFanSpeed = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed");
|
|
nvmlh->nvmlDeviceGetPerformanceState = (nvmlReturn_t (*)(nvmlDevice_t, int *))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPerformanceState"); /* or nvmlDeviceGetPowerState */
|
|
nvmlh->nvmlDeviceGetSerial = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSerial");
|
|
nvmlh->nvmlDeviceGetUUID = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetUUID");
|
|
nvmlh->nvmlDeviceGetVbiosVersion = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetVbiosVersion");
|
|
nvmlh->nvmlSystemGetDriverVersion = (nvmlReturn_t (*)(char *, unsigned int))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlSystemGetDriverVersion");
|
|
nvmlh->nvmlErrorString = (char* (*)(nvmlReturn_t))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlErrorString");
|
|
nvmlh->nvmlShutdown = (nvmlReturn_t (*)())
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown");
|
|
// v331
|
|
nvmlh->nvmlDeviceGetEnforcedPowerLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetEnforcedPowerLimit");
|
|
// v340
|
|
#ifdef __linux__
|
|
nvmlh->nvmlDeviceClearCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceClearCpuAffinity");
|
|
nvmlh->nvmlDeviceGetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int sz, unsigned long *cpuSet))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCpuAffinity");
|
|
nvmlh->nvmlDeviceSetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetCpuAffinity");
|
|
#endif
|
|
// v346
|
|
nvmlh->nvmlDeviceGetPcieThroughput = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPcieThroughput");
|
|
// v36x (API 8 / Pascal)
|
|
nvmlh->nvmlDeviceGetClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz))
|
|
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClock");
|
|
|
|
if (nvmlh->nvmlInit == NULL ||
|
|
nvmlh->nvmlShutdown == NULL ||
|
|
nvmlh->nvmlErrorString == NULL ||
|
|
nvmlh->nvmlDeviceGetCount == NULL ||
|
|
nvmlh->nvmlDeviceGetHandleByIndex == NULL ||
|
|
nvmlh->nvmlDeviceGetPciInfo == NULL ||
|
|
nvmlh->nvmlDeviceGetName == NULL)
|
|
{
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "Failed to obtain required NVML function pointers");
|
|
wrap_dlclose(nvmlh->nvml_dll);
|
|
free(nvmlh);
|
|
return NULL;
|
|
}
|
|
|
|
nvmlh->nvmlInit();
|
|
if (nvmlh->nvmlSystemGetDriverVersion)
|
|
nvmlh->nvmlSystemGetDriverVersion(driver_version, sizeof(driver_version));
|
|
nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);
|
|
|
|
/* Query CUDA device count, in case it doesn't agree with NVML, since */
|
|
/* CUDA will only report GPUs with compute capability greater than 1.0 */
|
|
if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) {
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "Failed to query CUDA device count!");
|
|
wrap_dlclose(nvmlh->nvml_dll);
|
|
free(nvmlh);
|
|
return NULL;
|
|
}
|
|
|
|
nvmlh->devs = (nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(nvmlDevice_t));
|
|
nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
|
|
nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
|
|
nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
|
|
nvmlh->nvml_pci_subsys_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
|
|
nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int));
|
|
nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int));
|
|
nvmlh->app_clocks = (nvmlEnableState_t*) calloc(nvmlh->nvml_gpucount, sizeof(nvmlEnableState_t));
|
|
|
|
/* Obtain GPU device handles we're going to need repeatedly... */
|
|
for (i=0; i<nvmlh->nvml_gpucount; i++) {
|
|
nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
|
|
}
|
|
|
|
/* Query PCI info for each NVML device, and build table for mapping of */
|
|
/* CUDA device IDs to NVML device IDs and vice versa */
|
|
for (i=0; i<nvmlh->nvml_gpucount; i++) {
|
|
nvmlPciInfo_t pciinfo;
|
|
|
|
nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo);
|
|
nvmlh->nvml_pci_domain_id[i] = pciinfo.domain;
|
|
nvmlh->nvml_pci_bus_id[i] = pciinfo.bus;
|
|
nvmlh->nvml_pci_device_id[i] = pciinfo.device;
|
|
nvmlh->nvml_pci_subsys_id[i] = pciinfo.pci_subsystem_id;
|
|
|
|
nvmlh->app_clocks[i] = NVML_FEATURE_UNKNOWN;
|
|
if (nvmlh->nvmlDeviceSetAPIRestriction) {
|
|
nvmlh->nvmlDeviceSetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
|
|
NVML_FEATURE_ENABLED);
|
|
/* there is only this API_SET_APPLICATION_CLOCKS on the 750 Ti (340.58) */
|
|
}
|
|
if (nvmlh->nvmlDeviceGetAPIRestriction) {
|
|
nvmlh->nvmlDeviceGetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
|
|
&nvmlh->app_clocks[i]);
|
|
}
|
|
}
|
|
|
|
/* build mapping of NVML device IDs to CUDA IDs */
|
|
for (i=0; i<nvmlh->nvml_gpucount; i++) {
|
|
nvmlh->nvml_cuda_device_id[i] = -1;
|
|
}
|
|
for (i=0; i<nvmlh->cuda_gpucount; i++) {
|
|
cudaDeviceProp props;
|
|
nvmlh->cuda_nvml_device_id[i] = -1;
|
|
|
|
if (cudaGetDeviceProperties(&props, i) == cudaSuccess) {
|
|
device_bus_ids[i] = props.pciBusID;
|
|
for (int j = 0; j < nvmlh->nvml_gpucount; j++) {
|
|
if ((nvmlh->nvml_pci_domain_id[j] == (uint32_t) props.pciDomainID) &&
|
|
(nvmlh->nvml_pci_bus_id[j] == (uint32_t) props.pciBusID) &&
|
|
(nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) {
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "CUDA GPU %d matches NVML GPU %d by busId %u",
|
|
i, j, (uint32_t) props.pciBusID);
|
|
nvmlh->nvml_cuda_device_id[j] = i;
|
|
nvmlh->cuda_nvml_device_id[i] = j;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return nvmlh;
|
|
}
|
|
|
|
/* apply config clocks to an used device */
|
|
int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
|
|
{
|
|
nvmlReturn_t rc;
|
|
uint32_t gpu_clk = 0, mem_clk = 0;
|
|
int n = nvmlh->cuda_nvml_device_id[dev_id];
|
|
if (n < 0 || n >= nvmlh->nvml_gpucount)
|
|
return -ENODEV;
|
|
|
|
if (!device_gpu_clocks[dev_id] && !device_mem_clocks[dev_id])
|
|
return 0; // nothing to do
|
|
|
|
if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) {
|
|
applog(LOG_WARNING, "GPU #%d: NVML application clock feature is not allowed!", dev_id);
|
|
return -EPERM;
|
|
}
|
|
|
|
uint32_t mem_prev = clock_prev_mem[dev_id];
|
|
if (!mem_prev)
|
|
nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_prev);
|
|
uint32_t gpu_prev = clock_prev[dev_id];
|
|
if (!gpu_prev)
|
|
nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_prev);
|
|
|
|
nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
|
|
rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
|
|
if (rc != NVML_SUCCESS) {
|
|
applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "GPU #%d: default application clocks are %u/%u", dev_id, mem_clk, gpu_clk);
|
|
|
|
// get application config values
|
|
if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
|
|
if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
|
|
|
|
// these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+
|
|
uint32_t nclocks = 0, mem_clocks[32] = { 0 };
|
|
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
|
|
nclocks = min(nclocks, 32);
|
|
if (nclocks)
|
|
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks);
|
|
for (uint8_t u=0; u < nclocks; u++) {
|
|
// ordered by pstate (so highest is first memory clock - P0)
|
|
if (mem_clocks[u] <= mem_clk) {
|
|
mem_clk = mem_clocks[u];
|
|
break;
|
|
}
|
|
}
|
|
|
|
uint32_t* gpu_clocks = NULL;
|
|
nclocks = 0;
|
|
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
|
|
if (nclocks) {
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "GPU #%d: %u clocks found for mem %u", dev_id, nclocks, mem_clk);
|
|
gpu_clocks = (uint32_t*) calloc(1, sizeof(uint32_t) * nclocks + 4);
|
|
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks);
|
|
for (uint8_t u=0; u < nclocks; u++) {
|
|
// ordered desc, so get first
|
|
if (gpu_clocks[u] <= gpu_clk) {
|
|
gpu_clk = gpu_clocks[u];
|
|
break;
|
|
}
|
|
}
|
|
free(gpu_clocks);
|
|
}
|
|
|
|
rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
|
|
if (rc == NVML_SUCCESS)
|
|
applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk);
|
|
else {
|
|
applog(LOG_WARNING, "GPU #%d: %u/%u - %s (NVML)", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
|
|
return -1;
|
|
}
|
|
|
|
// store previous clocks for reset on exit (or during wait...)
|
|
clock_prev[dev_id] = gpu_prev;
|
|
clock_prev_mem[dev_id] = mem_prev;
|
|
return 1;
|
|
}
|
|
|
|
/* reset default app clocks and limits on exit */
|
|
int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id)
|
|
{
|
|
int ret = 0;
|
|
nvmlReturn_t rc;
|
|
uint32_t gpu_clk = 0, mem_clk = 0;
|
|
int n = nvmlh->cuda_nvml_device_id[dev_id];
|
|
if (n < 0 || n >= nvmlh->nvml_gpucount)
|
|
return -ENODEV;
|
|
|
|
if (clock_prev[dev_id]) {
|
|
rc = nvmlh->nvmlDeviceResetApplicationsClocks(nvmlh->devs[n]);
|
|
if (rc != NVML_SUCCESS) {
|
|
applog(LOG_WARNING, "GPU #%d: unable to reset application clocks", dev_id);
|
|
}
|
|
clock_prev[dev_id] = 0;
|
|
ret = 1;
|
|
}
|
|
|
|
if (limit_prev[dev_id]) {
|
|
uint32_t plimit = limit_prev[dev_id];
|
|
if (nvmlh->nvmlDeviceGetPowerManagementDefaultLimit && !plimit) {
|
|
rc = nvmlh->nvmlDeviceGetPowerManagementDefaultLimit(nvmlh->devs[n], &plimit);
|
|
} else if (plimit) {
|
|
rc = NVML_SUCCESS;
|
|
}
|
|
if (rc == NVML_SUCCESS)
|
|
nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit);
|
|
ret = 1;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Set power state of a device (9xx)
|
|
* Code is similar as clocks one, which allow the change of the pstate
|
|
*/
|
|
int nvml_set_pstate(nvml_handle *nvmlh, int dev_id)
|
|
{
|
|
nvmlReturn_t rc;
|
|
uint32_t gpu_clk = 0, mem_clk = 0;
|
|
int n = nvmlh->cuda_nvml_device_id[dev_id];
|
|
if (n < 0 || n >= nvmlh->nvml_gpucount)
|
|
return -ENODEV;
|
|
|
|
if (device_pstate[dev_id] < 0)
|
|
return 0;
|
|
|
|
if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) {
|
|
applog(LOG_WARNING, "GPU #%d: NVML app. clock feature is not allowed!", dev_id);
|
|
return -EPERM;
|
|
}
|
|
|
|
nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
|
|
rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
|
|
if (rc != NVML_SUCCESS) {
|
|
applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id);
|
|
return -EINVAL;
|
|
}
|
|
|
|
// get application config values
|
|
if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
|
|
if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
|
|
|
|
// these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+
|
|
uint32_t nclocks = 0, mem_clocks[32] = { 0 };
|
|
int8_t wanted_pstate = device_pstate[dev_id];
|
|
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
|
|
nclocks = min(nclocks, 32);
|
|
if (nclocks)
|
|
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks);
|
|
if ((uint32_t) wanted_pstate+1 > nclocks) {
|
|
applog(LOG_WARNING, "GPU #%d: only %u mem clocks available (p-states)", dev_id, nclocks);
|
|
}
|
|
for (uint8_t u=0; u < nclocks; u++) {
|
|
// ordered by pstate (so highest P0 first)
|
|
if (u == wanted_pstate) {
|
|
mem_clk = mem_clocks[u];
|
|
break;
|
|
}
|
|
}
|
|
|
|
uint32_t* gpu_clocks = NULL;
|
|
nclocks = 0;
|
|
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
|
|
if (nclocks) {
|
|
gpu_clocks = (uint32_t*) calloc(1, sizeof(uint32_t) * nclocks + 4);
|
|
rc = nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks);
|
|
if (rc == NVML_SUCCESS) {
|
|
// ordered desc, get the max app clock (do not limit)
|
|
gpu_clk = gpu_clocks[0];
|
|
}
|
|
free(gpu_clocks);
|
|
}
|
|
|
|
rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
|
|
if (rc != NVML_SUCCESS) {
|
|
applog(LOG_WARNING, "GPU #%d: pstate P%d (%u/%u) %s", dev_id, (int) wanted_pstate,
|
|
mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
|
|
return -1;
|
|
}
|
|
|
|
if (!opt_quiet)
|
|
applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int) wanted_pstate, mem_clk, gpu_clk);
|
|
|
|
clock_prev[dev_id] = 1;
|
|
return 1;
|
|
}
|
|
|
|
int nvml_set_plimit(nvml_handle *nvmlh, int dev_id)
|
|
{
|
|
nvmlReturn_t rc = NVML_ERROR_UNKNOWN;
|
|
uint32_t gpu_clk = 0, mem_clk = 0;
|
|
int n = nvmlh->cuda_nvml_device_id[dev_id];
|
|
if (n < 0 || n >= nvmlh->nvml_gpucount)
|
|
return -ENODEV;
|
|
|
|
if (!device_plimit[dev_id])
|
|
return 0; // nothing to do
|
|
|
|
if (!nvmlh->nvmlDeviceSetPowerManagementLimit)
|
|
return -ENOSYS;
|
|
|
|
uint32_t plimit = device_plimit[dev_id] * 1000;
|
|
uint32_t pmin = 1000, pmax = 0, prev_limit = 0;
|
|
if (nvmlh->nvmlDeviceGetPowerManagementLimitConstraints)
|
|
rc = nvmlh->nvmlDeviceGetPowerManagementLimitConstraints(nvmlh->devs[n], &pmin, &pmax);
|
|
|
|
if (rc != NVML_SUCCESS) {
|
|
if (!nvmlh->nvmlDeviceGetPowerManagementLimit)
|
|
return -ENOSYS;
|
|
}
|
|
nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &prev_limit);
|
|
if (!pmax) pmax = prev_limit;
|
|
|
|
plimit = min(plimit, pmax);
|
|
plimit = max(plimit, pmin);
|
|
rc = nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit);
|
|
if (rc != NVML_SUCCESS) {
|
|
applog(LOG_WARNING, "GPU #%d: plimit %s", dev_id, nvmlh->nvmlErrorString(rc));
|
|
return -1;
|
|
} else {
|
|
device_plimit[dev_id] = plimit / 1000;
|
|
}
|
|
|
|
if (!opt_quiet) {
|
|
applog(LOG_INFO, "GPU #%d: power limit set to %uW (allowed range is %u-%u)",
|
|
dev_id, plimit/1000U, pmin/1000U, pmax/1000U);
|
|
}
|
|
|
|
limit_prev[dev_id] = prev_limit;
|
|
return 1;
|
|
}
|
|
|
|
uint32_t nvml_get_plimit(nvml_handle *nvmlh, int dev_id)
|
|
{
|
|
uint32_t plimit = 0;
|
|
int n = nvmlh ? nvmlh->cuda_nvml_device_id[dev_id] : -1;
|
|
if (n < 0 || n >= nvmlh->nvml_gpucount)
|
|
return 0;
|
|
|
|
if (nvmlh->nvmlDeviceGetPowerManagementLimit) {
|
|
nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &plimit);
|
|
}
|
|
return plimit;
|
|
}
|
|
|
|
// ccminer -D -n
|
|
#define LSTDEV_PFX " "
|
|
void nvml_print_device_info(int dev_id)
|
|
{
|
|
if (!hnvml) return;
|
|
|
|
int n = hnvml->cuda_nvml_device_id[dev_id];
|
|
if (n < 0 || n >= hnvml->nvml_gpucount)
|
|
return;
|
|
|
|
nvmlReturn_t rc;
|
|
|
|
if (hnvml->nvmlDeviceGetClock) {
|
|
uint32_t gpu_clk = 0, mem_clk = 0;
|
|
|
|
fprintf(stderr, "------- Clocks -------\n");
|
|
|
|
hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &gpu_clk);
|
|
rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &mem_clk);
|
|
if (rc == NVML_SUCCESS) {
|
|
fprintf(stderr, LSTDEV_PFX "DEFAULT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
|
|
}
|
|
hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_TARGET, &gpu_clk);
|
|
rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_TARGET, &mem_clk);
|
|
if (rc == NVML_SUCCESS) {
|
|
fprintf(stderr, LSTDEV_PFX "TARGET MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
|
|
}
|
|
hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_CURRENT, &gpu_clk);
|
|
rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_CURRENT, &mem_clk);
|
|
if (rc == NVML_SUCCESS) {
|
|
fprintf(stderr, LSTDEV_PFX "CURRENT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
|
|
}
|
|
}
|
|
}
|
|
|
|
int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount)
|
|
{
|
|
*gpucount = nvmlh->nvml_gpucount;
|
|
return 0;
|
|
}
|
|
|
|
int cuda_get_gpucount(nvml_handle *nvmlh, int *gpucount)
|
|
{
|
|
*gpucount = nvmlh->cuda_gpucount;
|
|
return 0;
|
|
}
|
|
|
|
|
|
int nvml_get_gpu_name(nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize)
|
|
{
|
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
|
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
|
|
return -ENODEV;
|
|
|
|
if (!nvmlh->nvmlDeviceGetName)
|
|
return -ENOSYS;
|
|
|
|
if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != NVML_SUCCESS)
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
int nvml_get_tempC(nvml_handle *nvmlh, int cudaindex, unsigned int *tempC)
|
|
{
|
|
nvmlReturn_t rc;
|
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
|
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
|
|
return -ENODEV;
|
|
|
|
if (!nvmlh->nvmlDeviceGetTemperature)
|
|
return -ENOSYS;
|
|
|
|
rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC);
|
|
if (rc != NVML_SUCCESS) {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
int nvml_get_fanpcnt(nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt)
|
|
{
|
|
nvmlReturn_t rc;
|
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
|
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
|
|
return -ENODEV;
|
|
|
|
if (!nvmlh->nvmlDeviceGetFanSpeed)
|
|
return -ENOSYS;
|
|
|
|
rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt);
|
|
if (rc != NVML_SUCCESS) {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
int nvml_get_current_clocks(int cudaindex, unsigned int *graphics_clock, unsigned int *mem_clock)
|
|
{
|
|
nvmlReturn_t rc;
|
|
int gpuindex = hnvml->cuda_nvml_device_id[cudaindex];
|
|
if (gpuindex < 0 || gpuindex >= hnvml->nvml_gpucount) return -ENODEV;
|
|
if (!hnvml->nvmlDeviceGetClockInfo) return -ENOSYS;
|
|
|
|
rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_SM, graphics_clock);
|
|
if (rc != NVML_SUCCESS) return -1;
|
|
rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_MEM, mem_clock);
|
|
if (rc != NVML_SUCCESS) return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Not Supported on 750Ti 340.23 */
|
|
int nvml_get_power_usage(nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts)
|
|
{
|
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
|
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
|
|
return -ENODEV;
|
|
|
|
if (!nvmlh->nvmlDeviceGetPowerUsage)
|
|
return -ENOSYS;
|
|
|
|
nvmlReturn_t res = nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts);
|
|
if (res != NVML_SUCCESS) {
|
|
//if (opt_debug)
|
|
// applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res));
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Not Supported on 750Ti 340.23 */
|
|
int nvml_get_pstate(nvml_handle *nvmlh, int cudaindex, int *pstate)
|
|
{
|
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
|
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
|
|
return -ENODEV;
|
|
|
|
if (!nvmlh->nvmlDeviceGetPerformanceState)
|
|
return -ENOSYS;
|
|
|
|
nvmlReturn_t res = nvmlh->nvmlDeviceGetPerformanceState(nvmlh->devs[gpuindex], pstate);
|
|
if (res != NVML_SUCCESS) {
|
|
//if (opt_debug)
|
|
// applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res));
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int nvml_get_busid(nvml_handle *nvmlh, int cudaindex, int *busid)
|
|
{
|
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
|
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
|
|
return -ENODEV;
|
|
|
|
(*busid) = nvmlh->nvml_pci_bus_id[gpuindex];
|
|
return 0;
|
|
}
|
|
|
|
int nvml_get_serial(nvml_handle *nvmlh, int cudaindex, char *sn, int maxlen)
|
|
{
|
|
uint32_t subids = 0;
|
|
char uuid[NVML_DEVICE_UUID_BUFFER_SIZE];
|
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
|
|
nvmlReturn_t res;
|
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
|
|
return -ENODEV;
|
|
|
|
if (nvmlh->nvmlDeviceGetSerial) {
|
|
res = nvmlh->nvmlDeviceGetSerial(nvmlh->devs[gpuindex], sn, maxlen);
|
|
if (res == NVML_SUCCESS)
|
|
return 0;
|
|
}
|
|
|
|
if (!nvmlh->nvmlDeviceGetUUID)
|
|
return -ENOSYS;
|
|
|
|
// nvmlDeviceGetUUID: GPU-f2bd642c-369f-5a14-e0b4-0d22dfe9a1fc
|
|
// use a part of uuid to generate an unique serial
|
|
// todo: check if there is vendor id is inside
|
|
memset(uuid, 0, sizeof(uuid));
|
|
res = nvmlh->nvmlDeviceGetUUID(nvmlh->devs[gpuindex], uuid, sizeof(uuid)-1);
|
|
if (res != NVML_SUCCESS) {
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "nvmlDeviceGetUUID: %s", nvmlh->nvmlErrorString(res));
|
|
return -1;
|
|
}
|
|
strncpy(sn, &uuid[4], min((int) strlen(uuid), maxlen));
|
|
sn[maxlen-1] = '\0';
|
|
return 0;
|
|
}
|
|
|
|
int nvml_get_bios(nvml_handle *nvmlh, int cudaindex, char *desc, int maxlen)
|
|
{
|
|
uint32_t subids = 0;
|
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
|
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
|
|
return -ENODEV;
|
|
|
|
if (!nvmlh->nvmlDeviceGetVbiosVersion)
|
|
return -ENOSYS;
|
|
|
|
nvmlReturn_t res = nvmlh->nvmlDeviceGetVbiosVersion(nvmlh->devs[gpuindex], desc, maxlen);
|
|
if (res != NVML_SUCCESS) {
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "nvmlDeviceGetVbiosVersion: %s", nvmlh->nvmlErrorString(res));
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int nvml_get_info(nvml_handle *nvmlh, int cudaindex, uint16_t &vid, uint16_t &pid)
|
|
{
|
|
uint32_t subids = 0;
|
|
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
|
|
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
|
|
return -ENODEV;
|
|
|
|
subids = nvmlh->nvml_pci_subsys_id[gpuindex];
|
|
if (!subids) subids = nvmlh->nvml_pci_device_id[gpuindex];
|
|
pid = subids >> 16;
|
|
vid = subids & 0xFFFF;
|
|
// Colorful and Inno3D
|
|
if (pid == 0) pid = nvmlh->nvml_pci_device_id[gpuindex] >> 16;
|
|
return 0;
|
|
}
|
|
|
|
int nvml_destroy(nvml_handle *nvmlh)
|
|
{
|
|
nvmlh->nvmlShutdown();
|
|
|
|
wrap_dlclose(nvmlh->nvml_dll);
|
|
|
|
free(nvmlh->nvml_pci_bus_id);
|
|
free(nvmlh->nvml_pci_device_id);
|
|
free(nvmlh->nvml_pci_domain_id);
|
|
free(nvmlh->nvml_pci_subsys_id);
|
|
free(nvmlh->nvml_cuda_device_id);
|
|
free(nvmlh->cuda_nvml_device_id);
|
|
free(nvmlh->app_clocks);
|
|
free(nvmlh->devs);
|
|
|
|
free(nvmlh);
|
|
return 0;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
/**
|
|
* nvapi alternative for windows x86 binaries
|
|
* nvml api doesn't exists as 32bit dll :///
|
|
*/
|
|
#ifdef WIN32
|
|
#include "nvapi/nvapi_ccminer.h"
|
|
|
|
static unsigned int nvapi_dev_map[MAX_GPUS] = { 0 };
|
|
static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = { 0 };
|
|
static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = { 0 };
|
|
static NvU32 nvapi_dev_cnt = 0;
|
|
extern bool nvapi_dll_loaded;
|
|
|
|
int nvapi_temperature(unsigned int devNum, unsigned int *temperature)
|
|
{
|
|
NvAPI_Status ret;
|
|
|
|
if (devNum >= nvapi_dev_cnt)
|
|
return -ENODEV;
|
|
|
|
NV_GPU_THERMAL_SETTINGS thermal;
|
|
thermal.version = NV_GPU_THERMAL_SETTINGS_VER;
|
|
ret = NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &thermal);
|
|
if (ret != NVAPI_OK) {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage(ret, string);
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetThermalSettings: %s", string);
|
|
return -1;
|
|
}
|
|
|
|
(*temperature) = (unsigned int) thermal.sensor[0].currentTemp;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int nvapi_fanspeed(unsigned int devNum, unsigned int *speed)
|
|
{
|
|
NvAPI_Status ret;
|
|
|
|
if (devNum >= nvapi_dev_cnt)
|
|
return -ENODEV;
|
|
|
|
NvU32 fanspeed = 0;
|
|
ret = NvAPI_GPU_GetTachReading(phys[devNum], &fanspeed);
|
|
if (ret != NVAPI_OK) {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage(ret, string);
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetTachReading: %s", string);
|
|
return -1;
|
|
}
|
|
|
|
(*speed) = (unsigned int) fanspeed;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int nvapi_getpstate(unsigned int devNum, unsigned int *pstate)
|
|
{
|
|
NvAPI_Status ret;
|
|
|
|
if (devNum >= nvapi_dev_cnt)
|
|
return -ENODEV;
|
|
|
|
NV_GPU_PERF_PSTATE_ID CurrentPstate = NVAPI_GPU_PERF_PSTATE_UNDEFINED; /* 16 */
|
|
ret = NvAPI_GPU_GetCurrentPstate(phys[devNum], &CurrentPstate);
|
|
if (ret != NVAPI_OK) {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage(ret, string);
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetCurrentPstate: %s", string);
|
|
return -1;
|
|
}
|
|
else {
|
|
// get pstate for the moment... often 0 = P0
|
|
(*pstate) = (unsigned int)CurrentPstate;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#define UTIL_DOMAIN_GPU 0
|
|
int nvapi_getusage(unsigned int devNum, unsigned int *pct)
|
|
{
|
|
NvAPI_Status ret;
|
|
|
|
if (devNum >= nvapi_dev_cnt)
|
|
return -ENODEV;
|
|
|
|
NV_GPU_DYNAMIC_PSTATES_INFO_EX info;
|
|
info.version = NV_GPU_DYNAMIC_PSTATES_INFO_EX_VER;
|
|
ret = NvAPI_GPU_GetDynamicPstatesInfoEx(phys[devNum], &info);
|
|
if (ret != NVAPI_OK) {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage(ret, string);
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "NVAPI GetDynamicPstatesInfoEx: %s", string);
|
|
return -1;
|
|
}
|
|
else {
|
|
if (info.utilization[UTIL_DOMAIN_GPU].bIsPresent)
|
|
(*pct) = info.utilization[UTIL_DOMAIN_GPU].percentage;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int nvapi_getinfo(unsigned int devNum, uint16_t &vid, uint16_t &pid)
|
|
{
|
|
NvAPI_Status ret;
|
|
NvU32 pDeviceId, pSubSystemId, pRevisionId, pExtDeviceId;
|
|
|
|
if (devNum >= nvapi_dev_cnt)
|
|
return -ENODEV;
|
|
|
|
ret = NvAPI_GPU_GetPCIIdentifiers(phys[devNum], &pDeviceId, &pSubSystemId, &pRevisionId, &pExtDeviceId);
|
|
if (ret != NVAPI_OK) {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage(ret, string);
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "NVAPI GetPCIIdentifiers: %s", string);
|
|
return -1;
|
|
}
|
|
|
|
pid = pDeviceId >> 16;
|
|
vid = pDeviceId & 0xFFFF;
|
|
if (vid == 0x10DE && pSubSystemId) {
|
|
vid = pSubSystemId & 0xFFFF;
|
|
pid = pSubSystemId >> 16;
|
|
// Colorful and Inno3D
|
|
if (pid == 0) pid = pDeviceId >> 16;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int nvapi_getserial(unsigned int devNum, char *serial, unsigned int maxlen)
|
|
{
|
|
NvAPI_Status ret;
|
|
if (devNum >= nvapi_dev_cnt)
|
|
return -ENODEV;
|
|
|
|
memset(serial, 0, maxlen);
|
|
|
|
if (maxlen < 11)
|
|
return -EINVAL;
|
|
|
|
NvAPI_ShortString ser = { 0 };
|
|
ret = NvAPI_DLL_GetSerialNumber(phys[devNum], ser);
|
|
if (ret != NVAPI_OK) {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage(ret, string);
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "NVAPI GetSerialNumber: %s", string);
|
|
return -1;
|
|
}
|
|
|
|
uint8_t *bytes = (uint8_t*) ser;
|
|
for (int n=0; n<5; n++) sprintf(&serial[n*2], "%02X", bytes[n]);
|
|
return 0;
|
|
}
|
|
|
|
int nvapi_getbios(unsigned int devNum, char *desc, unsigned int maxlen)
|
|
{
|
|
NvAPI_Status ret;
|
|
if (devNum >= nvapi_dev_cnt)
|
|
return -ENODEV;
|
|
|
|
if (maxlen < 64) // Short String
|
|
return -1;
|
|
|
|
ret = NvAPI_GPU_GetVbiosVersionString(phys[devNum], desc);
|
|
if (ret != NVAPI_OK) {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage(ret, string);
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "NVAPI GetVbiosVersionString: %s", string);
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int SetAsusRGBLogo(unsigned int devNum, uint32_t RGB, bool ignorePrevState)
|
|
{
|
|
NvAPI_Status ret = NVAPI_OK;
|
|
NV_I2C_INFO_EX* i2cInfo;
|
|
|
|
int delay1 = 20000;
|
|
int delay2 = 0;
|
|
|
|
uchar4 rgb = { 0 };
|
|
memcpy(&rgb, &RGB, 4);
|
|
uchar4 prgb = { 0 };
|
|
int32_t prev = device_led_state[nvapi_devid(devNum)];
|
|
memcpy(&prgb, &prev, 4);
|
|
|
|
NV_INIT_STRUCT_ALLOC(NV_I2C_INFO_EX, i2cInfo);
|
|
if (i2cInfo == NULL) return -ENOMEM;
|
|
|
|
NvU32 data[5] = { 0 };
|
|
NvU32 datv[2] = { 0, 1 };
|
|
NvU32 datw[2] = { 1, 0 };
|
|
if (rgb.z != prgb.z || ignorePrevState) {
|
|
data[2] = 4; // R:4 G:5 B:6, Mode = 7 (1 static, 2 breath, 3 blink, 4 demo)
|
|
data[3] = 1;
|
|
datv[0] = rgb.z | 0x13384000;
|
|
|
|
i2cInfo->i2cDevAddress = 0x52;
|
|
i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
|
|
i2cInfo->regAddrSize = 1;
|
|
i2cInfo->pbData = (NvU8*) datv;
|
|
i2cInfo->cbRead = 5;
|
|
i2cInfo->cbSize = 1;
|
|
i2cInfo->portId = 1;
|
|
i2cInfo->bIsPortIdSet = 1;
|
|
|
|
ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw);
|
|
usleep(delay1);
|
|
has_rgb_ok = (ret == NVAPI_OK);
|
|
}
|
|
|
|
if (rgb.y != prgb.y || ignorePrevState) {
|
|
data[2] = 5;
|
|
data[3] = 1;
|
|
datv[0] = rgb.y | 0x4000;
|
|
|
|
i2cInfo->i2cDevAddress = 0x52;
|
|
i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
|
|
i2cInfo->regAddrSize = 1;
|
|
i2cInfo->pbData = (NvU8*) datv;
|
|
i2cInfo->cbRead = 5;
|
|
i2cInfo->cbSize = 1;
|
|
i2cInfo->portId = 1;
|
|
i2cInfo->bIsPortIdSet = 1;
|
|
|
|
ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw);
|
|
usleep(delay1);
|
|
has_rgb_ok = (ret == NVAPI_OK);
|
|
}
|
|
|
|
if (rgb.y != prgb.y || ignorePrevState) {
|
|
data[2] = 6;
|
|
data[3] = 1;
|
|
datv[0] = rgb.x | 0x4000;
|
|
|
|
i2cInfo->i2cDevAddress = 0x52;
|
|
i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
|
|
i2cInfo->regAddrSize = 1;
|
|
i2cInfo->pbData = (NvU8*) datv;
|
|
i2cInfo->cbRead = 5;
|
|
i2cInfo->cbSize = 1;
|
|
i2cInfo->portId = 1;
|
|
i2cInfo->bIsPortIdSet = 1;
|
|
|
|
ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw);
|
|
usleep(delay1);
|
|
has_rgb_ok = (ret == NVAPI_OK);
|
|
}
|
|
|
|
if (rgb.w && ignorePrevState) {
|
|
data[2] = 7;
|
|
data[3] = 1;
|
|
datv[0] = rgb.w | 0x4000;
|
|
|
|
i2cInfo->i2cDevAddress = 0x52;
|
|
i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
|
|
i2cInfo->regAddrSize = 1;
|
|
i2cInfo->pbData = (NvU8*) datv;
|
|
i2cInfo->cbRead = 5;
|
|
i2cInfo->cbSize = 1;
|
|
i2cInfo->portId = 1;
|
|
i2cInfo->bIsPortIdSet = 1;
|
|
|
|
ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw);
|
|
usleep(delay1);
|
|
has_rgb_ok = (ret == NVAPI_OK);
|
|
}
|
|
usleep(delay2);
|
|
free(i2cInfo);
|
|
return (int) ret;
|
|
}
|
|
|
|
static int SetGigabyteRGBLogo(unsigned int devNum, uint32_t RGB)
|
|
{
|
|
NvAPI_Status ret;
|
|
NV_I2C_INFO_EX* i2cInfo;
|
|
NV_INIT_STRUCT_ALLOC(NV_I2C_INFO_EX, i2cInfo);
|
|
if (i2cInfo == NULL)
|
|
return -ENOMEM;
|
|
|
|
NvU32 readBuf[25] = { 0 };
|
|
NvU32 data[5] = { 0 };
|
|
data[0] = 1;
|
|
data[2] = swab32(RGB & 0xfcfcfcU) | 0x40;
|
|
|
|
i2cInfo->i2cDevAddress = 0x48 << 1;
|
|
i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
|
|
i2cInfo->regAddrSize = 4; // NVAPI_MAX_SIZEOF_I2C_REG_ADDRESS
|
|
i2cInfo->pbData = (NvU8*) readBuf;
|
|
i2cInfo->cbRead = 2;
|
|
i2cInfo->cbSize = sizeof(readBuf);
|
|
i2cInfo->portId = 1;
|
|
i2cInfo->bIsPortIdSet = 1;
|
|
|
|
//ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, data);
|
|
ret = NvAPI_DLL_I2CReadEx(phys[devNum], i2cInfo, data);
|
|
usleep(20000);
|
|
free(i2cInfo);
|
|
return (int) ret;
|
|
}
|
|
|
|
static int SetZotacRGBLogo(unsigned int devNum, uint32_t RGB)
|
|
{
|
|
NvAPI_Status ret;
|
|
NV_I2C_INFO* i2cInfo;
|
|
NV_INIT_STRUCT_ALLOC(NV_I2C_INFO, i2cInfo);
|
|
if (i2cInfo == NULL)
|
|
return -ENOMEM;
|
|
|
|
NvU32 buf[25] = { 0 };
|
|
NvU32 data[5] = { 0 };
|
|
|
|
uint32_t color = 0, level = 0x40;
|
|
|
|
uchar4 rgb = { 0 };
|
|
memcpy(&rgb, &RGB, 4);
|
|
level = rgb.x & 0xF0;
|
|
level |= rgb.y & 0xF0;
|
|
level |= rgb.z & 0xF0;
|
|
//applog(LOG_DEBUG, "R %u G %u B %u", rgb.z, rgb.y, rgb.x);
|
|
|
|
// Not really RGB custom, only some basic colors, so convert
|
|
// 0: Red, 1: Yellow, 2: Green, 3: Cyan, 4: Blue, 5: magenta, 6: white
|
|
if ((RGB & 0xFF0000) && (RGB & 0xFF00) && (RGB & 0xFF)) color = 6;
|
|
else if ((RGB & 0xFF0000) && (RGB & 0xFF)) color = 5;
|
|
else if ((RGB & 0xFF00) && (RGB & 0xFF)) color = 3;
|
|
else if ((RGB & 0xFF0000) && (RGB & 0xFF00)) color = 1;
|
|
else if (RGB & 0xFF) color = 4;
|
|
else if (RGB & 0xFF00) color = 2;
|
|
|
|
buf[0] = 0xF0; // F0 set colors
|
|
buf[0] |= (color << 8); // logo
|
|
buf[0] |= (1 << 16); // top
|
|
if (RGB != 0) // level : 0x10 to 0xF0
|
|
buf[0] |= (level << 24);
|
|
else
|
|
buf[0] |= (0x10U << 24);
|
|
|
|
// todo: i2c data crc ?
|
|
|
|
i2cInfo->displayMask = 1;
|
|
i2cInfo->bIsDDCPort = 1;
|
|
i2cInfo->i2cDevAddress = 0x48 << 1;
|
|
i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]);
|
|
i2cInfo->regAddrSize = 1;
|
|
i2cInfo->pbData = (NvU8*) buf;
|
|
i2cInfo->cbSize = 4;
|
|
i2cInfo->i2cSpeed = NVAPI_I2C_SPEED_DEPRECATED;
|
|
i2cInfo->i2cSpeedKhz = NVAPI_I2C_SPEED_100KHZ; // 4
|
|
i2cInfo->portId = 1;
|
|
i2cInfo->bIsPortIdSet = 1;
|
|
|
|
ret = NvAPI_I2CWrite(phys[devNum], i2cInfo);
|
|
// required to prevent i2c lock
|
|
usleep(20000);
|
|
|
|
#if 0
|
|
buf[0] = 0xF7; // F7 toggle leds
|
|
if (RGB == 0)
|
|
buf[0] |= (1 << 8); // 0 logo on, 1 off
|
|
buf[0] |= (1 << 16); // 1 top off
|
|
ret = NvAPI_I2CWrite(phys[devNum], i2cInfo);
|
|
usleep(20000);
|
|
#endif
|
|
// other modes:
|
|
// 0xF1 breathing green (0x070202F1)
|
|
// 0xF2 strobe green (0x070202F2)
|
|
// 0xF3 cycle (0x000000F3)
|
|
|
|
free(i2cInfo);
|
|
return (int) ret;
|
|
}
|
|
|
|
int nvapi_set_led(unsigned int devNum, int RGB, char *device_name)
|
|
{
|
|
uint16_t vid = 0, pid = 0;
|
|
NvAPI_Status ret;
|
|
if (strstr(device_name, "Gigabyte GTX 10")) {
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB);
|
|
return SetGigabyteRGBLogo(devNum, (uint32_t) RGB);
|
|
} else if (strstr(device_name, "ASUS GTX 10")) {
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB);
|
|
return SetAsusRGBLogo(devNum, (uint32_t) RGB, !has_rgb_ok);
|
|
} else if (strstr(device_name, "Zotac GTX 10")) {
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB);
|
|
return SetZotacRGBLogo(devNum, (uint32_t) RGB);
|
|
} else {
|
|
NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM* illu;
|
|
NV_INIT_STRUCT_ALLOC(NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM, illu);
|
|
illu->hPhysicalGpu = phys[devNum];
|
|
illu->Attribute = NV_GPU_IA_LOGO_BRIGHTNESS;
|
|
ret = NvAPI_GPU_QueryIlluminationSupport(illu);
|
|
if (!ret && illu->bSupported) {
|
|
NV_GPU_GET_ILLUMINATION_PARM *led;
|
|
NV_INIT_STRUCT_ALLOC(NV_GPU_GET_ILLUMINATION_PARM, led);
|
|
led->hPhysicalGpu = phys[devNum];
|
|
led->Attribute = NV_GPU_IA_LOGO_BRIGHTNESS;
|
|
NvAPI_GPU_GetIllumination(led);
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "GPU %x: Led level was %d, set to %d", (int) phys[devNum], led->Value, RGB);
|
|
led->Value = (uint32_t) RGB;
|
|
ret = NvAPI_GPU_SetIllumination((NV_GPU_SET_ILLUMINATION_PARM*) led);
|
|
free(led);
|
|
}
|
|
free(illu);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
int nvapi_pstateinfo(unsigned int devNum)
|
|
{
|
|
uint32_t n;
|
|
NvAPI_Status ret;
|
|
uint32_t* mem = (uint32_t*) calloc(1, 0x4000);
|
|
if (!mem)
|
|
return -ENOMEM;
|
|
|
|
unsigned int current = 0xFF;
|
|
// useless on init but...
|
|
nvapi_getpstate(devNum, ¤t);
|
|
|
|
#if 0
|
|
// try :p
|
|
uint32_t* buf = (uint32_t*) calloc(1, 0x8000);
|
|
for (int i=8; i < 0x8000 && buf; i+=4) {
|
|
buf[0] = 0x10000 + i;
|
|
NV_GPU_PERF_PSTATE_ID pst = NVAPI_GPU_PERF_PSTATE_P0;
|
|
ret = NvAPI_DLL_GetPstateClientLimits(phys[devNum], pst, buf);
|
|
if (ret != NVAPI_INCOMPATIBLE_STRUCT_VERSION) {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage(ret, string);
|
|
applog(LOG_BLUE, "struct size is %06x : %s", buf[0], string);
|
|
for (int n=0; n < i/32; n++)
|
|
applog_hex(&buf[n*(32/4)], 32);
|
|
break;
|
|
}
|
|
}
|
|
free(buf);
|
|
#endif
|
|
|
|
#if 0
|
|
// Unsure of the meaning of these values
|
|
NVAPI_GPU_POWER_TOPO topo = { 0 };
|
|
topo.version = NVAPI_GPU_POWER_TOPO_VER;
|
|
if ((ret = NvAPI_DLL_ClientPowerTopologyGetStatus(phys[devNum], &topo)) == NVAPI_OK) {
|
|
if (topo.count)
|
|
applog(LOG_RAW, " GPU TDP is %.1f~%.1f W ?",
|
|
(double) topo.entries[0].power/1000, (double) topo.entries[1].power/1000);
|
|
|
|
// Ok on 970, not pascal
|
|
NV_GPU_PERF_PSTATES20_INFO_V2 pset2 = { 0 };
|
|
pset2.version = NV_GPU_PERF_PSTATES20_INFO_VER2;
|
|
pset2.ov.numVoltages = 1;
|
|
pset2.ov.voltages[0].voltDelta_uV.value = 3000; // gpu + 3000 uv;
|
|
ret = NvAPI_DLL_SetPstates20v2(phys[devNum], &pset2);
|
|
#endif
|
|
|
|
NV_GPU_PERF_PSTATES20_INFO* info;
|
|
NV_INIT_STRUCT_ON(NV_GPU_PERF_PSTATES20_INFO, info, mem);
|
|
if ((ret = NvAPI_GPU_GetPstates20(phys[devNum], info)) != NVAPI_OK) {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage(ret, string);
|
|
if (opt_debug)
|
|
applog(LOG_RAW, "NVAPI GetPstates20: %s", string);
|
|
return -1;
|
|
}
|
|
|
|
for (n=0; n < info->numPstates; n++) {
|
|
NV_GPU_PSTATE20_CLOCK_ENTRY_V1* clocks = info->pstates[n].clocks;
|
|
applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %6.1f MHz%s %4u mV%s \x7F %d/%d",
|
|
info->pstates[n].pstateId == current ? ">":" ", (int) info->pstates[n].pstateId,
|
|
clocks[1].data.single.freq_kHz/1000, clocks[1].bIsEditable ? "*":" ",
|
|
(double) clocks[0].data.single.freq_kHz/1000, clocks[0].bIsEditable ? "*":" ",
|
|
info->pstates[n].baseVoltages[0].volt_uV/1000, info->pstates[n].baseVoltages[0].bIsEditable ? "*": " ",
|
|
info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable
|
|
info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000);
|
|
if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) {
|
|
applog(LOG_RAW, " OC %+4d MHz %+6.1f MHz",
|
|
clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000);
|
|
}
|
|
}
|
|
// boost over volting (GTX 9xx only ?)
|
|
for (n=0; n < info->ov.numVoltages; n++) {
|
|
applog(LOG_RAW, " OV: %u%+d mV%s \x7F %d/%d",
|
|
info->ov.voltages[n].volt_uV/1000, info->ov.voltages[n].voltDelta_uV.value/1000, info->ov.voltages[n].bIsEditable ? "*":" ",
|
|
info->ov.voltages[n].voltDelta_uV.valueRange.min/1000, info->ov.voltages[n].voltDelta_uV.valueRange.max/1000);
|
|
}
|
|
|
|
NV_GPU_CLOCK_FREQUENCIES *freqs;
|
|
NV_INIT_STRUCT_ON(NV_GPU_CLOCK_FREQUENCIES, freqs, mem);
|
|
freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
|
|
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
|
|
applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Base Clocks",
|
|
(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
|
|
(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
|
|
|
|
freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK;
|
|
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
|
|
applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Boost Clocks",
|
|
(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
|
|
(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
|
|
|
|
freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ;
|
|
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
|
|
applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz >Current",
|
|
(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000,
|
|
(double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000);
|
|
|
|
// Other clock values ??
|
|
NVAPI_GPU_PERF_CLOCKS *pcl;
|
|
NV_INIT_STRUCT_ALLOC(NVAPI_GPU_PERF_CLOCKS, pcl);
|
|
int numClock=0; ret = NVAPI_OK;
|
|
while (ret == NVAPI_OK) {
|
|
if ((ret = NvAPI_DLL_GetPerfClocks(phys[devNum], numClock, pcl)) == NVAPI_OK) {
|
|
applog(LOG_RAW, " C%d: MEM %4.0f MHz GPU %6.1f MHz [%5.1f/%6.1f]", numClock,
|
|
(double) pcl->memFreq1/1000, (double) pcl->gpuFreq1/1000, (double) pcl->gpuFreqMin/1000, (double) pcl->gpuFreqMax/1000);
|
|
// ret = NvAPI_DLL_SetPerfClocks(phys[devNum], numClock, pcl); // error
|
|
}
|
|
numClock++;
|
|
}
|
|
|
|
// Pascal only
|
|
NVAPI_VOLTBOOST_PERCENT *pvb;
|
|
NV_INIT_STRUCT_ON(NVAPI_VOLTBOOST_PERCENT, pvb, mem);
|
|
if ((ret = NvAPI_DLL_GetCoreVoltageBoostPercent(phys[devNum], pvb)) == NVAPI_OK) {
|
|
NVAPI_VOLTAGE_STATUS *pvdom;
|
|
NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGE_STATUS, pvdom);
|
|
NvAPI_DLL_GetCurrentVoltage(phys[devNum], pvdom);
|
|
if (pvdom && pvdom->value_uV)
|
|
applog(LOG_RAW, " GPU Voltage is %u mV %+d%% boost", pvdom->value_uV/1000, pvb->percent);
|
|
else if (pvdom)
|
|
applog(LOG_RAW, " GPU Voltage is %u mV", pvdom->value_uV/1000);
|
|
free(pvdom);
|
|
} else {
|
|
// Maxwell 9xx
|
|
NVAPI_VOLT_STATUS *mvdom, *mvstep;
|
|
NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvdom);
|
|
if (mvdom && (ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], mvdom)) == NVAPI_OK) {
|
|
NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvstep);
|
|
NvAPI_DLL_GetVoltageStep(phys[devNum], mvstep);
|
|
if (mvdom->value_uV) applog(LOG_RAW, " GPU Voltage is %.1f mV with %.3f mV resolution",
|
|
(double) mvdom->value_uV/1000, (double) mvstep->value_uV/1000);
|
|
free(mvstep);
|
|
}
|
|
free(mvdom);
|
|
}
|
|
|
|
uint32_t plim = nvapi_get_plimit(devNum);
|
|
applog(LOG_RAW, " Power limit is set to %u%%", plim);
|
|
|
|
#if 0
|
|
NVAPI_COOLER_SETTINGS *cooler;
|
|
NV_INIT_STRUCT_ON(NVAPI_COOLER_SETTINGS, cooler, mem);
|
|
ret = NvAPI_DLL_GetCoolerSettings(phys[devNum], 7, cooler);
|
|
if (ret == NVAPI_OK) {
|
|
applog(LOG_RAW, " Fan level is set to %u%%", cooler->level); // wrong val, seems 1 (auto ?)
|
|
NVAPI_COOLER_LEVEL *fan;
|
|
NV_INIT_STRUCT_ALLOC(NVAPI_COOLER_LEVEL, fan);
|
|
fan->level = 100;
|
|
fan->count = 1;
|
|
ret = NvAPI_DLL_SetCoolerLevels(phys[devNum], 7, fan);
|
|
free(fan);
|
|
sleep(10);
|
|
ret = NvAPI_DLL_RestoreCoolerSettings(phys[devNum], cooler, 7);
|
|
}
|
|
#endif
|
|
|
|
NV_GPU_THERMAL_SETTINGS *tset;
|
|
NV_INIT_STRUCT_ON(NV_GPU_THERMAL_SETTINGS, tset, mem);
|
|
|
|
NVAPI_GPU_THERMAL_INFO *tnfo;
|
|
NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_INFO, tnfo);
|
|
NVAPI_GPU_THERMAL_LIMIT *tlim;
|
|
NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_LIMIT, tlim);
|
|
NvAPI_GPU_GetThermalSettings(phys[devNum], 0, tset);
|
|
NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], tnfo);
|
|
if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], tlim)) == NVAPI_OK) {
|
|
applog(LOG_RAW, " Thermal limit is set to %u, current Tc %d, range [%u-%u]",
|
|
tlim->entries[0].value >> 8, tset->sensor[0].currentTemp,
|
|
tnfo->entries[0].min_temp >> 8, tnfo->entries[0].max_temp >> 8);
|
|
}
|
|
free(tnfo);
|
|
free(tlim);
|
|
|
|
#if 1
|
|
// Read pascal Clocks Table, Empty on 9xx
|
|
//NVAPI_CLOCKS_RANGE* ranges;
|
|
//NV_INIT_STRUCT_ON(NVAPI_CLOCKS_RANGE, ranges, mem);
|
|
//ret = NvAPI_DLL_GetClockBoostRanges(phys[devNum], ranges);
|
|
|
|
NVAPI_CLOCK_MASKS* boost;
|
|
NV_INIT_STRUCT_ON(NVAPI_CLOCK_MASKS, boost, mem);
|
|
ret = NvAPI_DLL_GetClockBoostMask(phys[devNum], boost);
|
|
int gpuClocks = 0, memClocks = 0;
|
|
for (n=0; n < 80+23; n++) {
|
|
if (boost->clocks[n].memDelta) memClocks++;
|
|
if (boost->clocks[n].gpuDelta) gpuClocks++;
|
|
}
|
|
|
|
// PASCAL GTX ONLY
|
|
if (gpuClocks || memClocks) {
|
|
NVAPI_CLOCK_TABLE *table;
|
|
NV_INIT_STRUCT_ALLOC(NVAPI_CLOCK_TABLE, table);
|
|
memcpy(table->mask, boost->mask, 12);
|
|
ret = NvAPI_DLL_GetClockBoostTable(phys[devNum], table);
|
|
gpuClocks = 0, memClocks = 0;
|
|
for (n=0; n < 12; n++) {
|
|
if (table->buf0[n] != 0) applog(LOG_RAW, "boost table 0[%u] not empty (%u)", n, table->buf0[n]);
|
|
}
|
|
for (n=0; n < 80; n++) {
|
|
if (table->gpuDeltas[n].freqDelta) {
|
|
// note: gpu delta value seems to be x2, not the memory
|
|
//applog(LOG_RAW, " Boost gpu clock delta %u set to %d MHz", n, table->gpuDeltas[n].freqDelta/2000);
|
|
gpuClocks++;
|
|
}
|
|
}
|
|
for (n=0; n < 23; n++) {
|
|
if (table->memFilled[n]) {
|
|
//applog(LOG_RAW, " Boost mem clock delta %u set to %d MHz", n, table->memDeltas[n]/1000);
|
|
memClocks++;
|
|
}
|
|
}
|
|
for (n=0; n < 1529; n++) {
|
|
if (table->buf1[n] != 0) applog(LOG_RAW, "boost table 1[%u] not empty (%u)", n, table->buf1[n]);
|
|
}
|
|
applog(LOG_RAW, " Boost table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
|
|
free(table);
|
|
|
|
NVAPI_VFP_CURVE *curve;
|
|
NV_INIT_STRUCT_ALLOC(NVAPI_VFP_CURVE, curve);
|
|
memcpy(curve->mask, boost->mask, 12);
|
|
ret = NvAPI_DLL_GetVFPCurve(phys[devNum], curve);
|
|
gpuClocks = 0, memClocks = 0;
|
|
for (n=0; n < 80; n++) {
|
|
if (curve->gpuEntries[n].freq_kHz || curve->gpuEntries[n].volt_uV) {
|
|
// applog(LOG_RAW, "gpu volt table %2u %4u MHz - %6u mV", n, curve->gpuEntries[n].freq_kHz/1000, curve->gpuEntries[n].volt_uV/1000);
|
|
gpuClocks++;
|
|
}
|
|
}
|
|
for (n=0; n < 23; n++) {
|
|
if (curve->memEntries[n].freq_kHz || curve->memEntries[n].volt_uV) {
|
|
// applog(LOG_RAW, "mem volt table %2u %4u MHz - %6u mV", n, curve->memEntries[n].freq_kHz/1000, curve->memEntries[n].volt_uV/1000);
|
|
memClocks++;
|
|
}
|
|
}
|
|
for (n=0; n < 1064; n++) {
|
|
if (curve->buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve->buf1[n]);
|
|
}
|
|
applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks);
|
|
free(curve);
|
|
}
|
|
|
|
// Maxwell
|
|
else {
|
|
NVAPI_VOLTAGES_TABLE* volts;
|
|
NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGES_TABLE, volts);
|
|
int entries = 0;
|
|
ret = NvAPI_DLL_GetVoltages(phys[devNum], volts);
|
|
for (n=0; n < 128; n++) {
|
|
if (volts->entries[n].volt_uV)
|
|
entries++;
|
|
}
|
|
applog(LOG_RAW, " Volts table contains %d gpu levels.", entries);
|
|
free(volts);
|
|
}
|
|
|
|
NV_DISPLAY_DRIVER_MEMORY_INFO* meminfo;
|
|
NV_INIT_STRUCT_ON(NV_DISPLAY_DRIVER_MEMORY_INFO, meminfo, mem);
|
|
meminfo->version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
|
|
if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], meminfo)) == NVAPI_OK) {
|
|
applog(LOG_RAW, " Memory: %u MB, %.1f used", meminfo->dedicatedVideoMemory/1024,
|
|
(double) (meminfo->availableDedicatedVideoMemory - meminfo->curAvailableDedicatedVideoMemory)/1024);
|
|
}
|
|
#if 0 /* some undetermined stats */
|
|
NVAPI_GPU_PERF_INFO pi = { 0 };
|
|
pi.version = NVAPI_GPU_PERF_INFO_VER;
|
|
ret = NvAPI_DLL_PerfPoliciesGetInfo(phys[devNum], &pi);
|
|
|
|
NVAPI_GPU_PERF_STATUS ps = { 0 };
|
|
ps.version = NVAPI_GPU_PERF_STATUS_VER;
|
|
ret = NvAPI_DLL_PerfPoliciesGetStatus(phys[devNum], &ps);
|
|
applog(LOG_BLUE, "%llx %lld. %lld. %llx %llx %llx", ps.timeRef, ps.val1, ps.val2, ps.values[0], ps.values[1], ps.values[2]);
|
|
#endif
|
|
|
|
#endif
|
|
free(mem);
|
|
return 0;
|
|
}
|
|
|
|
// workaround for buggy driver 378.49
|
|
unsigned int nvapi_get_gpu_clock(unsigned int devNum)
|
|
{
|
|
NvAPI_Status ret = NVAPI_OK;
|
|
unsigned int freq = 0;
|
|
NV_GPU_CLOCK_FREQUENCIES *freqs;
|
|
NV_INIT_STRUCT_ALLOC(NV_GPU_CLOCK_FREQUENCIES, freqs);
|
|
freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ;
|
|
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs);
|
|
if (ret == NVAPI_OK) {
|
|
freq = freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000;
|
|
}
|
|
free(freqs);
|
|
return freq; // in MHz
|
|
}
|
|
|
|
uint8_t nvapi_get_plimit(unsigned int devNum)
|
|
{
|
|
NvAPI_Status ret = NVAPI_OK;
|
|
NVAPI_GPU_POWER_STATUS pol = { 0 };
|
|
pol.version = NVAPI_GPU_POWER_STATUS_VER;
|
|
if ((ret = NvAPI_DLL_ClientPowerPoliciesGetStatus(phys[devNum], &pol)) != NVAPI_OK) {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage(ret, string);
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "NVAPI PowerPoliciesGetStatus: %s", string);
|
|
return 0;
|
|
}
|
|
return (uint8_t) (pol.entries[0].power / 1000); // in percent
|
|
}
|
|
|
|
int nvapi_set_plimit(unsigned int devNum, uint16_t percent)
|
|
{
|
|
NvAPI_Status ret = NVAPI_OK;
|
|
uint32_t val = percent * 1000;
|
|
|
|
NVAPI_GPU_POWER_INFO nfo = { 0 };
|
|
nfo.version = NVAPI_GPU_POWER_INFO_VER;
|
|
ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo);
|
|
if (ret == NVAPI_OK) {
|
|
if (val == 0)
|
|
val = nfo.entries[0].def_power;
|
|
else if (val < nfo.entries[0].min_power)
|
|
val = nfo.entries[0].min_power;
|
|
else if (val > nfo.entries[0].max_power)
|
|
val = nfo.entries[0].max_power;
|
|
}
|
|
|
|
NVAPI_GPU_POWER_STATUS pol = { 0 };
|
|
pol.version = NVAPI_GPU_POWER_STATUS_VER;
|
|
pol.flags = 1;
|
|
pol.entries[0].power = val;
|
|
if ((ret = NvAPI_DLL_ClientPowerPoliciesSetStatus(phys[devNum], &pol)) != NVAPI_OK) {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage(ret, string);
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "NVAPI PowerPoliciesSetStatus: %s", string);
|
|
return -1;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int nvapi_set_tlimit(unsigned int devNum, uint8_t limit)
|
|
{
|
|
NvAPI_Status ret;
|
|
uint32_t val = limit;
|
|
|
|
if (devNum >= nvapi_dev_cnt)
|
|
return -ENODEV;
|
|
|
|
NV_GPU_THERMAL_SETTINGS tset = { 0 };
|
|
NVAPI_GPU_THERMAL_INFO tnfo = { 0 };
|
|
NVAPI_GPU_THERMAL_LIMIT tlim = { 0 };
|
|
tset.version = NV_GPU_THERMAL_SETTINGS_VER;
|
|
NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &tset);
|
|
tnfo.version = NVAPI_GPU_THERMAL_INFO_VER;
|
|
NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], &tnfo);
|
|
tlim.version = NVAPI_GPU_THERMAL_LIMIT_VER;
|
|
if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], &tlim)) == NVAPI_OK) {
|
|
tlim.entries[0].value = val << 8;
|
|
tlim.flags = 1;
|
|
ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim);
|
|
if (ret == NVAPI_OK) {
|
|
applog(LOG_INFO, "GPU #%u: thermal limit set to %u, current Tc %d, range [%u-%u]",
|
|
devNum, val, tset.sensor[0].currentTemp,
|
|
tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
|
|
} else {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage(ret, string);
|
|
applog(LOG_WARNING, "GPU #%u: thermal limit: %s, valid range is [%u-%u]", devNum, string,
|
|
tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8);
|
|
}
|
|
}
|
|
return (int) ret;
|
|
}
|
|
|
|
int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock)
|
|
{
|
|
NvAPI_Status ret;
|
|
NvS32 delta = 0;
|
|
|
|
if (devNum >= nvapi_dev_cnt)
|
|
return -ENODEV;
|
|
#if 0
|
|
// wrong api to get default base clock when modified, cuda props seems fine
|
|
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
|
|
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
|
|
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
|
|
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs);
|
|
if (ret == NVAPI_OK) {
|
|
delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency;
|
|
}
|
|
|
|
NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
|
|
deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
|
|
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0); // we want default clock grr!
|
|
if (ret == NVAPI_OK) {
|
|
if (deffreqs.pstates[0].clocks[1].domainId == NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS)
|
|
delta = (clock * 1000) - deffreqs.pstates[0].clocks[1].freq*2;
|
|
}
|
|
#endif
|
|
|
|
cudaDeviceProp props = { 0 };
|
|
NvU32 busId = 0xFFFF;
|
|
ret = NvAPI_GPU_GetBusId(phys[devNum], &busId);
|
|
for (int d=0; d < (int) nvapi_dev_cnt; d++) {
|
|
// unsure about devNum, so be safe
|
|
cudaGetDeviceProperties(&props, d);
|
|
if (props.pciBusID == busId) {
|
|
delta = (clock * 1000) - props.clockRate;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (delta == (clock * 1000))
|
|
return ret;
|
|
|
|
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
|
|
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
|
|
pset1.numPstates = 1;
|
|
pset1.numClocks = 1;
|
|
// Ok on both 1080 and 970
|
|
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS;
|
|
pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
|
|
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
|
|
if (ret == NVAPI_OK) {
|
|
applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, delta/1000);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
|
|
{
|
|
NvAPI_Status ret;
|
|
NvS32 delta = 0;
|
|
|
|
if (devNum >= nvapi_dev_cnt)
|
|
return -ENODEV;
|
|
|
|
// wrong to get default base clock (when modified) on maxwell (same as cuda props one)
|
|
NV_GPU_CLOCK_FREQUENCIES freqs = { 0 };
|
|
freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER;
|
|
freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK;
|
|
ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); // wrong base clocks, useless
|
|
if (ret == NVAPI_OK) {
|
|
delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency;
|
|
}
|
|
|
|
// seems ok on maxwell and pascal for the mem clocks
|
|
NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 };
|
|
deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER;
|
|
ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // deprecated but req for def clocks
|
|
if (ret == NVAPI_OK) {
|
|
if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY)
|
|
delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq;
|
|
}
|
|
|
|
if (delta == (clock * 1000))
|
|
return ret;
|
|
|
|
// todo: bounds check with GetPstates20
|
|
|
|
NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 };
|
|
pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1;
|
|
pset1.numPstates = 1;
|
|
pset1.numClocks = 1;
|
|
pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY;
|
|
pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta;
|
|
ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1);
|
|
if (ret == NVAPI_OK) {
|
|
applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, delta/1000);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
// Replacement for WIN32 CUDA 6.5 on pascal
|
|
int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total)
|
|
{
|
|
NvAPI_Status ret = NVAPI_OK;
|
|
NV_DISPLAY_DRIVER_MEMORY_INFO mem = { 0 };
|
|
mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
|
|
unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS];
|
|
if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) {
|
|
*total = (uint64_t) mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory;
|
|
*free = (uint64_t) mem.curAvailableDedicatedVideoMemory;
|
|
}
|
|
return (int) ret;
|
|
}
|
|
|
|
int nvapi_init()
|
|
{
|
|
int num_gpus = cuda_num_devices();
|
|
NvAPI_Status ret = NvAPI_Initialize();
|
|
if (!ret == NVAPI_OK){
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage(ret, string);
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "NVAPI NvAPI_Initialize: %s", string);
|
|
return -1;
|
|
}
|
|
|
|
ret = NvAPI_EnumPhysicalGPUs(phys, &nvapi_dev_cnt);
|
|
if (ret != NVAPI_OK) {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage(ret, string);
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "NVAPI NvAPI_EnumPhysicalGPUs: %s", string);
|
|
return -1;
|
|
}
|
|
|
|
for (int g = 0; g < num_gpus; g++) {
|
|
cudaDeviceProp props;
|
|
if (cudaGetDeviceProperties(&props, g) == cudaSuccess) {
|
|
device_bus_ids[g] = props.pciBusID;
|
|
}
|
|
nvapi_dev_map[g] = g; // default mapping
|
|
}
|
|
|
|
for (NvU8 i = 0; i < nvapi_dev_cnt; i++) {
|
|
NvAPI_ShortString name;
|
|
ret = NvAPI_GPU_GetFullName(phys[i], name);
|
|
if (ret == NVAPI_OK) {
|
|
for (int g = 0; g < num_gpus; g++) {
|
|
NvU32 busId;
|
|
ret = NvAPI_GPU_GetBusId(phys[i], &busId);
|
|
if (ret == NVAPI_OK && busId == device_bus_ids[g]) {
|
|
nvapi_dev_map[g] = i;
|
|
if (opt_debug)
|
|
applog(LOG_DEBUG, "CUDA GPU %d matches NVAPI GPU %d by busId %u",
|
|
g, i, busId);
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage(ret, string);
|
|
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetFullName: %s", string);
|
|
}
|
|
}
|
|
#if 0
|
|
if (opt_debug) {
|
|
NvAPI_ShortString ver;
|
|
NvAPI_GetInterfaceVersionString(ver);
|
|
applog(LOG_DEBUG, "%s", ver);
|
|
}
|
|
#endif
|
|
|
|
NvU32 udv;
|
|
NvAPI_ShortString str;
|
|
ret = NvAPI_SYS_GetDriverAndBranchVersion(&udv, str);
|
|
if (ret == NVAPI_OK) {
|
|
sprintf(driver_version,"%d.%02d", udv / 100, udv % 100);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int nvapi_init_settings()
|
|
{
|
|
// nvapi.dll
|
|
int ret = nvapi_dll_init();
|
|
if (ret != NVAPI_OK)
|
|
return ret;
|
|
|
|
if (!opt_n_threads) {
|
|
opt_n_threads = active_gpus;
|
|
}
|
|
|
|
for (int n=0; n < opt_n_threads; n++) {
|
|
int dev_id = device_map[n % MAX_GPUS];
|
|
if (device_plimit[dev_id] && !hnvml) {
|
|
if (nvapi_set_plimit(nvapi_dev_map[dev_id], device_plimit[dev_id]) == NVAPI_OK) {
|
|
uint32_t res = nvapi_get_plimit(nvapi_dev_map[dev_id]);
|
|
gpulog(LOG_INFO, n, "Power limit is set to %u%%", res);
|
|
}
|
|
}
|
|
if (device_tlimit[dev_id]) {
|
|
nvapi_set_tlimit(nvapi_dev_map[dev_id], device_tlimit[dev_id]);
|
|
}
|
|
if (device_gpu_clocks[dev_id]) {
|
|
ret = nvapi_set_gpuclock(nvapi_dev_map[dev_id], device_gpu_clocks[dev_id]);
|
|
if (ret) {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage((NvAPI_Status) ret, string);
|
|
gpulog(LOG_WARNING, n, "Boost gpu clock %s", string);
|
|
}
|
|
}
|
|
if (device_mem_clocks[dev_id]) {
|
|
ret = nvapi_set_memclock(nvapi_dev_map[dev_id], device_mem_clocks[dev_id]);
|
|
if (ret) {
|
|
NvAPI_ShortString string;
|
|
NvAPI_GetErrorMessage((NvAPI_Status) ret, string);
|
|
gpulog(LOG_WARNING, n, "Boost mem clock %s", string);
|
|
}
|
|
}
|
|
if (device_pstate[dev_id]) {
|
|
// dunno how via nvapi or/and pascal
|
|
}
|
|
if (device_led[dev_id] != -1) {
|
|
int err = nvapi_set_led(nvapi_dev_map[dev_id], device_led[dev_id], device_name[dev_id]);
|
|
if (err != 0) {
|
|
gpulog(LOG_WARNING, n, "Unable to set led value (err %d)", err);
|
|
}
|
|
device_led_state[dev_id] = device_led[dev_id];
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
unsigned int nvapi_devnum(int dev_id)
|
|
{
|
|
return nvapi_dev_map[dev_id];
|
|
}
|
|
|
|
int nvapi_devid(unsigned int devNum)
|
|
{
|
|
for (int i=0; i < opt_n_threads; i++) {
|
|
int dev_id = device_map[i % MAX_GPUS];
|
|
if (nvapi_dev_map[dev_id] = devNum)
|
|
return dev_id;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#endif /* WIN32 : Windows specific (nvapi) */
|
|
|
|
/* api functions -------------------------------------- */
|
|
|
|
// assume 2500 rpm as default, auto-updated if more
|
|
static unsigned int fan_speed_max = 2500;
|
|
|
|
unsigned int gpu_fanpercent(struct cgpu_info *gpu)
|
|
{
|
|
unsigned int pct = 0;
|
|
if (hnvml) {
|
|
nvml_get_fanpcnt(hnvml, gpu->gpu_id, &pct);
|
|
}
|
|
#ifdef WIN32
|
|
else {
|
|
unsigned int rpm = 0;
|
|
nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm);
|
|
pct = (rpm * 100) / fan_speed_max;
|
|
if (pct > 100) {
|
|
pct = 100;
|
|
fan_speed_max = rpm;
|
|
}
|
|
}
|
|
#endif
|
|
return pct;
|
|
}
|
|
|
|
unsigned int gpu_fanrpm(struct cgpu_info *gpu)
|
|
{
|
|
unsigned int rpm = 0;
|
|
#ifdef WIN32
|
|
nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm);
|
|
#endif
|
|
return rpm;
|
|
}
|
|
|
|
|
|
float gpu_temp(struct cgpu_info *gpu)
|
|
{
|
|
float tc = 0.0;
|
|
unsigned int tmp = 0;
|
|
if (hnvml) {
|
|
nvml_get_tempC(hnvml, gpu->gpu_id, &tmp);
|
|
tc = (float)tmp;
|
|
}
|
|
#ifdef WIN32
|
|
else {
|
|
nvapi_temperature(nvapi_dev_map[gpu->gpu_id], &tmp);
|
|
tc = (float)tmp;
|
|
}
|
|
#endif
|
|
return tc;
|
|
}
|
|
|
|
int gpu_pstate(struct cgpu_info *gpu)
|
|
{
|
|
int pstate = -1;
|
|
int support = -1;
|
|
if (hnvml) {
|
|
support = nvml_get_pstate(hnvml, gpu->gpu_id, &pstate);
|
|
}
|
|
#ifdef WIN32
|
|
if (support == -1) {
|
|
unsigned int pst = 0;
|
|
nvapi_getpstate(nvapi_dev_map[gpu->gpu_id], &pst);
|
|
pstate = (int) pst;
|
|
}
|
|
#endif
|
|
return pstate;
|
|
}
|
|
|
|
int gpu_busid(struct cgpu_info *gpu)
|
|
{
|
|
int busid = -1;
|
|
int support = -1;
|
|
if (hnvml) {
|
|
support = nvml_get_busid(hnvml, gpu->gpu_id, &busid);
|
|
}
|
|
#ifdef WIN32
|
|
if (support == -1) {
|
|
busid = device_bus_ids[gpu->gpu_id];
|
|
}
|
|
#endif
|
|
return busid;
|
|
}
|
|
|
|
unsigned int gpu_power(struct cgpu_info *gpu)
|
|
{
|
|
unsigned int mw = 0;
|
|
int support = -1;
|
|
if (hnvml) {
|
|
support = nvml_get_power_usage(hnvml, gpu->gpu_id, &mw);
|
|
}
|
|
#ifdef WIN32
|
|
if (support == -1) {
|
|
unsigned int pct = 0;
|
|
nvapi_getusage(nvapi_dev_map[gpu->gpu_id], &pct);
|
|
pct *= nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]);
|
|
pct /= 100;
|
|
mw = pct; // to fix
|
|
}
|
|
#endif
|
|
if (gpu->gpu_power > 0) {
|
|
// average
|
|
mw = (gpu->gpu_power + mw) / 2;
|
|
}
|
|
return mw;
|
|
}
|
|
|
|
unsigned int gpu_plimit(struct cgpu_info *gpu)
|
|
{
|
|
unsigned int mw = 0;
|
|
int support = -1;
|
|
if (hnvml) {
|
|
mw = nvml_get_plimit(hnvml, gpu->gpu_id);
|
|
support = (mw > 0);
|
|
}
|
|
#ifdef WIN32
|
|
// NVAPI value is in % (< 100 so)
|
|
if (support == -1) {
|
|
mw = nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]);
|
|
}
|
|
#endif
|
|
return mw;
|
|
}
|
|
|
|
static int translate_vendor_id(uint16_t vid, char *vendorname)
|
|
{
|
|
struct VENDORS {
|
|
const uint16_t vid;
|
|
const char *name;
|
|
} vendors[] = {
|
|
{ 0x1043, "ASUS" },
|
|
{ 0x1048, "Elsa" },
|
|
{ 0x107D, "Leadtek" },
|
|
{ 0x10B0, "Gainward" },
|
|
// { 0x10DE, "NVIDIA" },
|
|
{ 0x1458, "Gigabyte" },
|
|
{ 0x1462, "MSI" },
|
|
{ 0x154B, "PNY" }, // maybe storage devices
|
|
{ 0x1569, "Palit" },
|
|
{ 0x1682, "XFX" },
|
|
{ 0x196D, "Club3D" },
|
|
{ 0x196E, "PNY" },
|
|
{ 0x19DA, "Zotac" },
|
|
{ 0x19F1, "BFG" },
|
|
{ 0x1ACC, "PoV" },
|
|
{ 0x1B4C, "Galax" }, // KFA2 in EU, to check on Pascal cards
|
|
{ 0x3842, "EVGA" },
|
|
{ 0x7377, "Colorful" },
|
|
{ 0, "" }
|
|
};
|
|
|
|
if (!vendorname)
|
|
return -EINVAL;
|
|
|
|
for(int v=0; v < ARRAY_SIZE(vendors); v++) {
|
|
if (vid == vendors[v].vid) {
|
|
strcpy(vendorname, vendors[v].name);
|
|
return vid;
|
|
}
|
|
}
|
|
if (opt_debug && vid != 0x10DE)
|
|
applog(LOG_DEBUG, "nvml: Unknown vendor %04x\n", vid);
|
|
return 0;
|
|
}
|
|
|
|
int gpu_vendor(uint8_t pci_bus_id, char *vendorname)
|
|
{
|
|
uint16_t vid = 0, pid = 0;
|
|
if (hnvml) { // may not be initialized on start...
|
|
for (int id=0; id < hnvml->nvml_gpucount; id++) {
|
|
if (hnvml->nvml_pci_bus_id[id] == pci_bus_id) {
|
|
int dev_id = hnvml->nvml_cuda_device_id[id];
|
|
nvml_get_info(hnvml, dev_id, vid, pid);
|
|
}
|
|
}
|
|
} else {
|
|
#ifdef WIN32
|
|
for (unsigned id = 0; id < nvapi_dev_cnt; id++) {
|
|
if (device_bus_ids[id] == pci_bus_id) {
|
|
nvapi_getinfo(nvapi_dev_map[id], vid, pid);
|
|
break;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
return translate_vendor_id(vid, vendorname);
|
|
}
|
|
|
|
int gpu_info(struct cgpu_info *gpu)
|
|
{
|
|
char vendorname[32] = { 0 };
|
|
int id = gpu->gpu_id;
|
|
uint8_t bus_id = 0;
|
|
|
|
gpu->nvml_id = -1;
|
|
gpu->nvapi_id = -1;
|
|
|
|
if (id < 0)
|
|
return -1;
|
|
|
|
if (hnvml) {
|
|
gpu->nvml_id = (int8_t) hnvml->cuda_nvml_device_id[id];
|
|
nvml_get_info(hnvml, id, gpu->gpu_vid, gpu->gpu_pid);
|
|
nvml_get_serial(hnvml, id, gpu->gpu_sn, sizeof(gpu->gpu_sn));
|
|
nvml_get_bios(hnvml, id, gpu->gpu_desc, sizeof(gpu->gpu_desc));
|
|
}
|
|
#ifdef WIN32
|
|
gpu->nvapi_id = (int8_t) nvapi_dev_map[id];
|
|
nvapi_getinfo(nvapi_dev_map[id], gpu->gpu_vid, gpu->gpu_pid);
|
|
nvapi_getserial(nvapi_dev_map[id], gpu->gpu_sn, sizeof(gpu->gpu_sn));
|
|
nvapi_getbios(nvapi_dev_map[id], gpu->gpu_desc, sizeof(gpu->gpu_desc));
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
#endif /* USE_WRAPNVML */
|
|
|
|
static int rgb_percent(int RGB, int percent)
|
|
{
|
|
uint8_t* comp = (uint8_t*) &RGB;
|
|
int res = ((percent*comp[2]) / 100) << 16;
|
|
res += ((percent*comp[1]) / 100) << 8;
|
|
return res + ((percent*comp[0]) / 100);
|
|
}
|
|
|
|
void gpu_led_on(int dev_id)
|
|
{
|
|
#if defined(WIN32) && defined(USE_WRAPNVML)
|
|
int value = device_led[dev_id];
|
|
if (device_led_state[dev_id] != value) {
|
|
if (nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0)
|
|
device_led_state[dev_id] = value;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void gpu_led_percent(int dev_id, int percent)
|
|
{
|
|
#if defined(WIN32) && defined(USE_WRAPNVML)
|
|
int value = rgb_percent(device_led[dev_id], percent);
|
|
if (device_led_state[dev_id] != value) {
|
|
if (nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0)
|
|
device_led_state[dev_id] = value;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void gpu_led_off(int dev_id)
|
|
{
|
|
#if defined(WIN32) && defined(USE_WRAPNVML)
|
|
if (device_led_state[dev_id]) {
|
|
if (nvapi_set_led(nvapi_dev_map[dev_id], 0, device_name[dev_id]) == 0)
|
|
device_led_state[dev_id] = 0;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#ifdef USE_WRAPNVML
|
|
extern double thr_hashrates[MAX_GPUS];
|
|
extern bool opt_debug_threads;
|
|
extern bool opt_hwmonitor;
|
|
extern int num_cpus;
|
|
|
|
void *monitor_thread(void *userdata)
|
|
{
|
|
int thr_id = -1;
|
|
|
|
while (!abort_flag && !opt_quiet)
|
|
{
|
|
// This thread monitors card's power lazily during scans, one at a time...
|
|
thr_id = (thr_id + 1) % opt_n_threads;
|
|
struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
|
|
int dev_id = cgpu->gpu_id; cudaSetDevice(dev_id);
|
|
|
|
if (hnvml != NULL && cgpu)
|
|
{
|
|
char khw[32] = { 0 };
|
|
uint64_t clock = 0, mem_clock = 0;
|
|
uint32_t fanpercent = 0, power = 0;
|
|
double tempC = 0, khs_per_watt = 0;
|
|
uint32_t counter = 0;
|
|
int max_loops = 1000;
|
|
|
|
pthread_cond_wait(&cgpu->monitor.sampling_signal, &cgpu->monitor.lock);
|
|
|
|
do {
|
|
unsigned int tmp_clock=0, tmp_memclock=0;
|
|
nvml_get_current_clocks(dev_id, &tmp_clock, &tmp_memclock);
|
|
#ifdef WIN32
|
|
if (tmp_clock < 200) {
|
|
// workaround for buggy drivers 378.x (real clock)
|
|
tmp_clock = nvapi_get_gpu_clock(nvapi_dev_map[dev_id]);
|
|
}
|
|
#endif
|
|
if (tmp_clock < 200) {
|
|
// some older cards only report a base clock with cuda props.
|
|
if (cuda_gpu_info(cgpu) == 0) {
|
|
tmp_clock = cgpu->gpu_clock/1000;
|
|
tmp_memclock = cgpu->gpu_memclock/1000;
|
|
}
|
|
}
|
|
clock += tmp_clock;
|
|
mem_clock += tmp_memclock;
|
|
tempC += gpu_temp(cgpu);
|
|
fanpercent += gpu_fanpercent(cgpu);
|
|
power += gpu_power(cgpu);
|
|
counter++;
|
|
|
|
usleep(50000);
|
|
if (abort_flag) goto abort;
|
|
|
|
} while (cgpu->monitor.sampling_flag && (--max_loops));
|
|
|
|
cgpu->monitor.gpu_temp = (uint32_t) (tempC/counter);
|
|
cgpu->monitor.gpu_fan = fanpercent/counter;
|
|
cgpu->monitor.gpu_power = power/counter;
|
|
cgpu->monitor.gpu_clock = (uint32_t) (clock/counter);
|
|
cgpu->monitor.gpu_memclock = (uint32_t) (mem_clock/counter);
|
|
|
|
if (power) {
|
|
khs_per_watt = stats_get_speed(thr_id, thr_hashrates[thr_id]);
|
|
khs_per_watt = khs_per_watt / ((double)power / counter);
|
|
format_hashrate(khs_per_watt * 1000, khw);
|
|
if (strlen(khw))
|
|
sprintf(&khw[strlen(khw)-1], "W %uW ", cgpu->monitor.gpu_power / 1000);
|
|
}
|
|
|
|
if (opt_hwmonitor && (time(NULL) - cgpu->monitor.tm_displayed) > 60) {
|
|
gpulog(LOG_INFO, thr_id, "%u MHz %s%uC FAN %u%%",
|
|
cgpu->monitor.gpu_clock/*, cgpu->monitor.gpu_memclock*/,
|
|
khw, cgpu->monitor.gpu_temp, cgpu->monitor.gpu_fan
|
|
);
|
|
cgpu->monitor.tm_displayed = (uint32_t)time(NULL);
|
|
}
|
|
|
|
pthread_mutex_unlock(&cgpu->monitor.lock);
|
|
}
|
|
usleep(500); // safety
|
|
}
|
|
abort:
|
|
if (opt_debug_threads)
|
|
applog(LOG_DEBUG, "%s() died", __func__);
|
|
return NULL;
|
|
}
|
|
#endif
|