1
0
mirror of https://github.com/GOSTSec/ccminer synced 2025-01-24 21:45:04 +00:00
ccminer/nvml.cpp
Tanguy Pruvot fe99de24a8 nvml: small changes to handle the 750ti app clocks
dynamically alloc the hardcoded gpu clocks list array size.

Unlike the 9xx, the 750ti api call requires to get them all (361.62)

Something was "fixed" recently for the 750 Ti in linux drivers...
2016-06-19 20:13:01 +02:00

1172 lines
35 KiB
C++

/*
* A trivial little dlopen()-based wrapper library for the
* NVIDIA NVML library, to allow runtime discovery of NVML on an
* arbitrary system. This is all very hackish and simple-minded, but
* it serves my immediate needs in the short term until NVIDIA provides
* a static NVML wrapper library themselves, hopefully in
* CUDA 6.5 or maybe sometime shortly after.
*
* This trivial code is made available under the "new" 3-clause BSD license,
* and/or any of the GPL licenses you prefer.
* Feel free to use the code and modify as you see fit.
*
* John E. Stone - john.stone@gmail.com
* Tanguy Pruvot - tpruvot@github
*
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "miner.h"
#include "nvml.h"
#include "cuda_runtime.h"
#ifdef USE_WRAPNVML
extern nvml_handle *hnvml;
extern char driver_version[32];
static uint32_t device_bus_ids[MAX_GPUS] = { 0 };
extern uint32_t device_gpu_clocks[MAX_GPUS];
extern uint32_t device_mem_clocks[MAX_GPUS];
extern uint32_t device_plimit[MAX_GPUS];
extern int8_t device_pstate[MAX_GPUS];
uint32_t clock_prev[MAX_GPUS] = { 0 };
uint32_t clock_prev_mem[MAX_GPUS] = { 0 };
uint32_t limit_prev[MAX_GPUS] = { 0 };
/*
* Wrappers to emulate dlopen() on other systems like Windows
*/
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
#include <windows.h>
static void *wrap_dlopen(const char *filename) {
HMODULE h = LoadLibrary(filename);
if (!h && opt_debug) {
applog(LOG_DEBUG, "dlopen(%d): failed to load %s",
GetLastError(), filename);
}
return (void*)h;
}
static void *wrap_dlsym(void *h, const char *sym) {
return (void *)GetProcAddress((HINSTANCE)h, sym);
}
static int wrap_dlclose(void *h) {
/* FreeLibrary returns nonzero on success */
return (!FreeLibrary((HINSTANCE)h));
}
#else
/* assume we can use dlopen itself... */
#include <dlfcn.h>
#include <errno.h>
static void *wrap_dlopen(const char *filename) {
void *h = dlopen(filename, RTLD_NOW);
if (h == NULL && opt_debug) {
applog(LOG_DEBUG, "dlopen(%d): failed to load %s",
errno, filename);
}
return (void*)h;
}
static void *wrap_dlsym(void *h, const char *sym) {
return dlsym(h, sym);
}
static int wrap_dlclose(void *h) {
return dlclose(h);
}
#endif
nvml_handle * nvml_create()
{
int i=0;
nvml_handle *nvmlh = NULL;
#if defined(WIN32)
/* Windows (do not use slashes, else ExpandEnvironmentStrings will mix them) */
#define libnvidia_ml "%PROGRAMFILES%\\NVIDIA Corporation\\NVSMI\\nvml.dll"
#else
/* linux assumed */
#define libnvidia_ml "libnvidia-ml.so"
#endif
char tmp[512];
#ifdef WIN32
ExpandEnvironmentStrings(libnvidia_ml, tmp, sizeof(tmp));
#else
strcpy(tmp, libnvidia_ml);
#endif
void *nvml_dll = wrap_dlopen(tmp);
if (nvml_dll == NULL) {
#ifdef WIN32
nvml_dll = wrap_dlopen("nvml.dll");
if (nvml_dll == NULL)
#endif
return NULL;
}
nvmlh = (nvml_handle *) calloc(1, sizeof(nvml_handle));
nvmlh->nvml_dll = nvml_dll;
nvmlh->nvmlInit = (nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2");
if (!nvmlh->nvmlInit)
nvmlh->nvmlInit = (nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
nvmlh->nvmlDeviceGetCount = (nvmlReturn_t (*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
if (!nvmlh->nvmlDeviceGetCount)
nvmlh->nvmlDeviceGetCount = (nvmlReturn_t (*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount");
nvmlh->nvmlDeviceGetHandleByIndex = (nvmlReturn_t (*)(int, nvmlDevice_t *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2");
nvmlh->nvmlDeviceGetAPIRestriction = (nvmlReturn_t (*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetAPIRestriction");
nvmlh->nvmlDeviceSetAPIRestriction = (nvmlReturn_t (*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetAPIRestriction");
nvmlh->nvmlDeviceGetDefaultApplicationsClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetDefaultApplicationsClock");
nvmlh->nvmlDeviceGetApplicationsClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clocks))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetApplicationsClock");
nvmlh->nvmlDeviceSetApplicationsClocks = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int mem, unsigned int gpu))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetApplicationsClocks");
nvmlh->nvmlDeviceResetApplicationsClocks = (nvmlReturn_t (*)(nvmlDevice_t))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceResetApplicationsClocks");
nvmlh->nvmlDeviceGetSupportedGraphicsClocks = (nvmlReturn_t (*)(nvmlDevice_t, uint32_t mem, uint32_t *num, uint32_t *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSupportedGraphicsClocks");
nvmlh->nvmlDeviceGetSupportedMemoryClocks = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *count, unsigned int *clocksMHz))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSupportedMemoryClocks");
nvmlh->nvmlDeviceGetClockInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClockInfo");
nvmlh->nvmlDeviceGetMaxClockInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxClockInfo");
nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo_v2");
if (!nvmlh->nvmlDeviceGetPciInfo)
nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo");
nvmlh->nvmlDeviceGetCurrPcieLinkGeneration = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *gen))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCurrPcieLinkGeneration");
nvmlh->nvmlDeviceGetCurrPcieLinkWidth = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *width))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCurrPcieLinkWidth");
nvmlh->nvmlDeviceGetMaxPcieLinkGeneration = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *gen))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxPcieLinkGeneration");
nvmlh->nvmlDeviceGetMaxPcieLinkWidth = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *width))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxPcieLinkWidth");
nvmlh->nvmlDeviceGetPowerUsage = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
nvmlh->nvmlDeviceGetPowerManagementDefaultLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementDefaultLimit");
nvmlh->nvmlDeviceGetPowerManagementLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementLimit");
nvmlh->nvmlDeviceGetPowerManagementLimitConstraints = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *min, unsigned int *max))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementLimitConstraints");
nvmlh->nvmlDeviceSetPowerManagementLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int limit))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetPowerManagementLimit");
nvmlh->nvmlDeviceGetName = (nvmlReturn_t (*)(nvmlDevice_t, char *, int))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName");
nvmlh->nvmlDeviceGetTemperature = (nvmlReturn_t (*)(nvmlDevice_t, int, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature");
nvmlh->nvmlDeviceGetFanSpeed = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed");
nvmlh->nvmlDeviceGetPerformanceState = (nvmlReturn_t (*)(nvmlDevice_t, int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPerformanceState"); /* or nvmlDeviceGetPowerState */
nvmlh->nvmlDeviceGetSerial = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSerial");
nvmlh->nvmlDeviceGetUUID = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetUUID");
nvmlh->nvmlDeviceGetVbiosVersion = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetVbiosVersion");
nvmlh->nvmlSystemGetDriverVersion = (nvmlReturn_t (*)(char *, unsigned int))
wrap_dlsym(nvmlh->nvml_dll, "nvmlSystemGetDriverVersion");
nvmlh->nvmlErrorString = (char* (*)(nvmlReturn_t))
wrap_dlsym(nvmlh->nvml_dll, "nvmlErrorString");
nvmlh->nvmlShutdown = (nvmlReturn_t (*)())
wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown");
// v331
nvmlh->nvmlDeviceGetEnforcedPowerLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetEnforcedPowerLimit");
// v340
/* NVML_ERROR_NOT_SUPPORTED
nvmlh->nvmlDeviceGetAutoBoostedClocksEnabled = (nvmlReturn_t (*)(nvmlDevice_t, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetAutoBoostedClocksEnabled");
nvmlh->nvmlDeviceSetAutoBoostedClocksEnabled = (nvmlReturn_t (*)(nvmlDevice_t, nvmlEnableState_t enabled))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetAutoBoostedClocksEnabled"); */
// v346
nvmlh->nvmlDeviceGetPcieThroughput = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPcieThroughput");
if (nvmlh->nvmlInit == NULL ||
nvmlh->nvmlShutdown == NULL ||
nvmlh->nvmlErrorString == NULL ||
nvmlh->nvmlDeviceGetCount == NULL ||
nvmlh->nvmlDeviceGetHandleByIndex == NULL ||
nvmlh->nvmlDeviceGetPciInfo == NULL ||
nvmlh->nvmlDeviceGetName == NULL)
{
if (opt_debug)
applog(LOG_DEBUG, "Failed to obtain required NVML function pointers");
wrap_dlclose(nvmlh->nvml_dll);
free(nvmlh);
return NULL;
}
nvmlh->nvmlInit();
if (nvmlh->nvmlSystemGetDriverVersion)
nvmlh->nvmlSystemGetDriverVersion(driver_version, sizeof(driver_version));
nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);
/* Query CUDA device count, in case it doesn't agree with NVML, since */
/* CUDA will only report GPUs with compute capability greater than 1.0 */
if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) {
if (opt_debug)
applog(LOG_DEBUG, "Failed to query CUDA device count!");
wrap_dlclose(nvmlh->nvml_dll);
free(nvmlh);
return NULL;
}
nvmlh->devs = (nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(nvmlDevice_t));
nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_subsys_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int));
nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int));
nvmlh->app_clocks = (nvmlEnableState_t*) calloc(nvmlh->nvml_gpucount, sizeof(nvmlEnableState_t));
/* Obtain GPU device handles we're going to need repeatedly... */
for (i=0; i<nvmlh->nvml_gpucount; i++) {
nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
}
/* Query PCI info for each NVML device, and build table for mapping of */
/* CUDA device IDs to NVML device IDs and vice versa */
for (i=0; i<nvmlh->nvml_gpucount; i++) {
nvmlPciInfo_t pciinfo;
nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo);
nvmlh->nvml_pci_domain_id[i] = pciinfo.domain;
nvmlh->nvml_pci_bus_id[i] = pciinfo.bus;
nvmlh->nvml_pci_device_id[i] = pciinfo.device;
nvmlh->nvml_pci_subsys_id[i] = pciinfo.pci_subsystem_id;
nvmlh->app_clocks[i] = NVML_FEATURE_UNKNOWN;
if (nvmlh->nvmlDeviceSetAPIRestriction) {
nvmlh->nvmlDeviceSetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
NVML_FEATURE_ENABLED);
/* there is only this API_SET_APPLICATION_CLOCKS on the 750 Ti (340.58) */
}
if (nvmlh->nvmlDeviceGetAPIRestriction) {
nvmlh->nvmlDeviceGetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS,
&nvmlh->app_clocks[i]);
}
}
/* build mapping of NVML device IDs to CUDA IDs */
for (i=0; i<nvmlh->nvml_gpucount; i++) {
nvmlh->nvml_cuda_device_id[i] = -1;
}
for (i=0; i<nvmlh->cuda_gpucount; i++) {
cudaDeviceProp props;
nvmlh->cuda_nvml_device_id[i] = -1;
if (cudaGetDeviceProperties(&props, i) == cudaSuccess) {
device_bus_ids[i] = props.pciBusID;
for (int j = 0; j < nvmlh->nvml_gpucount; j++) {
if ((nvmlh->nvml_pci_domain_id[j] == (uint32_t) props.pciDomainID) &&
(nvmlh->nvml_pci_bus_id[j] == (uint32_t) props.pciBusID) &&
(nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) {
if (opt_debug)
applog(LOG_DEBUG, "CUDA GPU %d matches NVML GPU %d by busId %u",
i, j, (uint32_t) props.pciBusID);
nvmlh->nvml_cuda_device_id[j] = i;
nvmlh->cuda_nvml_device_id[i] = j;
}
}
}
}
return nvmlh;
}
/* apply config clocks to an used device */
int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
{
nvmlReturn_t rc;
uint32_t gpu_clk = 0, mem_clk = 0;
int n = nvmlh->cuda_nvml_device_id[dev_id];
if (n < 0 || n >= nvmlh->nvml_gpucount)
return -ENODEV;
if (!device_gpu_clocks[dev_id] && !device_mem_clocks[dev_id])
return 0; // nothing to do
if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) {
applog(LOG_WARNING, "GPU #%d: NVML application clock feature is not allowed!", dev_id);
return -EPERM;
}
uint32_t mem_prev = clock_prev_mem[dev_id];
if (!mem_prev)
nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_prev);
uint32_t gpu_prev = clock_prev[dev_id];
if (!gpu_prev)
nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_prev);
nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
if (rc != NVML_SUCCESS) {
applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id);
return -EINVAL;
}
if (opt_debug)
applog(LOG_DEBUG, "GPU #%d: default application clocks are %u/%u", dev_id, mem_clk, gpu_clk);
// get application config values
if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
// these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+
uint32_t nclocks = 0, mem_clocks[32] = { 0 };
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
nclocks = min(nclocks, 32);
if (nclocks)
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks);
for (uint8_t u=0; u < nclocks; u++) {
// ordered by pstate (so highest is first memory clock - P0)
if (mem_clocks[u] <= mem_clk) {
mem_clk = mem_clocks[u];
break;
}
}
uint32_t* gpu_clocks = NULL;
nclocks = 0;
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
if (nclocks) {
if (opt_debug)
applog(LOG_DEBUG, "GPU #%d: %u clocks found for mem %u", dev_id, nclocks, mem_clk);
gpu_clocks = (uint32_t*) calloc(1, sizeof(uint32_t) * nclocks + 4);
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks);
for (uint8_t u=0; u < nclocks; u++) {
// ordered desc, so get first
if (gpu_clocks[u] <= gpu_clk) {
gpu_clk = gpu_clocks[u];
break;
}
}
free(gpu_clocks);
}
rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
if (rc == NVML_SUCCESS)
applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk);
else {
applog(LOG_WARNING, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
return -1;
}
// store previous clocks for reset on exit (or during wait...)
clock_prev[dev_id] = gpu_prev;
clock_prev_mem[dev_id] = mem_prev;
return 1;
}
/* reset default app clocks and limits on exit */
int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id)
{
int ret = 0;
nvmlReturn_t rc;
uint32_t gpu_clk = 0, mem_clk = 0;
int n = nvmlh->cuda_nvml_device_id[dev_id];
if (n < 0 || n >= nvmlh->nvml_gpucount)
return -ENODEV;
if (clock_prev[dev_id]) {
rc = nvmlh->nvmlDeviceResetApplicationsClocks(nvmlh->devs[n]);
if (rc != NVML_SUCCESS) {
applog(LOG_WARNING, "GPU #%d: unable to reset application clocks", dev_id);
}
clock_prev[dev_id] = 0;
ret = 1;
}
if (limit_prev[dev_id]) {
uint32_t plimit = limit_prev[dev_id];
if (nvmlh->nvmlDeviceGetPowerManagementDefaultLimit && !plimit) {
rc = nvmlh->nvmlDeviceGetPowerManagementDefaultLimit(nvmlh->devs[n], &plimit);
} else if (plimit) {
rc = NVML_SUCCESS;
}
if (rc == NVML_SUCCESS)
nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit);
ret = 1;
}
return ret;
}
/**
* Set power state of a device (9xx)
* Code is similar as clocks one, which allow the change of the pstate
*/
int nvml_set_pstate(nvml_handle *nvmlh, int dev_id)
{
nvmlReturn_t rc;
uint32_t gpu_clk = 0, mem_clk = 0;
int n = nvmlh->cuda_nvml_device_id[dev_id];
if (n < 0 || n >= nvmlh->nvml_gpucount)
return -ENODEV;
if (device_pstate[dev_id] < 0)
return 0;
if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) {
applog(LOG_WARNING, "GPU #%d: NVML app. clock feature is not allowed!", dev_id);
return -EPERM;
}
nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk);
rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
if (rc != NVML_SUCCESS) {
applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id);
return -EINVAL;
}
// get application config values
if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id];
if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id];
// these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+
uint32_t nclocks = 0, mem_clocks[32] = { 0 };
int8_t wanted_pstate = device_pstate[dev_id];
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL);
nclocks = min(nclocks, 32);
if (nclocks)
nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks);
if (wanted_pstate+1 > nclocks) {
applog(LOG_WARNING, "GPU #%d: only %u mem clocks available (p-states)", dev_id, nclocks);
}
for (uint8_t u=0; u < nclocks; u++) {
// ordered by pstate (so highest P0 first)
if (u == wanted_pstate) {
mem_clk = mem_clocks[u];
break;
}
}
uint32_t* gpu_clocks = NULL;
nclocks = 0;
nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL);
if (nclocks) {
gpu_clocks = (uint32_t*) calloc(1, sizeof(uint32_t) * nclocks + 4);
rc = nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks);
if (rc == NVML_SUCCESS) {
// ordered desc, get the max app clock (do not limit)
gpu_clk = gpu_clocks[0];
}
free(gpu_clocks);
}
rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk);
if (rc != NVML_SUCCESS) {
applog(LOG_WARNING, "GPU #%d: pstate P%d (%u/%u) %s", dev_id, (int) wanted_pstate,
mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc));
return -1;
}
if (!opt_quiet)
applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int) wanted_pstate, mem_clk, gpu_clk);
clock_prev[dev_id] = 1;
return 1;
}
int nvml_set_plimit(nvml_handle *nvmlh, int dev_id)
{
nvmlReturn_t rc = NVML_ERROR_UNKNOWN;
uint32_t gpu_clk = 0, mem_clk = 0;
int n = nvmlh->cuda_nvml_device_id[dev_id];
if (n < 0 || n >= nvmlh->nvml_gpucount)
return -ENODEV;
if (!device_plimit[dev_id])
return 0; // nothing to do
if (!nvmlh->nvmlDeviceSetPowerManagementLimit)
return -ENOSYS;
uint32_t plimit = device_plimit[dev_id] * 1000;
uint32_t pmin = 1000, pmax = 0, prev_limit = 0;
if (nvmlh->nvmlDeviceGetPowerManagementLimitConstraints)
rc = nvmlh->nvmlDeviceGetPowerManagementLimitConstraints(nvmlh->devs[n], &pmin, &pmax);
if (rc != NVML_SUCCESS) {
if (!nvmlh->nvmlDeviceGetPowerManagementLimit)
return -ENOSYS;
}
nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &prev_limit);
if (!pmax) pmax = prev_limit;
plimit = min(plimit, pmax);
plimit = max(plimit, pmin);
rc = nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit);
if (rc != NVML_SUCCESS) {
applog(LOG_WARNING, "GPU #%d: plimit %s", dev_id, nvmlh->nvmlErrorString(rc));
return -1;
}
if (!opt_quiet) {
applog(LOG_INFO, "GPU #%d: power limit set to %uW (allowed range is %u-%u)",
dev_id, plimit/1000U, pmin/1000U, pmax/1000U);
}
limit_prev[dev_id] = prev_limit;
return 1;
}
int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount)
{
*gpucount = nvmlh->nvml_gpucount;
return 0;
}
int cuda_get_gpucount(nvml_handle *nvmlh, int *gpucount)
{
*gpucount = nvmlh->cuda_gpucount;
return 0;
}
int nvml_get_gpu_name(nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -ENODEV;
if (!nvmlh->nvmlDeviceGetName)
return -ENOSYS;
if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != NVML_SUCCESS)
return -1;
return 0;
}
int nvml_get_tempC(nvml_handle *nvmlh, int cudaindex, unsigned int *tempC)
{
nvmlReturn_t rc;
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -ENODEV;
if (!nvmlh->nvmlDeviceGetTemperature)
return -ENOSYS;
rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC);
if (rc != NVML_SUCCESS) {
return -1;
}
return 0;
}
int nvml_get_fanpcnt(nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt)
{
nvmlReturn_t rc;
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -ENODEV;
if (!nvmlh->nvmlDeviceGetFanSpeed)
return -ENOSYS;
rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt);
if (rc != NVML_SUCCESS) {
return -1;
}
return 0;
}
/* Not Supported on 750Ti 340.23 */
int nvml_get_power_usage(nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -ENODEV;
if (!nvmlh->nvmlDeviceGetPowerUsage)
return -ENOSYS;
nvmlReturn_t res = nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts);
if (res != NVML_SUCCESS) {
//if (opt_debug)
// applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res));
return -1;
}
return 0;
}
/* Not Supported on 750Ti 340.23 */
int nvml_get_pstate(nvml_handle *nvmlh, int cudaindex, int *pstate)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -ENODEV;
if (!nvmlh->nvmlDeviceGetPerformanceState)
return -ENOSYS;
nvmlReturn_t res = nvmlh->nvmlDeviceGetPerformanceState(nvmlh->devs[gpuindex], pstate);
if (res != NVML_SUCCESS) {
//if (opt_debug)
// applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res));
return -1;
}
return 0;
}
int nvml_get_busid(nvml_handle *nvmlh, int cudaindex, int *busid)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -ENODEV;
(*busid) = nvmlh->nvml_pci_bus_id[gpuindex];
return 0;
}
int nvml_get_serial(nvml_handle *nvmlh, int cudaindex, char *sn, int maxlen)
{
uint32_t subids = 0;
char uuid[NVML_DEVICE_UUID_BUFFER_SIZE];
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
nvmlReturn_t res;
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -ENODEV;
if (nvmlh->nvmlDeviceGetSerial) {
res = nvmlh->nvmlDeviceGetSerial(nvmlh->devs[gpuindex], sn, maxlen);
if (res == NVML_SUCCESS)
return 0;
}
if (!nvmlh->nvmlDeviceGetUUID)
return -ENOSYS;
// nvmlDeviceGetUUID: GPU-f2bd642c-369f-5a14-e0b4-0d22dfe9a1fc
// use a part of uuid to generate an unique serial
// todo: check if there is vendor id is inside
memset(uuid, 0, sizeof(uuid));
res = nvmlh->nvmlDeviceGetUUID(nvmlh->devs[gpuindex], uuid, sizeof(uuid)-1);
if (res != NVML_SUCCESS) {
if (opt_debug)
applog(LOG_DEBUG, "nvmlDeviceGetUUID: %s", nvmlh->nvmlErrorString(res));
return -1;
}
strncpy(sn, &uuid[4], min((int) strlen(uuid), maxlen));
sn[maxlen-1] = '\0';
return 0;
}
int nvml_get_bios(nvml_handle *nvmlh, int cudaindex, char *desc, int maxlen)
{
uint32_t subids = 0;
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -ENODEV;
if (!nvmlh->nvmlDeviceGetVbiosVersion)
return -ENOSYS;
nvmlReturn_t res = nvmlh->nvmlDeviceGetVbiosVersion(nvmlh->devs[gpuindex], desc, maxlen);
if (res != NVML_SUCCESS) {
if (opt_debug)
applog(LOG_DEBUG, "nvmlDeviceGetVbiosVersion: %s", nvmlh->nvmlErrorString(res));
return -1;
}
return 0;
}
int nvml_get_info(nvml_handle *nvmlh, int cudaindex, uint16_t &vid, uint16_t &pid)
{
uint32_t subids = 0;
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -ENODEV;
subids = nvmlh->nvml_pci_subsys_id[gpuindex];
if (!subids) subids = nvmlh->nvml_pci_device_id[gpuindex];
pid = subids >> 16;
vid = subids & 0xFFFF;
return 0;
}
int nvml_destroy(nvml_handle *nvmlh)
{
nvmlh->nvmlShutdown();
wrap_dlclose(nvmlh->nvml_dll);
free(nvmlh->nvml_pci_bus_id);
free(nvmlh->nvml_pci_device_id);
free(nvmlh->nvml_pci_domain_id);
free(nvmlh->nvml_pci_subsys_id);
free(nvmlh->nvml_cuda_device_id);
free(nvmlh->cuda_nvml_device_id);
free(nvmlh->app_clocks);
free(nvmlh->devs);
free(nvmlh);
return 0;
}
/**
* nvapi alternative for windows x86 binaries
* nvml api doesn't exists as 32bit dll :///
*/
#ifdef WIN32
#include "nvapi/nvapi_ccminer.h"
static int nvapi_dev_map[MAX_GPUS] = { 0 };
static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = { 0 };
static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = { 0 };
static NvU32 nvapi_dev_cnt = 0;
int nvapi_temperature(unsigned int devNum, unsigned int *temperature)
{
NvAPI_Status ret;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
NV_GPU_THERMAL_SETTINGS thermal;
thermal.version = NV_GPU_THERMAL_SETTINGS_VER;
ret = NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &thermal);
if (ret != NVAPI_OK) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
if (opt_debug)
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetThermalSettings: %s", string);
return -1;
}
(*temperature) = (unsigned int) thermal.sensor[0].currentTemp;
return 0;
}
int nvapi_fanspeed(unsigned int devNum, unsigned int *speed)
{
NvAPI_Status ret;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
NvU32 fanspeed = 0;
ret = NvAPI_GPU_GetTachReading(phys[devNum], &fanspeed);
if (ret != NVAPI_OK) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
if (opt_debug)
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetTachReading: %s", string);
return -1;
}
(*speed) = (unsigned int) fanspeed;
return 0;
}
int nvapi_getpstate(unsigned int devNum, unsigned int *power)
{
NvAPI_Status ret;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
NV_GPU_PERF_PSTATE_ID CurrentPstate = NVAPI_GPU_PERF_PSTATE_UNDEFINED; /* 16 */
ret = NvAPI_GPU_GetCurrentPstate(phys[devNum], &CurrentPstate);
if (ret != NVAPI_OK) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
if (opt_debug)
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetCurrentPstate: %s", string);
return -1;
}
else {
// get pstate for the moment... often 0 = P0
(*power) = (unsigned int)CurrentPstate;
}
return 0;
}
#define UTIL_DOMAIN_GPU 0
int nvapi_getusage(unsigned int devNum, unsigned int *pct)
{
NvAPI_Status ret;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
NV_GPU_DYNAMIC_PSTATES_INFO_EX info;
info.version = NV_GPU_DYNAMIC_PSTATES_INFO_EX_VER;
ret = NvAPI_GPU_GetDynamicPstatesInfoEx(phys[devNum], &info);
if (ret != NVAPI_OK) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
if (opt_debug)
applog(LOG_DEBUG, "NVAPI GetDynamicPstatesInfoEx: %s", string);
return -1;
}
else {
if (info.utilization[UTIL_DOMAIN_GPU].bIsPresent)
(*pct) = info.utilization[UTIL_DOMAIN_GPU].percentage;
}
return 0;
}
int nvapi_getinfo(unsigned int devNum, uint16_t &vid, uint16_t &pid)
{
NvAPI_Status ret;
NvU32 pDeviceId, pSubSystemId, pRevisionId, pExtDeviceId;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
ret = NvAPI_GPU_GetPCIIdentifiers(phys[devNum], &pDeviceId, &pSubSystemId, &pRevisionId, &pExtDeviceId);
if (ret != NVAPI_OK) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
if (opt_debug)
applog(LOG_DEBUG, "NVAPI GetPCIIdentifiers: %s", string);
return -1;
}
pid = pDeviceId >> 16;
vid = pDeviceId & 0xFFFF;
if (vid == 0x10DE && pSubSystemId) {
vid = pSubSystemId & 0xFFFF;
pid = pSubSystemId >> 16;
}
return 0;
}
int nvapi_getserial(unsigned int devNum, char *serial, unsigned int maxlen)
{
// NvAPI_Status ret;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
sprintf(serial, "");
if (maxlen < 64) // Short String
return -1;
#if 0
ret = NvAPI_GPU_Get..(phys[devNum], serial);
if (ret != NVAPI_OK) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
if (opt_debug)
applog(LOG_DEBUG, "NVAPI ...: %s", string);
return -1;
}
#endif
return 0;
}
int nvapi_getbios(unsigned int devNum, char *desc, unsigned int maxlen)
{
NvAPI_Status ret;
if (devNum >= nvapi_dev_cnt)
return -ENODEV;
if (maxlen < 64) // Short String
return -1;
ret = NvAPI_GPU_GetVbiosVersionString(phys[devNum], desc);
if (ret != NVAPI_OK) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
if (opt_debug)
applog(LOG_DEBUG, "NVAPI GetVbiosVersionString: %s", string);
return -1;
}
return 0;
}
int nvapi_init()
{
int num_gpus = cuda_num_devices();
NvAPI_Status ret = NvAPI_Initialize();
if (!ret == NVAPI_OK){
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
if (opt_debug)
applog(LOG_DEBUG, "NVAPI NvAPI_Initialize: %s", string);
return -1;
}
ret = NvAPI_EnumPhysicalGPUs(phys, &nvapi_dev_cnt);
if (ret != NVAPI_OK) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
if (opt_debug)
applog(LOG_DEBUG, "NVAPI NvAPI_EnumPhysicalGPUs: %s", string);
return -1;
}
for (int g = 0; g < num_gpus; g++) {
cudaDeviceProp props;
if (cudaGetDeviceProperties(&props, g) == cudaSuccess) {
device_bus_ids[g] = props.pciBusID;
}
nvapi_dev_map[g] = g; // default mapping
}
for (NvU8 i = 0; i < nvapi_dev_cnt; i++) {
NvAPI_ShortString name;
ret = NvAPI_GPU_GetFullName(phys[i], name);
if (ret == NVAPI_OK) {
for (int g = 0; g < num_gpus; g++) {
NvU32 busId;
ret = NvAPI_GPU_GetBusId(phys[i], &busId);
if (ret == NVAPI_OK && busId == device_bus_ids[g]) {
nvapi_dev_map[g] = i;
if (opt_debug)
applog(LOG_DEBUG, "CUDA GPU %d matches NVAPI GPU %d by busId %u",
g, i, busId);
break;
}
}
} else {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetFullName: %s", string);
}
}
#if 0
NvAPI_ShortString ver;
NvAPI_GetInterfaceVersionString(ver);
applog(LOG_DEBUG, "NVAPI Version: %s", ver);
#endif
NvU32 udv;
NvAPI_ShortString str;
ret = NvAPI_SYS_GetDriverAndBranchVersion(&udv, str);
if (ret == NVAPI_OK) {
sprintf(driver_version,"%d.%02d", udv / 100, udv % 100);
}
return 0;
}
#endif
/* api functions -------------------------------------- */
// assume 2500 rpm as default, auto-updated if more
static unsigned int fan_speed_max = 2500;
unsigned int gpu_fanpercent(struct cgpu_info *gpu)
{
unsigned int pct = 0;
if (hnvml) {
nvml_get_fanpcnt(hnvml, gpu->gpu_id, &pct);
}
#ifdef WIN32
else {
unsigned int rpm = 0;
nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm);
pct = (rpm * 100) / fan_speed_max;
if (pct > 100) {
pct = 100;
fan_speed_max = rpm;
}
}
#endif
return pct;
}
unsigned int gpu_fanrpm(struct cgpu_info *gpu)
{
unsigned int rpm = 0;
#ifdef WIN32
nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm);
#endif
return rpm;
}
float gpu_temp(struct cgpu_info *gpu)
{
float tc = 0.0;
unsigned int tmp = 0;
if (hnvml) {
nvml_get_tempC(hnvml, gpu->gpu_id, &tmp);
tc = (float)tmp;
}
#ifdef WIN32
else {
nvapi_temperature(nvapi_dev_map[gpu->gpu_id], &tmp);
tc = (float)tmp;
}
#endif
return tc;
}
int gpu_pstate(struct cgpu_info *gpu)
{
int pstate = -1;
int support = -1;
if (hnvml) {
support = nvml_get_pstate(hnvml, gpu->gpu_id, &pstate);
}
#ifdef WIN32
if (support == -1) {
unsigned int pst = 0;
nvapi_getpstate(nvapi_dev_map[gpu->gpu_id], &pst);
pstate = (int) pst;
}
#endif
return pstate;
}
int gpu_busid(struct cgpu_info *gpu)
{
int busid = -1;
int support = -1;
if (hnvml) {
support = nvml_get_busid(hnvml, gpu->gpu_id, &busid);
}
#ifdef WIN32
if (support == -1) {
busid = device_bus_ids[gpu->gpu_id];
}
#endif
return busid;
}
unsigned int gpu_power(struct cgpu_info *gpu)
{
unsigned int mw = 0;
int support = -1;
if (hnvml) {
support = nvml_get_power_usage(hnvml, gpu->gpu_id, &mw);
}
#ifdef WIN32
if (support == -1) {
unsigned int pct = 0;
nvapi_getusage(nvapi_dev_map[gpu->gpu_id], &pct);
mw = pct; // to fix
}
#endif
if (gpu->gpu_power > 0) {
// average
mw = (gpu->gpu_power + mw) / 2;
}
return mw;
}
static int translate_vendor_id(uint16_t vid, char *vendorname)
{
struct VENDORS {
const uint16_t vid;
const char *name;
} vendors[] = {
{ 0x1043, "ASUS" },
{ 0x107D, "Leadtek" },
{ 0x10B0, "Gainward" },
// { 0x10DE, "NVIDIA" },
{ 0x1458, "Gigabyte" },
{ 0x1462, "MSI" },
{ 0x154B, "PNY" },
{ 0x1682, "XFX" },
{ 0x196D, "Club3D" },
{ 0x19DA, "Zotac" },
{ 0x19F1, "BFG" },
{ 0x1ACC, "PoV" },
{ 0x1B4C, "KFA2" },
{ 0x3842, "EVGA" },
{ 0x7377, "Colorful" },
{ 0, "" }
};
if (!vendorname)
return -EINVAL;
for(int v=0; v < ARRAY_SIZE(vendors); v++) {
if (vid == vendors[v].vid) {
strcpy(vendorname, vendors[v].name);
return vid;
}
}
if (opt_debug && vid != 0x10DE)
applog(LOG_DEBUG, "nvml: Unknown vendor %04x\n", vid);
return 0;
}
int gpu_vendor(uint8_t pci_bus_id, char *vendorname)
{
uint16_t vid = 0, pid = 0;
if (hnvml) { // may not be initialized on start...
for (int id=0; id < hnvml->nvml_gpucount; id++) {
if (hnvml->nvml_pci_bus_id[id] == pci_bus_id) {
int dev_id = hnvml->nvml_cuda_device_id[id];
nvml_get_info(hnvml, dev_id, vid, pid);
}
}
} else {
#ifdef WIN32
for (unsigned id = 0; id < nvapi_dev_cnt; id++) {
if (device_bus_ids[id] == pci_bus_id) {
nvapi_getinfo(nvapi_dev_map[id], vid, pid);
break;
}
}
#endif
}
return translate_vendor_id(vid, vendorname);
}
int gpu_info(struct cgpu_info *gpu)
{
char vendorname[32] = { 0 };
int id = gpu->gpu_id;
uint8_t bus_id = 0;
gpu->nvml_id = -1;
gpu->nvapi_id = -1;
if (id < 0)
return -1;
if (hnvml) {
gpu->nvml_id = (int8_t) hnvml->cuda_nvml_device_id[id];
nvml_get_info(hnvml, id, gpu->gpu_vid, gpu->gpu_pid);
nvml_get_serial(hnvml, id, gpu->gpu_sn, sizeof(gpu->gpu_sn));
nvml_get_bios(hnvml, id, gpu->gpu_desc, sizeof(gpu->gpu_desc));
}
#ifdef WIN32
gpu->nvapi_id = (int8_t) nvapi_dev_map[id];
nvapi_getinfo(nvapi_dev_map[id], gpu->gpu_vid, gpu->gpu_pid);
nvapi_getserial(nvapi_dev_map[id], gpu->gpu_sn, sizeof(gpu->gpu_sn));
nvapi_getbios(nvapi_dev_map[id], gpu->gpu_desc, sizeof(gpu->gpu_desc));
#endif
return 0;
}
#endif /* USE_WRAPNVML */