Browse Source
Based on mwhite73 <marvin.white@gmail.com> implementation Linked to the api system Also fix Makefile to support standard c++ files This prevent nvcc use without device code Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>master
Tanguy Pruvot
10 years ago
13 changed files with 752 additions and 66 deletions
@ -0,0 +1,479 @@
@@ -0,0 +1,479 @@
|
||||
/*
|
||||
* A trivial little dlopen()-based wrapper library for the |
||||
* NVIDIA NVML library, to allow runtime discovery of NVML on an |
||||
* arbitrary system. This is all very hackish and simple-minded, but |
||||
* it serves my immediate needs in the short term until NVIDIA provides |
||||
* a static NVML wrapper library themselves, hopefully in |
||||
* CUDA 6.5 or maybe sometime shortly after. |
||||
* |
||||
* This trivial code is made available under the "new" 3-clause BSD license, |
||||
* and/or any of the GPL licenses you prefer. |
||||
* Feel free to use the code and modify as you see fit. |
||||
* |
||||
* John E. Stone - john.stone@gmail.com |
||||
* Tanguy Pruvot - tpruvot@github |
||||
* |
||||
*/ |
||||
|
||||
#ifdef USE_WRAPNVML |
||||
|
||||
#include <stdio.h> |
||||
#include <stdlib.h> |
||||
#include <errno.h> |
||||
#ifndef _MSC_VER |
||||
#include <libgen.h> |
||||
#endif |
||||
|
||||
#include "miner.h" |
||||
#include "cuda_runtime.h" |
||||
#include "nvml.h" |
||||
|
||||
/*
|
||||
* Wrappers to emulate dlopen() on other systems like Windows |
||||
*/ |
||||
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64) |
||||
#include <windows.h> |
||||
static void *wrap_dlopen(const char *filename) { |
||||
return (void *)LoadLibrary(filename); |
||||
} |
||||
static void *wrap_dlsym(void *h, const char *sym) { |
||||
return (void *)GetProcAddress((HINSTANCE)h, sym); |
||||
} |
||||
static int wrap_dlclose(void *h) { |
||||
/* FreeLibrary returns nonzero on success */ |
||||
return (!FreeLibrary((HINSTANCE)h)); |
||||
} |
||||
#else |
||||
/* assume we can use dlopen itself... */ |
||||
#include <dlfcn.h> |
||||
static void *wrap_dlopen(const char *filename) { |
||||
return dlopen(filename, RTLD_NOW); |
||||
} |
||||
static void *wrap_dlsym(void *h, const char *sym) { |
||||
return dlsym(h, sym); |
||||
} |
||||
static int wrap_dlclose(void *h) { |
||||
return dlclose(h); |
||||
} |
||||
#endif |
||||
|
||||
#if defined(__cplusplus) |
||||
extern "C" { |
||||
#endif |
||||
|
||||
wrap_nvml_handle * wrap_nvml_create() |
||||
{ |
||||
int i=0; |
||||
wrap_nvml_handle *nvmlh = NULL; |
||||
|
||||
/*
|
||||
* We use hard-coded library installation locations for the time being... |
||||
* No idea where or if libnvidia-ml.so is installed on MacOS X, a |
||||
* deep scouring of the filesystem on one of the Mac CUDA build boxes |
||||
* I used turned up nothing, so for now it's not going to work on OSX. |
||||
*/ |
||||
#if defined(_WIN64) |
||||
/* 64-bit Windows */ |
||||
#define libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll" |
||||
#elif defined(_WIN32) || defined(_MSC_VER) |
||||
/* 32-bit Windows */ |
||||
#define libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll" |
||||
#elif defined(__linux) && (defined(__i386__) || defined(__ARM_ARCH_7A__)) |
||||
/* 32-bit linux assumed */ |
||||
#define libnvidia_ml "/usr/lib32/libnvidia-ml.so" |
||||
#elif defined(__linux) |
||||
/* 64-bit linux assumed */ |
||||
#define libnvidia_ml "/usr/lib/libnvidia-ml.so" |
||||
#else |
||||
#error "Unrecognized platform: need NVML DLL path for this platform..." |
||||
#endif |
||||
|
||||
#if WIN32 |
||||
char tmp[512]; |
||||
ExpandEnvironmentStringsA(libnvidia_ml, tmp, sizeof(tmp)); |
||||
#else |
||||
char tmp[512] = libnvidia_ml; |
||||
#endif |
||||
|
||||
void *nvml_dll = wrap_dlopen(tmp); |
||||
if (nvml_dll == NULL) { |
||||
#ifdef WIN32 |
||||
char lib[] = "nvml.dll"; |
||||
#else |
||||
char lib[64] = { '\0' }; |
||||
snprintf(lib, sizeof(lib), "%s", basename(tmp)); |
||||
/* try dlopen without path, here /usr/lib/nvidia-340/libnvidia-ml.so */ |
||||
#endif |
||||
nvml_dll = wrap_dlopen(lib); |
||||
if (opt_debug) |
||||
applog(LOG_DEBUG, "dlopen: %s=%p", lib, nvml_dll); |
||||
} |
||||
if (nvml_dll == NULL) { |
||||
if (opt_debug) |
||||
applog(LOG_DEBUG, "dlopen(%d): failed to load %s", errno, tmp); |
||||
return NULL; |
||||
} |
||||
|
||||
nvmlh = (wrap_nvml_handle *) calloc(1, sizeof(wrap_nvml_handle)); |
||||
|
||||
nvmlh->nvml_dll = nvml_dll; |
||||
|
||||
nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void)) |
||||
wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2"); |
||||
if (!nvmlh->nvmlInit) |
||||
nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void)) |
||||
wrap_dlsym(nvmlh->nvml_dll, "nvmlInit"); |
||||
nvmlh->nvmlDeviceGetCount = (wrap_nvmlReturn_t (*)(int *)) |
||||
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2"); |
||||
nvmlh->nvmlDeviceGetHandleByIndex = (wrap_nvmlReturn_t (*)(int, wrap_nvmlDevice_t *)) |
||||
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2"); |
||||
nvmlh->nvmlDeviceGetClockInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlClockType_t, unsigned int *)) |
||||
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClockInfo"); |
||||
nvmlh->nvmlDeviceGetPciInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *)) |
||||
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo"); |
||||
nvmlh->nvmlDeviceGetName = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, char *, int)) |
||||
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName"); |
||||
nvmlh->nvmlDeviceGetTemperature = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int, unsigned int *)) |
||||
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature"); |
||||
nvmlh->nvmlDeviceGetFanSpeed = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *)) |
||||
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed"); |
||||
nvmlh->nvmlDeviceGetPerformanceState = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int *)) |
||||
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage"); |
||||
nvmlh->nvmlDeviceGetPowerUsage = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *)) |
||||
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage"); |
||||
nvmlh->nvmlErrorString = (char* (*)(wrap_nvmlReturn_t)) |
||||
wrap_dlsym(nvmlh->nvml_dll, "nvmlErrorString"); |
||||
nvmlh->nvmlShutdown = (wrap_nvmlReturn_t (*)()) |
||||
wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown"); |
||||
|
||||
if (nvmlh->nvmlInit == NULL || |
||||
nvmlh->nvmlShutdown == NULL || |
||||
nvmlh->nvmlDeviceGetCount == NULL || |
||||
nvmlh->nvmlDeviceGetHandleByIndex == NULL || |
||||
nvmlh->nvmlDeviceGetPciInfo == NULL || |
||||
nvmlh->nvmlDeviceGetName == NULL || |
||||
nvmlh->nvmlDeviceGetTemperature == NULL || |
||||
nvmlh->nvmlDeviceGetFanSpeed == NULL || |
||||
nvmlh->nvmlDeviceGetPowerUsage == NULL) |
||||
{ |
||||
if (opt_debug) |
||||
applog(LOG_DEBUG, "Failed to obtain all required NVML function pointers"); |
||||
wrap_dlclose(nvmlh->nvml_dll); |
||||
free(nvmlh); |
||||
return NULL; |
||||
} |
||||
|
||||
nvmlh->nvmlInit(); |
||||
nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount); |
||||
|
||||
/* Query CUDA device count, in case it doesn't agree with NVML, since */ |
||||
/* CUDA will only report GPUs with compute capability greater than 1.0 */ |
||||
if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) { |
||||
if (opt_debug) |
||||
applog(LOG_DEBUG, "Failed to query CUDA device count!"); |
||||
wrap_dlclose(nvmlh->nvml_dll); |
||||
free(nvmlh); |
||||
return NULL; |
||||
} |
||||
|
||||
nvmlh->devs = (wrap_nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(wrap_nvmlDevice_t)); |
||||
nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); |
||||
nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); |
||||
nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); |
||||
nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int)); |
||||
nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int)); |
||||
|
||||
/* Obtain GPU device handles we're going to need repeatedly... */ |
||||
for (i=0; i<nvmlh->nvml_gpucount; i++) { |
||||
nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]); |
||||
} |
||||
|
||||
/* Query PCI info for each NVML device, and build table for mapping of */ |
||||
/* CUDA device IDs to NVML device IDs and vice versa */ |
||||
for (i=0; i<nvmlh->nvml_gpucount; i++) { |
||||
wrap_nvmlPciInfo_t pciinfo; |
||||
nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo); |
||||
nvmlh->nvml_pci_domain_id[i] = pciinfo.domain; |
||||
nvmlh->nvml_pci_bus_id[i] = pciinfo.bus; |
||||
nvmlh->nvml_pci_device_id[i] = pciinfo.device; |
||||
} |
||||
|
||||
/* build mapping of NVML device IDs to CUDA IDs */ |
||||
for (i=0; i<nvmlh->nvml_gpucount; i++) { |
||||
nvmlh->nvml_cuda_device_id[i] = -1; |
||||
} |
||||
for (i=0; i<nvmlh->cuda_gpucount; i++) { |
||||
cudaDeviceProp props; |
||||
nvmlh->cuda_nvml_device_id[i] = -1; |
||||
|
||||
if (cudaGetDeviceProperties(&props, i) == cudaSuccess) { |
||||
int j; |
||||
for (j=0; j<nvmlh->nvml_gpucount; j++) { |
||||
if ((nvmlh->nvml_pci_domain_id[j] == (uint32_t) props.pciDomainID) && |
||||
(nvmlh->nvml_pci_bus_id[j] == (uint32_t) props.pciBusID) && |
||||
(nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) { |
||||
if (opt_debug) |
||||
applog(LOG_DEBUG, "CUDA GPU[%d] matches NVML GPU[%d]", i, j); |
||||
nvmlh->nvml_cuda_device_id[j] = i; |
||||
nvmlh->cuda_nvml_device_id[i] = j; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
return nvmlh; |
||||
} |
||||
|
||||
int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) |
||||
{ |
||||
*gpucount = nvmlh->nvml_gpucount; |
||||
return 0; |
||||
} |
||||
|
||||
int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) |
||||
{ |
||||
*gpucount = nvmlh->cuda_gpucount; |
||||
return 0; |
||||
} |
||||
|
||||
int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize) |
||||
{ |
||||
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; |
||||
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) |
||||
return -1; |
||||
|
||||
if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != WRAPNVML_SUCCESS) |
||||
return -1; |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
|
||||
int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *tempC) |
||||
{ |
||||
wrap_nvmlReturn_t rc; |
||||
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; |
||||
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) |
||||
return -1; |
||||
|
||||
rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC); |
||||
if (rc != WRAPNVML_SUCCESS) { |
||||
return -1; |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
|
||||
int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt) |
||||
{ |
||||
wrap_nvmlReturn_t rc; |
||||
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; |
||||
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) |
||||
return -1; |
||||
|
||||
rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt); |
||||
if (rc != WRAPNVML_SUCCESS) { |
||||
return -1; |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
/* Not Supported on 750Ti 340.23 */ |
||||
int wrap_nvml_get_clock(wrap_nvml_handle *nvmlh, int cudaindex, int type, unsigned int *freq) |
||||
{ |
||||
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; |
||||
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) |
||||
return -1; |
||||
|
||||
wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetClockInfo(nvmlh->devs[gpuindex], (wrap_nvmlClockType_t) type, freq); |
||||
if (res != WRAPNVML_SUCCESS) { |
||||
if (opt_debug) |
||||
applog(LOG_DEBUG, "nvmlDeviceGetClockInfo: %s", nvmlh->nvmlErrorString(res)); |
||||
return -1; |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
/* Not Supported on 750Ti 340.23 */ |
||||
int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts) |
||||
{ |
||||
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; |
||||
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) |
||||
return -1; |
||||
|
||||
wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts); |
||||
if (res != WRAPNVML_SUCCESS) { |
||||
if (opt_debug) |
||||
applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res)); |
||||
return -1; |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
/* Not Supported on 750Ti 340.23 */ |
||||
int wrap_nvml_get_pstate(wrap_nvml_handle *nvmlh, int cudaindex, int *pstate) |
||||
{ |
||||
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; |
||||
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) |
||||
return -1; |
||||
|
||||
wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPerformanceState(nvmlh->devs[gpuindex], pstate); |
||||
if (res != WRAPNVML_SUCCESS) { |
||||
if (opt_debug) |
||||
applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res)); |
||||
return -1; |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
int wrap_nvml_destroy(wrap_nvml_handle *nvmlh) |
||||
{ |
||||
nvmlh->nvmlShutdown(); |
||||
|
||||
wrap_dlclose(nvmlh->nvml_dll); |
||||
free(nvmlh); |
||||
return 0; |
||||
} |
||||
|
||||
/* api functions */ |
||||
|
||||
extern wrap_nvml_handle *nvmlh; |
||||
extern int device_map[8]; |
||||
|
||||
unsigned int gpu_fanpercent(struct cgpu_info *gpu) |
||||
{ |
||||
unsigned int pct = 0; |
||||
if (nvmlh) { |
||||
wrap_nvml_get_fanpcnt(nvmlh, device_map[gpu->thr_id], &pct); |
||||
} |
||||
return pct; |
||||
} |
||||
|
||||
double gpu_temp(struct cgpu_info *gpu) |
||||
{ |
||||
double tc = 0.0; |
||||
if (nvmlh) { |
||||
unsigned int tmp = 0; |
||||
wrap_nvml_get_tempC(nvmlh, device_map[gpu->thr_id], &tmp); |
||||
tc = (double) tmp; |
||||
} |
||||
return tc; |
||||
} |
||||
|
||||
unsigned int gpu_clock(struct cgpu_info *gpu) |
||||
{ |
||||
unsigned int freq = 0; |
||||
if (nvmlh) { |
||||
wrap_nvml_get_clock(nvmlh, device_map[gpu->thr_id], NVML_CLOCK_SM, &freq); |
||||
} |
||||
return freq; |
||||
} |
||||
|
||||
unsigned int gpu_power(struct cgpu_info *gpu) |
||||
{ |
||||
unsigned int mw = 0; |
||||
if (nvmlh) { |
||||
wrap_nvml_get_power_usage(nvmlh, device_map[gpu->thr_id], &mw); |
||||
} |
||||
return mw; |
||||
} |
||||
|
||||
int gpu_pstate(struct cgpu_info *gpu) |
||||
{ |
||||
int pstate = 0; |
||||
if (nvmlh) { |
||||
wrap_nvml_get_pstate(nvmlh, device_map[gpu->thr_id], &pstate); |
||||
//gpu->gpu_pstate = pstate;
|
||||
} |
||||
return pstate; |
||||
} |
||||
|
||||
#if defined(__cplusplus) |
||||
} |
||||
#endif |
||||
|
||||
#endif /* USE_WRAPNVML */ |
||||
|
||||
/* strings /usr/lib/nvidia-340/libnvidia-ml.so | grep nvmlDeviceGet | grep -v : | sort | uniq
|
||||
|
||||
nvmlDeviceGetAccountingBufferSize |
||||
nvmlDeviceGetAccountingMode |
||||
nvmlDeviceGetAccountingPids |
||||
nvmlDeviceGetAccountingStats |
||||
nvmlDeviceGetAPIRestriction |
||||
nvmlDeviceGetApplicationsClock |
||||
nvmlDeviceGetAutoBoostedClocksEnabled |
||||
nvmlDeviceGetBAR1MemoryInfo |
||||
nvmlDeviceGetBoardId |
||||
nvmlDeviceGetBrand |
||||
nvmlDeviceGetBridgeChipInfo |
||||
* nvmlDeviceGetClockInfo |
||||
nvmlDeviceGetComputeMode |
||||
nvmlDeviceGetComputeRunningProcesses |
||||
nvmlDeviceGetCount |
||||
nvmlDeviceGetCount_v2 |
||||
nvmlDeviceGetCpuAffinity |
||||
nvmlDeviceGetCurrentClocksThrottleReasons |
||||
nvmlDeviceGetCurrPcieLinkGeneration |
||||
nvmlDeviceGetCurrPcieLinkWidth |
||||
nvmlDeviceGetDecoderUtilization |
||||
nvmlDeviceGetDefaultApplicationsClock |
||||
nvmlDeviceGetDetailedEccErrors |
||||
nvmlDeviceGetDisplayActive |
||||
nvmlDeviceGetDisplayMode |
||||
nvmlDeviceGetDriverModel |
||||
nvmlDeviceGetEccMode |
||||
nvmlDeviceGetEncoderUtilization |
||||
nvmlDeviceGetEnforcedPowerLimit |
||||
* nvmlDeviceGetFanSpeed |
||||
nvmlDeviceGetGpuOperationMode |
||||
nvmlDeviceGetHandleByIndex |
||||
nvmlDeviceGetHandleByIndex_v2 |
||||
nvmlDeviceGetHandleByPciBusId |
||||
nvmlDeviceGetHandleByPciBusId_v2 |
||||
nvmlDeviceGetHandleBySerial |
||||
nvmlDeviceGetHandleByUUID |
||||
nvmlDeviceGetIndex |
||||
nvmlDeviceGetInforomConfigurationChecksum |
||||
nvmlDeviceGetInforomImageVersion |
||||
nvmlDeviceGetInforomVersion |
||||
nvmlDeviceGetMaxClockInfo |
||||
nvmlDeviceGetMaxPcieLinkGeneration |
||||
nvmlDeviceGetMaxPcieLinkWidth |
||||
nvmlDeviceGetMemoryErrorCounter |
||||
nvmlDeviceGetMemoryInfo |
||||
nvmlDeviceGetMinorNumber |
||||
nvmlDeviceGetMultiGpuBoard |
||||
nvmlDeviceGetName |
||||
nvmlDeviceGetPciInfo |
||||
nvmlDeviceGetPciInfo_v2 |
||||
* nvmlDeviceGetPerformanceState |
||||
nvmlDeviceGetPersistenceMode |
||||
nvmlDeviceGetPowerManagementDefaultLimit |
||||
nvmlDeviceGetPowerManagementLimit |
||||
nvmlDeviceGetPowerManagementLimitConstraints |
||||
nvmlDeviceGetPowerManagementMode |
||||
nvmlDeviceGetPowerState (deprecated) |
||||
* nvmlDeviceGetPowerUsage |
||||
nvmlDeviceGetRetiredPages |
||||
nvmlDeviceGetRetiredPagesPendingStatus |
||||
nvmlDeviceGetSamples |
||||
nvmlDeviceGetSerial |
||||
nvmlDeviceGetSupportedClocksThrottleReasons |
||||
nvmlDeviceGetSupportedEventTypes |
||||
nvmlDeviceGetSupportedGraphicsClocks |
||||
nvmlDeviceGetSupportedMemoryClocks |
||||
nvmlDeviceGetTemperature |
||||
nvmlDeviceGetTemperatureThreshold |
||||
nvmlDeviceGetTotalEccErrors |
||||
nvmlDeviceGetUtilizationRates |
||||
nvmlDeviceGetUUID |
||||
nvmlDeviceGetVbiosVersion |
||||
nvmlDeviceGetViolationStatus |
||||
|
||||
*/ |
@ -0,0 +1,146 @@
@@ -0,0 +1,146 @@
|
||||
/*
|
||||
* A trivial little dlopen()-based wrapper library for the |
||||
* NVIDIA NVML library, to allow runtime discovery of NVML on an |
||||
* arbitrary system. This is all very hackish and simple-minded, but |
||||
* it serves my immediate needs in the short term until NVIDIA provides |
||||
* a static NVML wrapper library themselves, hopefully in |
||||
* CUDA 6.5 or maybe sometime shortly after. |
||||
* |
||||
* This trivial code is made available under the "new" 3-clause BSD license, |
||||
* and/or any of the GPL licenses you prefer. |
||||
* Feel free to use the code and modify as you see fit. |
||||
* |
||||
* John E. Stone - john.stone@gmail.com |
||||
* |
||||
*/ |
||||
|
||||
#if defined(__cplusplus) |
||||
extern "C" { |
||||
#endif |
||||
|
||||
/*
|
||||
* Ugly hacks to avoid dependencies on the real nvml.h until it starts |
||||
* getting included with the CUDA toolkit or a GDK that's got a known |
||||
* install location, etc. |
||||
*/ |
||||
typedef enum wrap_nvmlReturn_enum { |
||||
WRAPNVML_SUCCESS = 0 |
||||
} wrap_nvmlReturn_t; |
||||
|
||||
typedef void * wrap_nvmlDevice_t; |
||||
|
||||
/* our own version of the PCI info struct */ |
||||
typedef struct { |
||||
char bus_id_str[16]; /* string form of bus info */ |
||||
unsigned int domain; |
||||
unsigned int bus; |
||||
unsigned int device; |
||||
unsigned int pci_device_id; /* combined device and vendor id */ |
||||
unsigned int pci_subsystem_id; |
||||
unsigned int res0; /* NVML internal use only */ |
||||
unsigned int res1; |
||||
unsigned int res2; |
||||
unsigned int res3; |
||||
} wrap_nvmlPciInfo_t; |
||||
|
||||
typedef enum nvmlClockType_t { |
||||
NVML_CLOCK_GRAPHICS = 0, |
||||
NVML_CLOCK_SM = 1, |
||||
NVML_CLOCK_MEM = 2 |
||||
} wrap_nvmlClockType_t; |
||||
|
||||
/*
|
||||
* Handle to hold the function pointers for the entry points we need, |
||||
* and the shared library itself. |
||||
*/ |
||||
typedef struct { |
||||
void *nvml_dll; |
||||
int nvml_gpucount; |
||||
int cuda_gpucount; |
||||
unsigned int *nvml_pci_domain_id; |
||||
unsigned int *nvml_pci_bus_id; |
||||
unsigned int *nvml_pci_device_id; |
||||
int *nvml_cuda_device_id; /* map NVML dev to CUDA dev */ |
||||
int *cuda_nvml_device_id; /* map CUDA dev to NVML dev */ |
||||
wrap_nvmlDevice_t *devs; |
||||
wrap_nvmlReturn_t (*nvmlInit)(void); |
||||
wrap_nvmlReturn_t (*nvmlDeviceGetCount)(int *); |
||||
wrap_nvmlReturn_t (*nvmlDeviceGetHandleByIndex)(int, wrap_nvmlDevice_t *); |
||||
wrap_nvmlReturn_t (*nvmlDeviceGetClockInfo)(wrap_nvmlDevice_t, wrap_nvmlClockType_t, unsigned int *); |
||||
wrap_nvmlReturn_t (*nvmlDeviceGetPciInfo)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *); |
||||
wrap_nvmlReturn_t (*nvmlDeviceGetName)(wrap_nvmlDevice_t, char *, int); |
||||
wrap_nvmlReturn_t (*nvmlDeviceGetTemperature)(wrap_nvmlDevice_t, int, unsigned int *); |
||||
wrap_nvmlReturn_t (*nvmlDeviceGetFanSpeed)(wrap_nvmlDevice_t, unsigned int *); |
||||
wrap_nvmlReturn_t (*nvmlDeviceGetPerformanceState)(wrap_nvmlDevice_t, int *); /* enum */ |
||||
wrap_nvmlReturn_t (*nvmlDeviceGetPowerUsage)(wrap_nvmlDevice_t, unsigned int *); |
||||
char* (*nvmlErrorString)(wrap_nvmlReturn_t); |
||||
wrap_nvmlReturn_t (*nvmlShutdown)(void); |
||||
} wrap_nvml_handle; |
||||
|
||||
|
||||
wrap_nvml_handle * wrap_nvml_create(); |
||||
int wrap_nvml_destroy(wrap_nvml_handle *nvmlh); |
||||
|
||||
/*
|
||||
* Query the number of GPUs seen by NVML |
||||
*/ |
||||
int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount); |
||||
|
||||
/*
|
||||
* Query the number of GPUs seen by CUDA |
||||
*/ |
||||
int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount); |
||||
|
||||
|
||||
/*
|
||||
* query the name of the GPU model from the CUDA device ID |
||||
* |
||||
*/ |
||||
int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh, |
||||
int gpuindex, |
||||
char *namebuf, |
||||
int bufsize); |
||||
|
||||
/*
|
||||
* Query the current GPU temperature (Celsius), from the CUDA device ID |
||||
*/ |
||||
int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh, |
||||
int gpuindex, unsigned int *tempC); |
||||
|
||||
/*
|
||||
* Query the current GPU fan speed (percent) from the CUDA device ID |
||||
*/ |
||||
int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh, |
||||
int gpuindex, unsigned int *fanpcnt); |
||||
|
||||
/*
|
||||
* Query the current GPU speed from the CUDA device ID |
||||
*/ |
||||
int wrap_nvml_get_clock(wrap_nvml_handle *nvmlh, |
||||
int gpuindex, int clktype, unsigned int *freq); |
||||
|
||||
/*
|
||||
* Query the current GPU power usage in millwatts from the CUDA device ID |
||||
* |
||||
* This feature is only available on recent GPU generations and may be |
||||
* limited in some cases only to Tesla series GPUs. |
||||
* If the query is run on an unsupported GPU, this routine will return -1. |
||||
*/ |
||||
int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh, |
||||
int gpuindex, |
||||
unsigned int *milliwatts); |
||||
|
||||
/* api functions */ |
||||
|
||||
#include "miner.h" |
||||
|
||||
unsigned int gpu_fanpercent(struct cgpu_info *gpu); |
||||
double gpu_temp(struct cgpu_info *gpu); |
||||
unsigned int gpu_clock(struct cgpu_info *gpu); |
||||
unsigned int gpu_power(struct cgpu_info *gpu); |
||||
int gpu_pstate(struct cgpu_info *gpu); |
||||
|
||||
#if defined(__cplusplus) |
||||
} |
||||
#endif |
||||
|
Loading…
Reference in new issue