GOSTcoin support for ccminer CUDA miner project, compatible with most nvidia cards
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

579 lines
16 KiB

/*
* A trivial little dlopen()-based wrapper library for the
* NVIDIA NVML library, to allow runtime discovery of NVML on an
* arbitrary system. This is all very hackish and simple-minded, but
* it serves my immediate needs in the short term until NVIDIA provides
* a static NVML wrapper library themselves, hopefully in
* CUDA 6.5 or maybe sometime shortly after.
*
* This trivial code is made available under the "new" 3-clause BSD license,
* and/or any of the GPL licenses you prefer.
* Feel free to use the code and modify as you see fit.
*
* John E. Stone - john.stone@gmail.com
* Tanguy Pruvot - tpruvot@github
*
*/
#ifdef USE_WRAPNVML
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef _MSC_VER
#include <libgen.h>
#endif
#include "miner.h"
#include "cuda_runtime.h"
#include "nvml.h"
extern wrap_nvml_handle *hnvml;
extern int device_map[8];
/*
* Wrappers to emulate dlopen() on other systems like Windows
*/
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
#include <windows.h>
static void *wrap_dlopen(const char *filename) {
HMODULE h = LoadLibrary(filename);
if (!h && opt_debug) {
applog(LOG_DEBUG, "dlopen(%d): failed to load %s",
GetLastError(), filename);
}
return (void*)h;
}
static void *wrap_dlsym(void *h, const char *sym) {
return (void *)GetProcAddress((HINSTANCE)h, sym);
}
static int wrap_dlclose(void *h) {
/* FreeLibrary returns nonzero on success */
return (!FreeLibrary((HINSTANCE)h));
}
#else
/* assume we can use dlopen itself... */
#include <dlfcn.h>
#include <errno.h>
static void *wrap_dlopen(const char *filename) {
void *h = dlopen(filename, RTLD_NOW);
if (h == NULL && opt_debug) {
applog(LOG_DEBUG, "dlopen(%d): failed to load %s",
errno, filename);
}
return (void*)h;
}
static void *wrap_dlsym(void *h, const char *sym) {
return dlsym(h, sym);
}
static int wrap_dlclose(void *h) {
return dlclose(h);
}
#endif
#ifdef __cplusplus
extern "C" {
#endif
wrap_nvml_handle * wrap_nvml_create()
{
int i=0;
wrap_nvml_handle *nvmlh = NULL;
#if defined(WIN32)
/* Windows (do not use slashes, else ExpandEnvironmentStrings will mix them) */
#define libnvidia_ml "%PROGRAMFILES%\\NVIDIA Corporation\\NVSMI\\nvml.dll"
#else
/* linux assumed */
#define libnvidia_ml "libnvidia-ml.so"
#endif
char tmp[512];
#ifdef WIN32
ExpandEnvironmentStrings(libnvidia_ml, tmp, sizeof(tmp));
#else
strcpy(tmp, libnvidia_ml);
#endif
void *nvml_dll = wrap_dlopen(tmp);
if (nvml_dll == NULL) {
#ifdef WIN32
nvml_dll = wrap_dlopen("nvml.dll");
if (nvml_dll == NULL)
#endif
return NULL;
}
nvmlh = (wrap_nvml_handle *) calloc(1, sizeof(wrap_nvml_handle));
nvmlh->nvml_dll = nvml_dll;
nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void))
wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2");
if (!nvmlh->nvmlInit) {
nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void))
wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
}
nvmlh->nvmlDeviceGetCount = (wrap_nvmlReturn_t (*)(int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
nvmlh->nvmlDeviceGetHandleByIndex = (wrap_nvmlReturn_t (*)(int, wrap_nvmlDevice_t *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2");
nvmlh->nvmlDeviceGetClockInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlClockType_t, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClockInfo");
nvmlh->nvmlDeviceGetPciInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo");
nvmlh->nvmlDeviceGetName = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, char *, int))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName");
nvmlh->nvmlDeviceGetTemperature = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature");
nvmlh->nvmlDeviceGetFanSpeed = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed");
nvmlh->nvmlDeviceGetPerformanceState = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
nvmlh->nvmlDeviceGetPowerUsage = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
nvmlh->nvmlErrorString = (char* (*)(wrap_nvmlReturn_t))
wrap_dlsym(nvmlh->nvml_dll, "nvmlErrorString");
nvmlh->nvmlShutdown = (wrap_nvmlReturn_t (*)())
wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown");
if (nvmlh->nvmlInit == NULL ||
nvmlh->nvmlShutdown == NULL ||
nvmlh->nvmlDeviceGetCount == NULL ||
nvmlh->nvmlDeviceGetHandleByIndex == NULL ||
nvmlh->nvmlDeviceGetPciInfo == NULL ||
nvmlh->nvmlDeviceGetName == NULL ||
nvmlh->nvmlDeviceGetTemperature == NULL ||
nvmlh->nvmlDeviceGetFanSpeed == NULL)
{
if (opt_debug)
applog(LOG_DEBUG, "Failed to obtain required NVML function pointers");
wrap_dlclose(nvmlh->nvml_dll);
free(nvmlh);
return NULL;
}
nvmlh->nvmlInit();
nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);
/* Query CUDA device count, in case it doesn't agree with NVML, since */
/* CUDA will only report GPUs with compute capability greater than 1.0 */
if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) {
if (opt_debug)
applog(LOG_DEBUG, "Failed to query CUDA device count!");
wrap_dlclose(nvmlh->nvml_dll);
free(nvmlh);
return NULL;
}
nvmlh->devs = (wrap_nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(wrap_nvmlDevice_t));
nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int));
nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int));
/* Obtain GPU device handles we're going to need repeatedly... */
for (i=0; i<nvmlh->nvml_gpucount; i++) {
nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
}
/* Query PCI info for each NVML device, and build table for mapping of */
/* CUDA device IDs to NVML device IDs and vice versa */
for (i=0; i<nvmlh->nvml_gpucount; i++) {
wrap_nvmlPciInfo_t pciinfo;
nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo);
nvmlh->nvml_pci_domain_id[i] = pciinfo.domain;
nvmlh->nvml_pci_bus_id[i] = pciinfo.bus;
nvmlh->nvml_pci_device_id[i] = pciinfo.device;
}
/* build mapping of NVML device IDs to CUDA IDs */
for (i=0; i<nvmlh->nvml_gpucount; i++) {
nvmlh->nvml_cuda_device_id[i] = -1;
}
for (i=0; i<nvmlh->cuda_gpucount; i++) {
cudaDeviceProp props;
nvmlh->cuda_nvml_device_id[i] = -1;
if (cudaGetDeviceProperties(&props, i) == cudaSuccess) {
int j;
for (j=0; j<nvmlh->nvml_gpucount; j++) {
if ((nvmlh->nvml_pci_domain_id[j] == (uint32_t) props.pciDomainID) &&
(nvmlh->nvml_pci_bus_id[j] == (uint32_t) props.pciBusID) &&
(nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) {
if (opt_debug)
applog(LOG_DEBUG, "CUDA GPU[%d] matches NVML GPU[%d]", i, j);
nvmlh->nvml_cuda_device_id[j] = i;
nvmlh->cuda_nvml_device_id[i] = j;
}
}
}
}
return nvmlh;
}
int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount)
{
*gpucount = nvmlh->nvml_gpucount;
return 0;
}
int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount)
{
*gpucount = nvmlh->cuda_gpucount;
return 0;
}
int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != WRAPNVML_SUCCESS)
return -1;
return 0;
}
int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *tempC)
{
wrap_nvmlReturn_t rc;
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC);
if (rc != WRAPNVML_SUCCESS) {
return -1;
}
return 0;
}
int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt)
{
wrap_nvmlReturn_t rc;
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt);
if (rc != WRAPNVML_SUCCESS) {
return -1;
}
return 0;
}
/* Not Supported on 750Ti 340.23 */
int wrap_nvml_get_clock(wrap_nvml_handle *nvmlh, int cudaindex, int type, unsigned int *freq)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetClockInfo(nvmlh->devs[gpuindex], (wrap_nvmlClockType_t) type, freq);
if (res != WRAPNVML_SUCCESS) {
if (opt_debug)
applog(LOG_DEBUG, "nvmlDeviceGetClockInfo: %s", nvmlh->nvmlErrorString(res));
return -1;
}
return 0;
}
/* Not Supported on 750Ti 340.23 */
int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts);
if (res != WRAPNVML_SUCCESS) {
if (opt_debug)
applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res));
return -1;
}
return 0;
}
/* Not Supported on 750Ti 340.23 */
int wrap_nvml_get_pstate(wrap_nvml_handle *nvmlh, int cudaindex, int *pstate)
{
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;
wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPerformanceState(nvmlh->devs[gpuindex], pstate);
if (res != WRAPNVML_SUCCESS) {
if (opt_debug)
applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res));
return -1;
}
return 0;
}
int wrap_nvml_destroy(wrap_nvml_handle *nvmlh)
{
nvmlh->nvmlShutdown();
wrap_dlclose(nvmlh->nvml_dll);
free(nvmlh);
return 0;
}
/**
* nvapi alternative for windows x86 binaries
* nvml api doesn't exists as 32bit dll :///
*/
#ifdef WIN32
#include "nvapi/nvapi_ccminer.h"
static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = { 0 };
static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = { 0 };
static NvU32 nvapi_dev_cnt = 0;
int nvapi_temperature(unsigned int devNum, unsigned int *temperature)
{
NvAPI_Status ret;
if (devNum >= nvapi_dev_cnt)
return -1;
NV_GPU_THERMAL_SETTINGS thermal;
thermal.version = NV_GPU_THERMAL_SETTINGS_VER;
ret = NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &thermal);
if (ret != NVAPI_OK) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
if (opt_debug)
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetThermalSettings: %s", string);
return -1;
}
(*temperature) = (unsigned int) thermal.sensor[0].currentTemp;
return 0;
}
int nvapi_fanspeed(unsigned int devNum, unsigned int *speed)
{
NvAPI_Status ret;
if (devNum >= nvapi_dev_cnt)
return -1;
NvU32 fanspeed = 0;
ret = NvAPI_GPU_GetTachReading(phys[devNum], &fanspeed);
if (ret != NVAPI_OK) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
if (opt_debug)
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetTachReading: %s", string);
return -1;
}
(*speed) = (unsigned int) fanspeed;
return 0;
}
int wrap_nvapi_init()
{
NvAPI_Status ret = NvAPI_Initialize();
if (!ret == NVAPI_OK){
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
if (opt_debug)
applog(LOG_DEBUG, "NVAPI NvAPI_Initialize: %s", string);
return -1;
}
ret = NvAPI_EnumPhysicalGPUs(phys, &nvapi_dev_cnt);
if (ret != NVAPI_OK) {
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
if (opt_debug)
applog(LOG_DEBUG, "NVAPI NvAPI_EnumPhysicalGPUs: %s", string);
return -1;
}
#if 0
NvAPI_ShortString ver;
NvAPI_GetInterfaceVersionString(ver);
applog(LOG_DEBUG, "NVAPI Version: %s", ver);
NvAPI_ShortString name;
ret = NvAPI_GPU_GetFullName(phys[devNum], name);
if (ret != NVAPI_OK){
NvAPI_ShortString string;
NvAPI_GetErrorMessage(ret, string);
applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetFullName: %s", string);
}
#endif
return 0;
}
#endif
/* api functions */
static unsigned int fan_speed_max = 2000; /* assume 2000 rpm as default, auto-updated */
unsigned int gpu_fanpercent(struct cgpu_info *gpu)
{
unsigned int pct = 0;
if (hnvml) {
wrap_nvml_get_fanpcnt(hnvml, device_map[gpu->thr_id], &pct);
}
#ifdef WIN32
else {
unsigned int rpm = 0;
nvapi_fanspeed(device_map[gpu->thr_id], &rpm);
pct = (rpm * 100) / fan_speed_max;
if (pct > 100) {
pct = 100;
fan_speed_max = rpm;
}
}
#endif
return pct;
}
double gpu_temp(struct cgpu_info *gpu)
{
double tc = 0.0;
unsigned int tmp = 0;
if (hnvml) {
wrap_nvml_get_tempC(hnvml, device_map[gpu->thr_id], &tmp);
tc = (double) tmp;
}
#ifdef WIN32
else {
nvapi_temperature(device_map[gpu->thr_id], &tmp);
tc = (double)tmp;
}
#endif
return tc;
}
unsigned int gpu_clock(struct cgpu_info *gpu)
{
unsigned int freq = 0;
if (hnvml) {
wrap_nvml_get_clock(hnvml, device_map[gpu->thr_id], NVML_CLOCK_SM, &freq);
}
return freq;
}
unsigned int gpu_power(struct cgpu_info *gpu)
{
unsigned int mw = 0;
if (hnvml) {
wrap_nvml_get_power_usage(hnvml, device_map[gpu->thr_id], &mw);
}
return mw;
}
int gpu_pstate(struct cgpu_info *gpu)
{
int pstate = 0;
if (hnvml) {
wrap_nvml_get_pstate(hnvml, device_map[gpu->thr_id], &pstate);
}
return pstate;
}
#if defined(__cplusplus)
}
#endif
#endif /* USE_WRAPNVML */
/* strings /usr/lib/nvidia-340/libnvidia-ml.so | grep nvmlDeviceGet | grep -v : | sort | uniq
nvmlDeviceGetAccountingBufferSize
nvmlDeviceGetAccountingMode
nvmlDeviceGetAccountingPids
nvmlDeviceGetAccountingStats
nvmlDeviceGetAPIRestriction
nvmlDeviceGetApplicationsClock
nvmlDeviceGetAutoBoostedClocksEnabled
nvmlDeviceGetBAR1MemoryInfo
nvmlDeviceGetBoardId
nvmlDeviceGetBrand
nvmlDeviceGetBridgeChipInfo
* nvmlDeviceGetClockInfo
nvmlDeviceGetComputeMode
nvmlDeviceGetComputeRunningProcesses
nvmlDeviceGetCount
nvmlDeviceGetCount_v2
nvmlDeviceGetCpuAffinity
nvmlDeviceGetCurrentClocksThrottleReasons
nvmlDeviceGetCurrPcieLinkGeneration
nvmlDeviceGetCurrPcieLinkWidth
nvmlDeviceGetDecoderUtilization
nvmlDeviceGetDefaultApplicationsClock
nvmlDeviceGetDetailedEccErrors
nvmlDeviceGetDisplayActive
nvmlDeviceGetDisplayMode
nvmlDeviceGetDriverModel
nvmlDeviceGetEccMode
nvmlDeviceGetEncoderUtilization
nvmlDeviceGetEnforcedPowerLimit
* nvmlDeviceGetFanSpeed
nvmlDeviceGetGpuOperationMode
nvmlDeviceGetHandleByIndex
* nvmlDeviceGetHandleByIndex_v2
nvmlDeviceGetHandleByPciBusId
nvmlDeviceGetHandleByPciBusId_v2
nvmlDeviceGetHandleBySerial
nvmlDeviceGetHandleByUUID
nvmlDeviceGetIndex
nvmlDeviceGetInforomConfigurationChecksum
nvmlDeviceGetInforomImageVersion
nvmlDeviceGetInforomVersion
nvmlDeviceGetMaxClockInfo
nvmlDeviceGetMaxPcieLinkGeneration
nvmlDeviceGetMaxPcieLinkWidth
nvmlDeviceGetMemoryErrorCounter
nvmlDeviceGetMemoryInfo
nvmlDeviceGetMinorNumber
nvmlDeviceGetMultiGpuBoard
nvmlDeviceGetName
* nvmlDeviceGetPciInfo
nvmlDeviceGetPciInfo_v2
* nvmlDeviceGetPerformanceState
nvmlDeviceGetPersistenceMode
nvmlDeviceGetPowerManagementDefaultLimit
nvmlDeviceGetPowerManagementLimit
nvmlDeviceGetPowerManagementLimitConstraints
nvmlDeviceGetPowerManagementMode
nvmlDeviceGetPowerState (deprecated)
* nvmlDeviceGetPowerUsage
nvmlDeviceGetRetiredPages
nvmlDeviceGetRetiredPagesPendingStatus
nvmlDeviceGetSamples
nvmlDeviceGetSerial
nvmlDeviceGetSupportedClocksThrottleReasons
nvmlDeviceGetSupportedEventTypes
nvmlDeviceGetSupportedGraphicsClocks
nvmlDeviceGetSupportedMemoryClocks
nvmlDeviceGetTemperature
nvmlDeviceGetTemperatureThreshold
nvmlDeviceGetTotalEccErrors
nvmlDeviceGetUtilizationRates
nvmlDeviceGetUUID
nvmlDeviceGetVbiosVersion
nvmlDeviceGetViolationStatus
*/