/* * A trivial little dlopen()-based wrapper library for the * NVIDIA NVML library, to allow runtime discovery of NVML on an * arbitrary system. This is all very hackish and simple-minded, but * it serves my immediate needs in the short term until NVIDIA provides * a static NVML wrapper library themselves, hopefully in * CUDA 6.5 or maybe sometime shortly after. * * This trivial code is made available under the "new" 3-clause BSD license, * and/or any of the GPL licenses you prefer. * Feel free to use the code and modify as you see fit. * * John E. Stone - john.stone@gmail.com * Tanguy Pruvot - tpruvot@github * */ #ifdef USE_WRAPNVML #include #include #include #ifndef _MSC_VER #include #endif #include "miner.h" #include "cuda_runtime.h" #include "nvml.h" extern wrap_nvml_handle *hnvml; extern int num_processors; // gpus extern int device_map[8]; extern char* device_name[8]; static uint32_t device_bus_ids[8] = { 0 }; /* * Wrappers to emulate dlopen() on other systems like Windows */ #if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64) #include static void *wrap_dlopen(const char *filename) { HMODULE h = LoadLibrary(filename); if (!h && opt_debug) { applog(LOG_DEBUG, "dlopen(%d): failed to load %s", GetLastError(), filename); } return (void*)h; } static void *wrap_dlsym(void *h, const char *sym) { return (void *)GetProcAddress((HINSTANCE)h, sym); } static int wrap_dlclose(void *h) { /* FreeLibrary returns nonzero on success */ return (!FreeLibrary((HINSTANCE)h)); } #else /* assume we can use dlopen itself... */ #include #include static void *wrap_dlopen(const char *filename) { void *h = dlopen(filename, RTLD_NOW); if (h == NULL && opt_debug) { applog(LOG_DEBUG, "dlopen(%d): failed to load %s", errno, filename); } return (void*)h; } static void *wrap_dlsym(void *h, const char *sym) { return dlsym(h, sym); } static int wrap_dlclose(void *h) { return dlclose(h); } #endif #ifdef __cplusplus extern "C" { #endif wrap_nvml_handle * wrap_nvml_create() { int i=0; wrap_nvml_handle *nvmlh = NULL; #if defined(WIN32) /* Windows (do not use slashes, else ExpandEnvironmentStrings will mix them) */ #define libnvidia_ml "%PROGRAMFILES%\\NVIDIA Corporation\\NVSMI\\nvml.dll" #else /* linux assumed */ #define libnvidia_ml "libnvidia-ml.so" #endif char tmp[512]; #ifdef WIN32 ExpandEnvironmentStrings(libnvidia_ml, tmp, sizeof(tmp)); #else strcpy(tmp, libnvidia_ml); #endif void *nvml_dll = wrap_dlopen(tmp); if (nvml_dll == NULL) { #ifdef WIN32 nvml_dll = wrap_dlopen("nvml.dll"); if (nvml_dll == NULL) #endif return NULL; } nvmlh = (wrap_nvml_handle *) calloc(1, sizeof(wrap_nvml_handle)); nvmlh->nvml_dll = nvml_dll; nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2"); if (!nvmlh->nvmlInit) { nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit"); } nvmlh->nvmlDeviceGetCount = (wrap_nvmlReturn_t (*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2"); nvmlh->nvmlDeviceGetHandleByIndex = (wrap_nvmlReturn_t (*)(int, wrap_nvmlDevice_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2"); nvmlh->nvmlDeviceGetClockInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlClockType_t, unsigned int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClockInfo"); nvmlh->nvmlDeviceGetPciInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo"); nvmlh->nvmlDeviceGetName = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, char *, int)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName"); nvmlh->nvmlDeviceGetTemperature = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int, unsigned int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature"); nvmlh->nvmlDeviceGetFanSpeed = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed"); nvmlh->nvmlDeviceGetPerformanceState = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage"); nvmlh->nvmlDeviceGetPowerUsage = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage"); nvmlh->nvmlErrorString = (char* (*)(wrap_nvmlReturn_t)) wrap_dlsym(nvmlh->nvml_dll, "nvmlErrorString"); nvmlh->nvmlShutdown = (wrap_nvmlReturn_t (*)()) wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown"); if (nvmlh->nvmlInit == NULL || nvmlh->nvmlShutdown == NULL || nvmlh->nvmlDeviceGetCount == NULL || nvmlh->nvmlDeviceGetHandleByIndex == NULL || nvmlh->nvmlDeviceGetPciInfo == NULL || nvmlh->nvmlDeviceGetName == NULL || nvmlh->nvmlDeviceGetTemperature == NULL || nvmlh->nvmlDeviceGetFanSpeed == NULL) { if (opt_debug) applog(LOG_DEBUG, "Failed to obtain required NVML function pointers"); wrap_dlclose(nvmlh->nvml_dll); free(nvmlh); return NULL; } nvmlh->nvmlInit(); nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount); /* Query CUDA device count, in case it doesn't agree with NVML, since */ /* CUDA will only report GPUs with compute capability greater than 1.0 */ if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) { if (opt_debug) applog(LOG_DEBUG, "Failed to query CUDA device count!"); wrap_dlclose(nvmlh->nvml_dll); free(nvmlh); return NULL; } nvmlh->devs = (wrap_nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(wrap_nvmlDevice_t)); nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int)); nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int)); /* Obtain GPU device handles we're going to need repeatedly... */ for (i=0; invml_gpucount; i++) { nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]); } /* Query PCI info for each NVML device, and build table for mapping of */ /* CUDA device IDs to NVML device IDs and vice versa */ for (i=0; invml_gpucount; i++) { wrap_nvmlPciInfo_t pciinfo; nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo); nvmlh->nvml_pci_domain_id[i] = pciinfo.domain; nvmlh->nvml_pci_bus_id[i] = pciinfo.bus; nvmlh->nvml_pci_device_id[i] = pciinfo.device; } /* build mapping of NVML device IDs to CUDA IDs */ for (i=0; invml_gpucount; i++) { nvmlh->nvml_cuda_device_id[i] = -1; } for (i=0; icuda_gpucount; i++) { cudaDeviceProp props; nvmlh->cuda_nvml_device_id[i] = -1; if (cudaGetDeviceProperties(&props, i) == cudaSuccess) { int j; device_bus_ids[i] = props.pciBusID; for (j = 0; jnvml_gpucount; j++) { if ((nvmlh->nvml_pci_domain_id[j] == (uint32_t) props.pciDomainID) && (nvmlh->nvml_pci_bus_id[j] == (uint32_t) props.pciBusID) && (nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) { if (opt_debug) applog(LOG_DEBUG, "CUDA GPU[%d] matches NVML GPU[%d]", i, j); nvmlh->nvml_cuda_device_id[j] = i; nvmlh->cuda_nvml_device_id[i] = j; } } } } return nvmlh; } int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) { *gpucount = nvmlh->nvml_gpucount; return 0; } int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) { *gpucount = nvmlh->cuda_gpucount; return 0; } int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize) { int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -1; if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != WRAPNVML_SUCCESS) return -1; return 0; } int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *tempC) { wrap_nvmlReturn_t rc; int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -1; rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC); if (rc != WRAPNVML_SUCCESS) { return -1; } return 0; } int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt) { wrap_nvmlReturn_t rc; int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -1; rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt); if (rc != WRAPNVML_SUCCESS) { return -1; } return 0; } /* Not Supported on 750Ti 340.23 */ int wrap_nvml_get_clock(wrap_nvml_handle *nvmlh, int cudaindex, int type, unsigned int *freq) { int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -1; wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetClockInfo(nvmlh->devs[gpuindex], (wrap_nvmlClockType_t) type, freq); if (res != WRAPNVML_SUCCESS) { //if (opt_debug) // applog(LOG_DEBUG, "nvmlDeviceGetClockInfo: %s", nvmlh->nvmlErrorString(res)); return -1; } return 0; } /* Not Supported on 750Ti 340.23 */ int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts) { int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -1; wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts); if (res != WRAPNVML_SUCCESS) { if (opt_debug) applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res)); return -1; } return 0; } /* Not Supported on 750Ti 340.23 */ int wrap_nvml_get_pstate(wrap_nvml_handle *nvmlh, int cudaindex, int *pstate) { int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -1; wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPerformanceState(nvmlh->devs[gpuindex], pstate); if (res != WRAPNVML_SUCCESS) { //if (opt_debug) // applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res)); return -1; } return 0; } int wrap_nvml_destroy(wrap_nvml_handle *nvmlh) { nvmlh->nvmlShutdown(); wrap_dlclose(nvmlh->nvml_dll); free(nvmlh); return 0; } /** * nvapi alternative for windows x86 binaries * nvml api doesn't exists as 32bit dll :/// */ #ifdef WIN32 #include "nvapi/nvapi_ccminer.h" static int nvapi_dev_map[NVAPI_MAX_PHYSICAL_GPUS] = { 0 }; static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = { 0 }; static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = { 0 }; static NvU32 nvapi_dev_cnt = 0; int nvapi_temperature(unsigned int devNum, unsigned int *temperature) { NvAPI_Status ret; if (devNum >= nvapi_dev_cnt) return -1; NV_GPU_THERMAL_SETTINGS thermal; thermal.version = NV_GPU_THERMAL_SETTINGS_VER; ret = NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &thermal); if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetThermalSettings: %s", string); return -1; } (*temperature) = (unsigned int) thermal.sensor[0].currentTemp; return 0; } int nvapi_fanspeed(unsigned int devNum, unsigned int *speed) { NvAPI_Status ret; if (devNum >= nvapi_dev_cnt) return -1; NvU32 fanspeed = 0; ret = NvAPI_GPU_GetTachReading(phys[devNum], &fanspeed); if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetTachReading: %s", string); return -1; } (*speed) = (unsigned int) fanspeed; return 0; } int nvapi_getclock(unsigned int devNum, unsigned int *freq) { NvAPI_Status ret; if (devNum >= nvapi_dev_cnt) return -1; NV_GPU_CLOCK_FREQUENCIES_V2 clocks; clocks.version = NV_GPU_CLOCK_FREQUENCIES_VER_2; clocks.ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ; // CURRENT/BASE/BOOST ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &clocks); if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetAllClockFrequencies: %s", string); return -1; } else { // GRAPHICS/MEMORY (*freq) = (unsigned int)clocks.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency; } return 0; } int nvapi_getpstate(unsigned int devNum, unsigned int *power) { NvAPI_Status ret; if (devNum >= nvapi_dev_cnt) return -1; NV_GPU_PERF_PSTATE_ID CurrentPstate = NVAPI_GPU_PERF_PSTATE_UNDEFINED; /* 16 */ ret = NvAPI_GPU_GetCurrentPstate(phys[devNum], &CurrentPstate); if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetCurrentPstate: %s", string); return -1; } else { // get pstate for the moment... often 0 = P0 (*power) = (unsigned int)CurrentPstate; } return 0; } int wrap_nvapi_init() { NvAPI_Status ret = NvAPI_Initialize(); if (!ret == NVAPI_OK){ NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI NvAPI_Initialize: %s", string); return -1; } ret = NvAPI_EnumPhysicalGPUs(phys, &nvapi_dev_cnt); if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI NvAPI_EnumPhysicalGPUs: %s", string); return -1; } for (NvU8 i = 0; i < nvapi_dev_cnt; i++) { NvAPI_ShortString name; nvapi_dev_map[i] = i; // default mapping ret = NvAPI_GPU_GetFullName(phys[i], name); if (ret == NVAPI_OK) { for (int g = 0; g < num_processors; g++) { //todo : device_bus_ids, could be wrong on rigs if (strcmp(device_name[g], name) == 0 && nvapi_dev_map[i] == i) { nvapi_dev_map[i] = g; break; } } if (opt_debug) applog(LOG_DEBUG, "NVAPI dev %d: %s - mapped to CUDA device %d", i, name, nvapi_dev_map[i]); } else { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetFullName: %s", string); } } #if 0 NvAPI_ShortString ver; NvAPI_GetInterfaceVersionString(ver); applog(LOG_DEBUG, "NVAPI Version: %s", ver); #endif return 0; } #endif /* api functions -------------------------------------- */ // assume 2500 rpm as default, auto-updated if more static unsigned int fan_speed_max = 2500; int gpu_fanpercent(struct cgpu_info *gpu) { unsigned int pct = 0; if (hnvml) { wrap_nvml_get_fanpcnt(hnvml, device_map[gpu->thr_id], &pct); } #ifdef WIN32 else { unsigned int rpm = 0; nvapi_fanspeed(nvapi_dev_map[gpu->thr_id], &rpm); pct = (rpm * 100) / fan_speed_max; if (pct > 100) { pct = 100; fan_speed_max = rpm; } } #endif return (int) pct; } float gpu_temp(struct cgpu_info *gpu) { float tc = 0.0; unsigned int tmp = 0; if (hnvml) { wrap_nvml_get_tempC(hnvml, device_map[gpu->thr_id], &tmp); tc = (float)tmp; } #ifdef WIN32 else { nvapi_temperature(nvapi_dev_map[gpu->thr_id], &tmp); tc = (float)tmp; } #endif return tc; } int gpu_clock(struct cgpu_info *gpu) { unsigned int freq = 0; int support = -1; if (hnvml) { support = wrap_nvml_get_clock(hnvml, device_map[gpu->thr_id], NVML_CLOCK_SM, &freq); } #ifdef WIN32 if (support == -1) { nvapi_getclock(nvapi_dev_map[gpu->thr_id], &freq); } #endif return (int) freq; } int gpu_pstate(struct cgpu_info *gpu) { int pstate = -1; int support = -1; if (hnvml) { support = wrap_nvml_get_pstate(hnvml, device_map[gpu->thr_id], &pstate); } #ifdef WIN32 if (support == -1) { unsigned int pst = 0; nvapi_getpstate(nvapi_dev_map[gpu->thr_id], &pst); //todo : convert ? pstate = (int) pst; } #endif return pstate; } unsigned int gpu_power(struct cgpu_info *gpu) { unsigned int mw = 0; int support = -1; if (hnvml) { support = wrap_nvml_get_power_usage(hnvml, device_map[gpu->thr_id], &mw); } return mw; } #if defined(__cplusplus) } #endif #endif /* USE_WRAPNVML */ /* strings /usr/lib/nvidia-340/libnvidia-ml.so | grep nvmlDeviceGet | grep -v : | sort | uniq nvmlDeviceGetAccountingBufferSize nvmlDeviceGetAccountingMode nvmlDeviceGetAccountingPids nvmlDeviceGetAccountingStats nvmlDeviceGetAPIRestriction nvmlDeviceGetApplicationsClock nvmlDeviceGetAutoBoostedClocksEnabled nvmlDeviceGetBAR1MemoryInfo nvmlDeviceGetBoardId nvmlDeviceGetBrand nvmlDeviceGetBridgeChipInfo * nvmlDeviceGetClockInfo nvmlDeviceGetComputeMode nvmlDeviceGetComputeRunningProcesses nvmlDeviceGetCount nvmlDeviceGetCount_v2 nvmlDeviceGetCpuAffinity nvmlDeviceGetCurrentClocksThrottleReasons nvmlDeviceGetCurrPcieLinkGeneration nvmlDeviceGetCurrPcieLinkWidth nvmlDeviceGetDecoderUtilization nvmlDeviceGetDefaultApplicationsClock nvmlDeviceGetDetailedEccErrors nvmlDeviceGetDisplayActive nvmlDeviceGetDisplayMode nvmlDeviceGetDriverModel nvmlDeviceGetEccMode nvmlDeviceGetEncoderUtilization nvmlDeviceGetEnforcedPowerLimit * nvmlDeviceGetFanSpeed nvmlDeviceGetGpuOperationMode nvmlDeviceGetHandleByIndex * nvmlDeviceGetHandleByIndex_v2 nvmlDeviceGetHandleByPciBusId nvmlDeviceGetHandleByPciBusId_v2 nvmlDeviceGetHandleBySerial nvmlDeviceGetHandleByUUID nvmlDeviceGetIndex nvmlDeviceGetInforomConfigurationChecksum nvmlDeviceGetInforomImageVersion nvmlDeviceGetInforomVersion nvmlDeviceGetMaxClockInfo nvmlDeviceGetMaxPcieLinkGeneration nvmlDeviceGetMaxPcieLinkWidth nvmlDeviceGetMemoryErrorCounter nvmlDeviceGetMemoryInfo nvmlDeviceGetMinorNumber nvmlDeviceGetMultiGpuBoard nvmlDeviceGetName * nvmlDeviceGetPciInfo nvmlDeviceGetPciInfo_v2 * nvmlDeviceGetPerformanceState nvmlDeviceGetPersistenceMode nvmlDeviceGetPowerManagementDefaultLimit nvmlDeviceGetPowerManagementLimit nvmlDeviceGetPowerManagementLimitConstraints nvmlDeviceGetPowerManagementMode nvmlDeviceGetPowerState (deprecated) * nvmlDeviceGetPowerUsage nvmlDeviceGetRetiredPages nvmlDeviceGetRetiredPagesPendingStatus nvmlDeviceGetSamples nvmlDeviceGetSerial nvmlDeviceGetSupportedClocksThrottleReasons nvmlDeviceGetSupportedEventTypes nvmlDeviceGetSupportedGraphicsClocks nvmlDeviceGetSupportedMemoryClocks nvmlDeviceGetTemperature nvmlDeviceGetTemperatureThreshold nvmlDeviceGetTotalEccErrors nvmlDeviceGetUtilizationRates nvmlDeviceGetUUID nvmlDeviceGetVbiosVersion nvmlDeviceGetViolationStatus */