/* * A trivial little dlopen()-based wrapper library for the * NVIDIA NVML library, to allow runtime discovery of NVML on an * arbitrary system. This is all very hackish and simple-minded, but * it serves my immediate needs in the short term until NVIDIA provides * a static NVML wrapper library themselves, hopefully in * CUDA 6.5 or maybe sometime shortly after. * * This trivial code is made available under the "new" 3-clause BSD license, * and/or any of the GPL licenses you prefer. * Feel free to use the code and modify as you see fit. * * John E. Stone - john.stone@gmail.com * Tanguy Pruvot - tpruvot@github * */ #ifdef USE_WRAPNVML #include #include #include #ifndef _MSC_VER #include #endif #include "miner.h" #include "cuda_runtime.h" #include "nvml.h" extern wrap_nvml_handle *hnvml; extern int device_map[8]; /* * Wrappers to emulate dlopen() on other systems like Windows */ #if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64) #include static void *wrap_dlopen(const char *filename) { HMODULE h = LoadLibrary(filename); if (!h && opt_debug) { applog(LOG_DEBUG, "dlopen(%d): failed to load %s", GetLastError(), filename); } return (void*)h; } static void *wrap_dlsym(void *h, const char *sym) { return (void *)GetProcAddress((HINSTANCE)h, sym); } static int wrap_dlclose(void *h) { /* FreeLibrary returns nonzero on success */ return (!FreeLibrary((HINSTANCE)h)); } #else /* assume we can use dlopen itself... */ #include #include static void *wrap_dlopen(const char *filename) { void *h = dlopen(filename, RTLD_NOW); if (h == NULL && opt_debug) { applog(LOG_DEBUG, "dlopen(%d): failed to load %s", errno, filename); } return (void*)h; } static void *wrap_dlsym(void *h, const char *sym) { return dlsym(h, sym); } static int wrap_dlclose(void *h) { return dlclose(h); } #endif #ifdef __cplusplus extern "C" { #endif wrap_nvml_handle * wrap_nvml_create() { int i=0; wrap_nvml_handle *nvmlh = NULL; #if defined(WIN32) /* Windows (do not use slashes, else ExpandEnvironmentStrings will mix them) */ #define libnvidia_ml "%PROGRAMFILES%\\NVIDIA Corporation\\NVSMI\\nvml.dll" #else /* linux assumed */ #define libnvidia_ml "libnvidia-ml.so" #endif char tmp[512]; #ifdef WIN32 ExpandEnvironmentStrings(libnvidia_ml, tmp, sizeof(tmp)); #else strcpy(tmp, libnvidia_ml); #endif void *nvml_dll = wrap_dlopen(tmp); if (nvml_dll == NULL) { #ifdef WIN32 nvml_dll = wrap_dlopen("nvml.dll"); if (nvml_dll == NULL) #endif return NULL; } nvmlh = (wrap_nvml_handle *) calloc(1, sizeof(wrap_nvml_handle)); nvmlh->nvml_dll = nvml_dll; nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2"); if (!nvmlh->nvmlInit) { nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit"); } nvmlh->nvmlDeviceGetCount = (wrap_nvmlReturn_t (*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2"); nvmlh->nvmlDeviceGetHandleByIndex = (wrap_nvmlReturn_t (*)(int, wrap_nvmlDevice_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2"); nvmlh->nvmlDeviceGetClockInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlClockType_t, unsigned int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClockInfo"); nvmlh->nvmlDeviceGetPciInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo"); nvmlh->nvmlDeviceGetName = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, char *, int)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName"); nvmlh->nvmlDeviceGetTemperature = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int, unsigned int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature"); nvmlh->nvmlDeviceGetFanSpeed = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed"); nvmlh->nvmlDeviceGetPerformanceState = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage"); nvmlh->nvmlDeviceGetPowerUsage = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage"); nvmlh->nvmlErrorString = (char* (*)(wrap_nvmlReturn_t)) wrap_dlsym(nvmlh->nvml_dll, "nvmlErrorString"); nvmlh->nvmlShutdown = (wrap_nvmlReturn_t (*)()) wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown"); if (nvmlh->nvmlInit == NULL || nvmlh->nvmlShutdown == NULL || nvmlh->nvmlDeviceGetCount == NULL || nvmlh->nvmlDeviceGetHandleByIndex == NULL || nvmlh->nvmlDeviceGetPciInfo == NULL || nvmlh->nvmlDeviceGetName == NULL || nvmlh->nvmlDeviceGetTemperature == NULL || nvmlh->nvmlDeviceGetFanSpeed == NULL) { if (opt_debug) applog(LOG_DEBUG, "Failed to obtain required NVML function pointers"); wrap_dlclose(nvmlh->nvml_dll); free(nvmlh); return NULL; } nvmlh->nvmlInit(); nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount); /* Query CUDA device count, in case it doesn't agree with NVML, since */ /* CUDA will only report GPUs with compute capability greater than 1.0 */ if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) { if (opt_debug) applog(LOG_DEBUG, "Failed to query CUDA device count!"); wrap_dlclose(nvmlh->nvml_dll); free(nvmlh); return NULL; } nvmlh->devs = (wrap_nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(wrap_nvmlDevice_t)); nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int)); nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int)); /* Obtain GPU device handles we're going to need repeatedly... */ for (i=0; invml_gpucount; i++) { nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]); } /* Query PCI info for each NVML device, and build table for mapping of */ /* CUDA device IDs to NVML device IDs and vice versa */ for (i=0; invml_gpucount; i++) { wrap_nvmlPciInfo_t pciinfo; nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo); nvmlh->nvml_pci_domain_id[i] = pciinfo.domain; nvmlh->nvml_pci_bus_id[i] = pciinfo.bus; nvmlh->nvml_pci_device_id[i] = pciinfo.device; } /* build mapping of NVML device IDs to CUDA IDs */ for (i=0; invml_gpucount; i++) { nvmlh->nvml_cuda_device_id[i] = -1; } for (i=0; icuda_gpucount; i++) { cudaDeviceProp props; nvmlh->cuda_nvml_device_id[i] = -1; if (cudaGetDeviceProperties(&props, i) == cudaSuccess) { int j; for (j=0; jnvml_gpucount; j++) { if ((nvmlh->nvml_pci_domain_id[j] == (uint32_t) props.pciDomainID) && (nvmlh->nvml_pci_bus_id[j] == (uint32_t) props.pciBusID) && (nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) { if (opt_debug) applog(LOG_DEBUG, "CUDA GPU[%d] matches NVML GPU[%d]", i, j); nvmlh->nvml_cuda_device_id[j] = i; nvmlh->cuda_nvml_device_id[i] = j; } } } } return nvmlh; } int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) { *gpucount = nvmlh->nvml_gpucount; return 0; } int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) { *gpucount = nvmlh->cuda_gpucount; return 0; } int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize) { int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -1; if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != WRAPNVML_SUCCESS) return -1; return 0; } int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *tempC) { wrap_nvmlReturn_t rc; int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -1; rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC); if (rc != WRAPNVML_SUCCESS) { return -1; } return 0; } int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt) { wrap_nvmlReturn_t rc; int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -1; rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt); if (rc != WRAPNVML_SUCCESS) { return -1; } return 0; } /* Not Supported on 750Ti 340.23 */ int wrap_nvml_get_clock(wrap_nvml_handle *nvmlh, int cudaindex, int type, unsigned int *freq) { int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -1; wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetClockInfo(nvmlh->devs[gpuindex], (wrap_nvmlClockType_t) type, freq); if (res != WRAPNVML_SUCCESS) { if (opt_debug) applog(LOG_DEBUG, "nvmlDeviceGetClockInfo: %s", nvmlh->nvmlErrorString(res)); return -1; } return 0; } /* Not Supported on 750Ti 340.23 */ int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts) { int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -1; wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts); if (res != WRAPNVML_SUCCESS) { if (opt_debug) applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res)); return -1; } return 0; } /* Not Supported on 750Ti 340.23 */ int wrap_nvml_get_pstate(wrap_nvml_handle *nvmlh, int cudaindex, int *pstate) { int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -1; wrap_nvmlReturn_t res = nvmlh->nvmlDeviceGetPerformanceState(nvmlh->devs[gpuindex], pstate); if (res != WRAPNVML_SUCCESS) { if (opt_debug) applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res)); return -1; } return 0; } int wrap_nvml_destroy(wrap_nvml_handle *nvmlh) { nvmlh->nvmlShutdown(); wrap_dlclose(nvmlh->nvml_dll); free(nvmlh); return 0; } /* api functions */ unsigned int gpu_fanpercent(struct cgpu_info *gpu) { unsigned int pct = 0; if (hnvml) { wrap_nvml_get_fanpcnt(hnvml, device_map[gpu->thr_id], &pct); } return pct; } double gpu_temp(struct cgpu_info *gpu) { double tc = 0.0; if (hnvml) { unsigned int tmp = 0; wrap_nvml_get_tempC(hnvml, device_map[gpu->thr_id], &tmp); tc = (double) tmp; } return tc; } unsigned int gpu_clock(struct cgpu_info *gpu) { unsigned int freq = 0; if (hnvml) { wrap_nvml_get_clock(hnvml, device_map[gpu->thr_id], NVML_CLOCK_SM, &freq); } return freq; } unsigned int gpu_power(struct cgpu_info *gpu) { unsigned int mw = 0; if (hnvml) { wrap_nvml_get_power_usage(hnvml, device_map[gpu->thr_id], &mw); } return mw; } int gpu_pstate(struct cgpu_info *gpu) { int pstate = 0; if (hnvml) { wrap_nvml_get_pstate(hnvml, device_map[gpu->thr_id], &pstate); } return pstate; } #if defined(__cplusplus) } #endif #endif /* USE_WRAPNVML */ /* strings /usr/lib/nvidia-340/libnvidia-ml.so | grep nvmlDeviceGet | grep -v : | sort | uniq nvmlDeviceGetAccountingBufferSize nvmlDeviceGetAccountingMode nvmlDeviceGetAccountingPids nvmlDeviceGetAccountingStats nvmlDeviceGetAPIRestriction nvmlDeviceGetApplicationsClock nvmlDeviceGetAutoBoostedClocksEnabled nvmlDeviceGetBAR1MemoryInfo nvmlDeviceGetBoardId nvmlDeviceGetBrand nvmlDeviceGetBridgeChipInfo * nvmlDeviceGetClockInfo nvmlDeviceGetComputeMode nvmlDeviceGetComputeRunningProcesses nvmlDeviceGetCount nvmlDeviceGetCount_v2 nvmlDeviceGetCpuAffinity nvmlDeviceGetCurrentClocksThrottleReasons nvmlDeviceGetCurrPcieLinkGeneration nvmlDeviceGetCurrPcieLinkWidth nvmlDeviceGetDecoderUtilization nvmlDeviceGetDefaultApplicationsClock nvmlDeviceGetDetailedEccErrors nvmlDeviceGetDisplayActive nvmlDeviceGetDisplayMode nvmlDeviceGetDriverModel nvmlDeviceGetEccMode nvmlDeviceGetEncoderUtilization nvmlDeviceGetEnforcedPowerLimit * nvmlDeviceGetFanSpeed nvmlDeviceGetGpuOperationMode nvmlDeviceGetHandleByIndex * nvmlDeviceGetHandleByIndex_v2 nvmlDeviceGetHandleByPciBusId nvmlDeviceGetHandleByPciBusId_v2 nvmlDeviceGetHandleBySerial nvmlDeviceGetHandleByUUID nvmlDeviceGetIndex nvmlDeviceGetInforomConfigurationChecksum nvmlDeviceGetInforomImageVersion nvmlDeviceGetInforomVersion nvmlDeviceGetMaxClockInfo nvmlDeviceGetMaxPcieLinkGeneration nvmlDeviceGetMaxPcieLinkWidth nvmlDeviceGetMemoryErrorCounter nvmlDeviceGetMemoryInfo nvmlDeviceGetMinorNumber nvmlDeviceGetMultiGpuBoard nvmlDeviceGetName * nvmlDeviceGetPciInfo nvmlDeviceGetPciInfo_v2 * nvmlDeviceGetPerformanceState nvmlDeviceGetPersistenceMode nvmlDeviceGetPowerManagementDefaultLimit nvmlDeviceGetPowerManagementLimit nvmlDeviceGetPowerManagementLimitConstraints nvmlDeviceGetPowerManagementMode nvmlDeviceGetPowerState (deprecated) * nvmlDeviceGetPowerUsage nvmlDeviceGetRetiredPages nvmlDeviceGetRetiredPagesPendingStatus nvmlDeviceGetSamples nvmlDeviceGetSerial nvmlDeviceGetSupportedClocksThrottleReasons nvmlDeviceGetSupportedEventTypes nvmlDeviceGetSupportedGraphicsClocks nvmlDeviceGetSupportedMemoryClocks nvmlDeviceGetTemperature nvmlDeviceGetTemperatureThreshold nvmlDeviceGetTotalEccErrors nvmlDeviceGetUtilizationRates nvmlDeviceGetUUID nvmlDeviceGetVbiosVersion nvmlDeviceGetViolationStatus */