/* * A trivial little dlopen()-based wrapper library for the * NVIDIA NVML library, to allow runtime discovery of NVML on an * arbitrary system. This is all very hackish and simple-minded, but * it serves my immediate needs in the short term until NVIDIA provides * a static NVML wrapper library themselves, hopefully in * CUDA 6.5 or maybe sometime shortly after. * * This trivial code is made available under the "new" 3-clause BSD license, * and/or any of the GPL licenses you prefer. * Feel free to use the code and modify as you see fit. * * John E. Stone - john.stone@gmail.com * Tanguy Pruvot - tpruvot@github * */ #include #include #include #include #include #include "miner.h" #include "nvml.h" #include "cuda_runtime.h" #ifdef USE_WRAPNVML extern nvml_handle *hnvml; extern char driver_version[32]; static uint32_t device_bus_ids[MAX_GPUS] = { 0 }; extern uint32_t device_gpu_clocks[MAX_GPUS]; extern uint32_t device_mem_clocks[MAX_GPUS]; extern int32_t device_mem_offsets[MAX_GPUS]; extern uint8_t device_tlimit[MAX_GPUS]; extern int8_t device_pstate[MAX_GPUS]; extern int32_t device_led[MAX_GPUS]; int32_t device_led_state[MAX_GPUS] = { 0 }; static __thread bool has_rgb_ok = false; uint32_t clock_prev[MAX_GPUS] = { 0 }; uint32_t clock_prev_mem[MAX_GPUS] = { 0 }; uint32_t limit_prev[MAX_GPUS] = { 0 }; static bool nvml_plimit_set = false; extern bool need_memclockrst; /* * Wrappers to emulate dlopen() on other systems like Windows */ #if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64) #include static void *wrap_dlopen(const char *filename) { HMODULE h = LoadLibrary(filename); if (!h && opt_debug) { applog(LOG_DEBUG, "dlopen(%d): failed to load %s", GetLastError(), filename); } return (void*)h; } static void *wrap_dlsym(void *h, const char *sym) { return (void *)GetProcAddress((HINSTANCE)h, sym); } static int wrap_dlclose(void *h) { /* FreeLibrary returns nonzero on success */ return (!FreeLibrary((HINSTANCE)h)); } #else /* assume we can use dlopen itself... */ #include #include static void *wrap_dlopen(const char *filename) { void *h = dlopen(filename, RTLD_NOW); if (h == NULL && opt_debug) { applog(LOG_DEBUG, "dlopen(%d): failed to load %s", errno, filename); } return (void*)h; } static void *wrap_dlsym(void *h, const char *sym) { return dlsym(h, sym); } static int wrap_dlclose(void *h) { return dlclose(h); } #endif nvml_handle * nvml_create() { int i=0; nvml_handle *nvmlh = NULL; #ifdef WIN32 /* Windows (do not use slashes, else ExpandEnvironmentStrings will mix them) */ #define libnvidia_ml "%PROGRAMFILES%\\NVIDIA Corporation\\NVSMI\\nvml.dll" #else /* linux assumed */ #define libnvidia_ml "libnvidia-ml.so" #endif char tmp[512]; #ifdef WIN32 ExpandEnvironmentStrings(libnvidia_ml, tmp, sizeof(tmp)); #else strcpy(tmp, libnvidia_ml); #endif void *nvml_dll = wrap_dlopen(tmp); if (nvml_dll == NULL) { #ifdef WIN32 nvml_dll = wrap_dlopen("nvml.dll"); if (nvml_dll == NULL) #endif return NULL; } nvmlh = (nvml_handle *) calloc(1, sizeof(nvml_handle)); nvmlh->nvml_dll = nvml_dll; nvmlh->nvmlInit = (nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit_v2"); if (!nvmlh->nvmlInit) nvmlh->nvmlInit = (nvmlReturn_t (*)(void)) wrap_dlsym(nvmlh->nvml_dll, "nvmlInit"); nvmlh->nvmlDeviceGetCount = (nvmlReturn_t (*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2"); if (!nvmlh->nvmlDeviceGetCount) nvmlh->nvmlDeviceGetCount = (nvmlReturn_t (*)(int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount"); nvmlh->nvmlDeviceGetHandleByIndex = (nvmlReturn_t (*)(int, nvmlDevice_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2"); nvmlh->nvmlDeviceGetAPIRestriction = (nvmlReturn_t (*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetAPIRestriction"); nvmlh->nvmlDeviceSetAPIRestriction = (nvmlReturn_t (*)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetAPIRestriction"); nvmlh->nvmlDeviceGetDefaultApplicationsClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetDefaultApplicationsClock"); nvmlh->nvmlDeviceGetApplicationsClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clocks)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetApplicationsClock"); nvmlh->nvmlDeviceSetApplicationsClocks = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int mem, unsigned int gpu)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetApplicationsClocks"); nvmlh->nvmlDeviceResetApplicationsClocks = (nvmlReturn_t (*)(nvmlDevice_t)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceResetApplicationsClocks"); nvmlh->nvmlDeviceGetSupportedGraphicsClocks = (nvmlReturn_t (*)(nvmlDevice_t, uint32_t mem, uint32_t *num, uint32_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSupportedGraphicsClocks"); nvmlh->nvmlDeviceGetSupportedMemoryClocks = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *count, unsigned int *clocksMHz)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSupportedMemoryClocks"); nvmlh->nvmlDeviceGetClockInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClockInfo"); nvmlh->nvmlDeviceGetMaxClockInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t, unsigned int *clock)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxClockInfo"); nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo_v2"); if (!nvmlh->nvmlDeviceGetPciInfo) nvmlh->nvmlDeviceGetPciInfo = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPciInfo_t *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo"); nvmlh->nvmlDeviceGetCurrPcieLinkGeneration = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *gen)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCurrPcieLinkGeneration"); nvmlh->nvmlDeviceGetCurrPcieLinkWidth = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *width)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCurrPcieLinkWidth"); nvmlh->nvmlDeviceGetMaxPcieLinkGeneration = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *gen)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxPcieLinkGeneration"); nvmlh->nvmlDeviceGetMaxPcieLinkWidth = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *width)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxPcieLinkWidth"); nvmlh->nvmlDeviceGetPowerUsage = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage"); nvmlh->nvmlDeviceGetPowerManagementDefaultLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementDefaultLimit"); nvmlh->nvmlDeviceGetPowerManagementLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementLimit"); nvmlh->nvmlDeviceGetPowerManagementLimitConstraints = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *min, unsigned int *max)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerManagementLimitConstraints"); nvmlh->nvmlDeviceSetPowerManagementLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int limit)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetPowerManagementLimit"); nvmlh->nvmlDeviceGetName = (nvmlReturn_t (*)(nvmlDevice_t, char *, int)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName"); nvmlh->nvmlDeviceGetTemperature = (nvmlReturn_t (*)(nvmlDevice_t, int, unsigned int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature"); nvmlh->nvmlDeviceGetFanSpeed = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed"); nvmlh->nvmlDeviceGetPerformanceState = (nvmlReturn_t (*)(nvmlDevice_t, int *)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPerformanceState"); /* or nvmlDeviceGetPowerState */ nvmlh->nvmlDeviceGetSerial = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetSerial"); nvmlh->nvmlDeviceGetUUID = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetUUID"); nvmlh->nvmlDeviceGetVbiosVersion = (nvmlReturn_t (*)(nvmlDevice_t, char *, unsigned int)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetVbiosVersion"); nvmlh->nvmlSystemGetDriverVersion = (nvmlReturn_t (*)(char *, unsigned int)) wrap_dlsym(nvmlh->nvml_dll, "nvmlSystemGetDriverVersion"); nvmlh->nvmlErrorString = (char* (*)(nvmlReturn_t)) wrap_dlsym(nvmlh->nvml_dll, "nvmlErrorString"); nvmlh->nvmlShutdown = (nvmlReturn_t (*)()) wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown"); // v331 nvmlh->nvmlDeviceGetEnforcedPowerLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetEnforcedPowerLimit"); // v340 #ifdef __linux__ nvmlh->nvmlDeviceClearCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceClearCpuAffinity"); nvmlh->nvmlDeviceGetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int sz, unsigned long *cpuSet)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCpuAffinity"); nvmlh->nvmlDeviceSetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetCpuAffinity"); #endif // v346 nvmlh->nvmlDeviceGetPcieThroughput = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPcieThroughput"); // v36x (API 8 / Pascal) nvmlh->nvmlDeviceGetClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClock"); if (nvmlh->nvmlInit == NULL || nvmlh->nvmlShutdown == NULL || nvmlh->nvmlErrorString == NULL || nvmlh->nvmlDeviceGetCount == NULL || nvmlh->nvmlDeviceGetHandleByIndex == NULL || nvmlh->nvmlDeviceGetPciInfo == NULL || nvmlh->nvmlDeviceGetName == NULL) { if (opt_debug) applog(LOG_DEBUG, "Failed to obtain required NVML function pointers"); wrap_dlclose(nvmlh->nvml_dll); free(nvmlh); return NULL; } nvmlh->nvmlInit(); if (nvmlh->nvmlSystemGetDriverVersion) nvmlh->nvmlSystemGetDriverVersion(driver_version, sizeof(driver_version)); nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount); /* Query CUDA device count, in case it doesn't agree with NVML, since */ /* CUDA will only report GPUs with compute capability greater than 1.0 */ if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) { if (opt_debug) applog(LOG_DEBUG, "Failed to query CUDA device count!"); wrap_dlclose(nvmlh->nvml_dll); free(nvmlh); return NULL; } nvmlh->devs = (nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(nvmlDevice_t)); nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); nvmlh->nvml_pci_vendor_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); nvmlh->nvml_pci_subsys_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int)); nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int)); nvmlh->app_clocks = (nvmlEnableState_t*) calloc(nvmlh->nvml_gpucount, sizeof(nvmlEnableState_t)); /* Obtain GPU device handles we're going to need repeatedly... */ for (i=0; invml_gpucount; i++) { nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]); } /* Query PCI info for each NVML device, and build table for mapping of */ /* CUDA device IDs to NVML device IDs and vice versa */ for (i=0; invml_gpucount; i++) { nvmlPciInfo_t pciinfo; nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo); nvmlh->nvml_pci_domain_id[i] = pciinfo.domain; nvmlh->nvml_pci_bus_id[i] = pciinfo.bus; nvmlh->nvml_pci_device_id[i] = pciinfo.device; nvmlh->nvml_pci_vendor_id[i] = pciinfo.pci_device_id; nvmlh->nvml_pci_subsys_id[i] = pciinfo.pci_subsystem_id; nvmlh->app_clocks[i] = NVML_FEATURE_UNKNOWN; if (nvmlh->nvmlDeviceSetAPIRestriction) { nvmlh->nvmlDeviceSetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, NVML_FEATURE_ENABLED); /* there is only this API_SET_APPLICATION_CLOCKS on the 750 Ti (340.58) */ } if (nvmlh->nvmlDeviceGetAPIRestriction) { nvmlh->nvmlDeviceGetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, &nvmlh->app_clocks[i]); } } /* build mapping of NVML device IDs to CUDA IDs */ for (i=0; invml_gpucount; i++) { nvmlh->nvml_cuda_device_id[i] = -1; } for (i=0; icuda_gpucount; i++) { cudaDeviceProp props; nvmlh->cuda_nvml_device_id[i] = -1; if (cudaGetDeviceProperties(&props, i) == cudaSuccess) { device_bus_ids[i] = props.pciBusID; for (int j = 0; j < nvmlh->nvml_gpucount; j++) { if ((nvmlh->nvml_pci_domain_id[j] == (uint32_t) props.pciDomainID) && (nvmlh->nvml_pci_bus_id[j] == (uint32_t) props.pciBusID) && (nvmlh->nvml_pci_device_id[j] == (uint32_t) props.pciDeviceID)) { if (opt_debug) applog(LOG_DEBUG, "CUDA GPU %d matches NVML GPU %d by busId %u", i, j, (uint32_t) props.pciBusID); nvmlh->nvml_cuda_device_id[j] = i; nvmlh->cuda_nvml_device_id[i] = j; } } } } return nvmlh; } /* apply config clocks to an used device */ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id) { nvmlReturn_t rc; uint32_t gpu_clk = 0, mem_clk = 0; int n = nvmlh->cuda_nvml_device_id[dev_id]; //if (need_nvsettings) /* prefer later than init time */ // nvs_set_clocks(dev_id); if (n < 0 || n >= nvmlh->nvml_gpucount) return -ENODEV; if (!device_gpu_clocks[dev_id] && !device_mem_clocks[dev_id]) return 0; // nothing to do if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) { applog(LOG_WARNING, "GPU #%d: NVML application clock feature is not allowed!", dev_id); return -EPERM; } uint32_t mem_prev = clock_prev_mem[dev_id]; if (!mem_prev) nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_prev); uint32_t gpu_prev = clock_prev[dev_id]; if (!gpu_prev) nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_prev); nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk); rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk); if (rc != NVML_SUCCESS) { applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id); return -EINVAL; } if (opt_debug) applog(LOG_DEBUG, "GPU #%d: default application clocks are %u/%u", dev_id, mem_clk, gpu_clk); // get application config values if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id]; if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id]; // these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+ uint32_t nclocks = 0, mem_clocks[32] = { 0 }; nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL); nclocks = min(nclocks, 32); if (nclocks) nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks); for (uint8_t u=0; u < nclocks; u++) { // ordered by pstate (so highest is first memory clock - P0) if (mem_clocks[u] <= mem_clk) { mem_clk = mem_clocks[u]; break; } } uint32_t* gpu_clocks = NULL; nclocks = 0; nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL); if (nclocks) { if (opt_debug) applog(LOG_DEBUG, "GPU #%d: %u clocks found for mem %u", dev_id, nclocks, mem_clk); gpu_clocks = (uint32_t*) calloc(1, sizeof(uint32_t) * nclocks + 4); nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks); for (uint8_t u=0; u < nclocks; u++) { // ordered desc, so get first if (gpu_clocks[u] <= gpu_clk) { gpu_clk = gpu_clocks[u]; break; } } free(gpu_clocks); } rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk); if (rc == NVML_SUCCESS) applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk); else { applog(LOG_WARNING, "GPU #%d: %u/%u - %s (NVML)", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc)); return -1; } // store previous clocks for reset on exit (or during wait...) clock_prev[dev_id] = gpu_prev; clock_prev_mem[dev_id] = mem_prev; return 1; } /* reset default app clocks and limits on exit */ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id) { int ret = 0; nvmlReturn_t rc; uint32_t gpu_clk = 0, mem_clk = 0; int n = nvmlh->cuda_nvml_device_id[dev_id]; if (need_nvsettings) nvs_reset_clocks(dev_id); if (n < 0 || n >= nvmlh->nvml_gpucount) return -ENODEV; if (clock_prev[dev_id]) { rc = nvmlh->nvmlDeviceResetApplicationsClocks(nvmlh->devs[n]); if (rc != NVML_SUCCESS) { applog(LOG_WARNING, "GPU #%d: unable to reset application clocks", dev_id); } clock_prev[dev_id] = 0; ret = 1; } if (limit_prev[dev_id]) { uint32_t plimit = limit_prev[dev_id]; if (nvmlh->nvmlDeviceGetPowerManagementDefaultLimit && !plimit) { rc = nvmlh->nvmlDeviceGetPowerManagementDefaultLimit(nvmlh->devs[n], &plimit); } else if (plimit) { rc = NVML_SUCCESS; } if (rc == NVML_SUCCESS) nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit); ret = 1; } return ret; } /** * Set power state of a device (9xx) * Code is similar as clocks one, which allow the change of the pstate */ int nvml_set_pstate(nvml_handle *nvmlh, int dev_id) { nvmlReturn_t rc; uint32_t gpu_clk = 0, mem_clk = 0; int n = nvmlh->cuda_nvml_device_id[dev_id]; if (n < 0 || n >= nvmlh->nvml_gpucount) return -ENODEV; if (device_pstate[dev_id] < 0) return 0; if (nvmlh->app_clocks[n] != NVML_FEATURE_ENABLED) { applog(LOG_WARNING, "GPU #%d: NVML app. clock feature is not allowed!", dev_id); return -EPERM; } nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk); rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk); if (rc != NVML_SUCCESS) { applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id); return -EINVAL; } // get application config values if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id]; if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id]; // these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+ uint32_t nclocks = 0, mem_clocks[32] = { 0 }; int8_t wanted_pstate = device_pstate[dev_id]; nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL); nclocks = min(nclocks, 32); if (nclocks) nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks); if ((uint32_t) wanted_pstate+1 > nclocks) { applog(LOG_WARNING, "GPU #%d: only %u mem clocks available (p-states)", dev_id, nclocks); } for (uint8_t u=0; u < nclocks; u++) { // ordered by pstate (so highest P0 first) if (u == wanted_pstate) { mem_clk = mem_clocks[u]; break; } } uint32_t* gpu_clocks = NULL; nclocks = 0; nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL); if (nclocks) { gpu_clocks = (uint32_t*) calloc(1, sizeof(uint32_t) * nclocks + 4); rc = nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks); if (rc == NVML_SUCCESS) { // ordered desc, get the max app clock (do not limit) gpu_clk = gpu_clocks[0]; } free(gpu_clocks); } rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk); if (rc != NVML_SUCCESS) { applog(LOG_WARNING, "GPU #%d: pstate P%d (%u/%u) %s", dev_id, (int) wanted_pstate, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc)); return -1; } if (!opt_quiet) applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int) wanted_pstate, mem_clk, gpu_clk); clock_prev[dev_id] = 1; return 1; } int nvml_set_plimit(nvml_handle *nvmlh, int dev_id) { nvmlReturn_t rc = NVML_ERROR_UNKNOWN; uint32_t gpu_clk = 0, mem_clk = 0; int n = nvmlh->cuda_nvml_device_id[dev_id]; if (n < 0 || n >= nvmlh->nvml_gpucount) return -ENODEV; if (!device_plimit[dev_id]) return 0; // nothing to do if (!nvmlh->nvmlDeviceSetPowerManagementLimit) return -ENOSYS; uint32_t plimit = device_plimit[dev_id] * 1000; uint32_t pmin = 1000, pmax = 0, prev_limit = 0; if (nvmlh->nvmlDeviceGetPowerManagementLimitConstraints) rc = nvmlh->nvmlDeviceGetPowerManagementLimitConstraints(nvmlh->devs[n], &pmin, &pmax); if (rc != NVML_SUCCESS) { if (!nvmlh->nvmlDeviceGetPowerManagementLimit) return -ENOSYS; } nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &prev_limit); if (!pmax) pmax = prev_limit; plimit = min(plimit, pmax); plimit = max(plimit, pmin); rc = nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit); if (rc != NVML_SUCCESS) { #ifndef WIN32 applog(LOG_WARNING, "GPU #%d: plimit %s", dev_id, nvmlh->nvmlErrorString(rc)); #endif return -1; } else { device_plimit[dev_id] = plimit / 1000; nvml_plimit_set = true; } if (!opt_quiet) { applog(LOG_INFO, "GPU #%d: power limit set to %uW (allowed range is %u-%u)", dev_id, plimit/1000U, pmin/1000U, pmax/1000U); } limit_prev[dev_id] = prev_limit; return 1; } uint32_t nvml_get_plimit(nvml_handle *nvmlh, int dev_id) { uint32_t plimit = 0; int n = nvmlh ? nvmlh->cuda_nvml_device_id[dev_id] : -1; if (n < 0 || n >= nvmlh->nvml_gpucount) return 0; if (nvmlh->nvmlDeviceGetPowerManagementLimit) { nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &plimit); } return plimit; } // ccminer -D -n #define LSTDEV_PFX " " void nvml_print_device_info(int dev_id) { if (!hnvml) return; int n = hnvml->cuda_nvml_device_id[dev_id]; if (n < 0 || n >= hnvml->nvml_gpucount) return; nvmlReturn_t rc; // fprintf(stderr, "------ Hardware ------\n"); int gvid = hnvml->nvml_pci_vendor_id[n] & 0xFFFF; int gpid = hnvml->nvml_pci_vendor_id[n] >> 16; int svid = hnvml->nvml_pci_subsys_id[n] & 0xFFFF; int spid = hnvml->nvml_pci_subsys_id[n] >> 16; fprintf(stderr, LSTDEV_PFX "ID %04x:%04x/%04x:%04x BUS %04x:%02x:%02x.0\n", gvid, gpid, svid, spid, (int) hnvml->nvml_pci_domain_id[n], (int) hnvml->nvml_pci_bus_id[n], (int) hnvml->nvml_pci_device_id[n]); if (hnvml->nvmlDeviceGetClock) { uint32_t gpu_clk = 0, mem_clk = 0; // fprintf(stderr, "------- Clocks -------\n"); hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &gpu_clk); rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &mem_clk); if (rc == NVML_SUCCESS) { fprintf(stderr, LSTDEV_PFX "DEFAULT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk); } hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_TARGET, &gpu_clk); rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_TARGET, &mem_clk); if (rc == NVML_SUCCESS) { fprintf(stderr, LSTDEV_PFX "TARGET MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk); } hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_CURRENT, &gpu_clk); rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_CURRENT, &mem_clk); if (rc == NVML_SUCCESS) { fprintf(stderr, LSTDEV_PFX "CURRENT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk); } } } int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount) { *gpucount = nvmlh->nvml_gpucount; return 0; } int cuda_get_gpucount(nvml_handle *nvmlh, int *gpucount) { *gpucount = nvmlh->cuda_gpucount; return 0; } int nvml_get_gpu_name(nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize) { int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -ENODEV; if (!nvmlh->nvmlDeviceGetName) return -ENOSYS; if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != NVML_SUCCESS) return -1; return 0; } int nvml_get_tempC(nvml_handle *nvmlh, int cudaindex, unsigned int *tempC) { nvmlReturn_t rc; int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -ENODEV; if (!nvmlh->nvmlDeviceGetTemperature) return -ENOSYS; rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC); if (rc != NVML_SUCCESS) { return -1; } return 0; } int nvml_get_fanpcnt(nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt) { nvmlReturn_t rc; int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -ENODEV; if (!nvmlh->nvmlDeviceGetFanSpeed) return -ENOSYS; rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt); if (rc != NVML_SUCCESS) { return -1; } return 0; } int nvml_get_current_clocks(int cudaindex, unsigned int *graphics_clock, unsigned int *mem_clock) { nvmlReturn_t rc; int gpuindex = hnvml->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= hnvml->nvml_gpucount) return -ENODEV; if (!hnvml->nvmlDeviceGetClockInfo) return -ENOSYS; rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_SM, graphics_clock); if (rc != NVML_SUCCESS) return -1; rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_MEM, mem_clock); if (rc != NVML_SUCCESS) return -1; return 0; } /* Not Supported on 750Ti 340.23 */ int nvml_get_power_usage(nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts) { int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -ENODEV; if (!nvmlh->nvmlDeviceGetPowerUsage) return -ENOSYS; nvmlReturn_t res = nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts); if (res != NVML_SUCCESS) { //if (opt_debug) // applog(LOG_DEBUG, "nvmlDeviceGetPowerUsage: %s", nvmlh->nvmlErrorString(res)); return -1; } return 0; } /* Not Supported on 750Ti 340.23 */ int nvml_get_pstate(nvml_handle *nvmlh, int cudaindex, int *pstate) { int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -ENODEV; if (!nvmlh->nvmlDeviceGetPerformanceState) return -ENOSYS; nvmlReturn_t res = nvmlh->nvmlDeviceGetPerformanceState(nvmlh->devs[gpuindex], pstate); if (res != NVML_SUCCESS) { //if (opt_debug) // applog(LOG_DEBUG, "nvmlDeviceGetPerformanceState: %s", nvmlh->nvmlErrorString(res)); return -1; } return 0; } int nvml_get_busid(nvml_handle *nvmlh, int cudaindex, int *busid) { int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -ENODEV; (*busid) = nvmlh->nvml_pci_bus_id[gpuindex]; return 0; } int nvml_get_serial(nvml_handle *nvmlh, int cudaindex, char *sn, int maxlen) { uint32_t subids = 0; char uuid[NVML_DEVICE_UUID_BUFFER_SIZE]; int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; nvmlReturn_t res; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -ENODEV; if (nvmlh->nvmlDeviceGetSerial) { res = nvmlh->nvmlDeviceGetSerial(nvmlh->devs[gpuindex], sn, maxlen); if (res == NVML_SUCCESS) return 0; } if (!nvmlh->nvmlDeviceGetUUID) return -ENOSYS; // nvmlDeviceGetUUID: GPU-f2bd642c-369f-5a14-e0b4-0d22dfe9a1fc // use a part of uuid to generate an unique serial // todo: check if there is vendor id is inside memset(uuid, 0, sizeof(uuid)); res = nvmlh->nvmlDeviceGetUUID(nvmlh->devs[gpuindex], uuid, sizeof(uuid)-1); if (res != NVML_SUCCESS) { if (opt_debug) applog(LOG_DEBUG, "nvmlDeviceGetUUID: %s", nvmlh->nvmlErrorString(res)); return -1; } strncpy(sn, &uuid[4], min((int) strlen(uuid), maxlen)); sn[maxlen-1] = '\0'; return 0; } int nvml_get_bios(nvml_handle *nvmlh, int cudaindex, char *desc, int maxlen) { uint32_t subids = 0; int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -ENODEV; if (!nvmlh->nvmlDeviceGetVbiosVersion) return -ENOSYS; nvmlReturn_t res = nvmlh->nvmlDeviceGetVbiosVersion(nvmlh->devs[gpuindex], desc, maxlen); if (res != NVML_SUCCESS) { if (opt_debug) applog(LOG_DEBUG, "nvmlDeviceGetVbiosVersion: %s", nvmlh->nvmlErrorString(res)); return -1; } return 0; } int nvml_get_info(nvml_handle *nvmlh, int cudaindex, uint16_t &vid, uint16_t &pid) { uint32_t subids = 0; int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount) return -ENODEV; subids = nvmlh->nvml_pci_subsys_id[gpuindex]; if (!subids) subids = nvmlh->nvml_pci_vendor_id[gpuindex]; pid = subids >> 16; vid = subids & 0xFFFF; // Colorful and Inno3D if (pid == 0) pid = nvmlh->nvml_pci_vendor_id[gpuindex] >> 16; return 0; } int nvml_destroy(nvml_handle *nvmlh) { nvmlh->nvmlShutdown(); wrap_dlclose(nvmlh->nvml_dll); free(nvmlh->nvml_pci_bus_id); free(nvmlh->nvml_pci_device_id); free(nvmlh->nvml_pci_domain_id); free(nvmlh->nvml_pci_vendor_id); free(nvmlh->nvml_pci_subsys_id); free(nvmlh->nvml_cuda_device_id); free(nvmlh->cuda_nvml_device_id); free(nvmlh->app_clocks); free(nvmlh->devs); free(nvmlh); return 0; } // ---------------------------------------------------------------------------- /** * nvapi alternative for windows x86 binaries * nvml api doesn't exists as 32bit dll :/// */ #ifdef WIN32 #include "nvapi/nvapi_ccminer.h" static unsigned int nvapi_dev_map[MAX_GPUS] = { 0 }; static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = { 0 }; static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = { 0 }; static NvU32 nvapi_dev_cnt = 0; extern bool nvapi_dll_loaded; int nvapi_temperature(unsigned int devNum, unsigned int *temperature) { NvAPI_Status ret; if (devNum >= nvapi_dev_cnt) return -ENODEV; NV_GPU_THERMAL_SETTINGS thermal; thermal.version = NV_GPU_THERMAL_SETTINGS_VER; ret = NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &thermal); if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetThermalSettings: %s", string); return -1; } (*temperature) = (unsigned int) thermal.sensor[0].currentTemp; return 0; } int nvapi_fanspeed(unsigned int devNum, unsigned int *speed) { NvAPI_Status ret; if (devNum >= nvapi_dev_cnt) return -ENODEV; NvU32 fanspeed = 0; ret = NvAPI_GPU_GetTachReading(phys[devNum], &fanspeed); if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetTachReading: %s", string); return -1; } (*speed) = (unsigned int) fanspeed; return 0; } int nvapi_getpstate(unsigned int devNum, unsigned int *pstate) { NvAPI_Status ret; if (devNum >= nvapi_dev_cnt) return -ENODEV; NV_GPU_PERF_PSTATE_ID CurrentPstate = NVAPI_GPU_PERF_PSTATE_UNDEFINED; /* 16 */ ret = NvAPI_GPU_GetCurrentPstate(phys[devNum], &CurrentPstate); if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetCurrentPstate: %s", string); return -1; } else { // get pstate for the moment... often 0 = P0 (*pstate) = (unsigned int)CurrentPstate; } return 0; } #define UTIL_DOMAIN_GPU 0 int nvapi_getusage(unsigned int devNum, unsigned int *pct) { NvAPI_Status ret; if (devNum >= nvapi_dev_cnt) return -ENODEV; NV_GPU_DYNAMIC_PSTATES_INFO_EX info; info.version = NV_GPU_DYNAMIC_PSTATES_INFO_EX_VER; ret = NvAPI_GPU_GetDynamicPstatesInfoEx(phys[devNum], &info); if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI GetDynamicPstatesInfoEx: %s", string); return -1; } else { if (info.utilization[UTIL_DOMAIN_GPU].bIsPresent) (*pct) = info.utilization[UTIL_DOMAIN_GPU].percentage; } return 0; } int nvapi_getinfo(unsigned int devNum, uint16_t &vid, uint16_t &pid) { NvAPI_Status ret; NvU32 pDeviceId, pSubSystemId, pRevisionId, pExtDeviceId; if (devNum >= nvapi_dev_cnt) return -ENODEV; ret = NvAPI_GPU_GetPCIIdentifiers(phys[devNum], &pDeviceId, &pSubSystemId, &pRevisionId, &pExtDeviceId); if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI GetPCIIdentifiers: %s", string); return -1; } pid = pDeviceId >> 16; vid = pDeviceId & 0xFFFF; if (vid == 0x10DE && pSubSystemId) { vid = pSubSystemId & 0xFFFF; pid = pSubSystemId >> 16; // Colorful and Inno3D if (pid == 0) pid = pDeviceId >> 16; } return 0; } int nvapi_getserial(unsigned int devNum, char *serial, unsigned int maxlen) { NvAPI_Status ret; if (devNum >= nvapi_dev_cnt) return -ENODEV; memset(serial, 0, maxlen); if (maxlen < 11) return -EINVAL; NvAPI_ShortString ser = { 0 }; ret = NvAPI_DLL_GetSerialNumber(phys[devNum], ser); if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI GetSerialNumber: %s", string); return -1; } uint8_t *bytes = (uint8_t*) ser; for (int n=0; n<5; n++) sprintf(&serial[n*2], "%02X", bytes[n]); return 0; } int nvapi_getbios(unsigned int devNum, char *desc, unsigned int maxlen) { NvAPI_Status ret; if (devNum >= nvapi_dev_cnt) return -ENODEV; if (maxlen < 64) // Short String return -1; ret = NvAPI_GPU_GetVbiosVersionString(phys[devNum], desc); if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI GetVbiosVersionString: %s", string); return -1; } return 0; } static int SetAsusRGBLogo(unsigned int devNum, uint32_t RGB, bool ignorePrevState) { NvAPI_Status ret = NVAPI_OK; NV_I2C_INFO_EX* i2cInfo; int delay1 = 20000; int delay2 = 0; uchar4 rgb = { 0 }; memcpy(&rgb, &RGB, 4); uchar4 prgb = { 0 }; int32_t prev = device_led_state[nvapi_devid(devNum)]; memcpy(&prgb, &prev, 4); NV_INIT_STRUCT_ALLOC(NV_I2C_INFO_EX, i2cInfo); if (i2cInfo == NULL) return -ENOMEM; NvU32 data[5] = { 0 }; NvU32 datv[2] = { 0, 1 }; NvU32 datw[2] = { 1, 0 }; if (rgb.z != prgb.z || ignorePrevState) { data[2] = 4; // R:4 G:5 B:6, Mode = 7 (1 static, 2 breath, 3 blink, 4 demo) data[3] = 1; datv[0] = rgb.z | 0x13384000; i2cInfo->i2cDevAddress = 0x52; i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); i2cInfo->regAddrSize = 1; i2cInfo->pbData = (NvU8*) datv; i2cInfo->cbRead = 5; i2cInfo->cbSize = 1; i2cInfo->portId = 1; i2cInfo->bIsPortIdSet = 1; ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw); usleep(delay1); has_rgb_ok = (ret == NVAPI_OK); } if (rgb.y != prgb.y || ignorePrevState) { data[2] = 5; data[3] = 1; datv[0] = rgb.y | 0x4000; i2cInfo->i2cDevAddress = 0x52; i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); i2cInfo->regAddrSize = 1; i2cInfo->pbData = (NvU8*) datv; i2cInfo->cbRead = 5; i2cInfo->cbSize = 1; i2cInfo->portId = 1; i2cInfo->bIsPortIdSet = 1; ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw); usleep(delay1); has_rgb_ok = (ret == NVAPI_OK); } if (rgb.y != prgb.y || ignorePrevState) { data[2] = 6; data[3] = 1; datv[0] = rgb.x | 0x4000; i2cInfo->i2cDevAddress = 0x52; i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); i2cInfo->regAddrSize = 1; i2cInfo->pbData = (NvU8*) datv; i2cInfo->cbRead = 5; i2cInfo->cbSize = 1; i2cInfo->portId = 1; i2cInfo->bIsPortIdSet = 1; ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw); usleep(delay1); has_rgb_ok = (ret == NVAPI_OK); } if (rgb.w && ignorePrevState) { data[2] = 7; data[3] = 1; datv[0] = rgb.w | 0x4000; i2cInfo->i2cDevAddress = 0x52; i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); i2cInfo->regAddrSize = 1; i2cInfo->pbData = (NvU8*) datv; i2cInfo->cbRead = 5; i2cInfo->cbSize = 1; i2cInfo->portId = 1; i2cInfo->bIsPortIdSet = 1; ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw); usleep(delay1); has_rgb_ok = (ret == NVAPI_OK); } usleep(delay2); free(i2cInfo); return (int) ret; } static int SetGigabyteRGBLogo(unsigned int devNum, uint32_t RGB) { NvAPI_Status ret; NV_I2C_INFO_EX* i2cInfo; NV_INIT_STRUCT_ALLOC(NV_I2C_INFO_EX, i2cInfo); if (i2cInfo == NULL) return -ENOMEM; NvU32 readBuf[25] = { 0 }; NvU32 data[5] = { 0 }; data[0] = 1; data[2] = swab32(RGB & 0xfcfcfcU) | 0x40; i2cInfo->i2cDevAddress = 0x48 << 1; i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); i2cInfo->regAddrSize = 4; // NVAPI_MAX_SIZEOF_I2C_REG_ADDRESS i2cInfo->pbData = (NvU8*) readBuf; i2cInfo->cbRead = 2; i2cInfo->cbSize = sizeof(readBuf); i2cInfo->portId = 1; i2cInfo->bIsPortIdSet = 1; //ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, data); ret = NvAPI_DLL_I2CReadEx(phys[devNum], i2cInfo, data); usleep(20000); free(i2cInfo); return (int) ret; } static int SetZotacRGBLogo(unsigned int devNum, uint32_t RGB) { NvAPI_Status ret; NV_I2C_INFO* i2cInfo; NV_INIT_STRUCT_ALLOC(NV_I2C_INFO, i2cInfo); if (i2cInfo == NULL) return -ENOMEM; NvU32 buf[25] = { 0 }; NvU32 data[5] = { 0 }; uint32_t color = 0, level = 0x40; uchar4 rgb = { 0 }; memcpy(&rgb, &RGB, 4); level = rgb.x & 0xF0; level |= rgb.y & 0xF0; level |= rgb.z & 0xF0; //applog(LOG_DEBUG, "R %u G %u B %u", rgb.z, rgb.y, rgb.x); // Not really RGB custom, only some basic colors, so convert // 0: Red, 1: Yellow, 2: Green, 3: Cyan, 4: Blue, 5: magenta, 6: white if ((RGB & 0xFF0000) && (RGB & 0xFF00) && (RGB & 0xFF)) color = 6; else if ((RGB & 0xFF0000) && (RGB & 0xFF)) color = 5; else if ((RGB & 0xFF00) && (RGB & 0xFF)) color = 3; else if ((RGB & 0xFF0000) && (RGB & 0xFF00)) color = 1; else if (RGB & 0xFF) color = 4; else if (RGB & 0xFF00) color = 2; buf[0] = 0xF0; // F0 set colors buf[0] |= (color << 8); // logo buf[0] |= (1 << 16); // top if (RGB != 0) // level : 0x10 to 0xF0 buf[0] |= (level << 24); else buf[0] |= (0x10U << 24); // todo: i2c data crc ? i2cInfo->displayMask = 1; i2cInfo->bIsDDCPort = 1; i2cInfo->i2cDevAddress = 0x48 << 1; i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); i2cInfo->regAddrSize = 1; i2cInfo->pbData = (NvU8*) buf; i2cInfo->cbSize = 4; i2cInfo->i2cSpeed = NVAPI_I2C_SPEED_DEPRECATED; i2cInfo->i2cSpeedKhz = NVAPI_I2C_SPEED_100KHZ; // 4 i2cInfo->portId = 1; i2cInfo->bIsPortIdSet = 1; ret = NvAPI_I2CWrite(phys[devNum], i2cInfo); // required to prevent i2c lock usleep(20000); #if 0 buf[0] = 0xF7; // F7 toggle leds if (RGB == 0) buf[0] |= (1 << 8); // 0 logo on, 1 off buf[0] |= (1 << 16); // 1 top off ret = NvAPI_I2CWrite(phys[devNum], i2cInfo); usleep(20000); #endif // other modes: // 0xF1 breathing green (0x070202F1) // 0xF2 strobe green (0x070202F2) // 0xF3 cycle (0x000000F3) free(i2cInfo); return (int) ret; } int nvapi_set_led(unsigned int devNum, int RGB, char *device_name) { uint16_t vid = 0, pid = 0; NvAPI_Status ret; if (strstr(device_name, "Gigabyte GTX 10")) { if (opt_debug) applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB); return SetGigabyteRGBLogo(devNum, (uint32_t) RGB); } else if (strstr(device_name, "ASUS GTX 10")) { if (opt_debug) applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB); return SetAsusRGBLogo(devNum, (uint32_t) RGB, !has_rgb_ok); } else if (strstr(device_name, "Zotac GTX 10")) { if (opt_debug) applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB); return SetZotacRGBLogo(devNum, (uint32_t) RGB); } else { NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM* illu; NV_INIT_STRUCT_ALLOC(NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM, illu); illu->hPhysicalGpu = phys[devNum]; illu->Attribute = NV_GPU_IA_LOGO_BRIGHTNESS; ret = NvAPI_GPU_QueryIlluminationSupport(illu); if (!ret && illu->bSupported) { NV_GPU_GET_ILLUMINATION_PARM *led; NV_INIT_STRUCT_ALLOC(NV_GPU_GET_ILLUMINATION_PARM, led); led->hPhysicalGpu = phys[devNum]; led->Attribute = NV_GPU_IA_LOGO_BRIGHTNESS; NvAPI_GPU_GetIllumination(led); if (opt_debug) applog(LOG_DEBUG, "GPU %x: Led level was %d, set to %d", (int) phys[devNum], led->Value, RGB); led->Value = (uint32_t) RGB; ret = NvAPI_GPU_SetIllumination((NV_GPU_SET_ILLUMINATION_PARM*) led); free(led); } free(illu); return ret; } } int nvapi_pstateinfo(unsigned int devNum) { uint32_t n; NvAPI_Status ret; uint32_t* mem = (uint32_t*) calloc(1, 0x4000); if (!mem) return -ENOMEM; unsigned int current = 0xFF; // useless on init but... nvapi_getpstate(devNum, ¤t); #if 0 // try :p uint32_t* buf = (uint32_t*) calloc(1, 0x8000); for (int i=8; i < 0x8000 && buf; i+=4) { buf[0] = 0x10000 + i; NV_GPU_PERF_PSTATE_ID pst = NVAPI_GPU_PERF_PSTATE_P0; ret = NvAPI_DLL_GetPstateClientLimits(phys[devNum], pst, buf); if (ret != NVAPI_INCOMPATIBLE_STRUCT_VERSION) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); applog(LOG_BLUE, "struct size is %06x : %s", buf[0], string); for (int n=0; n < i/32; n++) applog_hex(&buf[n*(32/4)], 32); break; } } free(buf); #endif #if 0 // Unsure of the meaning of these values NVAPI_GPU_POWER_TOPO topo = { 0 }; topo.version = NVAPI_GPU_POWER_TOPO_VER; if ((ret = NvAPI_DLL_ClientPowerTopologyGetStatus(phys[devNum], &topo)) == NVAPI_OK) { if (topo.count) applog(LOG_RAW, " GPU TDP is %.1f~%.1f W ?", (double) topo.entries[0].power/1000, (double) topo.entries[1].power/1000); // Ok on 970, not pascal NV_GPU_PERF_PSTATES20_INFO_V2 pset2 = { 0 }; pset2.version = NV_GPU_PERF_PSTATES20_INFO_VER2; pset2.ov.numVoltages = 1; pset2.ov.voltages[0].voltDelta_uV.value = 3000; // gpu + 3000 uv; ret = NvAPI_DLL_SetPstates20v2(phys[devNum], &pset2); #endif NV_GPU_PERF_PSTATES20_INFO* info; NV_INIT_STRUCT_ON(NV_GPU_PERF_PSTATES20_INFO, info, mem); if ((ret = NvAPI_GPU_GetPstates20(phys[devNum], info)) != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_RAW, "NVAPI GetPstates20: %s", string); return -1; } for (n=0; n < info->numPstates; n++) { NV_GPU_PSTATE20_CLOCK_ENTRY_V1* clocks = info->pstates[n].clocks; applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %6.1f MHz%s %4u mV%s \x7F %d/%d", info->pstates[n].pstateId == current ? ">":" ", (int) info->pstates[n].pstateId, clocks[1].data.single.freq_kHz/1000, clocks[1].bIsEditable ? "*":" ", (double) clocks[0].data.single.freq_kHz/1000, clocks[0].bIsEditable ? "*":" ", info->pstates[n].baseVoltages[0].volt_uV/1000, info->pstates[n].baseVoltages[0].bIsEditable ? "*": " ", info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000); if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) { applog(LOG_RAW, " OC %+4d MHz %+6.1f MHz", clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000); } } // boost over volting (GTX 9xx only ?) for (n=0; n < info->ov.numVoltages; n++) { applog(LOG_RAW, " OV: %u%+d mV%s \x7F %d/%d", info->ov.voltages[n].volt_uV/1000, info->ov.voltages[n].voltDelta_uV.value/1000, info->ov.voltages[n].bIsEditable ? "*":" ", info->ov.voltages[n].voltDelta_uV.valueRange.min/1000, info->ov.voltages[n].voltDelta_uV.valueRange.max/1000); } NV_GPU_CLOCK_FREQUENCIES *freqs; NV_INIT_STRUCT_ON(NV_GPU_CLOCK_FREQUENCIES, freqs, mem); freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs); applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Base Clocks", (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK; ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs); applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Boost Clocks", (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ; ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs); applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz >Current", (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); // Other clock values ?? NVAPI_GPU_PERF_CLOCKS *pcl; NV_INIT_STRUCT_ALLOC(NVAPI_GPU_PERF_CLOCKS, pcl); int numClock=0; ret = NVAPI_OK; while (ret == NVAPI_OK) { if ((ret = NvAPI_DLL_GetPerfClocks(phys[devNum], numClock, pcl)) == NVAPI_OK) { applog(LOG_RAW, " C%d: MEM %4.0f MHz GPU %6.1f MHz [%5.1f/%6.1f]", numClock, (double) pcl->memFreq1/1000, (double) pcl->gpuFreq1/1000, (double) pcl->gpuFreqMin/1000, (double) pcl->gpuFreqMax/1000); // ret = NvAPI_DLL_SetPerfClocks(phys[devNum], numClock, pcl); // error } numClock++; } // Pascal only NVAPI_VOLTBOOST_PERCENT *pvb; NV_INIT_STRUCT_ON(NVAPI_VOLTBOOST_PERCENT, pvb, mem); if ((ret = NvAPI_DLL_GetCoreVoltageBoostPercent(phys[devNum], pvb)) == NVAPI_OK) { NVAPI_VOLTAGE_STATUS *pvdom; NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGE_STATUS, pvdom); NvAPI_DLL_GetCurrentVoltage(phys[devNum], pvdom); if (pvdom && pvdom->value_uV) applog(LOG_RAW, " GPU Voltage is %u mV %+d%% boost", pvdom->value_uV/1000, pvb->percent); else if (pvdom) applog(LOG_RAW, " GPU Voltage is %u mV", pvdom->value_uV/1000); free(pvdom); } else { // Maxwell 9xx NVAPI_VOLT_STATUS *mvdom, *mvstep; NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvdom); if (mvdom && (ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], mvdom)) == NVAPI_OK) { NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvstep); NvAPI_DLL_GetVoltageStep(phys[devNum], mvstep); if (mvdom->value_uV) applog(LOG_RAW, " GPU Voltage is %.1f mV with %.3f mV resolution", (double) mvdom->value_uV/1000, (double) mvstep->value_uV/1000); free(mvstep); } free(mvdom); } uint32_t plim = nvapi_get_plimit(devNum); double min_pw = 0, max_pw = 0; // percent NVAPI_GPU_POWER_INFO nfo = { 0 }; nfo.version = NVAPI_GPU_POWER_INFO_VER; ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo); if (ret == NVAPI_OK && nfo.valid) { min_pw = (double)nfo.entries[0].min_power / 1000; max_pw = (double)nfo.entries[0].max_power / 1000; } applog(LOG_RAW, " Power limit is set to %u%%, range [%.0f-%.0f%%]", plim, min_pw, max_pw); #if 0 NVAPI_COOLER_SETTINGS *cooler; NV_INIT_STRUCT_ON(NVAPI_COOLER_SETTINGS, cooler, mem); ret = NvAPI_DLL_GetCoolerSettings(phys[devNum], 7, cooler); if (ret == NVAPI_OK) { applog(LOG_RAW, " Fan level is set to %u%%", cooler->level); // wrong val, seems 1 (auto ?) NVAPI_COOLER_LEVEL *fan; NV_INIT_STRUCT_ALLOC(NVAPI_COOLER_LEVEL, fan); fan->level = 100; fan->count = 1; ret = NvAPI_DLL_SetCoolerLevels(phys[devNum], 7, fan); free(fan); sleep(10); ret = NvAPI_DLL_RestoreCoolerSettings(phys[devNum], cooler, 7); } #endif NV_GPU_THERMAL_SETTINGS *tset; NV_INIT_STRUCT_ON(NV_GPU_THERMAL_SETTINGS, tset, mem); NVAPI_GPU_THERMAL_INFO *tnfo; NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_INFO, tnfo); NVAPI_GPU_THERMAL_LIMIT *tlim; NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_LIMIT, tlim); NvAPI_GPU_GetThermalSettings(phys[devNum], 0, tset); NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], tnfo); if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], tlim)) == NVAPI_OK) { applog(LOG_RAW, " Thermal limit is set to %u, current Tc %d, range [%u-%u]", tlim->entries[0].value >> 8, tset->sensor[0].currentTemp, tnfo->entries[0].min_temp >> 8, tnfo->entries[0].max_temp >> 8); } free(tnfo); free(tlim); #if 1 // Read pascal Clocks Table, Empty on 9xx //NVAPI_CLOCKS_RANGE* ranges; //NV_INIT_STRUCT_ON(NVAPI_CLOCKS_RANGE, ranges, mem); //ret = NvAPI_DLL_GetClockBoostRanges(phys[devNum], ranges); NVAPI_CLOCK_MASKS* boost; NV_INIT_STRUCT_ON(NVAPI_CLOCK_MASKS, boost, mem); ret = NvAPI_DLL_GetClockBoostMask(phys[devNum], boost); int gpuClocks = 0, memClocks = 0; for (n=0; n < 80+23; n++) { if (boost->clocks[n].memDelta) memClocks++; if (boost->clocks[n].gpuDelta) gpuClocks++; } // PASCAL GTX ONLY if (gpuClocks || memClocks) { NVAPI_CLOCK_TABLE *table; NV_INIT_STRUCT_ALLOC(NVAPI_CLOCK_TABLE, table); memcpy(table->mask, boost->mask, 12); ret = NvAPI_DLL_GetClockBoostTable(phys[devNum], table); gpuClocks = 0, memClocks = 0; for (n=0; n < 12; n++) { if (table->buf0[n] != 0) applog(LOG_RAW, "boost table 0[%u] not empty (%u)", n, table->buf0[n]); } for (n=0; n < 80; n++) { if (table->gpuDeltas[n].freqDelta) { // note: gpu delta value seems to be x2, not the memory //applog(LOG_RAW, " Boost gpu clock delta %u set to %d MHz", n, table->gpuDeltas[n].freqDelta/2000); gpuClocks++; } } for (n=0; n < 23; n++) { if (table->memFilled[n]) { //applog(LOG_RAW, " Boost mem clock delta %u set to %d MHz", n, table->memDeltas[n]/1000); memClocks++; } } for (n=0; n < 1529; n++) { if (table->buf1[n] != 0) applog(LOG_RAW, "boost table 1[%u] not empty (%u)", n, table->buf1[n]); } applog(LOG_RAW, " Boost table contains %d gpu and %d mem levels.", gpuClocks, memClocks); free(table); NVAPI_VFP_CURVE *curve; NV_INIT_STRUCT_ALLOC(NVAPI_VFP_CURVE, curve); memcpy(curve->mask, boost->mask, 12); ret = NvAPI_DLL_GetVFPCurve(phys[devNum], curve); gpuClocks = 0, memClocks = 0; for (n=0; n < 80; n++) { if (curve->gpuEntries[n].freq_kHz || curve->gpuEntries[n].volt_uV) { // applog(LOG_RAW, "gpu volt table %2u %4u MHz - %6u mV", n, curve->gpuEntries[n].freq_kHz/1000, curve->gpuEntries[n].volt_uV/1000); gpuClocks++; } } for (n=0; n < 23; n++) { if (curve->memEntries[n].freq_kHz || curve->memEntries[n].volt_uV) { // applog(LOG_RAW, "mem volt table %2u %4u MHz - %6u mV", n, curve->memEntries[n].freq_kHz/1000, curve->memEntries[n].volt_uV/1000); memClocks++; } } for (n=0; n < 1064; n++) { if (curve->buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve->buf1[n]); } applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks); free(curve); } // Maxwell else { NVAPI_VOLTAGES_TABLE* volts; NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGES_TABLE, volts); int entries = 0; ret = NvAPI_DLL_GetVoltages(phys[devNum], volts); for (n=0; n < 128; n++) { if (volts->entries[n].volt_uV) entries++; } applog(LOG_RAW, " Volts table contains %d gpu levels.", entries); free(volts); } NV_DISPLAY_DRIVER_MEMORY_INFO* meminfo; NV_INIT_STRUCT_ON(NV_DISPLAY_DRIVER_MEMORY_INFO, meminfo, mem); meminfo->version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER; if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], meminfo)) == NVAPI_OK) { applog(LOG_RAW, " Memory: %u MB, %.1f used", meminfo->dedicatedVideoMemory/1024, (double) (meminfo->availableDedicatedVideoMemory - meminfo->curAvailableDedicatedVideoMemory)/1024); } #if 0 /* some undetermined stats */ NVAPI_GPU_PERF_INFO pi = { 0 }; pi.version = NVAPI_GPU_PERF_INFO_VER; ret = NvAPI_DLL_PerfPoliciesGetInfo(phys[devNum], &pi); NVAPI_GPU_PERF_STATUS ps = { 0 }; ps.version = NVAPI_GPU_PERF_STATUS_VER; ret = NvAPI_DLL_PerfPoliciesGetStatus(phys[devNum], &ps); applog(LOG_BLUE, "%llx %lld. %lld. %llx %llx %llx", ps.timeRef, ps.val1, ps.val2, ps.values[0], ps.values[1], ps.values[2]); #endif #endif free(mem); return 0; } // workaround for buggy driver 378.49 unsigned int nvapi_get_gpu_clock(unsigned int devNum) { NvAPI_Status ret = NVAPI_OK; unsigned int freq = 0; NV_GPU_CLOCK_FREQUENCIES *freqs; NV_INIT_STRUCT_ALLOC(NV_GPU_CLOCK_FREQUENCIES, freqs); freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ; ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs); if (ret == NVAPI_OK) { freq = freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000; } free(freqs); return freq; // in MHz } uint8_t nvapi_get_plimit(unsigned int devNum) { NvAPI_Status ret = NVAPI_OK; NVAPI_GPU_POWER_STATUS pol = { 0 }; pol.version = NVAPI_GPU_POWER_STATUS_VER; if ((ret = NvAPI_DLL_ClientPowerPoliciesGetStatus(phys[devNum], &pol)) != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI PowerPoliciesGetStatus: %s", string); return 0; } return (uint8_t) (pol.entries[0].power / 1000); // in percent } int nvapi_set_plimit(unsigned int devNum, uint16_t percent) { NvAPI_Status ret = NVAPI_OK; uint32_t val = percent * 1000; NVAPI_GPU_POWER_INFO nfo = { 0 }; nfo.version = NVAPI_GPU_POWER_INFO_VER; ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo); if (ret == NVAPI_OK) { if (val == 0) val = nfo.entries[0].def_power; else if (val < nfo.entries[0].min_power) val = nfo.entries[0].min_power; else if (val > nfo.entries[0].max_power) val = nfo.entries[0].max_power; } NVAPI_GPU_POWER_STATUS pol = { 0 }; pol.version = NVAPI_GPU_POWER_STATUS_VER; pol.flags = 1; pol.entries[0].power = val; if ((ret = NvAPI_DLL_ClientPowerPoliciesSetStatus(phys[devNum], &pol)) != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI PowerPoliciesSetStatus: %s", string); return -1; } return ret; } int nvapi_set_tlimit(unsigned int devNum, uint8_t limit) { NvAPI_Status ret; uint32_t val = limit; if (devNum >= nvapi_dev_cnt) return -ENODEV; NV_GPU_THERMAL_SETTINGS tset = { 0 }; NVAPI_GPU_THERMAL_INFO tnfo = { 0 }; NVAPI_GPU_THERMAL_LIMIT tlim = { 0 }; tset.version = NV_GPU_THERMAL_SETTINGS_VER; NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &tset); tnfo.version = NVAPI_GPU_THERMAL_INFO_VER; NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], &tnfo); tlim.version = NVAPI_GPU_THERMAL_LIMIT_VER; if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], &tlim)) == NVAPI_OK) { tlim.entries[0].value = val << 8; tlim.flags = 1; ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim); if (ret == NVAPI_OK) { applog(LOG_INFO, "GPU #%u: thermal limit set to %u, current Tc %d, range [%u-%u]", devNum, val, tset.sensor[0].currentTemp, tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8); } else { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); applog(LOG_WARNING, "GPU #%u: thermal limit: %s, valid range is [%u-%u]", devNum, string, tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8); } } return (int) ret; } int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock) { NvAPI_Status ret; NvS32 delta = 0; if (devNum >= nvapi_dev_cnt) return -ENODEV; #if 0 // wrong api to get default base clock when modified, cuda props seems fine NV_GPU_CLOCK_FREQUENCIES freqs = { 0 }; freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER; freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); if (ret == NVAPI_OK) { delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency; } NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 }; deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER; ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0); // we want default clock grr! if (ret == NVAPI_OK) { if (deffreqs.pstates[0].clocks[1].domainId == NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS) delta = (clock * 1000) - deffreqs.pstates[0].clocks[1].freq*2; } #endif cudaDeviceProp props = { 0 }; NvU32 busId = 0xFFFF; ret = NvAPI_GPU_GetBusId(phys[devNum], &busId); for (int d=0; d < (int) nvapi_dev_cnt; d++) { // unsure about devNum, so be safe cudaGetDeviceProperties(&props, d); if (props.pciBusID == busId) { delta = (clock * 1000) - props.clockRate; break; } } if (delta == (clock * 1000)) return ret; NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 }; pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1; pset1.numPstates = 1; pset1.numClocks = 1; // Ok on both 1080 and 970 pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS; pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta; ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1); if (ret == NVAPI_OK) { applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, delta/1000); } return ret; } int nvapi_set_memclock(unsigned int devNum, uint32_t clock) { NvAPI_Status ret; NvS32 delta = 0; if (devNum >= nvapi_dev_cnt) return -ENODEV; // wrong to get default base clock (when modified) on maxwell (same as cuda props one) NV_GPU_CLOCK_FREQUENCIES freqs = { 0 }; freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER; freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); // wrong base clocks, useless if (ret == NVAPI_OK) { delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency; } // seems ok on maxwell and pascal for the mem clocks NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 }; deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER; ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // deprecated but req for def clocks if (ret == NVAPI_OK) { if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY) delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq; } if (delta == (clock * 1000)) return ret; // todo: bounds check with GetPstates20 NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 }; pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1; pset1.numPstates = 1; pset1.numClocks = 1; pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY; pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta; ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1); if (ret == NVAPI_OK) { applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, delta/1000); } return ret; } static int nvapi_set_memoffset(unsigned int devNum, int32_t delta, bool log=true) { NvAPI_Status ret; NvS32 deltaKHz = delta * 1000; if (devNum >= nvapi_dev_cnt) return -ENODEV; // todo: bounds check with GetPstates20 NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 }; pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1; pset1.numPstates = 1; pset1.numClocks = 1; pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY; pset1.pstates[0].clocks[0].freqDelta_kHz.value = deltaKHz; ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1); if (ret == NVAPI_OK) { if (log) applog(LOG_INFO, "GPU #%u: Memory clock offset set to %+d MHz", devNum, deltaKHz / 1000); need_memclockrst = true; } return ret; } // Replacement for WIN32 CUDA 6.5 on pascal int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total) { NvAPI_Status ret = NVAPI_OK; NV_DISPLAY_DRIVER_MEMORY_INFO mem = { 0 }; mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER; unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS]; if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) { *total = (uint64_t) mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory; *free = (uint64_t) mem.curAvailableDedicatedVideoMemory; } return (int) ret; } int nvapi_init() { int num_gpus = cuda_num_devices(); NvAPI_Status ret = NvAPI_Initialize(); if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI NvAPI_Initialize: %s", string); return -1; } ret = NvAPI_EnumPhysicalGPUs(phys, &nvapi_dev_cnt); if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) applog(LOG_DEBUG, "NVAPI NvAPI_EnumPhysicalGPUs: %s", string); return -1; } for (int g = 0; g < num_gpus; g++) { cudaDeviceProp props; if (cudaGetDeviceProperties(&props, g) == cudaSuccess) { device_bus_ids[g] = props.pciBusID; } nvapi_dev_map[g] = g; // default mapping } for (NvU8 i = 0; i < nvapi_dev_cnt; i++) { NvAPI_ShortString name; ret = NvAPI_GPU_GetFullName(phys[i], name); if (ret == NVAPI_OK) { for (int g = 0; g < num_gpus; g++) { NvU32 busId; ret = NvAPI_GPU_GetBusId(phys[i], &busId); if (ret == NVAPI_OK && busId == device_bus_ids[g]) { nvapi_dev_map[g] = i; if (opt_debug) applog(LOG_DEBUG, "CUDA GPU %d matches NVAPI GPU %d by busId %u", g, i, busId); break; } } } else { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetFullName: %s", string); } } #if 0 if (opt_debug) { NvAPI_ShortString ver; NvAPI_GetInterfaceVersionString(ver); applog(LOG_DEBUG, "%s", ver); } #endif NvU32 udv; NvAPI_ShortString str; ret = NvAPI_SYS_GetDriverAndBranchVersion(&udv, str); if (ret == NVAPI_OK) { sprintf(driver_version,"%d.%02d", udv / 100, udv % 100); } return 0; } int nvapi_init_settings() { // nvapi.dll int ret = nvapi_dll_init(); if (ret != NVAPI_OK) return ret; if (!opt_n_threads) { opt_n_threads = active_gpus; } for (int n=0; n < opt_n_threads; n++) { int dev_id = device_map[n % MAX_GPUS]; if (device_plimit[dev_id] && !nvml_plimit_set) { if (nvapi_set_plimit(nvapi_dev_map[dev_id], device_plimit[dev_id]) == NVAPI_OK) { uint32_t res = nvapi_get_plimit(nvapi_dev_map[dev_id]); gpulog(LOG_INFO, n, "Power limit is set to %u%%", res); } } if (device_tlimit[dev_id]) { nvapi_set_tlimit(nvapi_dev_map[dev_id], device_tlimit[dev_id]); } if (device_gpu_clocks[dev_id]) { ret = nvapi_set_gpuclock(nvapi_dev_map[dev_id], device_gpu_clocks[dev_id]); if (ret) { NvAPI_ShortString string; NvAPI_GetErrorMessage((NvAPI_Status) ret, string); gpulog(LOG_WARNING, n, "nvapi_set_gpuclock %s", string); } } if (device_mem_offsets[dev_id]) { ret = nvapi_set_memoffset(nvapi_dev_map[dev_id], device_mem_offsets[dev_id]); if (ret) { NvAPI_ShortString string; NvAPI_GetErrorMessage((NvAPI_Status)ret, string); gpulog(LOG_WARNING, n, "nvapi_set_memoffset %s", string); } } else if (device_mem_clocks[dev_id]) { ret = nvapi_set_memclock(nvapi_dev_map[dev_id], device_mem_clocks[dev_id]); if (ret) { NvAPI_ShortString string; NvAPI_GetErrorMessage((NvAPI_Status) ret, string); gpulog(LOG_WARNING, n, "nvapi_set_memclock %s", string); } } if (device_pstate[dev_id]) { // dunno how via nvapi or/and pascal } if (device_led[dev_id] != -1) { int err = nvapi_set_led(nvapi_dev_map[dev_id], device_led[dev_id], device_name[dev_id]); if (err != 0) { gpulog(LOG_WARNING, n, "Unable to set led value (err %d)", err); } device_led_state[dev_id] = device_led[dev_id]; } } return ret; } void nvapi_toggle_clocks(int thr_id, bool enable) { int dev_id = device_map[thr_id % MAX_GPUS]; if (device_mem_offsets[dev_id]) { nvapi_set_memoffset(nvapi_dev_map[dev_id], enable ? device_mem_offsets[dev_id] : 0, false); } } unsigned int nvapi_devnum(int dev_id) { return nvapi_dev_map[dev_id]; } int nvapi_devid(unsigned int devNum) { for (int i=0; i < opt_n_threads; i++) { int dev_id = device_map[i % MAX_GPUS]; if (nvapi_dev_map[dev_id] = devNum) return dev_id; } return 0; } #endif /* WIN32 : Windows specific (nvapi) */ /* api functions -------------------------------------- */ // assume 2500 rpm as default, auto-updated if more static unsigned int fan_speed_max = 2500; unsigned int gpu_fanpercent(struct cgpu_info *gpu) { unsigned int pct = 0; if (hnvml) { nvml_get_fanpcnt(hnvml, gpu->gpu_id, &pct); } #ifdef WIN32 else { unsigned int rpm = 0; nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm); pct = (rpm * 100) / fan_speed_max; if (pct > 100) { pct = 100; fan_speed_max = rpm; } } #endif return pct; } unsigned int gpu_fanrpm(struct cgpu_info *gpu) { unsigned int rpm = 0; #ifdef WIN32 nvapi_fanspeed(nvapi_dev_map[gpu->gpu_id], &rpm); #endif return rpm; } float gpu_temp(struct cgpu_info *gpu) { float tc = 0.0; unsigned int tmp = 0; if (hnvml) { nvml_get_tempC(hnvml, gpu->gpu_id, &tmp); tc = (float)tmp; } #ifdef WIN32 else { nvapi_temperature(nvapi_dev_map[gpu->gpu_id], &tmp); tc = (float)tmp; } #endif return tc; } int gpu_pstate(struct cgpu_info *gpu) { int pstate = -1; int support = -1; if (hnvml) { support = nvml_get_pstate(hnvml, gpu->gpu_id, &pstate); } #ifdef WIN32 if (support == -1) { unsigned int pst = 0; nvapi_getpstate(nvapi_dev_map[gpu->gpu_id], &pst); pstate = (int) pst; } #endif return pstate; } int gpu_busid(struct cgpu_info *gpu) { int busid = -1; int support = -1; if (hnvml) { support = nvml_get_busid(hnvml, gpu->gpu_id, &busid); } #ifdef WIN32 if (support == -1) { busid = device_bus_ids[gpu->gpu_id]; } #endif return busid; } unsigned int gpu_power(struct cgpu_info *gpu) { unsigned int mw = 0; int support = -1; if (hnvml) { support = nvml_get_power_usage(hnvml, gpu->gpu_id, &mw); } #ifdef WIN32 if (support == -1) { unsigned int pct = 0; nvapi_getusage(nvapi_dev_map[gpu->gpu_id], &pct); pct *= nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]); pct /= 100; mw = pct; // to fix } #endif if (gpu->gpu_power > 0) { // average mw = (gpu->gpu_power + mw) / 2; } return mw; } unsigned int gpu_plimit(struct cgpu_info *gpu) { unsigned int mw = 0; int support = -1; if (hnvml) { mw = nvml_get_plimit(hnvml, gpu->gpu_id); support = (mw > 0); } #ifdef WIN32 // NVAPI value is in % (< 100 so) if (support == -1) { mw = nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]); } #endif return mw; } static int translate_vendor_id(uint16_t vid, char *vendorname) { struct VENDORS { const uint16_t vid; const char *name; } vendors[] = { { 0x1043, "ASUS" }, { 0x1048, "Elsa" }, { 0x107D, "Leadtek" }, { 0x10B0, "Gainward" }, // { 0x10DE, "NVIDIA" }, { 0x1458, "Gigabyte" }, { 0x1462, "MSI" }, { 0x154B, "PNY" }, // maybe storage devices { 0x1569, "Palit" }, { 0x1682, "XFX" }, { 0x196D, "Club3D" }, { 0x196E, "PNY" }, { 0x19DA, "Zotac" }, { 0x19F1, "BFG" }, { 0x1ACC, "PoV" }, { 0x1B4C, "Galax" }, // KFA2 in EU, to check on Pascal cards { 0x3842, "EVGA" }, { 0x7377, "Colorful" }, { 0, "" } }; if (!vendorname) return -EINVAL; for(int v=0; v < ARRAY_SIZE(vendors); v++) { if (vid == vendors[v].vid) { strcpy(vendorname, vendors[v].name); return vid; } } if (opt_debug && vid != 0x10DE) applog(LOG_DEBUG, "nvml: Unknown vendor %04x\n", vid); return 0; } int gpu_vendor(uint8_t pci_bus_id, char *vendorname) { uint16_t vid = 0, pid = 0; if (hnvml) { // may not be initialized on start... for (int id=0; id < hnvml->nvml_gpucount; id++) { if (hnvml->nvml_pci_bus_id[id] == pci_bus_id) { int dev_id = hnvml->nvml_cuda_device_id[id]; nvml_get_info(hnvml, dev_id, vid, pid); } } } else { #ifdef WIN32 for (unsigned id = 0; id < nvapi_dev_cnt; id++) { if (device_bus_ids[id] == pci_bus_id) { nvapi_getinfo(nvapi_dev_map[id], vid, pid); break; } } #endif } return translate_vendor_id(vid, vendorname); } int gpu_info(struct cgpu_info *gpu) { char vendorname[32] = { 0 }; int id = gpu->gpu_id; uint8_t bus_id = 0; gpu->nvml_id = -1; gpu->nvapi_id = -1; if (id < 0) return -1; if (hnvml) { gpu->nvml_id = (int8_t) hnvml->cuda_nvml_device_id[id]; nvml_get_info(hnvml, id, gpu->gpu_vid, gpu->gpu_pid); nvml_get_serial(hnvml, id, gpu->gpu_sn, sizeof(gpu->gpu_sn)); nvml_get_bios(hnvml, id, gpu->gpu_desc, sizeof(gpu->gpu_desc)); } #ifdef WIN32 gpu->nvapi_id = (int8_t) nvapi_dev_map[id]; nvapi_getinfo(nvapi_dev_map[id], gpu->gpu_vid, gpu->gpu_pid); nvapi_getserial(nvapi_dev_map[id], gpu->gpu_sn, sizeof(gpu->gpu_sn)); nvapi_getbios(nvapi_dev_map[id], gpu->gpu_desc, sizeof(gpu->gpu_desc)); #endif return 0; } #endif /* USE_WRAPNVML */ static int rgb_percent(int RGB, int percent) { uint8_t* comp = (uint8_t*) &RGB; int res = ((percent*comp[2]) / 100) << 16; res += ((percent*comp[1]) / 100) << 8; return res + ((percent*comp[0]) / 100); } void gpu_led_on(int dev_id) { #if defined(WIN32) && defined(USE_WRAPNVML) int value = device_led[dev_id]; if (device_led_state[dev_id] != value) { if (nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0) device_led_state[dev_id] = value; } #endif } void gpu_led_percent(int dev_id, int percent) { #if defined(WIN32) && defined(USE_WRAPNVML) int value = rgb_percent(device_led[dev_id], percent); if (device_led_state[dev_id] != value) { if (nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0) device_led_state[dev_id] = value; } #endif } void gpu_led_off(int dev_id) { #if defined(WIN32) && defined(USE_WRAPNVML) if (device_led_state[dev_id]) { if (nvapi_set_led(nvapi_dev_map[dev_id], 0, device_name[dev_id]) == 0) device_led_state[dev_id] = 0; } #endif } #ifdef USE_WRAPNVML extern double thr_hashrates[MAX_GPUS]; extern bool opt_debug_threads; extern bool opt_hwmonitor; extern int num_cpus; void *monitor_thread(void *userdata) { int thr_id = -1; while (!abort_flag && !opt_quiet) { // This thread monitors card's power lazily during scans, one at a time... thr_id = (thr_id + 1) % opt_n_threads; struct cgpu_info *cgpu = &thr_info[thr_id].gpu; int dev_id = cgpu->gpu_id; cudaSetDevice(dev_id); if (hnvml != NULL && cgpu) { char khw[32] = { 0 }; uint64_t clock = 0, mem_clock = 0; uint32_t fanpercent = 0, power = 0; double tempC = 0, khs_per_watt = 0; uint32_t counter = 0; int max_loops = 1000; pthread_cond_wait(&cgpu->monitor.sampling_signal, &cgpu->monitor.lock); do { unsigned int tmp_clock=0, tmp_memclock=0; nvml_get_current_clocks(dev_id, &tmp_clock, &tmp_memclock); #ifdef WIN32 if (tmp_clock < 200) { // workaround for buggy drivers 378.x (real clock) tmp_clock = nvapi_get_gpu_clock(nvapi_dev_map[dev_id]); } #endif if (tmp_clock < 200) { // some older cards only report a base clock with cuda props. if (cuda_gpu_info(cgpu) == 0) { tmp_clock = cgpu->gpu_clock/1000; tmp_memclock = cgpu->gpu_memclock/1000; } } clock += tmp_clock; mem_clock += tmp_memclock; tempC += gpu_temp(cgpu); fanpercent += gpu_fanpercent(cgpu); power += gpu_power(cgpu); counter++; usleep(50000); if (abort_flag) goto abort; } while (cgpu->monitor.sampling_flag && (--max_loops)); cgpu->monitor.gpu_temp = (uint32_t) (tempC/counter); cgpu->monitor.gpu_fan = fanpercent/counter; cgpu->monitor.gpu_power = power/counter; cgpu->monitor.gpu_clock = (uint32_t) (clock/counter); cgpu->monitor.gpu_memclock = (uint32_t) (mem_clock/counter); if (power) { khs_per_watt = stats_get_speed(thr_id, thr_hashrates[thr_id]); khs_per_watt = khs_per_watt / ((double)power / counter); format_hashrate(khs_per_watt * 1000, khw); if (strlen(khw)) sprintf(&khw[strlen(khw)-1], "W %uW ", cgpu->monitor.gpu_power / 1000); } if (opt_hwmonitor && (time(NULL) - cgpu->monitor.tm_displayed) > 60) { gpulog(LOG_INFO, thr_id, "%u MHz %s%uC FAN %u%%", cgpu->monitor.gpu_clock/*, cgpu->monitor.gpu_memclock*/, khw, cgpu->monitor.gpu_temp, cgpu->monitor.gpu_fan ); cgpu->monitor.tm_displayed = (uint32_t)time(NULL); } pthread_mutex_unlock(&cgpu->monitor.lock); } usleep(500); // safety } abort: if (opt_debug_threads) applog(LOG_DEBUG, "%s() died", __func__); return NULL; } #endif