From fd757f2180e6706038cc5c37d85145ab0852cf5e Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Mon, 20 Jun 2016 07:32:26 +0200 Subject: [PATCH] nvml: add new getclock api (v8) + some new functions to test on pascal (if supported) --- cuda.cpp | 3 +++ nvml.cpp | 60 +++++++++++++++++++++++++++++++++++++++++++++++++ nvml.h | 68 +++++++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 121 insertions(+), 10 deletions(-) diff --git a/cuda.cpp b/cuda.cpp index 2f3c67f..42e8fd9 100644 --- a/cuda.cpp +++ b/cuda.cpp @@ -106,6 +106,9 @@ void cuda_print_devices() cudaGetDeviceProperties(&props, dev_id); if (!opt_n_threads || n < opt_n_threads) { fprintf(stderr, "GPU #%d: SM %d.%d %s\n", dev_id, props.major, props.minor, device_name[dev_id]); +#ifdef USE_WRAPNVML + if (opt_debug) nvml_print_device_info(dev_id); +#endif } } } diff --git a/nvml.cpp b/nvml.cpp index fa8f119..0063dab 100644 --- a/nvml.cpp +++ b/nvml.cpp @@ -188,6 +188,14 @@ nvml_handle * nvml_create() nvmlh->nvmlDeviceGetEnforcedPowerLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetEnforcedPowerLimit"); // v340 +#ifdef __linux__ + nvmlh->nvmlDeviceClearCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceClearCpuAffinity"); + nvmlh->nvmlDeviceGetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int sz, unsigned long *cpuSet)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCpuAffinity"); + nvmlh->nvmlDeviceSetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetCpuAffinity"); +#endif /* NVML_ERROR_NOT_SUPPORTED nvmlh->nvmlDeviceGetAutoBoostedClocksEnabled = (nvmlReturn_t (*)(nvmlDevice_t, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetAutoBoostedClocksEnabled"); @@ -196,6 +204,11 @@ nvml_handle * nvml_create() // v346 nvmlh->nvmlDeviceGetPcieThroughput = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPcieThroughput"); + // v36x (API 8 / Pascal) + nvmlh->nvmlDeviceGetClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClock"); + nvmlh->nvmlDeviceGetMaxCustomerBoostClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t clockType, unsigned int *clockMHz)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxCustomerBoostClock"); if (nvmlh->nvmlInit == NULL || nvmlh->nvmlShutdown == NULL || @@ -528,6 +541,53 @@ int nvml_set_plimit(nvml_handle *nvmlh, int dev_id) return 1; } +// ccminer -D -n +#define LSTDEV_PFX " " +void nvml_print_device_info(int dev_id) +{ + if (!hnvml) return; + + int n = hnvml->cuda_nvml_device_id[dev_id]; + if (n < 0 || n >= hnvml->nvml_gpucount) + return; + + nvmlReturn_t rc; + + if (hnvml->nvmlDeviceGetClock) { + uint32_t gpu_clk = 0, mem_clk = 0; + + fprintf(stderr, "------- Clocks -------\n"); + + hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &gpu_clk); + rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &mem_clk); + if (rc == NVML_SUCCESS) { + fprintf(stderr, LSTDEV_PFX "DEFAULT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk); + } + hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_TARGET, &gpu_clk); + rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_TARGET, &mem_clk); + if (rc == NVML_SUCCESS) { + fprintf(stderr, LSTDEV_PFX "TARGET MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk); + } + hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_CURRENT, &gpu_clk); + rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_CURRENT, &mem_clk); + if (rc == NVML_SUCCESS) { + fprintf(stderr, LSTDEV_PFX "CURRENT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk); + } + // NVML_ERROR_NOT_SUPPORTED on Maxwell (361.62) + hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_CUSTOMER_BOOST_MAX, &gpu_clk); + rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_CUSTOMER_BOOST_MAX, &mem_clk); + if (rc == NVML_SUCCESS) { + fprintf(stderr, LSTDEV_PFX "BOOSTED MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk); + } + // NVML_ERROR_NOT_SUPPORTED on Maxwell (361.62) + hnvml->nvmlDeviceGetMaxCustomerBoostClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk); + rc = hnvml->nvmlDeviceGetMaxCustomerBoostClock(hnvml->devs[n], NVML_CLOCK_MEM, &mem_clk); + if (rc == NVML_SUCCESS) { + fprintf(stderr, LSTDEV_PFX "MXBOOST MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk); + } + } +} + int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount) { *gpucount = nvmlh->nvml_gpucount; diff --git a/nvml.h b/nvml.h index 4e1df9f..2603603 100644 --- a/nvml.h +++ b/nvml.h @@ -57,13 +57,32 @@ enum nvmlReturn_t { NVML_ERROR_INSUFFICIENT_POWER = 8, NVML_ERROR_DRIVER_NOT_LOADED = 9, NVML_ERROR_TIMEOUT = 10, + NVML_ERROR_IRQ_ISSUE = 11, + NVML_ERROR_LIBRARY_NOT_FOUND = 12, + NVML_ERROR_FUNCTION_NOT_FOUND = 13, + NVML_ERROR_CORRUPTED_INFOROM = 14, + NVML_ERROR_GPU_IS_LOST = 15, + NVML_ERROR_RESET_REQUIRED = 16, + NVML_ERROR_OPERATING_SYSTEM = 17, + NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18, + NVML_ERROR_IN_USE = 19, NVML_ERROR_UNKNOWN = 999 }; enum nvmlClockType_t { NVML_CLOCK_GRAPHICS = 0, NVML_CLOCK_SM = 1, - NVML_CLOCK_MEM = 2 + NVML_CLOCK_MEM = 2, + NVML_CLOCK_VIDEO = 3, + NVML_CLOCK_COUNT +}; + +enum nvmlClockId_t { + NVML_CLOCK_ID_CURRENT = 0, + NVML_CLOCK_ID_APP_CLOCK_TARGET = 1, + NVML_CLOCK_ID_APP_CLOCK_DEFAULT = 2, + NVML_CLOCK_ID_CUSTOMER_BOOST_MAX = 3, + NVML_CLOCK_ID_COUNT }; enum nvmlPcieUtilCounter_t { @@ -80,6 +99,11 @@ enum nvmlValueType_t { NVML_VALUE_TYPE_COUNT }; +typedef int nvmlGpuTopologyLevel_t; +typedef int nvmlNvLinkCapability_t; +typedef int nvmlNvLinkErrorCounter_t; +typedef int nvmlNvLinkUtilizationControl_t; + #define NVML_DEVICE_SERIAL_BUFFER_SIZE 30 #define NVML_DEVICE_UUID_BUFFER_SIZE 80 #define NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE 32 @@ -136,21 +160,44 @@ typedef struct { // v331 nvmlReturn_t (*nvmlDeviceGetEnforcedPowerLimit)(nvmlDevice_t, unsigned int *limit); // v340 - //nvmlReturn_t (*nvmlDeviceGetCpuAffinity)(nvmlDevice_t, unsigned int cpuSetSize, unsigned long* cpuSet); - //nvmlReturn_t (*nvmlDeviceSetCpuAffinity)(nvmlDevice_t); - //nvmlReturn_t (*nvmlDeviceGetAutoBoostedClocksEnabled)(nvmlDevice_t, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled); - //nvmlReturn_t (*nvmlDeviceSetAutoBoostedClocksEnabled)(nvmlDevice_t, nvmlEnableState_t enabled); +#ifdef __linux__ + nvmlReturn_t (*nvmlDeviceClearCpuAffinity)(nvmlDevice_t); + nvmlReturn_t (*nvmlDeviceGetCpuAffinity)(nvmlDevice_t, unsigned int cpuSetSize, unsigned long* cpuSet); + nvmlReturn_t (*nvmlDeviceSetCpuAffinity)(nvmlDevice_t); +#endif + nvmlReturn_t (*nvmlDeviceGetAutoBoostedClocksEnabled)(nvmlDevice_t, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled); + nvmlReturn_t (*nvmlDeviceSetAutoBoostedClocksEnabled)(nvmlDevice_t, nvmlEnableState_t enabled); // v346 nvmlReturn_t (*nvmlDeviceGetPcieThroughput)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value); -} nvml_handle; + // v36x (API 8) + nvmlReturn_t (*nvmlDeviceGetClock)(nvmlDevice_t, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz); + nvmlReturn_t (*nvmlDeviceGetMaxCustomerBoostClock)(nvmlDevice_t, nvmlClockType_t clockType, unsigned int *clockMHz); +#ifdef __linux__ + nvmlReturn_t (*nvmlSystemGetTopologyGpuSet)(unsigned int cpuNumber, unsigned int *count, nvmlDevice_t *deviceArray); + nvmlReturn_t (*nvmlDeviceGetTopologyNearestGpus)(nvmlDevice_t, nvmlGpuTopologyLevel_t level, unsigned int *count, nvmlDevice_t *deviceArray); + nvmlReturn_t (*nvmlDeviceGetTopologyCommonAncestor)(nvmlDevice_t device1, nvmlDevice_t device2, nvmlGpuTopologyLevel_t *pathInfo); +#endif + nvmlReturn_t (*nvmlDeviceGetNvLinkState)(nvmlDevice_t, unsigned int link, nvmlEnableState_t *isActive); + nvmlReturn_t (*nvmlDeviceGetNvLinkVersion)(nvmlDevice_t, unsigned int link, unsigned int *version); + nvmlReturn_t (*nvmlDeviceGetNvLinkCapability)(nvmlDevice_t, unsigned int link, nvmlNvLinkCapability_t capability, unsigned int *capResult); + nvmlReturn_t (*nvmlDeviceGetNvLinkRemotePciInfo)(nvmlDevice_t, unsigned int link, nvmlPciInfo_t *pci); + nvmlReturn_t (*nvmlDeviceGetNvLinkErrorCounter)(nvmlDevice_t, unsigned int link, nvmlNvLinkErrorCounter_t counter, unsigned long long *counterValue); + nvmlReturn_t (*nvmlDeviceResetNvLinkErrorCounters)(nvmlDevice_t, unsigned int link); + nvmlReturn_t (*nvmlDeviceSetNvLinkUtilizationControl)(nvmlDevice_t, unsigned int link, unsigned int counter, nvmlNvLinkUtilizationControl_t *control, unsigned int reset); + nvmlReturn_t (*nvmlDeviceGetNvLinkUtilizationControl)(nvmlDevice_t, unsigned int link, unsigned int counter, nvmlNvLinkUtilizationControl_t *control); + nvmlReturn_t (*nvmlDeviceGetNvLinkUtilizationCounter)(nvmlDevice_t, unsigned int link, unsigned int counter, unsigned long long *rxcounter, unsigned long long *txcounter); + nvmlReturn_t (*nvmlDeviceFreezeNvLinkUtilizationCounter)(nvmlDevice_t, unsigned int link, unsigned int counter, nvmlEnableState_t freeze); + nvmlReturn_t (*nvmlDeviceResetNvLinkUtilizationCounter)(nvmlDevice_t, unsigned int link, unsigned int counter); +} nvml_handle; nvml_handle * nvml_create(); int nvml_destroy(nvml_handle *nvmlh); -/* - * Query the number of GPUs seen by NVML - */ +// Debug informations +void nvml_print_device_info(int dev_id); + +// Query the number of GPUs seen by NVML int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount); int nvml_set_plimit(nvml_handle *nvmlh, int dev_id); @@ -168,8 +215,9 @@ unsigned int gpu_power(struct cgpu_info *gpu); int gpu_pstate(struct cgpu_info *gpu); int gpu_busid(struct cgpu_info *gpu); -/* pid/vid, sn and bios rev */ +// pid/vid, sn and bios rev int gpu_info(struct cgpu_info *gpu); + int gpu_vendor(uint8_t pci_bus_id, char *vendorname);