Browse Source

nvml: add new getclock api (v8)

+ some new functions to test on pascal (if supported)
master
Tanguy Pruvot 9 years ago
parent
commit
fd757f2180
  1. 3
      cuda.cpp
  2. 60
      nvml.cpp
  3. 68
      nvml.h

3
cuda.cpp

@ -106,6 +106,9 @@ void cuda_print_devices()
cudaGetDeviceProperties(&props, dev_id); cudaGetDeviceProperties(&props, dev_id);
if (!opt_n_threads || n < opt_n_threads) { if (!opt_n_threads || n < opt_n_threads) {
fprintf(stderr, "GPU #%d: SM %d.%d %s\n", dev_id, props.major, props.minor, device_name[dev_id]); fprintf(stderr, "GPU #%d: SM %d.%d %s\n", dev_id, props.major, props.minor, device_name[dev_id]);
#ifdef USE_WRAPNVML
if (opt_debug) nvml_print_device_info(dev_id);
#endif
} }
} }
} }

60
nvml.cpp

@ -188,6 +188,14 @@ nvml_handle * nvml_create()
nvmlh->nvmlDeviceGetEnforcedPowerLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit)) nvmlh->nvmlDeviceGetEnforcedPowerLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetEnforcedPowerLimit"); wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetEnforcedPowerLimit");
// v340 // v340
#ifdef __linux__
nvmlh->nvmlDeviceClearCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceClearCpuAffinity");
nvmlh->nvmlDeviceGetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int sz, unsigned long *cpuSet))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCpuAffinity");
nvmlh->nvmlDeviceSetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetCpuAffinity");
#endif
/* NVML_ERROR_NOT_SUPPORTED /* NVML_ERROR_NOT_SUPPORTED
nvmlh->nvmlDeviceGetAutoBoostedClocksEnabled = (nvmlReturn_t (*)(nvmlDevice_t, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled)) nvmlh->nvmlDeviceGetAutoBoostedClocksEnabled = (nvmlReturn_t (*)(nvmlDevice_t, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetAutoBoostedClocksEnabled"); wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetAutoBoostedClocksEnabled");
@ -196,6 +204,11 @@ nvml_handle * nvml_create()
// v346 // v346
nvmlh->nvmlDeviceGetPcieThroughput = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value)) nvmlh->nvmlDeviceGetPcieThroughput = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPcieThroughput"); wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPcieThroughput");
// v36x (API 8 / Pascal)
nvmlh->nvmlDeviceGetClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClock");
nvmlh->nvmlDeviceGetMaxCustomerBoostClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t clockType, unsigned int *clockMHz))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetMaxCustomerBoostClock");
if (nvmlh->nvmlInit == NULL || if (nvmlh->nvmlInit == NULL ||
nvmlh->nvmlShutdown == NULL || nvmlh->nvmlShutdown == NULL ||
@ -528,6 +541,53 @@ int nvml_set_plimit(nvml_handle *nvmlh, int dev_id)
return 1; return 1;
} }
// ccminer -D -n
#define LSTDEV_PFX " "
void nvml_print_device_info(int dev_id)
{
if (!hnvml) return;
int n = hnvml->cuda_nvml_device_id[dev_id];
if (n < 0 || n >= hnvml->nvml_gpucount)
return;
nvmlReturn_t rc;
if (hnvml->nvmlDeviceGetClock) {
uint32_t gpu_clk = 0, mem_clk = 0;
fprintf(stderr, "------- Clocks -------\n");
hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &gpu_clk);
rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &mem_clk);
if (rc == NVML_SUCCESS) {
fprintf(stderr, LSTDEV_PFX "DEFAULT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
}
hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_TARGET, &gpu_clk);
rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_TARGET, &mem_clk);
if (rc == NVML_SUCCESS) {
fprintf(stderr, LSTDEV_PFX "TARGET MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
}
hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_CURRENT, &gpu_clk);
rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_CURRENT, &mem_clk);
if (rc == NVML_SUCCESS) {
fprintf(stderr, LSTDEV_PFX "CURRENT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
}
// NVML_ERROR_NOT_SUPPORTED on Maxwell (361.62)
hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_CUSTOMER_BOOST_MAX, &gpu_clk);
rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_CUSTOMER_BOOST_MAX, &mem_clk);
if (rc == NVML_SUCCESS) {
fprintf(stderr, LSTDEV_PFX "BOOSTED MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
}
// NVML_ERROR_NOT_SUPPORTED on Maxwell (361.62)
hnvml->nvmlDeviceGetMaxCustomerBoostClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk);
rc = hnvml->nvmlDeviceGetMaxCustomerBoostClock(hnvml->devs[n], NVML_CLOCK_MEM, &mem_clk);
if (rc == NVML_SUCCESS) {
fprintf(stderr, LSTDEV_PFX "MXBOOST MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk);
}
}
}
int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount) int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount)
{ {
*gpucount = nvmlh->nvml_gpucount; *gpucount = nvmlh->nvml_gpucount;

68
nvml.h

@ -57,13 +57,32 @@ enum nvmlReturn_t {
NVML_ERROR_INSUFFICIENT_POWER = 8, NVML_ERROR_INSUFFICIENT_POWER = 8,
NVML_ERROR_DRIVER_NOT_LOADED = 9, NVML_ERROR_DRIVER_NOT_LOADED = 9,
NVML_ERROR_TIMEOUT = 10, NVML_ERROR_TIMEOUT = 10,
NVML_ERROR_IRQ_ISSUE = 11,
NVML_ERROR_LIBRARY_NOT_FOUND = 12,
NVML_ERROR_FUNCTION_NOT_FOUND = 13,
NVML_ERROR_CORRUPTED_INFOROM = 14,
NVML_ERROR_GPU_IS_LOST = 15,
NVML_ERROR_RESET_REQUIRED = 16,
NVML_ERROR_OPERATING_SYSTEM = 17,
NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18,
NVML_ERROR_IN_USE = 19,
NVML_ERROR_UNKNOWN = 999 NVML_ERROR_UNKNOWN = 999
}; };
enum nvmlClockType_t { enum nvmlClockType_t {
NVML_CLOCK_GRAPHICS = 0, NVML_CLOCK_GRAPHICS = 0,
NVML_CLOCK_SM = 1, NVML_CLOCK_SM = 1,
NVML_CLOCK_MEM = 2 NVML_CLOCK_MEM = 2,
NVML_CLOCK_VIDEO = 3,
NVML_CLOCK_COUNT
};
enum nvmlClockId_t {
NVML_CLOCK_ID_CURRENT = 0,
NVML_CLOCK_ID_APP_CLOCK_TARGET = 1,
NVML_CLOCK_ID_APP_CLOCK_DEFAULT = 2,
NVML_CLOCK_ID_CUSTOMER_BOOST_MAX = 3,
NVML_CLOCK_ID_COUNT
}; };
enum nvmlPcieUtilCounter_t { enum nvmlPcieUtilCounter_t {
@ -80,6 +99,11 @@ enum nvmlValueType_t {
NVML_VALUE_TYPE_COUNT NVML_VALUE_TYPE_COUNT
}; };
typedef int nvmlGpuTopologyLevel_t;
typedef int nvmlNvLinkCapability_t;
typedef int nvmlNvLinkErrorCounter_t;
typedef int nvmlNvLinkUtilizationControl_t;
#define NVML_DEVICE_SERIAL_BUFFER_SIZE 30 #define NVML_DEVICE_SERIAL_BUFFER_SIZE 30
#define NVML_DEVICE_UUID_BUFFER_SIZE 80 #define NVML_DEVICE_UUID_BUFFER_SIZE 80
#define NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE 32 #define NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE 32
@ -136,21 +160,44 @@ typedef struct {
// v331 // v331
nvmlReturn_t (*nvmlDeviceGetEnforcedPowerLimit)(nvmlDevice_t, unsigned int *limit); nvmlReturn_t (*nvmlDeviceGetEnforcedPowerLimit)(nvmlDevice_t, unsigned int *limit);
// v340 // v340
//nvmlReturn_t (*nvmlDeviceGetCpuAffinity)(nvmlDevice_t, unsigned int cpuSetSize, unsigned long* cpuSet); #ifdef __linux__
//nvmlReturn_t (*nvmlDeviceSetCpuAffinity)(nvmlDevice_t); nvmlReturn_t (*nvmlDeviceClearCpuAffinity)(nvmlDevice_t);
//nvmlReturn_t (*nvmlDeviceGetAutoBoostedClocksEnabled)(nvmlDevice_t, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled); nvmlReturn_t (*nvmlDeviceGetCpuAffinity)(nvmlDevice_t, unsigned int cpuSetSize, unsigned long* cpuSet);
//nvmlReturn_t (*nvmlDeviceSetAutoBoostedClocksEnabled)(nvmlDevice_t, nvmlEnableState_t enabled); nvmlReturn_t (*nvmlDeviceSetCpuAffinity)(nvmlDevice_t);
#endif
nvmlReturn_t (*nvmlDeviceGetAutoBoostedClocksEnabled)(nvmlDevice_t, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled);
nvmlReturn_t (*nvmlDeviceSetAutoBoostedClocksEnabled)(nvmlDevice_t, nvmlEnableState_t enabled);
// v346 // v346
nvmlReturn_t (*nvmlDeviceGetPcieThroughput)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value); nvmlReturn_t (*nvmlDeviceGetPcieThroughput)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value);
} nvml_handle; // v36x (API 8)
nvmlReturn_t (*nvmlDeviceGetClock)(nvmlDevice_t, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz);
nvmlReturn_t (*nvmlDeviceGetMaxCustomerBoostClock)(nvmlDevice_t, nvmlClockType_t clockType, unsigned int *clockMHz);
#ifdef __linux__
nvmlReturn_t (*nvmlSystemGetTopologyGpuSet)(unsigned int cpuNumber, unsigned int *count, nvmlDevice_t *deviceArray);
nvmlReturn_t (*nvmlDeviceGetTopologyNearestGpus)(nvmlDevice_t, nvmlGpuTopologyLevel_t level, unsigned int *count, nvmlDevice_t *deviceArray);
nvmlReturn_t (*nvmlDeviceGetTopologyCommonAncestor)(nvmlDevice_t device1, nvmlDevice_t device2, nvmlGpuTopologyLevel_t *pathInfo);
#endif
nvmlReturn_t (*nvmlDeviceGetNvLinkState)(nvmlDevice_t, unsigned int link, nvmlEnableState_t *isActive);
nvmlReturn_t (*nvmlDeviceGetNvLinkVersion)(nvmlDevice_t, unsigned int link, unsigned int *version);
nvmlReturn_t (*nvmlDeviceGetNvLinkCapability)(nvmlDevice_t, unsigned int link, nvmlNvLinkCapability_t capability, unsigned int *capResult);
nvmlReturn_t (*nvmlDeviceGetNvLinkRemotePciInfo)(nvmlDevice_t, unsigned int link, nvmlPciInfo_t *pci);
nvmlReturn_t (*nvmlDeviceGetNvLinkErrorCounter)(nvmlDevice_t, unsigned int link, nvmlNvLinkErrorCounter_t counter, unsigned long long *counterValue);
nvmlReturn_t (*nvmlDeviceResetNvLinkErrorCounters)(nvmlDevice_t, unsigned int link);
nvmlReturn_t (*nvmlDeviceSetNvLinkUtilizationControl)(nvmlDevice_t, unsigned int link, unsigned int counter, nvmlNvLinkUtilizationControl_t *control, unsigned int reset);
nvmlReturn_t (*nvmlDeviceGetNvLinkUtilizationControl)(nvmlDevice_t, unsigned int link, unsigned int counter, nvmlNvLinkUtilizationControl_t *control);
nvmlReturn_t (*nvmlDeviceGetNvLinkUtilizationCounter)(nvmlDevice_t, unsigned int link, unsigned int counter, unsigned long long *rxcounter, unsigned long long *txcounter);
nvmlReturn_t (*nvmlDeviceFreezeNvLinkUtilizationCounter)(nvmlDevice_t, unsigned int link, unsigned int counter, nvmlEnableState_t freeze);
nvmlReturn_t (*nvmlDeviceResetNvLinkUtilizationCounter)(nvmlDevice_t, unsigned int link, unsigned int counter);
} nvml_handle;
nvml_handle * nvml_create(); nvml_handle * nvml_create();
int nvml_destroy(nvml_handle *nvmlh); int nvml_destroy(nvml_handle *nvmlh);
/* // Debug informations
* Query the number of GPUs seen by NVML void nvml_print_device_info(int dev_id);
*/
// Query the number of GPUs seen by NVML
int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount); int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount);
int nvml_set_plimit(nvml_handle *nvmlh, int dev_id); int nvml_set_plimit(nvml_handle *nvmlh, int dev_id);
@ -168,8 +215,9 @@ unsigned int gpu_power(struct cgpu_info *gpu);
int gpu_pstate(struct cgpu_info *gpu); int gpu_pstate(struct cgpu_info *gpu);
int gpu_busid(struct cgpu_info *gpu); int gpu_busid(struct cgpu_info *gpu);
/* pid/vid, sn and bios rev */ // pid/vid, sn and bios rev
int gpu_info(struct cgpu_info *gpu); int gpu_info(struct cgpu_info *gpu);
int gpu_vendor(uint8_t pci_bus_id, char *vendorname); int gpu_vendor(uint8_t pci_bus_id, char *vendorname);

Loading…
Cancel
Save