Browse Source

nvml: force 64bits types for mem sizes

size_t can be a bit... imprevisible on x86
master
Tanguy Pruvot 8 years ago
parent
commit
f84c83afe5
  1. 14
      cuda.cpp
  2. 10
      miner.h
  3. 6
      nvml.cpp
  4. 2
      nvml.h

14
cuda.cpp

@ -218,16 +218,18 @@ void cuda_reset_device(int thr_id, bool *init) @@ -218,16 +218,18 @@ void cuda_reset_device(int thr_id, bool *init)
int cuda_available_memory(int thr_id)
{
int dev_id = device_map[thr_id % MAX_GPUS];
size_t mtotal = 0, mfree = 0;
#if defined(_WIN32) && defined(USE_WRAPNVML)
uint64_t tot64 = 0, free64 = 0;
// cuda (6.5) one can crash on pascal and dont handle 8GB
nvapiMemGetInfo(dev_id, &mfree, &mtotal);
nvapiMemGetInfo(dev_id, &free64, &tot64);
return (int) (free64 / (1024 * 1024));
#else
size_t mtotal = 0, mfree = 0;
cudaSetDevice(dev_id);
cudaDeviceSynchronize();
cudaMemGetInfo(&mfree, &mtotal);
#endif
return (int) (mfree / (1024 * 1024));
#endif
}
// Check (and reset) last cuda error, and report it in logs
@ -252,9 +254,9 @@ int cuda_gpu_info(struct cgpu_info *gpu) @@ -252,9 +254,9 @@ int cuda_gpu_info(struct cgpu_info *gpu)
{
cudaDeviceProp props;
if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) {
gpu->gpu_clock = props.clockRate;
gpu->gpu_memclock = props.memoryClockRate;
gpu->gpu_mem = (props.totalGlobalMem / 1024); // kB
gpu->gpu_clock = (uint32_t) props.clockRate;
gpu->gpu_memclock = (uint32_t) props.memoryClockRate;
gpu->gpu_mem = (uint64_t) (props.totalGlobalMem / 1024); // kB
#if defined(_WIN32) && defined(USE_WRAPNVML)
// required to get mem size > 4GB (size_t too small for bytes on 32bit)
nvapiMemGetInfo(gpu->gpu_id, &gpu->gpu_memfree, &gpu->gpu_mem); // kB

10
miner.h

@ -367,10 +367,10 @@ struct cgpu_info { @@ -367,10 +367,10 @@ struct cgpu_info {
uint16_t gpu_fan;
uint16_t gpu_fan_rpm;
uint16_t gpu_arch;
int gpu_clock;
int gpu_memclock;
size_t gpu_mem;
size_t gpu_memfree;
uint32_t gpu_clock;
uint32_t gpu_memclock;
uint64_t gpu_mem;
uint64_t gpu_memfree;
uint32_t gpu_power;
double gpu_vddc;
int16_t gpu_pstate;
@ -383,7 +383,7 @@ struct cgpu_info { @@ -383,7 +383,7 @@ struct cgpu_info {
char gpu_sn[64];
char gpu_desc[64];
float intensity;
double intensity;
uint32_t throughput;
};

6
nvml.cpp

@ -1657,15 +1657,15 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock) @@ -1657,15 +1657,15 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
}
// Replacement for WIN32 CUDA 6.5 on pascal
int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total)
int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total)
{
NvAPI_Status ret = NVAPI_OK;
NV_DISPLAY_DRIVER_MEMORY_INFO mem = { 0 };
mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS];
if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) {
*total = mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory;
*free = mem.curAvailableDedicatedVideoMemory;
*total = (uint64_t) mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory;
*free = (uint64_t) mem.curAvailableDedicatedVideoMemory;
}
return (int) ret;
}

2
nvml.h

@ -231,7 +231,7 @@ unsigned int nvapi_devnum(int dev_id); @@ -231,7 +231,7 @@ unsigned int nvapi_devnum(int dev_id);
int nvapi_devid(unsigned int devNum);
// cuda Replacement for 6.5 compat
int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total);
int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total);
#endif
#endif /* USE_WRAPNVML */

Loading…
Cancel
Save