Browse Source

api: change unit of device mem to MB

without that, no way to read sizes > 4GB on x86 binaries
2upstream
Tanguy Pruvot 9 years ago
parent
commit
dbb9507d2b
  1. 9
      api.cpp
  2. 9
      cuda.cpp
  3. 4
      miner.h
  4. 2
      neoscrypt/cuda_neoscrypt.cu
  5. 2
      nvml.cpp
  6. 2
      util.cpp

9
api.cpp

@ -116,6 +116,7 @@ static void gpustatus(int thr_id)
char buf[512]; *buf = '\0'; char buf[512]; *buf = '\0';
char* card; char* card;
cuda_gpu_info(cgpu);
#ifdef USE_WRAPNVML #ifdef USE_WRAPNVML
cgpu->has_monitoring = true; cgpu->has_monitoring = true;
cgpu->gpu_bus = gpu_busid(cgpu); cgpu->gpu_bus = gpu_busid(cgpu);
@ -124,7 +125,6 @@ static void gpustatus(int thr_id)
cgpu->gpu_fan_rpm = (uint16_t) gpu_fanrpm(cgpu); cgpu->gpu_fan_rpm = (uint16_t) gpu_fanrpm(cgpu);
cgpu->gpu_power = gpu_power(cgpu); // mWatts cgpu->gpu_power = gpu_power(cgpu); // mWatts
#endif #endif
cuda_gpu_clocks(cgpu);
// todo: per gpu // todo: per gpu
cgpu->accepted = p->accepted_count; cgpu->accepted = p->accepted_count;
@ -254,6 +254,7 @@ static void gpuhwinfos(int gpu_id)
if (cgpu == NULL) if (cgpu == NULL)
return; return;
cuda_gpu_info(cgpu);
#ifdef USE_WRAPNVML #ifdef USE_WRAPNVML
cgpu->has_monitoring = true; cgpu->has_monitoring = true;
cgpu->gpu_bus = gpu_busid(cgpu); cgpu->gpu_bus = gpu_busid(cgpu);
@ -268,18 +269,16 @@ static void gpuhwinfos(int gpu_id)
#endif #endif
#endif #endif
cuda_gpu_clocks(cgpu);
memset(pstate, 0, sizeof(pstate)); memset(pstate, 0, sizeof(pstate));
if (cgpu->gpu_pstate != -1) if (cgpu->gpu_pstate != -1)
snprintf(pstate, sizeof(pstate), "P%d", (int) cgpu->gpu_pstate); snprintf(pstate, sizeof(pstate), "P%d", (int) cgpu->gpu_pstate);
card = device_name[gpu_id]; card = device_name[gpu_id];
snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;SM=%hu;MEM=%lu;" snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;SM=%hu;MEM=%u;"
"TEMP=%.1f;FAN=%hu;RPM=%hu;FREQ=%d;MEMFREQ=%d;PST=%s;POWER=%u;" "TEMP=%.1f;FAN=%hu;RPM=%hu;FREQ=%d;MEMFREQ=%d;PST=%s;POWER=%u;"
"VID=%hx;PID=%hx;NVML=%d;NVAPI=%d;SN=%s;BIOS=%s|", "VID=%hx;PID=%hx;NVML=%d;NVAPI=%d;SN=%s;BIOS=%s|",
gpu_id, cgpu->gpu_bus, card, cgpu->gpu_arch, cgpu->gpu_mem, gpu_id, cgpu->gpu_bus, card, cgpu->gpu_arch, (uint32_t) cgpu->gpu_mem,
cgpu->gpu_temp, cgpu->gpu_fan, cgpu->gpu_fan_rpm, cgpu->gpu_temp, cgpu->gpu_fan, cgpu->gpu_fan_rpm,
cgpu->gpu_clock, cgpu->gpu_memclock, cgpu->gpu_clock, cgpu->gpu_memclock,
pstate, cgpu->gpu_power, pstate, cgpu->gpu_power,

9
cuda.cpp

@ -233,13 +233,18 @@ void cuda_clear_lasterror()
} /* extern "C" */ } /* extern "C" */
#endif #endif
int cuda_gpu_clocks(struct cgpu_info *gpu) int cuda_gpu_info(struct cgpu_info *gpu)
{ {
cudaDeviceProp props; cudaDeviceProp props;
if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) { if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) {
gpu->gpu_clock = props.clockRate; gpu->gpu_clock = props.clockRate;
gpu->gpu_memclock = props.memoryClockRate; gpu->gpu_memclock = props.memoryClockRate;
gpu->gpu_mem = props.totalGlobalMem; gpu->gpu_mem = (props.totalGlobalMem / 1024); // kB
#if defined(_WIN32) && defined(USE_WRAPNVML)
// required to get mem size > 4GB (size_t too small for bytes on 32bit)
nvapiMemGetInfo(gpu->gpu_id, &gpu->gpu_memfree, &gpu->gpu_mem); // kB
#endif
gpu->gpu_mem = gpu->gpu_mem / 1024; // MB
return 0; return 0;
} }
return -1; return -1;

4
miner.h

@ -365,6 +365,7 @@ struct cgpu_info {
int gpu_clock; int gpu_clock;
int gpu_memclock; int gpu_memclock;
size_t gpu_mem; size_t gpu_mem;
size_t gpu_memfree;
uint32_t gpu_power; uint32_t gpu_power;
double gpu_vddc; double gpu_vddc;
int16_t gpu_pstate; int16_t gpu_pstate;
@ -486,6 +487,7 @@ extern double net_diff;
extern double stratum_diff; extern double stratum_diff;
#define MAX_GPUS 16 #define MAX_GPUS 16
//#define MAX_THREADS 32 todo
extern char* device_name[MAX_GPUS]; extern char* device_name[MAX_GPUS];
extern short device_map[MAX_GPUS]; extern short device_map[MAX_GPUS];
extern long device_sm[MAX_GPUS]; extern long device_sm[MAX_GPUS];
@ -500,7 +502,7 @@ void cuda_shutdown();
int cuda_finddevice(char *name); int cuda_finddevice(char *name);
int cuda_version(); int cuda_version();
void cuda_print_devices(); void cuda_print_devices();
int cuda_gpu_clocks(struct cgpu_info *gpu); int cuda_gpu_info(struct cgpu_info *gpu);
int cuda_available_memory(int thr_id); int cuda_available_memory(int thr_id);
uint32_t cuda_default_throughput(int thr_id, uint32_t defcount); uint32_t cuda_default_throughput(int thr_id, uint32_t defcount);

2
neoscrypt/cuda_neoscrypt.cu

@ -752,8 +752,8 @@ void fastkdf256_v1(const uint32_t thread, const uint32_t nonce, uint32_t* const
rbuf = idx & 3; rbuf = idx & 3;
bitbuf = rbuf << 3; bitbuf = rbuf << 3;
for(int i = 0; i<64; i++)
#if __CUDA_ARCH__ >= 320 #if __CUDA_ARCH__ >= 320
for(int i = 0; i<64; i++)
asm("shf.r.clamp.b32 %0, %1, %2, %3;" : "=r"(((uint32_t*)output)[i]) : "r"(B[(qbuf + i) & 0x3f]), "r"(B[(qbuf + i + 1) & 0x3f4]), "r"(bitbuf)); asm("shf.r.clamp.b32 %0, %1, %2, %3;" : "=r"(((uint32_t*)output)[i]) : "r"(B[(qbuf + i) & 0x3f]), "r"(B[(qbuf + i + 1) & 0x3f4]), "r"(bitbuf));
#endif #endif

2
nvml.cpp

@ -1483,7 +1483,7 @@ int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total)
mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER; mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS]; unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS];
if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) { if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) {
*total = mem.availableDedicatedVideoMemory; *total = mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory;
*free = mem.curAvailableDedicatedVideoMemory; *free = mem.curAvailableDedicatedVideoMemory;
} }
return (int) ret; return (int) ret;

2
util.cpp

@ -1638,13 +1638,13 @@ static bool stratum_benchdata(json_t *result, json_t *params, int thr_id)
strcpy(os, is_windows() ? "win32" : "linux"); strcpy(os, is_windows() ? "win32" : "linux");
#endif #endif
cuda_gpu_info(cgpu);
#ifdef USE_WRAPNVML #ifdef USE_WRAPNVML
cgpu->has_monitoring = true; cgpu->has_monitoring = true;
cgpu->gpu_power = gpu_power(cgpu); // mWatts cgpu->gpu_power = gpu_power(cgpu); // mWatts
watts = (cgpu->gpu_power >= 1000) ? cgpu->gpu_power / 1000 : 0; // ignore nvapi % watts = (cgpu->gpu_power >= 1000) ? cgpu->gpu_power / 1000 : 0; // ignore nvapi %
gpu_info(cgpu); gpu_info(cgpu);
#endif #endif
cuda_gpu_clocks(cgpu);
get_currentalgo(algo, sizeof(algo)); get_currentalgo(algo, sizeof(algo));
card = device_name[dev_id]; card = device_name[dev_id];

Loading…
Cancel
Save