From dbb9507d2b2d93f8a4e43c35c2d51571918338ba Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Sun, 10 Jul 2016 12:59:04 +0200 Subject: [PATCH] api: change unit of device mem to MB without that, no way to read sizes > 4GB on x86 binaries --- api.cpp | 9 ++++----- cuda.cpp | 9 +++++++-- miner.h | 4 +++- neoscrypt/cuda_neoscrypt.cu | 2 +- nvml.cpp | 2 +- util.cpp | 2 +- 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/api.cpp b/api.cpp index 457155b..b103df5 100644 --- a/api.cpp +++ b/api.cpp @@ -116,6 +116,7 @@ static void gpustatus(int thr_id) char buf[512]; *buf = '\0'; char* card; + cuda_gpu_info(cgpu); #ifdef USE_WRAPNVML cgpu->has_monitoring = true; cgpu->gpu_bus = gpu_busid(cgpu); @@ -124,7 +125,6 @@ static void gpustatus(int thr_id) cgpu->gpu_fan_rpm = (uint16_t) gpu_fanrpm(cgpu); cgpu->gpu_power = gpu_power(cgpu); // mWatts #endif - cuda_gpu_clocks(cgpu); // todo: per gpu cgpu->accepted = p->accepted_count; @@ -254,6 +254,7 @@ static void gpuhwinfos(int gpu_id) if (cgpu == NULL) return; + cuda_gpu_info(cgpu); #ifdef USE_WRAPNVML cgpu->has_monitoring = true; cgpu->gpu_bus = gpu_busid(cgpu); @@ -268,18 +269,16 @@ static void gpuhwinfos(int gpu_id) #endif #endif - cuda_gpu_clocks(cgpu); - memset(pstate, 0, sizeof(pstate)); if (cgpu->gpu_pstate != -1) snprintf(pstate, sizeof(pstate), "P%d", (int) cgpu->gpu_pstate); card = device_name[gpu_id]; - snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;SM=%hu;MEM=%lu;" + snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;SM=%hu;MEM=%u;" "TEMP=%.1f;FAN=%hu;RPM=%hu;FREQ=%d;MEMFREQ=%d;PST=%s;POWER=%u;" "VID=%hx;PID=%hx;NVML=%d;NVAPI=%d;SN=%s;BIOS=%s|", - gpu_id, cgpu->gpu_bus, card, cgpu->gpu_arch, cgpu->gpu_mem, + gpu_id, cgpu->gpu_bus, card, cgpu->gpu_arch, (uint32_t) cgpu->gpu_mem, cgpu->gpu_temp, cgpu->gpu_fan, cgpu->gpu_fan_rpm, cgpu->gpu_clock, cgpu->gpu_memclock, pstate, cgpu->gpu_power, diff --git a/cuda.cpp b/cuda.cpp index 79be292..4575214 100644 --- a/cuda.cpp +++ b/cuda.cpp @@ -233,13 +233,18 @@ void cuda_clear_lasterror() } /* extern "C" */ #endif -int cuda_gpu_clocks(struct cgpu_info *gpu) +int cuda_gpu_info(struct cgpu_info *gpu) { cudaDeviceProp props; if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) { gpu->gpu_clock = props.clockRate; gpu->gpu_memclock = props.memoryClockRate; - gpu->gpu_mem = props.totalGlobalMem; + gpu->gpu_mem = (props.totalGlobalMem / 1024); // kB +#if defined(_WIN32) && defined(USE_WRAPNVML) + // required to get mem size > 4GB (size_t too small for bytes on 32bit) + nvapiMemGetInfo(gpu->gpu_id, &gpu->gpu_memfree, &gpu->gpu_mem); // kB +#endif + gpu->gpu_mem = gpu->gpu_mem / 1024; // MB return 0; } return -1; diff --git a/miner.h b/miner.h index 2d30aa0..6bbb3a8 100644 --- a/miner.h +++ b/miner.h @@ -365,6 +365,7 @@ struct cgpu_info { int gpu_clock; int gpu_memclock; size_t gpu_mem; + size_t gpu_memfree; uint32_t gpu_power; double gpu_vddc; int16_t gpu_pstate; @@ -486,6 +487,7 @@ extern double net_diff; extern double stratum_diff; #define MAX_GPUS 16 +//#define MAX_THREADS 32 todo extern char* device_name[MAX_GPUS]; extern short device_map[MAX_GPUS]; extern long device_sm[MAX_GPUS]; @@ -500,7 +502,7 @@ void cuda_shutdown(); int cuda_finddevice(char *name); int cuda_version(); void cuda_print_devices(); -int cuda_gpu_clocks(struct cgpu_info *gpu); +int cuda_gpu_info(struct cgpu_info *gpu); int cuda_available_memory(int thr_id); uint32_t cuda_default_throughput(int thr_id, uint32_t defcount); diff --git a/neoscrypt/cuda_neoscrypt.cu b/neoscrypt/cuda_neoscrypt.cu index 1fa45e8..42b3382 100644 --- a/neoscrypt/cuda_neoscrypt.cu +++ b/neoscrypt/cuda_neoscrypt.cu @@ -752,8 +752,8 @@ void fastkdf256_v1(const uint32_t thread, const uint32_t nonce, uint32_t* const rbuf = idx & 3; bitbuf = rbuf << 3; - for(int i = 0; i<64; i++) #if __CUDA_ARCH__ >= 320 + for(int i = 0; i<64; i++) asm("shf.r.clamp.b32 %0, %1, %2, %3;" : "=r"(((uint32_t*)output)[i]) : "r"(B[(qbuf + i) & 0x3f]), "r"(B[(qbuf + i + 1) & 0x3f4]), "r"(bitbuf)); #endif diff --git a/nvml.cpp b/nvml.cpp index 2581874..0b32967 100644 --- a/nvml.cpp +++ b/nvml.cpp @@ -1483,7 +1483,7 @@ int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total) mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER; unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS]; if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) { - *total = mem.availableDedicatedVideoMemory; + *total = mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory; *free = mem.curAvailableDedicatedVideoMemory; } return (int) ret; diff --git a/util.cpp b/util.cpp index 1b107ca..e7b305a 100644 --- a/util.cpp +++ b/util.cpp @@ -1638,13 +1638,13 @@ static bool stratum_benchdata(json_t *result, json_t *params, int thr_id) strcpy(os, is_windows() ? "win32" : "linux"); #endif + cuda_gpu_info(cgpu); #ifdef USE_WRAPNVML cgpu->has_monitoring = true; cgpu->gpu_power = gpu_power(cgpu); // mWatts watts = (cgpu->gpu_power >= 1000) ? cgpu->gpu_power / 1000 : 0; // ignore nvapi % gpu_info(cgpu); #endif - cuda_gpu_clocks(cgpu); get_currentalgo(algo, sizeof(algo)); card = device_name[dev_id];