api: change unit of device mem to MB

without that, no way to read sizes > 4GB on x86 binaries
2025-08-26 05:42:02 +00:00 · 2016-07-10 12:59:04 +02:00 · 2016-07-10 12:59:04 +02:00 · dbb9507d2b
commit dbb9507d2b
parent 4ca7b5a404
6 changed files with 17 additions and 11 deletions
--- a/api.cpp
+++ b/api.cpp
@ -116,6 +116,7 @@ static void gpustatus(int thr_id)
 		char buf[512]; *buf = '\0';
 		char* card;

+		cuda_gpu_info(cgpu);
 #ifdef USE_WRAPNVML
 		cgpu->has_monitoring = true;
 		cgpu->gpu_bus = gpu_busid(cgpu);
@ -124,7 +125,6 @@ static void gpustatus(int thr_id)
 		cgpu->gpu_fan_rpm = (uint16_t) gpu_fanrpm(cgpu);
 		cgpu->gpu_power = gpu_power(cgpu); // mWatts
 #endif
-		cuda_gpu_clocks(cgpu);

 		// todo: per gpu
 		cgpu->accepted = p->accepted_count;
@ -254,6 +254,7 @@ static void gpuhwinfos(int gpu_id)
 	if (cgpu == NULL)
 		return;

+	cuda_gpu_info(cgpu);
 #ifdef USE_WRAPNVML
 	cgpu->has_monitoring = true;
 	cgpu->gpu_bus = gpu_busid(cgpu);
@ -268,18 +269,16 @@ static void gpuhwinfos(int gpu_id)
 #endif
 #endif

-	cuda_gpu_clocks(cgpu);
-
 	memset(pstate, 0, sizeof(pstate));
 	if (cgpu->gpu_pstate != -1)
 		snprintf(pstate, sizeof(pstate), "P%d", (int) cgpu->gpu_pstate);

 	card = device_name[gpu_id];

-	snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;SM=%hu;MEM=%lu;"
+	snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;SM=%hu;MEM=%u;"
 		"TEMP=%.1f;FAN=%hu;RPM=%hu;FREQ=%d;MEMFREQ=%d;PST=%s;POWER=%u;"
 		"VID=%hx;PID=%hx;NVML=%d;NVAPI=%d;SN=%s;BIOS=%s|",
-		gpu_id, cgpu->gpu_bus, card, cgpu->gpu_arch, cgpu->gpu_mem,
+		gpu_id, cgpu->gpu_bus, card, cgpu->gpu_arch, (uint32_t) cgpu->gpu_mem,
 		cgpu->gpu_temp, cgpu->gpu_fan, cgpu->gpu_fan_rpm,
 		cgpu->gpu_clock, cgpu->gpu_memclock,
 		pstate, cgpu->gpu_power,
--- a/cuda.cpp
+++ b/cuda.cpp
@ -233,13 +233,18 @@ void cuda_clear_lasterror()
 } /* extern "C" */
 #endif

-int cuda_gpu_clocks(struct cgpu_info *gpu)
+int cuda_gpu_info(struct cgpu_info *gpu)
 {
 	cudaDeviceProp props;
 	if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) {
 		gpu->gpu_clock = props.clockRate;
 		gpu->gpu_memclock = props.memoryClockRate;
-		gpu->gpu_mem = props.totalGlobalMem;
+		gpu->gpu_mem = (props.totalGlobalMem / 1024); // kB
+#if defined(_WIN32) && defined(USE_WRAPNVML)
+		// required to get mem size > 4GB (size_t too small for bytes on 32bit)
+		nvapiMemGetInfo(gpu->gpu_id, &gpu->gpu_memfree, &gpu->gpu_mem); // kB
+#endif
+		gpu->gpu_mem = gpu->gpu_mem / 1024; // MB
 		return 0;
 	}
 	return -1;
--- a/miner.h
+++ b/miner.h
@ -365,6 +365,7 @@ struct cgpu_info {
 	int gpu_clock;
 	int gpu_memclock;
 	size_t gpu_mem;
+	size_t gpu_memfree;
 	uint32_t gpu_power;
 	double gpu_vddc;
 	int16_t gpu_pstate;
@ -486,6 +487,7 @@ extern double net_diff;
 extern double stratum_diff;

 #define MAX_GPUS 16
+//#define MAX_THREADS 32 todo
 extern char* device_name[MAX_GPUS];
 extern short device_map[MAX_GPUS];
 extern long  device_sm[MAX_GPUS];
@ -500,7 +502,7 @@ void cuda_shutdown();
 int cuda_finddevice(char *name);
 int cuda_version();
 void cuda_print_devices();
-int cuda_gpu_clocks(struct cgpu_info *gpu);
+int cuda_gpu_info(struct cgpu_info *gpu);
 int cuda_available_memory(int thr_id);

 uint32_t cuda_default_throughput(int thr_id, uint32_t defcount);
--- a/neoscrypt/cuda_neoscrypt.cu
+++ b/neoscrypt/cuda_neoscrypt.cu
@ -752,8 +752,8 @@ void fastkdf256_v1(const uint32_t thread, const uint32_t nonce, uint32_t* const
 	rbuf = idx & 3;
 	bitbuf = rbuf << 3;

-	for(int i = 0; i<64; i++)
 #if __CUDA_ARCH__ >= 320
+	for(int i = 0; i<64; i++)
 		asm("shf.r.clamp.b32 %0, %1, %2, %3;" : "=r"(((uint32_t*)output)[i]) : "r"(B[(qbuf + i) & 0x3f]), "r"(B[(qbuf + i + 1) & 0x3f4]), "r"(bitbuf));
 #endif

--- a/nvml.cpp
+++ b/nvml.cpp
@ -1483,7 +1483,7 @@ int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total)
 	mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
 	unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS];
 	if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) {
-		*total = mem.availableDedicatedVideoMemory;
+		*total = mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory;
 		*free  = mem.curAvailableDedicatedVideoMemory;
 	}
 	return (int) ret;
--- a/util.cpp
+++ b/util.cpp
@ -1638,13 +1638,13 @@ static bool stratum_benchdata(json_t *result, json_t *params, int thr_id)
 	strcpy(os, is_windows() ? "win32" : "linux");
 #endif

+	cuda_gpu_info(cgpu);
 #ifdef USE_WRAPNVML
 	cgpu->has_monitoring = true;
 	cgpu->gpu_power = gpu_power(cgpu); // mWatts
 	watts = (cgpu->gpu_power >= 1000) ? cgpu->gpu_power / 1000 : 0; // ignore nvapi %
 	gpu_info(cgpu);
 #endif
-	cuda_gpu_clocks(cgpu);
 	get_currentalgo(algo, sizeof(algo));

 	card = device_name[dev_id];