diff --git a/api.cpp b/api.cpp
index 6f096e6..0e8b5b3 100644
--- a/api.cpp
+++ b/api.cpp
@@ -128,9 +128,9 @@ static void gpustatus(int thr_id)
 		cgpu->gpu_bus = gpu_busid(cgpu);
 		cgpu->gpu_temp = gpu_temp(cgpu);
 		cgpu->gpu_fan = gpu_fanpercent(cgpu);
-		cgpu->gpu_clock = gpu_clock(cgpu);
 		cgpu->gpu_pstate = gpu_pstate(cgpu);
 #endif
+		gpu_clocks(cgpu);
 
 		// todo: can be 0 if set by algo (auto)
 		if (opt_intensity == 0 && opt_work_size) {
@@ -149,9 +149,9 @@ static void gpustatus(int thr_id)
 
 		cgpu->khashes = stats_get_speed(cgpu->gpu_id, 0.0) / 1000.0;
 
-		snprintf(pstate, sizeof(pstate), "P%hu", cgpu->gpu_pstate);
-		if (cgpu->gpu_pstate == -1)
-			(*pstate) = '\0';
+		memset(pstate, 0, sizeof(pstate));
+		if (cgpu->gpu_pstate != -1)
+			snprintf(pstate, sizeof(pstate), "P%hu", cgpu->gpu_pstate);
 
 		card = device_name[gpuid];
 
@@ -196,7 +196,7 @@ static char *getsummary(char *params)
 		"ALGO=%s;GPUS=%d;KHS=%.2f;ACC=%d;REJ=%d;"
 		"ACCMN=%.3f;DIFF=%.6f;UPTIME=%.0f;TS=%u|",
 		PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
-		algo, opt_n_threads, (double)global_hashrate / 1000.0,
+		algo, num_processors, (double)global_hashrate / 1000.0,
 		accepted_count, rejected_count,
 		accps, global_diff, uptime, (uint32_t) ts);
 	return buffer;
@@ -227,22 +227,23 @@ static void gpuhwinfos(int gpu_id)
 	cgpu->gpu_temp = gpu_temp(cgpu);
 	cgpu->gpu_fan = gpu_fanpercent(cgpu);
 	cgpu->gpu_pstate = gpu_pstate(cgpu);
-	cgpu->gpu_clock = gpu_clock(cgpu);
 	gpu_info(cgpu);
 #endif
 
-	snprintf(pstate, sizeof(pstate), "P%hu", cgpu->gpu_pstate);
-	if (cgpu->gpu_pstate == -1)
-		(*pstate) = '\0';
+	gpu_clocks(cgpu);
+
+	memset(pstate, 0, sizeof(pstate));
+	if (cgpu->gpu_pstate != -1)
+		snprintf(pstate, sizeof(pstate), "P%hu", cgpu->gpu_pstate);
 
 	card = device_name[gpu_id];
 
-	snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;"
-		"TEMP=%.1f;FAN=%d;FREQ=%d;PST=%s;"
+	snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;MEM=%lu;"
+		"TEMP=%.1f;FAN=%d;FREQ=%d;MEMFREQ=%d;PST=%s;"
 		"VID=%hx;PID=%hx;BIOS=%s|",
-		gpu_id, cgpu->gpu_bus, card,
-		cgpu->gpu_temp, cgpu->gpu_fan, cgpu->gpu_clock, pstate,
-		cgpu->gpu_vid, cgpu->gpu_pid, cgpu->gpu_desc);
+		gpu_id, cgpu->gpu_bus, card, cgpu->gpu_mem,
+		cgpu->gpu_temp, cgpu->gpu_fan, cgpu->gpu_clock, cgpu->gpu_memclock,
+		pstate,	cgpu->gpu_vid, cgpu->gpu_pid, cgpu->gpu_desc);
 
 	strcat(buffer, buf);
 }
diff --git a/miner.h b/miner.h
index 7b72642..2280071 100644
--- a/miner.h
+++ b/miner.h
@@ -380,6 +380,7 @@ struct cgpu_info {
 	int gpu_fan;
 	int gpu_clock;
 	int gpu_memclock;
+	size_t gpu_mem;
 	uint32_t gpu_usage;
 	double gpu_vddc;
 	int16_t gpu_pstate;
diff --git a/nvml.cpp b/nvml.cpp
index 167ad0d..33263c9 100644
--- a/nvml.cpp
+++ b/nvml.cpp
@@ -15,8 +15,6 @@
  *
  */
 
-#ifdef USE_WRAPNVML
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -26,6 +24,9 @@
 
 #include "miner.h"
 #include "cuda_runtime.h"
+
+#ifdef USE_WRAPNVML
+
 #include "nvml.h"
 
 extern wrap_nvml_handle *hnvml;
@@ -74,10 +75,6 @@ static uint32_t device_bus_ids[8] = { 0 };
 	}
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 wrap_nvml_handle * wrap_nvml_create()
 {
 	int i=0;
@@ -634,26 +631,6 @@ float gpu_temp(struct cgpu_info *gpu)
 	return tc;
 }
 
-int gpu_clock(struct cgpu_info *gpu)
-{
-	unsigned int freq = 0;
-	int support = -1;
-	if (hnvml) {
-		support = wrap_nvml_get_clock(hnvml, gpu->gpu_id, NVML_CLOCK_GRAPHICS, &freq);
-	}
-	if (support == -1) {
-#ifdef WIN32
-		nvapi_getclock(nvapi_dev_map[gpu->gpu_id], &freq);
-#else
-		cudaDeviceProp props;
-		if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) {
-			freq = props.clockRate;
-		}
-#endif
-	}
-	return (int) freq;
-}
-
 int gpu_pstate(struct cgpu_info *gpu)
 {
 	int pstate = -1;
@@ -714,13 +691,20 @@ int gpu_info(struct cgpu_info *gpu)
 	return 0;
 }
 
-#if defined(__cplusplus)
-}
-#endif
-
-
 #endif /* USE_WRAPNVML */
 
+int gpu_clocks(struct cgpu_info *gpu)
+{
+	cudaDeviceProp props;
+	if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) {
+		gpu->gpu_clock = props.clockRate;
+		gpu->gpu_memclock = props.memoryClockRate;
+		gpu->gpu_mem = props.totalGlobalMem;
+		return 0;
+	}
+	return -1;
+}
+
 /* strings /usr/lib/nvidia-340/libnvidia-ml.so | grep nvmlDeviceGet | grep -v : | sort | uniq
 
 	nvmlDeviceGetAccountingBufferSize
diff --git a/nvml.h b/nvml.h
index 3adf4e0..7c14850 100644
--- a/nvml.h
+++ b/nvml.h
@@ -14,10 +14,6 @@
  *
  */
 
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
 /*
  * Ugly hacks to avoid dependencies on the real nvml.h until it starts
  * getting included with the CUDA toolkit or a GDK that's got a known
@@ -141,9 +137,10 @@ int wrap_nvapi_init();
 
 #include "miner.h"
 
+#ifdef USE_WRAPNVML
+
 int gpu_fanpercent(struct cgpu_info *gpu);
 float gpu_temp(struct cgpu_info *gpu);
-int gpu_clock(struct cgpu_info *gpu);
 unsigned int gpu_power(struct cgpu_info *gpu);
 unsigned int gpu_usage(struct cgpu_info *gpu);
 int gpu_pstate(struct cgpu_info *gpu);
@@ -151,6 +148,7 @@ int gpu_busid(struct cgpu_info *gpu);
 
 int gpu_info(struct cgpu_info *gpu);
 
-#if defined(__cplusplus)
-}
 #endif
+
+// cuda api based
+int gpu_clocks(struct cgpu_info *gpu);