diff --git a/JHA/cuda_jha_compactionTest.cu b/JHA/cuda_jha_compactionTest.cu
index aab71b6..20e73db 100644
--- a/JHA/cuda_jha_compactionTest.cu
+++ b/JHA/cuda_jha_compactionTest.cu
@@ -10,9 +10,6 @@ static uint32_t *h_numValid[8];
 
 static uint32_t *d_partSum[2][8]; // fÃ¼r bis zu vier partielle Summen
 
-// aus heavy.cu
-extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
-
 // True/False tester
 typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash);
 
diff --git a/JHA/cuda_jha_keccak512.cu b/JHA/cuda_jha_keccak512.cu
index e1a73d8..52ddd1f 100644
--- a/JHA/cuda_jha_keccak512.cu
+++ b/JHA/cuda_jha_keccak512.cu
@@ -3,9 +3,6 @@
 
 #include "cuda_helper.h"
 
-// aus heavy.cu
-extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
-
 __constant__ uint64_t c_State[25];
 __constant__ uint32_t c_PaddedMessage[18];
 
diff --git a/api.cpp b/api.cpp
index 26a6829..a83c7cd 100644
--- a/api.cpp
+++ b/api.cpp
@@ -103,11 +103,8 @@ static int bye = 0;
 
 extern char *opt_api_allow;
 extern int opt_api_listen; /* port */
-extern uint64_t global_hashrate;
 extern uint32_t accepted_count;
 extern uint32_t rejected_count;
-extern int device_map[8];
-extern char *device_name[8];
 extern int num_cpus;
 extern char driver_version[32];
 extern struct stratum_ctx stratum;
@@ -203,7 +200,7 @@ static char *getsummary(char *params)
 		"ALGO=%s;GPUS=%d;KHS=%.2f;ACC=%d;REJ=%d;"
 		"ACCMN=%.3f;DIFF=%.6f;UPTIME=%.0f;TS=%u|",
 		PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
-		algo, num_processors, (double)global_hashrate / 1000.0,
+		algo, active_gpus, (double)global_hashrate / 1000.0,
 		accepted_count, rejected_count,
 		accps, global_diff, uptime, (uint32_t) ts);
 	return buffer;
@@ -277,10 +274,10 @@ static void gpuhwinfos(int gpu_id)
 
 	card = device_name[gpu_id];
 
-	snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;MEM=%lu;"
+	snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;SM=%u;MEM=%lu;"
 		"TEMP=%.1f;FAN=%d;FREQ=%d;MEMFREQ=%d;PST=%s;"
 		"VID=%hx;PID=%hx;NVML=%d;NVAPI=%d;SN=%s;BIOS=%s|",
-		gpu_id, cgpu->gpu_bus, card, cgpu->gpu_mem,
+		gpu_id, cgpu->gpu_bus, card, cgpu->gpu_arch, cgpu->gpu_mem,
 		cgpu->gpu_temp, cgpu->gpu_fan, cgpu->gpu_clock, cgpu->gpu_memclock,
 		pstate, cgpu->gpu_vid, cgpu->gpu_pid, cgpu->nvml_id, cgpu->nvapi_id,
 		cgpu->gpu_sn, cgpu->gpu_desc);
@@ -304,12 +301,12 @@ static void syshwinfos()
 {
 	char buf[256];
 
-	float temp = cpu_temp(0);
-	uint32_t clock = cpu_clock(0);
+	int cputc = (int) cpu_temp(0);
+	uint32_t cpuclk = cpu_clock(0);
 
 	memset(buf, 0, sizeof(buf));
-	snprintf(buf, sizeof(buf), "OS=%s;NVDRIVER=%s;CPUS=%d;CPUTEMP=%.1f;CPUFREQ=%d|",
-		os_name(), driver_version, num_cpus, temp, clock);
+	snprintf(buf, sizeof(buf), "OS=%s;NVDRIVER=%s;CPUS=%d;CPUTEMP=%d;CPUFREQ=%d|",
+		os_name(), driver_version, num_cpus, cputc, cpuclk);
 	strcat(buffer, buf);
 }
 
@@ -341,9 +338,9 @@ static char *gethistory(char *params)
 	for (int i = 0; i < records; i++) {
 		time_t ts = data[i].tm_stat;
 		p += sprintf(p, "GPU=%d;H=%u;KHS=%.2f;DIFF=%.6f;"
-				"COUNT=%u;FOUND=%u;TS=%u|",
+				"COUNT=%u;FOUND=%u;ID=%u;TS=%u|",
 			data[i].gpu_id, data[i].height, data[i].hashrate, data[i].difficulty,
-			data[i].hashcount, data[i].hashfound, (uint32_t)ts);
+			data[i].hashcount, data[i].hashfound, data[i].uid, (uint32_t)ts);
 	}
 	return buffer;
 }
diff --git a/blake32.cu b/blake32.cu
index 87bd1ca..75e656a 100644
--- a/blake32.cu
+++ b/blake32.cu
@@ -425,7 +425,7 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
 	}
 
 	if (!init[thr_id]) {
-		if (num_processors > 1)
+		if (active_gpus > 1)
 			cudaSetDevice(device_map[thr_id]);
 		CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), 0);
 		CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), 0);
diff --git a/ccminer.cpp b/ccminer.cpp
index ce68d13..3808c59 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -192,7 +192,6 @@ bool want_stratum = true;
 bool have_stratum = false;
 static bool submit_old = false;
 bool use_syslog = false;
-static char* opt_syslog_pfx = (char*) PACKAGE_NAME;
 bool use_colors = true;
 static bool opt_background = false;
 bool opt_quiet = false;
@@ -208,10 +207,10 @@ static double opt_difficulty = 1; // CH
 bool opt_trust_pool = false;
 uint16_t opt_vote = 9999;
 int num_cpus;
-int num_processors;
-int device_map[8] = {0,1,2,3,4,5,6,7}; // CB
-char *device_name[8]; // CB
-int device_sm[8];
+int active_gpus;
+char * device_name[8];
+short device_map[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+long  device_sm[8] = { 0 };
 char *rpc_user = NULL;
 static char *rpc_url;
 static char *rpc_userpass;
@@ -241,8 +240,9 @@ int opt_statsavg = 30;
 int opt_intensity = 0;
 uint32_t opt_work_size = 0; /* default */
 uint32_t opt_work_adds = 0;
-
-char *opt_api_allow = (char*) "127.0.0.1"; /* 0.0.0.0 for all ips */
+// strdup on char* to allow a common free() if used
+static char* opt_syslog_pfx = strdup(PROGRAM_NAME);
+char *opt_api_allow = strdup("127.0.0.1"); /* 0.0.0.0 for all ips */
 int opt_api_listen = 4068; /* 0 to disable */
 
 #ifdef HAVE_GETOPT_LONG
@@ -409,6 +409,8 @@ void proper_exit(int reason)
 	if (hnvml)
 		wrap_nvml_destroy(hnvml);
 #endif
+	free(opt_syslog_pfx);
+	free(opt_api_allow);
 	exit(reason);
 }
 
@@ -1671,6 +1673,7 @@ static void parse_arg(int key, char *arg)
 		if (p) {
 			/* ip:port */
 			if (p - arg > 0) {
+				free(opt_api_allow);
 				opt_api_allow = strdup(arg);
 				opt_api_allow[p - arg] = '\0';
 			}
@@ -1872,8 +1875,10 @@ static void parse_arg(int key, char *arg)
 	case 1008:
 		applog(LOG_INFO, "Now logging to syslog...");
 		use_syslog = true;
-		if (arg && strlen(arg))
+		if (arg && strlen(arg)) {
+			free(opt_syslog_pfx);
 			opt_syslog_pfx = strdup(arg);
+		}
 		break;
 	case 'd': // CB
 		{
@@ -1899,7 +1904,7 @@ static void parse_arg(int key, char *arg)
 					}
 				}
 				// set number of active gpus
-				num_processors = opt_n_threads;
+				active_gpus = opt_n_threads;
 				pch = strtok (NULL, ",");
 			}
 		}
@@ -2079,7 +2084,7 @@ int main(int argc, char *argv[])
 		num_cpus = 1;
 
 	// number of gpus
-	num_processors = cuda_num_devices();
+	active_gpus = cuda_num_devices();
 	cuda_devicenames();
 
 	/* parse command line */
@@ -2133,12 +2138,12 @@ int main(int argc, char *argv[])
 	SetConsoleCtrlHandler((PHANDLER_ROUTINE)ConsoleHandler, TRUE);
 #endif
 
-	if (num_processors == 0) {
+	if (active_gpus == 0) {
 		applog(LOG_ERR, "No CUDA devices found! terminating.");
 		exit(1);
 	}
 	if (!opt_n_threads)
-		opt_n_threads = num_processors;
+		opt_n_threads = active_gpus;
 
 #ifdef HAVE_SYSLOG_H
 	if (use_syslog)
@@ -2208,7 +2213,7 @@ int main(int argc, char *argv[])
 
 #ifdef USE_WRAPNVML
 #ifndef WIN32
-	/* nvml is currently not usable on Windows (even for x64) */
+	/* nvml is currently not the best choice on Windows (only in x64) */
 	hnvml = wrap_nvml_create();
 	if (hnvml)
 		applog(LOG_INFO, "NVML GPU monitoring enabled.");
@@ -2241,8 +2246,9 @@ int main(int argc, char *argv[])
 		thr = &thr_info[i];
 
 		thr->id = i;
-		thr->gpu.gpu_id = device_map[i];
 		thr->gpu.thr_id = i;
+		thr->gpu.gpu_id = (uint8_t) device_map[i];
+		thr->gpu.gpu_arch = (uint16_t) device_sm[device_map[i]];
 		thr->q = tq_new();
 		if (!thr->q)
 			return 1;
@@ -2253,9 +2259,9 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	applog(LOG_INFO, "%d miner threads started, "
+	applog(LOG_INFO, "%d miner thread%s started, "
 		"using '%s' algorithm.",
-		opt_n_threads,
+		opt_n_threads, opt_n_threads > 1 ? "s":"",
 		algo_names[opt_algo]);
 
 #ifdef WIN32
diff --git a/ccminer.vcxproj b/ccminer.vcxproj
index 1cf7791..e4ee1da 100644
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@@ -172,7 +172,7 @@
       <CInterleavedPTX>false</CInterleavedPTX>
       <MaxRegCount>80</MaxRegCount>
       <PtxAsOptionV>true</PtxAsOptionV>
-      <Keep>true</Keep>
+      <Keep>false</Keep>
       <CodeGeneration>compute_30,sm_30;compute_50,sm_50</CodeGeneration>
       <AdditionalOptions>--ptxas-options="-O2" %(AdditionalOptions)</AdditionalOptions>
       <Defines>
@@ -215,7 +215,7 @@
       <CInterleavedPTX>false</CInterleavedPTX>
       <MaxRegCount>80</MaxRegCount>
       <PtxAsOptionV>true</PtxAsOptionV>
-      <Keep>true</Keep>
+      <Keep>false</Keep>
       <CodeGeneration>compute_50,sm_50</CodeGeneration>
       <Include>
       </Include>
@@ -366,7 +366,7 @@
     <CudaCompile Include="heavy\heavy.cu">
     </CudaCompile>
     <CudaCompile Include="JHA\cuda_jha_compactionTest.cu">
-      <AdditionalOptions Condition="'$(Configuration)'=='Release'">-Xptxas "-abi=yes -O2" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)'=='Release'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)'=='Debug'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
     </CudaCompile>
     <CudaCompile Include="JHA\cuda_jha_keccak512.cu">
@@ -375,7 +375,7 @@
     </CudaCompile>
     <CudaCompile Include="blake32.cu">
       <MaxRegCount>64</MaxRegCount>
-      <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-O2 -dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
       <FastMath>true</FastMath>
     </CudaCompile>
     <CudaCompile Include="keccak\cuda_keccak256.cu">
@@ -384,7 +384,7 @@
     <CudaCompile Include="keccak\keccak256.cu" />
     <CudaCompile Include="pentablake.cu">
       <MaxRegCount>80</MaxRegCount>
-      <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-O2 -dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
       <FastMath>true</FastMath>
     </CudaCompile>
     <CudaCompile Include="quark\animecoin.cu">
@@ -402,7 +402,7 @@
       <CInterleavedPTX>false</CInterleavedPTX>
     </CudaCompile>
     <CudaCompile Include="quark\cuda_quark_compactionTest.cu">
-      <AdditionalOptions Condition="'$(Configuration)'=='Release'">-Xptxas "-abi=yes -O2" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)'=='Release'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)'=='Debug'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
     </CudaCompile>
     <CudaCompile Include="quark\cuda_quark_groestl512.cu">
diff --git a/cuda.cpp b/cuda.cpp
index 780b648..e409ca5 100644
--- a/cuda.cpp
+++ b/cuda.cpp
@@ -25,10 +25,6 @@
 #include "compat.h" // sleep
 #endif
 
-extern char *device_name[8];
-extern int device_map[8];
-extern int device_sm[8];
-
 // CUDA Devices on the System
 int cuda_num_devices()
 {
@@ -74,7 +70,7 @@ void cuda_devicenames()
 		cudaGetDeviceProperties(&props, device_map[i]);
 
 		device_name[i] = strdup(props.name);
-		device_sm[i] = props.major * 100 + props.minor * 10;
+		device_sm[i] = (props.major * 100 + props.minor * 10);
 	}
 }
 
diff --git a/cuda_helper.h b/cuda_helper.h
index d94e72a..f22100e 100644
--- a/cuda_helper.h
+++ b/cuda_helper.h
@@ -13,8 +13,8 @@
 
 #include <stdint.h>
 
-extern int device_map[8];
-extern int device_sm[8];
+extern "C" short device_map[8];
+extern "C"  long device_sm[8];
 
 // common functions
 extern void cuda_check_cpu_init(int thr_id, int threads);
diff --git a/fuguecoin.cpp b/fuguecoin.cpp
index 3f31a16..ed928c2 100644
--- a/fuguecoin.cpp
+++ b/fuguecoin.cpp
@@ -13,9 +13,6 @@ extern "C" void my_fugue256(void *cc, const void *data, size_t len);
 extern "C" void my_fugue256_close(void *cc, void *dst);
 extern "C" void my_fugue256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
 
-extern int device_map[8];
-extern int device_sm[8];
-
 // vorbereitete Kontexte nach den ersten 80 Bytes
 sph_fugue256_context  ctx_fugue_const[8];
 
diff --git a/miner.h b/miner.h
index 52d538c..90b9d52 100644
--- a/miner.h
+++ b/miner.h
@@ -386,6 +386,7 @@ struct cgpu_info {
 	uint8_t has_monitoring;
 	float gpu_temp;
 	int gpu_fan;
+	uint16_t gpu_arch;
 	int gpu_clock;
 	int gpu_memclock;
 	size_t gpu_mem;
@@ -410,6 +411,7 @@ struct thr_api {
 };
 
 struct stats_data {
+	uint32_t uid;
 	uint32_t tm_stat;
 	uint32_t hashcount;
 	uint32_t height;
@@ -454,7 +456,7 @@ extern bool opt_protocol;
 extern bool opt_tracegpu;
 extern int opt_intensity;
 extern int opt_n_threads;
-extern int num_processors;
+extern int active_gpus;
 extern int opt_timeout;
 extern bool want_longpoll;
 extern bool have_longpoll;
@@ -478,6 +480,10 @@ extern uint32_t opt_work_size;
 extern uint64_t global_hashrate;
 extern double   global_diff;
 
+extern char* device_name[8];
+extern short device_map[8];
+extern long  device_sm[8];
+
 #define CL_N    "\x1B[0m"
 #define CL_RED  "\x1B[31m"
 #define CL_GRN  "\x1B[32m"
diff --git a/pentablake.cu b/pentablake.cu
index 726ba5f..e1e1d96 100644
--- a/pentablake.cu
+++ b/pentablake.cu
@@ -511,7 +511,7 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *
 		((uint32_t*)ptarget)[7] = 0x000F;
 
 	if (!init[thr_id]) {
-		if (num_processors > 1) {
+		if (active_gpus > 1) {
 			cudaSetDevice(device_map[thr_id]);
 		}
 		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput));
diff --git a/quark/cuda_bmw512.cu b/quark/cuda_bmw512.cu
index f037b04..ce4a773 100644
--- a/quark/cuda_bmw512.cu
+++ b/quark/cuda_bmw512.cu
@@ -3,9 +3,6 @@
 
 #include "cuda_helper.h"
 
-// aus heavy.cu
-extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
-
 // die Message it Padding zur Berechnung auf der GPU
 __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
 
diff --git a/quark/cuda_jh512.cu b/quark/cuda_jh512.cu
index 07453e8..8f62810 100644
--- a/quark/cuda_jh512.cu
+++ b/quark/cuda_jh512.cu
@@ -1,8 +1,5 @@
 #include "cuda_helper.h"
 
-// aus heavy.cu
-extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
-
 typedef struct {
     uint32_t x[8][4];                     /*the 1024-bit state, ( x[i][0] || x[i][1] || x[i][2] || x[i][3] ) is the ith row of the state in the pseudocode*/
     uint32_t buffer[16];                  /*the 512-bit message block to be hashed;*/
diff --git a/quark/cuda_quark_compactionTest.cu b/quark/cuda_quark_compactionTest.cu
index 6a0ec5f..cac7562 100644
--- a/quark/cuda_quark_compactionTest.cu
+++ b/quark/cuda_quark_compactionTest.cu
@@ -8,10 +8,7 @@ static uint32_t *d_tempBranch1Nonces[8];
 static uint32_t *d_numValid[8];
 static uint32_t *h_numValid[8];
 
-static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen
-
-// aus heavy.cu
-extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+static uint32_t *d_partSum[2][8]; // fÃ¼r bis zu vier partielle Summen
 
 // True/False tester
 typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash);
@@ -83,7 +80,7 @@ __global__ void quark_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t *
 				inpHash = &inpHashes[id<<4];
 			}else
 			{
-				// Nonce-Liste verfügbar
+				// Nonce-Liste verfÃ¼gbar
 				int nonce = d_validNonceTable[id] - startNounce;
 				inpHash = &inpHashes[nonce<<4];
 			}			
@@ -200,7 +197,7 @@ __global__ void quark_compactTest_gpu_SCATTER(uint32_t *sum, uint32_t *outp, cud
 			inpHash = &inpHashes[id<<4];
 		}else
 		{
-			// Nonce-Liste verfügbar
+			// Nonce-Liste verfÃ¼gbar
 			int nonce = d_validNonceTable[id] - startNounce;
 			actNounce = nonce;
 			inpHash = &inpHashes[nonce<<4];
@@ -335,7 +332,7 @@ __host__ void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t st
 											int order)
 {
 	// Wenn validNonceTable genutzt wird, dann werden auch nur die Nonces betrachtet, die dort enthalten sind
-	// "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen!
+	// "threads" ist in diesem Fall auf die LÃ¤nge dieses Array's zu setzen!
 	
 	quark_compactTest_cpu_dualCompaction(thr_id, threads,
 		h_numValid[thr_id], d_nonces1, d_nonces2,
@@ -351,7 +348,7 @@ __host__ void quark_compactTest_single_false_cpu_hash_64(int thr_id, int threads
 											int order)
 {
 	// Wenn validNonceTable genutzt wird, dann werden auch nur die Nonces betrachtet, die dort enthalten sind
-	// "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen!
+	// "threads" ist in diesem Fall auf die LÃ¤nge dieses Array's zu setzen!
 
 	quark_compactTest_cpu_singleCompaction(thr_id, threads, h_numValid[thr_id], d_nonces1, h_QuarkFalseFunction[thr_id], startNounce, inpHashes, d_validNonceTable);
 
diff --git a/quark/cuda_quark_keccak512.cu b/quark/cuda_quark_keccak512.cu
index b205100..0ddbc05 100644
--- a/quark/cuda_quark_keccak512.cu
+++ b/quark/cuda_quark_keccak512.cu
@@ -3,9 +3,6 @@
 
 #include "cuda_helper.h"
 
-// heavy.cu
-extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
-
 #define U32TO64_LE(p) \
     (((uint64_t)(*p)) | (((uint64_t)(*(p + 1))) << 32))
 
diff --git a/stats.cpp b/stats.cpp
index 5a17273..6b87b05 100644
--- a/stats.cpp
+++ b/stats.cpp
@@ -19,10 +19,9 @@ static uint64_t uid = 0;
 
 extern uint64_t global_hashrate;
 extern int opt_statsavg;
-extern int device_map[8];
 
 /**
- * Store speed per thread (todo: compute vardiff ?)
+ * Store speed per thread
  */
 void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate, uint8_t found, uint32_t height)
 {
@@ -38,6 +37,7 @@ void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate, uint8
 		return;
 
 	memset(&data, 0, sizeof(data));
+	data.uid = uid;
 	data.gpu_id = gpu;
 	data.thr_id = (uint8_t)thr_id;
 	data.tm_stat = (uint32_t) time(NULL);
diff --git a/x11/cuda_x11_cubehash512.cu b/x11/cuda_x11_cubehash512.cu
index cd2ccef..0a570dc 100644
--- a/x11/cuda_x11_cubehash512.cu
+++ b/x11/cuda_x11_cubehash512.cu
@@ -1,8 +1,5 @@
 #include "cuda_helper.h"
 
-// aus heavy.cu
-extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
-
 typedef unsigned char BitSequence;
 
 #define CUBEHASH_ROUNDS 16 /* this is r for CubeHashr/b */
diff --git a/x13/cuda_x13_fugue512.cu b/x13/cuda_x13_fugue512.cu
index 8b4a72b..4360a0f 100644
--- a/x13/cuda_x13_fugue512.cu
+++ b/x13/cuda_x13_fugue512.cu
@@ -7,9 +7,6 @@
  */
 #include "cuda_helper.h"
 
-// aus heavy.cu
-extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
-
 /*
  * X13 kernel implementation.
  *
diff --git a/x15/cuda_x14_shabal512.cu b/x15/cuda_x14_shabal512.cu
index 3d55747..60ffb32 100644
--- a/x15/cuda_x14_shabal512.cu
+++ b/x15/cuda_x14_shabal512.cu
@@ -1,10 +1,8 @@
 /*
- * Shabal-512 for X14/X15 (STUB)
+ * Shabal-512 for X14/X15
  */
 #include "cuda_helper.h"
 
-extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
-
 /* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */
 /*
  * Shabal implementation.
diff --git a/x15/cuda_x15_whirlpool.cu b/x15/cuda_x15_whirlpool.cu
index f15133d..3e84818 100644
--- a/x15/cuda_x15_whirlpool.cu
+++ b/x15/cuda_x15_whirlpool.cu
@@ -11,8 +11,6 @@
 
 #include "cuda_helper.h"
 
-extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
-
 __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
 __constant__ uint32_t pTarget[8];
 
diff --git a/x17/cuda_x17_haval512.cu b/x17/cuda_x17_haval512.cu
index ba01cc9..ff9912d 100644
--- a/x17/cuda_x17_haval512.cu
+++ b/x17/cuda_x17_haval512.cu
@@ -48,9 +48,6 @@
 
 #define SPH_T64(x)    ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
 
-// in heavy.cu
-extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
-
 static __constant__ uint32_t initVector[8];
 
 static const uint32_t c_initVector[8] = {
diff --git a/x17/cuda_x17_sha512.cu b/x17/cuda_x17_sha512.cu
index aeb72fd..8ef2a27 100644
--- a/x17/cuda_x17_sha512.cu
+++ b/x17/cuda_x17_sha512.cu
@@ -47,9 +47,6 @@
 #define SPH_T32(x)  ((x) & SPH_C32(0xFFFFFFFF))
 #define SPH_T64(x)  ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
 
-// in heavy.cu
-extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
-
 static __constant__ uint64_t H_512[8];
 
 static const uint64_t H512[8] = {