diff --git a/quark/cuda_quark_blake512.cu b/quark/cuda_quark_blake512.cu
index 54689c3..24d2975 100644
--- a/quark/cuda_quark_blake512.cu
+++ b/quark/cuda_quark_blake512.cu
@@ -235,8 +235,8 @@ __global__ void quark_blake512_gpu_hash_80(uint32_t threads, uint32_t startNounc
 
 // ---------------------------- END CUDA quark_blake512 functions ------------------------------------
 
-// Setup-Funktionen
-__host__ void quark_blake512_cpu_init(int thr_id, uint32_t threads)
+__host__
+void quark_blake512_cpu_init(int thr_id, uint32_t threads)
 {
 	// Kopiere die Hash-Tabellen in den GPU-Speicher
 	CUDA_CALL_OR_RET( cudaMemcpyToSymbol(c_sigma,
@@ -246,7 +246,8 @@ __host__ void quark_blake512_cpu_init(int thr_id, uint32_t threads)
 }
 
 // Blake512 für 80 Byte grosse Eingangsdaten
-__host__ void quark_blake512_cpu_setBlock_80(void *pdata)
+__host__
+void quark_blake512_cpu_setBlock_80(void *pdata)
 {
 	// Message mit Padding bereitstellen
 	// lediglich die korrekte Nonce ist noch ab Byte 76 einzusetzen.
@@ -263,7 +264,8 @@ __host__ void quark_blake512_cpu_setBlock_80(void *pdata)
 	);
 }
 
-__host__ void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_outputHash, int order)
+__host__
+void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_outputHash, int order)
 {
 	const uint32_t threadsperblock = 256;
 
@@ -271,28 +273,19 @@ __host__ void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t
 	dim3 grid((threads + threadsperblock-1)/threadsperblock);
 	dim3 block(threadsperblock);
 
-	// Größe des dynamischen Shared Memory Bereichs
-	size_t shared_size = 0;
-
-	quark_blake512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, d_nonceVector, (uint64_t*)d_outputHash);
+	quark_blake512_gpu_hash_64<<<grid, block>>>(threads, startNounce, d_nonceVector, (uint64_t*)d_outputHash);
 
 	// Strategisches Sleep Kommando zur Senkung der CPU Last
-	MyStreamSynchronize(NULL, order, thr_id);
+	//MyStreamSynchronize(NULL, order, thr_id);
 }
 
-__host__ void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int order)
+__host__
+void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int order)
 {
 	const uint32_t threadsperblock = 256;
 
-	// berechne wie viele Thread Blocks wir brauchen
 	dim3 grid((threads + threadsperblock-1)/threadsperblock);
 	dim3 block(threadsperblock);
 
-	// Größe des dynamischen Shared Memory Bereichs
-	size_t shared_size = 0;
-
-	quark_blake512_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
-
-	// Strategisches Sleep Kommando zur Senkung der CPU Last
-	MyStreamSynchronize(NULL, order, thr_id);
+	quark_blake512_gpu_hash_80<<<grid, block>>>(threads, startNounce, d_outputHash);
 }
diff --git a/qubit/qubit_luffa512.cu b/qubit/qubit_luffa512.cu
index ae8566b..a3be27f 100644
--- a/qubit/qubit_luffa512.cu
+++ b/qubit/qubit_luffa512.cu
@@ -487,7 +487,6 @@ void qubit_luffa512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNoun
 	size_t shared_size = 0;
 
 	qubit_luffa512_gpu_hash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash);
-	MyStreamSynchronize(NULL, order, thr_id);
 }
 
 __host__
diff --git a/x11/cuda_x11_cubehash512.cu b/x11/cuda_x11_cubehash512.cu
index ed6e63a..39ef7b2 100644
--- a/x11/cuda_x11_cubehash512.cu
+++ b/x11/cuda_x11_cubehash512.cu
@@ -306,6 +306,6 @@ void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNou
     size_t shared_size = 0;
 
     x11_cubehash512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
-    MyStreamSynchronize(NULL, order, thr_id);
+    //MyStreamSynchronize(NULL, order, thr_id);
 }
 
diff --git a/x11/cuda_x11_shavite512.cu b/x11/cuda_x11_shavite512.cu
index f324d86..2f5e806 100644
--- a/x11/cuda_x11_shavite512.cu
+++ b/x11/cuda_x11_shavite512.cu
@@ -1442,7 +1442,7 @@ void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNoun
 	dim3 block(threadsperblock);
 
 	x11_shavite512_gpu_hash_64<<<grid, block>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
-	MyStreamSynchronize(NULL, order, thr_id);
+	//MyStreamSynchronize(NULL, order, thr_id);
 }
 
 __host__
@@ -1454,7 +1454,6 @@ void x11_shavite512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNoun
 	dim3 block(threadsperblock);
 
 	x11_shavite512_gpu_hash_80<<<grid, block>>>(threads, startNounce, d_outputHash);
-	MyStreamSynchronize(NULL, order, thr_id);
 }
 
 __host__
diff --git a/x11/cuda_x11_simd512.cu b/x11/cuda_x11_simd512.cu
index 9a78397..326ed12 100644
--- a/x11/cuda_x11_simd512.cu
+++ b/x11/cuda_x11_simd512.cu
@@ -697,5 +697,5 @@ void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce,
 
 	x11_simd512_gpu_final_64 <<<grid, block>>> (threads, d_hash, d_temp4[thr_id], d_state[thr_id]);
 
-	MyStreamSynchronize(NULL, order, thr_id);
+	//MyStreamSynchronize(NULL, order, thr_id);
 }