From e1c4b3042cd356f9ccf30734b13772ab5393e399 Mon Sep 17 00:00:00 2001
From: Tanguy Pruvot <tanguy.pruvot@gmail.com>
Date: Fri, 25 Sep 2015 07:51:09 +0200
Subject: [PATCH] algos: add functions to free allocated resources

Will be used later for algo switching

not really tested yet...
---
 Algo256/blake256.cu                | 16 ++++++++++++
 Algo256/bmw.cu                     | 16 ++++++++++++
 Algo256/cuda_bmw256.cu             |  7 +++++
 Algo256/cuda_fugue256.cu           |  7 +++++
 Algo256/cuda_groestl256.cu         |  8 +++++-
 Algo256/cuda_keccak256.cu          |  7 +++++
 Algo256/keccak256.cu               | 17 +++++++++++++
 JHA/jackpotcoin.cu                 | 22 ++++++++++++++++
 ccminer.cpp                        | 39 ++++++++++++++++++++++++++++
 cuda_checkhash.cu                  |  7 +++++
 cuda_fugue256.h                    |  1 +
 cuda_groestlcoin.cu                |  6 +++++
 cuda_groestlcoin.h                 |  1 +
 cuda_helper.h                      |  1 +
 cuda_myriadgroestl.cu              |  7 +++++
 cuda_nist5.cu                      | 41 +++++++++++++++++++++++++-----
 fuguecoin.cpp                      | 27 +++++++++++++++-----
 groestlcoin.cpp                    | 13 ++++++++++
 heavy/heavy.cu                     | 16 ++++++++++++
 lyra2/lyra2RE.cu                   | 17 +++++++++++++
 lyra2/lyra2REv2.cu                 | 18 +++++++++++++
 miner.h                            | 36 ++++++++++++++++++++++++++
 myriadgroestl.cpp                  | 14 ++++++++++
 neoscrypt/cuda_neoscrypt.cu        |  7 +++++
 neoscrypt/neoscrypt.cpp            | 15 +++++++++++
 pentablake.cu                      | 17 +++++++++++++
 quark/cuda_quark_compactionTest.cu | 14 +++++++++-
 quark/quarkcoin.cu                 | 25 ++++++++++++++++++
 qubit/deep.cu                      | 16 ++++++++++++
 qubit/luffa.cu                     | 16 ++++++++++++
 qubit/qubit.cu                     | 16 ++++++++++++
 scrypt-jane.cpp                    |  6 +++++
 scrypt.cpp                         |  6 +++++
 skein.cu                           | 16 ++++++++++++
 skein2.cpp                         | 16 ++++++++++++
 x11/c11.cu                         | 19 +++++++++++---
 x11/fresh.cu                       | 20 ++++++++++++---
 x11/s3.cu                          | 16 ++++++++++++
 x11/x11.cu                         | 20 ++++++++++++---
 x13/x13.cu                         | 19 +++++++++++---
 x15/cuda_whirlpoolx.cu             |  9 +++++++
 x15/whirlpoolx.cu                  | 17 +++++++++++++
 x15/x14.cu                         | 19 +++++++++++---
 x15/x15.cu                         | 19 +++++++++++---
 x17/x17.cu                         | 20 ++++++++++++---
 zr5.cu                             | 27 ++++++++++++++++++++
 46 files changed, 681 insertions(+), 38 deletions(-)

diff --git a/Algo256/blake256.cu b/Algo256/blake256.cu
index edcaa8e..5378848 100644
--- a/Algo256/blake256.cu
+++ b/Algo256/blake256.cu
@@ -489,3 +489,19 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
 
 	return rc;
 }
+
+// cleanup
+extern "C" void free_blake256(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFreeHost(h_resNonce[thr_id]);
+	cudaFree(d_resNonce[thr_id]);
+
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/Algo256/bmw.cu b/Algo256/bmw.cu
index a71b092..100cece 100644
--- a/Algo256/bmw.cu
+++ b/Algo256/bmw.cu
@@ -101,3 +101,19 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u
 	*hashes_done = pdata[19] - first_nonce;
 	return 0;
 }
+
+// cleanup
+extern "C" void free_bmw(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/Algo256/cuda_bmw256.cu b/Algo256/cuda_bmw256.cu
index 6189606..0fde12e 100644
--- a/Algo256/cuda_bmw256.cu
+++ b/Algo256/cuda_bmw256.cu
@@ -269,6 +269,13 @@ void bmw256_cpu_init(int thr_id, uint32_t threads)
 	cudaMallocHost(&d_gnounce[thr_id], 2 * sizeof(uint32_t));
 }
 
+__host__
+void bmw256_cpu_free(int thr_id)
+{
+	cudaFree(d_GNonce[thr_id]);
+	cudaFreeHost(d_gnounce[thr_id]);
+}
+
 __host__
 void bmw256_setTarget(const void *pTargetIn)
 {
diff --git a/Algo256/cuda_fugue256.cu b/Algo256/cuda_fugue256.cu
index 7bcbfcf..66d1c35 100644
--- a/Algo256/cuda_fugue256.cu
+++ b/Algo256/cuda_fugue256.cu
@@ -732,6 +732,13 @@ void fugue256_cpu_init(int thr_id, uint32_t threads)
 	cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
 }
 
+__host__
+void fugue256_cpu_free(int thr_id)
+{
+	cudaFree(d_fugue256_hashoutput[thr_id]);
+	cudaFree(d_resultNonce[thr_id]);
+}
+
 __host__
 void fugue256_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
 {
diff --git a/Algo256/cuda_groestl256.cu b/Algo256/cuda_groestl256.cu
index 1e8bf61..b1ffa4f 100644
--- a/Algo256/cuda_groestl256.cu
+++ b/Algo256/cuda_groestl256.cu
@@ -262,7 +262,6 @@ void groestl256_gpu_hash32(uint32_t threads, uint32_t startNounce, uint64_t *out
 __host__
 void groestl256_cpu_init(int thr_id, uint32_t threads)
 {
-
 	// Texturen mit obigem Makro initialisieren
 	texDef(t0up2, d_T0up, T0up_cpu, sizeof(uint32_t) * 256);
 	texDef(t0dn2, d_T0dn, T0dn_cpu, sizeof(uint32_t) * 256);
@@ -277,6 +276,13 @@ void groestl256_cpu_init(int thr_id, uint32_t threads)
 	cudaMallocHost(&h_GNonces[thr_id], 2*sizeof(uint32_t));
 }
 
+__host__
+void groestl256_cpu_free(int thr_id)
+{
+	cudaFree(d_GNonces[thr_id]);
+	cudaFreeHost(h_GNonces[thr_id]);
+}
+
 __host__
 uint32_t groestl256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
 {
diff --git a/Algo256/cuda_keccak256.cu b/Algo256/cuda_keccak256.cu
index cef2688..5ae7013 100644
--- a/Algo256/cuda_keccak256.cu
+++ b/Algo256/cuda_keccak256.cu
@@ -300,3 +300,10 @@ void keccak256_cpu_init(int thr_id, uint32_t threads)
 	CUDA_SAFE_CALL(cudaMalloc(&d_KNonce[thr_id], sizeof(uint32_t)));
 	CUDA_SAFE_CALL(cudaMallocHost(&d_nounce[thr_id], 1*sizeof(uint32_t)));
 }
+
+__host__
+void keccak256_cpu_free(int thr_id)
+{
+	cudaFree(d_KNonce[thr_id]);
+	cudaFreeHost(d_nounce[thr_id]);
+}
diff --git a/Algo256/keccak256.cu b/Algo256/keccak256.cu
index 1b91ee1..bacacd1 100644
--- a/Algo256/keccak256.cu
+++ b/Algo256/keccak256.cu
@@ -17,6 +17,7 @@ extern "C"
 static uint32_t *d_hash[MAX_GPUS];
 
 extern void keccak256_cpu_init(int thr_id, uint32_t threads);
+extern void keccak256_cpu_free(int thr_id);
 extern void keccak256_setBlock_80(void *pdata,const void *ptarget);
 extern uint32_t keccak256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order);
 
@@ -93,3 +94,19 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no
 
 	return 0;
 }
+
+// cleanup
+extern "C" void free_keccak256(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	keccak256_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu
index 8edcd58..883706c 100644
--- a/JHA/jackpotcoin.cu
+++ b/JHA/jackpotcoin.cu
@@ -250,3 +250,25 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc
 
 	return 0;
 }
+
+// cleanup
+extern "C" void free_jackpot(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cudaFree(d_branch1Nonces[thr_id]);
+	cudaFree(d_branch2Nonces[thr_id]);
+	cudaFree(d_branch3Nonces[thr_id]);
+
+	cudaFree(d_jackpotNonces[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/ccminer.cpp b/ccminer.cpp
index de79fbb..407c50d 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -1503,6 +1503,45 @@ static bool wanna_mine(int thr_id)
 	return state;
 }
 
+// required to switch algos
+void miner_free_device(int thr_id)
+{
+	// todo: some kind of algo "registration"
+	// to call automatically if needed
+	free_blake256(thr_id);
+	free_bmw(thr_id);
+	free_c11(thr_id);
+	free_deep(thr_id);
+	free_keccak256(thr_id);
+	free_fresh(thr_id);
+	free_fugue256(thr_id);
+	free_groestlcoin(thr_id);
+	free_heavy(thr_id);
+	free_jackpot(thr_id);
+	free_luffa(thr_id);
+	free_lyra2(thr_id);
+	free_lyra2v2(thr_id);
+	free_myriad(thr_id);
+	free_neoscrypt(thr_id);
+	free_nist5(thr_id);
+	free_pentablake(thr_id);
+	free_quark(thr_id);
+	free_qubit(thr_id);
+	free_skeincoin(thr_id);
+	free_skein2(thr_id);
+	free_s3(thr_id);
+	free_whirlx(thr_id);
+	free_x11(thr_id);
+	free_x13(thr_id);
+	free_x14(thr_id);
+	free_x15(thr_id);
+	free_x17(thr_id);
+	free_zr5(thr_id);
+	//free_sha256d(thr_id);
+	free_scrypt(thr_id);
+	free_scrypt_jane(thr_id);
+}
+
 static void *miner_thread(void *userdata)
 {
 	struct thr_info *mythr = (struct thr_info *)userdata;
diff --git a/cuda_checkhash.cu b/cuda_checkhash.cu
index 52ba432..20d9e45 100644
--- a/cuda_checkhash.cu
+++ b/cuda_checkhash.cu
@@ -23,6 +23,13 @@ void cuda_check_cpu_init(int thr_id, uint32_t threads)
     init_done = true;
 }
 
+__host__
+void cuda_check_cpu_free(int thr_id)
+{
+	cudaFree(d_resNonces[thr_id]);
+	cudaFreeHost(h_resNonces[thr_id]);
+}
+
 // Target Difficulty
 __host__
 void cuda_check_cpu_setTarget(const void *ptarget)
diff --git a/cuda_fugue256.h b/cuda_fugue256.h
index ec9b771..a4852b4 100644
--- a/cuda_fugue256.h
+++ b/cuda_fugue256.h
@@ -4,5 +4,6 @@
 void fugue256_cpu_hash(int thr_id, uint32_t threads, int startNounce, void *outputHashes, uint32_t *nounce);
 void fugue256_cpu_setBlock(int thr_id, void *data, void *pTargetIn);
 void fugue256_cpu_init(int thr_id, uint32_t threads);
+void fugue256_cpu_free(int thr_id);
 
 #endif
diff --git a/cuda_groestlcoin.cu b/cuda_groestlcoin.cu
index 27da418..fcef970 100644
--- a/cuda_groestlcoin.cu
+++ b/cuda_groestlcoin.cu
@@ -102,6 +102,12 @@ void groestlcoin_cpu_init(int thr_id, uint32_t threads)
 	cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
 }
 
+__host__
+void groestlcoin_cpu_free(int thr_id)
+{
+	cudaFree(d_resultNonce[thr_id]);
+}
+
 __host__
 void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
 {
diff --git a/cuda_groestlcoin.h b/cuda_groestlcoin.h
index 7b95b59..d4fa42e 100644
--- a/cuda_groestlcoin.h
+++ b/cuda_groestlcoin.h
@@ -2,6 +2,7 @@
 #define _CUDA_GROESTLCOIN_H
 
 void groestlcoin_cpu_init(int thr_id, uint32_t threads);
+void groestlcoin_cpu_free(int thr_id);
 void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn);
 void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce);
 
diff --git a/cuda_helper.h b/cuda_helper.h
index 9c6b290..fd98bff 100644
--- a/cuda_helper.h
+++ b/cuda_helper.h
@@ -31,6 +31,7 @@ extern int cuda_arch[MAX_GPUS];
 extern int cuda_get_arch(int thr_id);
 extern void cuda_reset_device(int thr_id, bool *init);
 extern void cuda_check_cpu_init(int thr_id, uint32_t threads);
+extern void cuda_check_cpu_free(int thr_id);
 extern void cuda_check_cpu_setTarget(const void *ptarget);
 extern uint32_t cuda_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash);
 extern uint32_t cuda_check_hash_suppl(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash, uint8_t numNonce);
diff --git a/cuda_myriadgroestl.cu b/cuda_myriadgroestl.cu
index 28ea94c..681b6a0 100644
--- a/cuda_myriadgroestl.cu
+++ b/cuda_myriadgroestl.cu
@@ -327,6 +327,13 @@ void myriadgroestl_cpu_init(int thr_id, uint32_t threads)
     cudaMalloc(&d_outputHashes[thr_id], 16*sizeof(uint32_t)*threads);
 }
 
+__host__
+void myriadgroestl_cpu_free(int thr_id)
+{
+    cudaFree(d_resultNonce[thr_id]);
+    cudaFree(d_outputHashes[thr_id]);
+}
+
 __host__
 void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
 {
diff --git a/cuda_nist5.cu b/cuda_nist5.cu
index 75582ea..1ed36b0 100644
--- a/cuda_nist5.cu
+++ b/cuda_nist5.cu
@@ -72,6 +72,7 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce,
 	uint32_t *pdata = work->data;
 	uint32_t *ptarget = work->target;
 	const uint32_t first_nonce = pdata[19];
+	int res = 0;
 
 	uint32_t throughput =  device_intensity(thr_id, __func__, 1 << 20); // 256*256*16
 	throughput = min(throughput,  (max_nonce - first_nonce));
@@ -83,19 +84,26 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce,
 	{
 		cudaSetDevice(device_map[thr_id]);
 
-		// Konstanten kopieren, Speicher belegen
+		// Constants copy/init (no device alloc in these algos)
 		quark_blake512_cpu_init(thr_id, throughput);
 		quark_groestl512_cpu_init(thr_id, throughput);
 		quark_jh512_cpu_init(thr_id, throughput);
 		quark_keccak512_cpu_init(thr_id, throughput);
 		quark_skein512_cpu_init(thr_id, throughput);
 
-		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
+		// char[64] work space for hashes results
+		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)64 * throughput));
 
 		cuda_check_cpu_init(thr_id, throughput);
 		init[thr_id] = true;
 	}
 
+#ifdef USE_STREAMS
+	cudaStream_t stream[5];
+	for (int i = 0; i < 5; i++)
+		cudaStreamCreate(&stream[i]);
+#endif
+
 	for (int k=0; k < 20; k++)
 		be32enc(&endiandata[k], pdata[k]);
 
@@ -123,7 +131,7 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce,
 			nist5hash(vhash64, endiandata);
 
 			if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
-				int res = 1;
+				res = 1;
 				uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
 				bn_store_hash_target_ratio(vhash64, ptarget, work);
 				if (secNonce != 0) {
@@ -135,7 +143,7 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce,
 					res++;
 				}
 				pdata[19] = foundNonce;
-				return res;
+				goto out;
 			}
 			else {
 				applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce);
@@ -146,6 +154,27 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce,
 
 	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
 
-	*hashes_done = pdata[19] - first_nonce + 1;
-	return 0;
+out:
+#ifdef USE_STREAMS
+	for (int i = 0; i < 5; i++)
+		cudaStreamDestroy(stream[i]);
+#endif
+
+	return res;
 }
+
+// ressources cleanup
+extern "C" void free_nist5(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/fuguecoin.cpp b/fuguecoin.cpp
index 056f92d..1c51a50 100644
--- a/fuguecoin.cpp
+++ b/fuguecoin.cpp
@@ -20,6 +20,15 @@ extern "C" void my_fugue256_addbits_and_close(void *cc, unsigned ub, unsigned n,
     ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u)   | \
       (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
 
+void fugue256_hash(unsigned char* output, const unsigned char* input, int len)
+{
+	sph_fugue256_context ctx;
+
+	sph_fugue256_init(&ctx);
+	sph_fugue256(&ctx, input, len);
+	sph_fugue256_close(&ctx, (void *)output);
+}
+
 static bool init[MAX_GPUS] = { 0 };
 
 int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
@@ -91,11 +100,17 @@ int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce, unsigne
 	return 0;
 }
 
-void fugue256_hash(unsigned char* output, const unsigned char* input, int len)
+// cleanup
+void free_fugue256(int thr_id)
 {
-	sph_fugue256_context ctx;
+	if (!init[thr_id])
+		return;
 
-	sph_fugue256_init(&ctx);
-	sph_fugue256(&ctx, input, len);
-	sph_fugue256_close(&ctx, (void *)output);
-}
+	cudaSetDevice(device_map[thr_id]);
+
+	fugue256_cpu_free(thr_id);
+
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/groestlcoin.cpp b/groestlcoin.cpp
index 9c81aa5..5a3957b 100644
--- a/groestlcoin.cpp
+++ b/groestlcoin.cpp
@@ -91,3 +91,16 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
 	return 0;
 }
 
+// cleanup
+void free_groestlcoin(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	groestlcoin_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/heavy/heavy.cu b/heavy/heavy.cu
index 023f780..eeb1aba 100644
--- a/heavy/heavy.cu
+++ b/heavy/heavy.cu
@@ -300,6 +300,22 @@ exit:
     return rc;
 }
 
+// cleanup
+extern "C" void free_heavy(int thr_id)
+{
+    if (!init[thr_id])
+        return;
+
+    cudaSetDevice(device_map[thr_id]);
+
+    cudaFree(heavy_nonceVector[thr_id]);
+
+    // todo: free sub algos vectors
+
+    init[thr_id] = false;
+
+    cudaDeviceSynchronize();
+}
 __host__
 void heavycoin_hash(uchar* output, const uchar* input, int len)
 {
diff --git a/lyra2/lyra2RE.cu b/lyra2/lyra2RE.cu
index ce95449..3f74484 100644
--- a/lyra2/lyra2RE.cu
+++ b/lyra2/lyra2RE.cu
@@ -24,6 +24,7 @@ extern void skein256_cpu_init(int thr_id, uint32_t threads);
 extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
 
 extern void groestl256_cpu_init(int thr_id, uint32_t threads);
+extern void groestl256_cpu_free(int thr_id);
 extern void groestl256_setTarget(const void *ptarget);
 extern uint32_t groestl256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_outputHash, int order);
 extern uint32_t groestl256_getSecNonce(int thr_id, int num);
@@ -162,3 +163,19 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce,
 
 	return 0;
 }
+
+// cleanup
+extern "C" void free_lyra2(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	groestl256_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/lyra2/lyra2REv2.cu b/lyra2/lyra2REv2.cu
index 99fc95d..2a6cf50 100644
--- a/lyra2/lyra2REv2.cu
+++ b/lyra2/lyra2REv2.cu
@@ -28,6 +28,7 @@ extern void lyra2v2_cpu_init(int thr_id, uint32_t threads, uint64_t* matrix);
 
 extern void bmw256_setTarget(const void *ptarget);
 extern void bmw256_cpu_init(int thr_id, uint32_t threads);
+extern void bmw256_cpu_free(int thr_id);
 extern void bmw256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *resultnonces);
 
 void lyra2v2_hash(void *state, const void *input)
@@ -169,3 +170,20 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc
 	MyStreamSynchronize(NULL, 0, device_map[thr_id]);
 	return 0;
 }
+
+// cleanup
+extern "C" void free_lyra2v2(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+	cudaFree(d_hash2[thr_id]);
+
+	bmw256_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/miner.h b/miner.h
index fdb8828..db868fa 100644
--- a/miner.h
+++ b/miner.h
@@ -300,6 +300,42 @@ extern int scanhash_scrypt(int thr_id, struct work *work, uint32_t max_nonce, un
 extern int scanhash_scrypt_jane(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done,
 	unsigned char *scratchbuf, struct timeval *tv_start, struct timeval *tv_end);
 
+/* free device allocated memory per algo */
+void miner_free_device(int thr_id);
+
+extern void free_blake256(int thr_id);
+extern void free_bmw(int thr_id);
+extern void free_c11(int thr_id);
+extern void free_deep(int thr_id);
+extern void free_keccak256(int thr_id);
+extern void free_fresh(int thr_id);
+extern void free_fugue256(int thr_id);
+extern void free_groestlcoin(int thr_id);
+extern void free_heavy(int thr_id);
+extern void free_jackpot(int thr_id);
+extern void free_luffa(int thr_id);
+extern void free_lyra2(int thr_id);
+extern void free_lyra2v2(int thr_id);
+extern void free_myriad(int thr_id);
+extern void free_neoscrypt(int thr_id);
+extern void free_nist5(int thr_id);
+extern void free_pentablake(int thr_id);
+extern void free_quark(int thr_id);
+extern void free_qubit(int thr_id);
+extern void free_skeincoin(int thr_id);
+extern void free_skein2(int thr_id);
+extern void free_s3(int thr_id);
+extern void free_whirlx(int thr_id);
+extern void free_x11(int thr_id);
+extern void free_x13(int thr_id);
+extern void free_x14(int thr_id);
+extern void free_x15(int thr_id);
+extern void free_x17(int thr_id);
+extern void free_zr5(int thr_id);
+//extern void free_sha256d(int thr_id);
+extern void free_scrypt(int thr_id);
+extern void free_scrypt_jane(int thr_id);
+
 /* api related */
 void *api_thread(void *userdata);
 void api_set_throughput(int thr_id, uint32_t throughput);
diff --git a/myriadgroestl.cpp b/myriadgroestl.cpp
index 5bdbc5c..c93d872 100644
--- a/myriadgroestl.cpp
+++ b/myriadgroestl.cpp
@@ -8,6 +8,7 @@
 #include "miner.h"
 
 void myriadgroestl_cpu_init(int thr_id, uint32_t threads);
+void myriadgroestl_cpu_free(int thr_id);
 void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn);
 void myriadgroestl_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce);
 
@@ -95,3 +96,16 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned
 	return 0;
 }
 
+// cleanup
+void free_myriad(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	myriadgroestl_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/neoscrypt/cuda_neoscrypt.cu b/neoscrypt/cuda_neoscrypt.cu
index f58d3df..79d14d6 100644
--- a/neoscrypt/cuda_neoscrypt.cu
+++ b/neoscrypt/cuda_neoscrypt.cu
@@ -735,6 +735,13 @@ void neoscrypt_cpu_init(int thr_id, uint32_t threads)
 	cudaMemcpyToSymbol(BLAKE2S_SIGMA, BLAKE2S_SIGMA_host, sizeof(BLAKE2S_SIGMA_host), 0, cudaMemcpyHostToDevice);
 }
 
+__host__
+void neoscrypt_cpu_free(int thr_id)
+{
+	cudaFree(d_NNonce[thr_id]);
+	cudaFree(d_buffer[thr_id]);
+}
+
 __host__
 uint32_t neoscrypt_cpu_hash_k4(int thr_id, uint32_t threads, uint32_t startNounce, int have_stratum, int order)
 {
diff --git a/neoscrypt/neoscrypt.cpp b/neoscrypt/neoscrypt.cpp
index cb1bed4..0330248 100644
--- a/neoscrypt/neoscrypt.cpp
+++ b/neoscrypt/neoscrypt.cpp
@@ -4,6 +4,7 @@
 
 extern void neoscrypt_setBlockTarget(uint32_t * data, const void *ptarget);
 extern void neoscrypt_cpu_init(int thr_id, uint32_t threads);
+extern void neoscrypt_cpu_free(int thr_id);
 extern uint32_t neoscrypt_cpu_hash_k4(int thr_id, uint32_t threads, uint32_t startNounce, int have_stratum, int order);
 
 static bool init[MAX_GPUS] = { 0 };
@@ -81,3 +82,17 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
+
+// cleanup
+void free_neoscrypt(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	neoscrypt_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/pentablake.cu b/pentablake.cu
index e777bb8..1cd7585 100644
--- a/pentablake.cu
+++ b/pentablake.cu
@@ -442,3 +442,20 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return rc;
 }
+
+// cleanup
+void free_pentablake(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+	cudaFree(h_resNounce[thr_id]);
+	cudaFree(d_resNounce[thr_id]);
+
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/quark/cuda_quark_compactionTest.cu b/quark/cuda_quark_compactionTest.cu
index dc78c85..47af463 100644
--- a/quark/cuda_quark_compactionTest.cu
+++ b/quark/cuda_quark_compactionTest.cu
@@ -27,7 +27,7 @@ __device__ cuda_compactTestFunction_t d_QuarkTrueFunction = QuarkTrueTest, d_Qua
 
 cuda_compactTestFunction_t h_QuarkTrueFunction[MAX_GPUS], h_QuarkFalseFunction[MAX_GPUS];
 
-// Setup-Funktionen
+// Setup/Alloc Function
 __host__ void quark_compactTest_cpu_init(int thr_id, uint32_t threads)
 {
 	cudaMemcpyFromSymbol(&h_QuarkTrueFunction[thr_id], d_QuarkTrueFunction, sizeof(cuda_compactTestFunction_t));
@@ -45,6 +45,18 @@ __host__ void quark_compactTest_cpu_init(int thr_id, uint32_t threads)
 	cudaMalloc(&d_partSum[1][thr_id], sizeof(uint32_t) * s1); // BLOCKSIZE (Threads/Block)
 }
 
+// Because all alloc should have a free...
+__host__ void quark_compactTest_cpu_free(int thr_id)
+{
+	cudaFree(d_tempBranch1Nonces[thr_id]);
+	cudaFree(d_numValid[thr_id]);
+
+	cudaFree(d_partSum[0][thr_id]);
+	cudaFree(d_partSum[1][thr_id]);
+
+	cudaFreeHost(h_numValid[thr_id]);
+}
+
 #if __CUDA_ARCH__ < 300
 /**
  * __shfl_up() calculates a source lane ID by subtracting delta from the caller's lane ID, and clamping to the range 0..width-1
diff --git a/quark/quarkcoin.cu b/quark/quarkcoin.cu
index dc266dd..0f06f02 100644
--- a/quark/quarkcoin.cu
+++ b/quark/quarkcoin.cu
@@ -41,6 +41,7 @@ extern void quark_jh512_cpu_init(int thr_id, uint32_t threads);
 extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
 extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads);
+extern void quark_compactTest_cpu_free(int thr_id);
 extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
 											uint32_t *d_nonces1, uint32_t *nrm1,
 											uint32_t *d_nonces2, uint32_t *nrm2,
@@ -51,6 +52,7 @@ extern void quark_compactTest_single_false_cpu_hash_64(int thr_id, uint32_t thre
 
 extern uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
 
+
 // Original Quarkhash Funktion aus einem miner Quelltext
 extern "C" void quarkhash(void *state, const void *input)
 {
@@ -252,3 +254,26 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce,
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
+
+// cleanup
+extern "C" void free_quark(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+	cudaDeviceSynchronize();
+
+	cudaFree(d_hash[thr_id]);
+
+	cudaFree(d_branch1Nonces[thr_id]);
+	cudaFree(d_branch2Nonces[thr_id]);
+	cudaFree(d_branch3Nonces[thr_id]);
+
+	quark_compactTest_cpu_free(thr_id);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/qubit/deep.cu b/qubit/deep.cu
index b0b1fbb..26b85d6 100644
--- a/qubit/deep.cu
+++ b/qubit/deep.cu
@@ -128,3 +128,19 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce,
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
+
+// cleanup
+extern "C" void free_deep(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/qubit/luffa.cu b/qubit/luffa.cu
index 6b2fe75..84ab868 100644
--- a/qubit/luffa.cu
+++ b/qubit/luffa.cu
@@ -96,3 +96,19 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce,
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
+
+// cleanup
+extern "C" void free_luffa(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/qubit/qubit.cu b/qubit/qubit.cu
index 6c69d72..a5e3a7e 100644
--- a/qubit/qubit.cu
+++ b/qubit/qubit.cu
@@ -156,3 +156,19 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce,
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
+
+// cleanup
+extern "C" void free_qubit(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/scrypt-jane.cpp b/scrypt-jane.cpp
index eb20ea5..f65388e 100644
--- a/scrypt-jane.cpp
+++ b/scrypt-jane.cpp
@@ -426,6 +426,12 @@ unsigned char GetNfactor(unsigned int nTimestamp)
 	return Nfactor;
 }
 
+// cleanup
+void free_scrypt_jane(int thr_id)
+{
+	// todo ?
+}
+
 #define bswap_32x4(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) \
 					 | (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
 static int s_Nfactor = 0;
diff --git a/scrypt.cpp b/scrypt.cpp
index e63139d..d13b0bd 100644
--- a/scrypt.cpp
+++ b/scrypt.cpp
@@ -685,6 +685,12 @@ static int lastFactor = 0;
 
 static void computeGold(uint32_t* const input, uint32_t *reference, uchar *scratchpad);
 
+// cleanup
+void free_scrypt(int thr_id)
+{
+	// todo ?
+}
+
 // Scrypt proof of work algorithm
 // using SSE2 vectorized HMAC SHA256 on CPU and
 // a salsa core implementation on GPU with CUDA
diff --git a/skein.cu b/skein.cu
index 3b52178..ed327ce 100644
--- a/skein.cu
+++ b/skein.cu
@@ -464,3 +464,19 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
 
 	return 0;
 }
+
+// cleanup
+extern "C" void free_skeincoin(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/skein2.cpp b/skein2.cpp
index 8a6fcef..08fb5ac 100644
--- a/skein2.cpp
+++ b/skein2.cpp
@@ -120,3 +120,19 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned
 
 	return 0;
 }
+
+// cleanup
+void free_skein2(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/x11/c11.cu b/x11/c11.cu
index c0cffdc..b711d97 100644
--- a/x11/c11.cu
+++ b/x11/c11.cu
@@ -54,9 +54,6 @@ extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t start
 extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
 extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads);
-extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes,
-                                          uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
 
 // Flax/C11 CPU Hash
 extern "C" void c11hash(void *output, const void *input)
@@ -248,3 +245,19 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
+
+// cleanup
+extern "C" void free_c11(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/x11/fresh.cu b/x11/fresh.cu
index e667baf..af45366 100644
--- a/x11/fresh.cu
+++ b/x11/fresh.cu
@@ -25,10 +25,6 @@ extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t start
 extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
 extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads);
-extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes,
-											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
-											int order);
 
 // CPU Hash
 extern "C" void fresh_hash(void *state, const void *input)
@@ -157,3 +153,19 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce,
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
+
+// cleanup
+extern "C" void free_fresh(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/x11/s3.cu b/x11/s3.cu
index 0f517f0..e17e8ec 100644
--- a/x11/s3.cu
+++ b/x11/s3.cu
@@ -136,3 +136,19 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
+
+// cleanup
+extern "C" void free_s3(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/x11/x11.cu b/x11/x11.cu
index b13aff5..d65a48d 100644
--- a/x11/x11.cu
+++ b/x11/x11.cu
@@ -54,10 +54,6 @@ extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t start
 extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
 extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads);
-extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes,
-                                          uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
-
 // X11 CPU Hash
 extern "C" void x11hash(void *output, const void *input)
 {
@@ -247,3 +243,19 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
+
+// cleanup
+extern "C" void free_x11(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/x13/x13.cu b/x13/x13.cu
index dea8709..4e980a4 100644
--- a/x13/x13.cu
+++ b/x13/x13.cu
@@ -62,9 +62,6 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
 extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
 extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads);
-extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, 
-                                          uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
 
 // X13 CPU Hash
 extern "C" void x13hash(void *output, const void *input)
@@ -248,3 +245,19 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
+
+// cleanup
+extern "C" void free_x13(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/x15/cuda_whirlpoolx.cu b/x15/cuda_whirlpoolx.cu
index 570b863..d5f94eb 100644
--- a/x15/cuda_whirlpoolx.cu
+++ b/x15/cuda_whirlpoolx.cu
@@ -544,6 +544,15 @@ extern void whirlpoolx_cpu_init(int thr_id, uint32_t threads)
 	CUDA_SAFE_CALL(cudaMalloc(&d_tmp[thr_id], 8 * 9 * sizeof(uint64_t))); // d_tmp[threadIdx.x+64] (7+64)
 }
 
+__host__
+extern void whirlpoolx_cpu_free(int thr_id)
+{
+	cudaFree(d_WXNonce[thr_id]);
+	cudaFreeHost(h_wxnounce[thr_id]);
+	cudaFree(d_xtra[thr_id]);
+	cudaFree(d_tmp[thr_id]);
+}
+
 __host__
 void whirlpoolx_setBlock_80(void *pdata, const void *ptarget)
 {
diff --git a/x15/whirlpoolx.cu b/x15/whirlpoolx.cu
index 4aa4346..541d2de 100644
--- a/x15/whirlpoolx.cu
+++ b/x15/whirlpoolx.cu
@@ -12,6 +12,7 @@ extern "C" {
 static uint32_t *d_hash[MAX_GPUS];
 
 extern void whirlpoolx_cpu_init(int thr_id, uint32_t threads);
+extern void whirlpoolx_cpu_free(int thr_id);
 extern void whirlpoolx_setBlock_80(void *pdata, const void *ptarget);
 extern uint32_t whirlpoolx_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce);
 extern void whirlpoolx_precompute(int thr_id);
@@ -99,3 +100,19 @@ extern "C" int scanhash_whirlx(int thr_id,  struct work* work, uint32_t max_nonc
 
 	return 0;
 }
+
+// cleanup
+extern "C" void free_whirlx(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	whirlpoolx_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/x15/x14.cu b/x15/x14.cu
index 337b746..fe09d04 100644
--- a/x15/x14.cu
+++ b/x15/x14.cu
@@ -69,9 +69,6 @@ extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
 extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads);
 extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads);
-extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes,
-											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
 
 // X14 CPU Hash function
 extern "C" void x14hash(void *output, const void *input)
@@ -252,3 +249,19 @@ extern "C" int scanhash_x14(int thr_id,  struct work* work, uint32_t max_nonce,
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
+
+// cleanup
+extern "C" void free_x14(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/x15/x15.cu b/x15/x15.cu
index e569875..01cf00b 100644
--- a/x15/x15.cu
+++ b/x15/x15.cu
@@ -74,9 +74,6 @@ extern void x15_whirlpool_cpu_init(int thr_id, uint32_t threads, int mode);
 extern void x15_whirlpool_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x15_whirlpool_cpu_free(int thr_id);
 
-extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads);
-extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes,
-											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
 
 // X15 CPU Hash function
 extern "C" void x15hash(void *output, const void *input)
@@ -267,3 +264,19 @@ extern "C" int scanhash_x15(int thr_id,  struct work* work, uint32_t max_nonce,
 	x15_whirlpool_cpu_free(thr_id);
 	return 0;
 }
+
+// cleanup
+extern "C" void free_x15(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/x17/x17.cu b/x17/x17.cu
index 2193d32..0bcb028 100644
--- a/x17/x17.cu
+++ b/x17/x17.cu
@@ -82,10 +82,6 @@ extern void x17_sha512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startN
 extern void x17_haval256_cpu_init(int thr_id, uint32_t threads);
 extern void x17_haval256_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads);
-extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes,
-											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
-											int order);
 
 // X17 Hashfunktion
 extern "C" void x17hash(void *output, const void *input)
@@ -290,3 +286,19 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
+
+// cleanup
+extern "C" void free_x17(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/zr5.cu b/zr5.cu
index 63ed68b..50cf7d7 100644
--- a/zr5.cu
+++ b/zr5.cu
@@ -469,3 +469,30 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work,
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }
+
+// cleanup
+extern "C" void free_zr5(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaSetDevice(device_map[thr_id]);
+
+	cudaFree(d_hash[thr_id]);
+
+	cudaFree(d_poks[thr_id]);
+	cudaFree(d_permut[thr_id]);
+	cudaFree(d_buffers[thr_id]);
+
+	cudaFree(d_blake[thr_id]);
+	cudaFree(d_groes[thr_id]);
+	cudaFree(d_jh512[thr_id]);
+	cudaFree(d_skein[thr_id]);
+
+	cudaFree(d_txs[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
\ No newline at end of file