From e1c4b3042cd356f9ccf30734b13772ab5393e399 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Fri, 25 Sep 2015 07:51:09 +0200 Subject: [PATCH] algos: add functions to free allocated resources Will be used later for algo switching not really tested yet... --- Algo256/blake256.cu | 16 ++++++++++++ Algo256/bmw.cu | 16 ++++++++++++ Algo256/cuda_bmw256.cu | 7 +++++ Algo256/cuda_fugue256.cu | 7 +++++ Algo256/cuda_groestl256.cu | 8 +++++- Algo256/cuda_keccak256.cu | 7 +++++ Algo256/keccak256.cu | 17 +++++++++++++ JHA/jackpotcoin.cu | 22 ++++++++++++++++ ccminer.cpp | 39 ++++++++++++++++++++++++++++ cuda_checkhash.cu | 7 +++++ cuda_fugue256.h | 1 + cuda_groestlcoin.cu | 6 +++++ cuda_groestlcoin.h | 1 + cuda_helper.h | 1 + cuda_myriadgroestl.cu | 7 +++++ cuda_nist5.cu | 41 +++++++++++++++++++++++++----- fuguecoin.cpp | 27 +++++++++++++++----- groestlcoin.cpp | 13 ++++++++++ heavy/heavy.cu | 16 ++++++++++++ lyra2/lyra2RE.cu | 17 +++++++++++++ lyra2/lyra2REv2.cu | 18 +++++++++++++ miner.h | 36 ++++++++++++++++++++++++++ myriadgroestl.cpp | 14 ++++++++++ neoscrypt/cuda_neoscrypt.cu | 7 +++++ neoscrypt/neoscrypt.cpp | 15 +++++++++++ pentablake.cu | 17 +++++++++++++ quark/cuda_quark_compactionTest.cu | 14 +++++++++- quark/quarkcoin.cu | 25 ++++++++++++++++++ qubit/deep.cu | 16 ++++++++++++ qubit/luffa.cu | 16 ++++++++++++ qubit/qubit.cu | 16 ++++++++++++ scrypt-jane.cpp | 6 +++++ scrypt.cpp | 6 +++++ skein.cu | 16 ++++++++++++ skein2.cpp | 16 ++++++++++++ x11/c11.cu | 19 +++++++++++--- x11/fresh.cu | 20 ++++++++++++--- x11/s3.cu | 16 ++++++++++++ x11/x11.cu | 20 ++++++++++++--- x13/x13.cu | 19 +++++++++++--- x15/cuda_whirlpoolx.cu | 9 +++++++ x15/whirlpoolx.cu | 17 +++++++++++++ x15/x14.cu | 19 +++++++++++--- x15/x15.cu | 19 +++++++++++--- x17/x17.cu | 20 ++++++++++++--- zr5.cu | 27 ++++++++++++++++++++ 46 files changed, 681 insertions(+), 38 deletions(-) diff --git a/Algo256/blake256.cu b/Algo256/blake256.cu index edcaa8e..5378848 100644 --- a/Algo256/blake256.cu +++ b/Algo256/blake256.cu @@ -489,3 +489,19 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non return rc; } + +// cleanup +extern "C" void free_blake256(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFreeHost(h_resNonce[thr_id]); + cudaFree(d_resNonce[thr_id]); + + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/Algo256/bmw.cu b/Algo256/bmw.cu index a71b092..100cece 100644 --- a/Algo256/bmw.cu +++ b/Algo256/bmw.cu @@ -101,3 +101,19 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u *hashes_done = pdata[19] - first_nonce; return 0; } + +// cleanup +extern "C" void free_bmw(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/Algo256/cuda_bmw256.cu b/Algo256/cuda_bmw256.cu index 6189606..0fde12e 100644 --- a/Algo256/cuda_bmw256.cu +++ b/Algo256/cuda_bmw256.cu @@ -269,6 +269,13 @@ void bmw256_cpu_init(int thr_id, uint32_t threads) cudaMallocHost(&d_gnounce[thr_id], 2 * sizeof(uint32_t)); } +__host__ +void bmw256_cpu_free(int thr_id) +{ + cudaFree(d_GNonce[thr_id]); + cudaFreeHost(d_gnounce[thr_id]); +} + __host__ void bmw256_setTarget(const void *pTargetIn) { diff --git a/Algo256/cuda_fugue256.cu b/Algo256/cuda_fugue256.cu index 7bcbfcf..66d1c35 100644 --- a/Algo256/cuda_fugue256.cu +++ b/Algo256/cuda_fugue256.cu @@ -732,6 +732,13 @@ void fugue256_cpu_init(int thr_id, uint32_t threads) cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t)); } +__host__ +void fugue256_cpu_free(int thr_id) +{ + cudaFree(d_fugue256_hashoutput[thr_id]); + cudaFree(d_resultNonce[thr_id]); +} + __host__ void fugue256_cpu_setBlock(int thr_id, void *data, void *pTargetIn) { diff --git a/Algo256/cuda_groestl256.cu b/Algo256/cuda_groestl256.cu index 1e8bf61..b1ffa4f 100644 --- a/Algo256/cuda_groestl256.cu +++ b/Algo256/cuda_groestl256.cu @@ -262,7 +262,6 @@ void groestl256_gpu_hash32(uint32_t threads, uint32_t startNounce, uint64_t *out __host__ void groestl256_cpu_init(int thr_id, uint32_t threads) { - // Texturen mit obigem Makro initialisieren texDef(t0up2, d_T0up, T0up_cpu, sizeof(uint32_t) * 256); texDef(t0dn2, d_T0dn, T0dn_cpu, sizeof(uint32_t) * 256); @@ -277,6 +276,13 @@ void groestl256_cpu_init(int thr_id, uint32_t threads) cudaMallocHost(&h_GNonces[thr_id], 2*sizeof(uint32_t)); } +__host__ +void groestl256_cpu_free(int thr_id) +{ + cudaFree(d_GNonces[thr_id]); + cudaFreeHost(h_GNonces[thr_id]); +} + __host__ uint32_t groestl256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_outputHash, int order) { diff --git a/Algo256/cuda_keccak256.cu b/Algo256/cuda_keccak256.cu index cef2688..5ae7013 100644 --- a/Algo256/cuda_keccak256.cu +++ b/Algo256/cuda_keccak256.cu @@ -300,3 +300,10 @@ void keccak256_cpu_init(int thr_id, uint32_t threads) CUDA_SAFE_CALL(cudaMalloc(&d_KNonce[thr_id], sizeof(uint32_t))); CUDA_SAFE_CALL(cudaMallocHost(&d_nounce[thr_id], 1*sizeof(uint32_t))); } + +__host__ +void keccak256_cpu_free(int thr_id) +{ + cudaFree(d_KNonce[thr_id]); + cudaFreeHost(d_nounce[thr_id]); +} diff --git a/Algo256/keccak256.cu b/Algo256/keccak256.cu index 1b91ee1..bacacd1 100644 --- a/Algo256/keccak256.cu +++ b/Algo256/keccak256.cu @@ -17,6 +17,7 @@ extern "C" static uint32_t *d_hash[MAX_GPUS]; extern void keccak256_cpu_init(int thr_id, uint32_t threads); +extern void keccak256_cpu_free(int thr_id); extern void keccak256_setBlock_80(void *pdata,const void *ptarget); extern uint32_t keccak256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); @@ -93,3 +94,19 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no return 0; } + +// cleanup +extern "C" void free_keccak256(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + keccak256_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu index 8edcd58..883706c 100644 --- a/JHA/jackpotcoin.cu +++ b/JHA/jackpotcoin.cu @@ -250,3 +250,25 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc return 0; } + +// cleanup +extern "C" void free_jackpot(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cudaFree(d_branch1Nonces[thr_id]); + cudaFree(d_branch2Nonces[thr_id]); + cudaFree(d_branch3Nonces[thr_id]); + + cudaFree(d_jackpotNonces[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/ccminer.cpp b/ccminer.cpp index de79fbb..407c50d 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -1503,6 +1503,45 @@ static bool wanna_mine(int thr_id) return state; } +// required to switch algos +void miner_free_device(int thr_id) +{ + // todo: some kind of algo "registration" + // to call automatically if needed + free_blake256(thr_id); + free_bmw(thr_id); + free_c11(thr_id); + free_deep(thr_id); + free_keccak256(thr_id); + free_fresh(thr_id); + free_fugue256(thr_id); + free_groestlcoin(thr_id); + free_heavy(thr_id); + free_jackpot(thr_id); + free_luffa(thr_id); + free_lyra2(thr_id); + free_lyra2v2(thr_id); + free_myriad(thr_id); + free_neoscrypt(thr_id); + free_nist5(thr_id); + free_pentablake(thr_id); + free_quark(thr_id); + free_qubit(thr_id); + free_skeincoin(thr_id); + free_skein2(thr_id); + free_s3(thr_id); + free_whirlx(thr_id); + free_x11(thr_id); + free_x13(thr_id); + free_x14(thr_id); + free_x15(thr_id); + free_x17(thr_id); + free_zr5(thr_id); + //free_sha256d(thr_id); + free_scrypt(thr_id); + free_scrypt_jane(thr_id); +} + static void *miner_thread(void *userdata) { struct thr_info *mythr = (struct thr_info *)userdata; diff --git a/cuda_checkhash.cu b/cuda_checkhash.cu index 52ba432..20d9e45 100644 --- a/cuda_checkhash.cu +++ b/cuda_checkhash.cu @@ -23,6 +23,13 @@ void cuda_check_cpu_init(int thr_id, uint32_t threads) init_done = true; } +__host__ +void cuda_check_cpu_free(int thr_id) +{ + cudaFree(d_resNonces[thr_id]); + cudaFreeHost(h_resNonces[thr_id]); +} + // Target Difficulty __host__ void cuda_check_cpu_setTarget(const void *ptarget) diff --git a/cuda_fugue256.h b/cuda_fugue256.h index ec9b771..a4852b4 100644 --- a/cuda_fugue256.h +++ b/cuda_fugue256.h @@ -4,5 +4,6 @@ void fugue256_cpu_hash(int thr_id, uint32_t threads, int startNounce, void *outputHashes, uint32_t *nounce); void fugue256_cpu_setBlock(int thr_id, void *data, void *pTargetIn); void fugue256_cpu_init(int thr_id, uint32_t threads); +void fugue256_cpu_free(int thr_id); #endif diff --git a/cuda_groestlcoin.cu b/cuda_groestlcoin.cu index 27da418..fcef970 100644 --- a/cuda_groestlcoin.cu +++ b/cuda_groestlcoin.cu @@ -102,6 +102,12 @@ void groestlcoin_cpu_init(int thr_id, uint32_t threads) cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t)); } +__host__ +void groestlcoin_cpu_free(int thr_id) +{ + cudaFree(d_resultNonce[thr_id]); +} + __host__ void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn) { diff --git a/cuda_groestlcoin.h b/cuda_groestlcoin.h index 7b95b59..d4fa42e 100644 --- a/cuda_groestlcoin.h +++ b/cuda_groestlcoin.h @@ -2,6 +2,7 @@ #define _CUDA_GROESTLCOIN_H void groestlcoin_cpu_init(int thr_id, uint32_t threads); +void groestlcoin_cpu_free(int thr_id); void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn); void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce); diff --git a/cuda_helper.h b/cuda_helper.h index 9c6b290..fd98bff 100644 --- a/cuda_helper.h +++ b/cuda_helper.h @@ -31,6 +31,7 @@ extern int cuda_arch[MAX_GPUS]; extern int cuda_get_arch(int thr_id); extern void cuda_reset_device(int thr_id, bool *init); extern void cuda_check_cpu_init(int thr_id, uint32_t threads); +extern void cuda_check_cpu_free(int thr_id); extern void cuda_check_cpu_setTarget(const void *ptarget); extern uint32_t cuda_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash); extern uint32_t cuda_check_hash_suppl(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash, uint8_t numNonce); diff --git a/cuda_myriadgroestl.cu b/cuda_myriadgroestl.cu index 28ea94c..681b6a0 100644 --- a/cuda_myriadgroestl.cu +++ b/cuda_myriadgroestl.cu @@ -327,6 +327,13 @@ void myriadgroestl_cpu_init(int thr_id, uint32_t threads) cudaMalloc(&d_outputHashes[thr_id], 16*sizeof(uint32_t)*threads); } +__host__ +void myriadgroestl_cpu_free(int thr_id) +{ + cudaFree(d_resultNonce[thr_id]); + cudaFree(d_outputHashes[thr_id]); +} + __host__ void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn) { diff --git a/cuda_nist5.cu b/cuda_nist5.cu index 75582ea..1ed36b0 100644 --- a/cuda_nist5.cu +++ b/cuda_nist5.cu @@ -72,6 +72,7 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; + int res = 0; uint32_t throughput = device_intensity(thr_id, __func__, 1 << 20); // 256*256*16 throughput = min(throughput, (max_nonce - first_nonce)); @@ -83,19 +84,26 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, { cudaSetDevice(device_map[thr_id]); - // Konstanten kopieren, Speicher belegen + // Constants copy/init (no device alloc in these algos) quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); quark_jh512_cpu_init(thr_id, throughput); quark_keccak512_cpu_init(thr_id, throughput); quark_skein512_cpu_init(thr_id, throughput); - CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput)); + // char[64] work space for hashes results + CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)64 * throughput)); cuda_check_cpu_init(thr_id, throughput); init[thr_id] = true; } +#ifdef USE_STREAMS + cudaStream_t stream[5]; + for (int i = 0; i < 5; i++) + cudaStreamCreate(&stream[i]); +#endif + for (int k=0; k < 20; k++) be32enc(&endiandata[k], pdata[k]); @@ -123,7 +131,7 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, nist5hash(vhash64, endiandata); if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { - int res = 1; + res = 1; uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); bn_store_hash_target_ratio(vhash64, ptarget, work); if (secNonce != 0) { @@ -135,7 +143,7 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, res++; } pdata[19] = foundNonce; - return res; + goto out; } else { applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce); @@ -146,6 +154,27 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); - *hashes_done = pdata[19] - first_nonce + 1; - return 0; +out: +#ifdef USE_STREAMS + for (int i = 0; i < 5; i++) + cudaStreamDestroy(stream[i]); +#endif + + return res; } + +// ressources cleanup +extern "C" void free_nist5(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/fuguecoin.cpp b/fuguecoin.cpp index 056f92d..1c51a50 100644 --- a/fuguecoin.cpp +++ b/fuguecoin.cpp @@ -20,6 +20,15 @@ extern "C" void my_fugue256_addbits_and_close(void *cc, unsigned ub, unsigned n, ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \ (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu)) +void fugue256_hash(unsigned char* output, const unsigned char* input, int len) +{ + sph_fugue256_context ctx; + + sph_fugue256_init(&ctx); + sph_fugue256(&ctx, input, len); + sph_fugue256_close(&ctx, (void *)output); +} + static bool init[MAX_GPUS] = { 0 }; int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) @@ -91,11 +100,17 @@ int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce, unsigne return 0; } -void fugue256_hash(unsigned char* output, const unsigned char* input, int len) +// cleanup +void free_fugue256(int thr_id) { - sph_fugue256_context ctx; + if (!init[thr_id]) + return; - sph_fugue256_init(&ctx); - sph_fugue256(&ctx, input, len); - sph_fugue256_close(&ctx, (void *)output); -} + cudaSetDevice(device_map[thr_id]); + + fugue256_cpu_free(thr_id); + + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/groestlcoin.cpp b/groestlcoin.cpp index 9c81aa5..5a3957b 100644 --- a/groestlcoin.cpp +++ b/groestlcoin.cpp @@ -91,3 +91,16 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi return 0; } +// cleanup +void free_groestlcoin(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + groestlcoin_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/heavy/heavy.cu b/heavy/heavy.cu index 023f780..eeb1aba 100644 --- a/heavy/heavy.cu +++ b/heavy/heavy.cu @@ -300,6 +300,22 @@ exit: return rc; } +// cleanup +extern "C" void free_heavy(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(heavy_nonceVector[thr_id]); + + // todo: free sub algos vectors + + init[thr_id] = false; + + cudaDeviceSynchronize(); +} __host__ void heavycoin_hash(uchar* output, const uchar* input, int len) { diff --git a/lyra2/lyra2RE.cu b/lyra2/lyra2RE.cu index ce95449..3f74484 100644 --- a/lyra2/lyra2RE.cu +++ b/lyra2/lyra2RE.cu @@ -24,6 +24,7 @@ extern void skein256_cpu_init(int thr_id, uint32_t threads); extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order); extern void groestl256_cpu_init(int thr_id, uint32_t threads); +extern void groestl256_cpu_free(int thr_id); extern void groestl256_setTarget(const void *ptarget); extern uint32_t groestl256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_outputHash, int order); extern uint32_t groestl256_getSecNonce(int thr_id, int num); @@ -162,3 +163,19 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, return 0; } + +// cleanup +extern "C" void free_lyra2(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + groestl256_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/lyra2/lyra2REv2.cu b/lyra2/lyra2REv2.cu index 99fc95d..2a6cf50 100644 --- a/lyra2/lyra2REv2.cu +++ b/lyra2/lyra2REv2.cu @@ -28,6 +28,7 @@ extern void lyra2v2_cpu_init(int thr_id, uint32_t threads, uint64_t* matrix); extern void bmw256_setTarget(const void *ptarget); extern void bmw256_cpu_init(int thr_id, uint32_t threads); +extern void bmw256_cpu_free(int thr_id); extern void bmw256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *resultnonces); void lyra2v2_hash(void *state, const void *input) @@ -169,3 +170,20 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc MyStreamSynchronize(NULL, 0, device_map[thr_id]); return 0; } + +// cleanup +extern "C" void free_lyra2v2(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + cudaFree(d_hash2[thr_id]); + + bmw256_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/miner.h b/miner.h index fdb8828..db868fa 100644 --- a/miner.h +++ b/miner.h @@ -300,6 +300,42 @@ extern int scanhash_scrypt(int thr_id, struct work *work, uint32_t max_nonce, un extern int scanhash_scrypt_jane(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done, unsigned char *scratchbuf, struct timeval *tv_start, struct timeval *tv_end); +/* free device allocated memory per algo */ +void miner_free_device(int thr_id); + +extern void free_blake256(int thr_id); +extern void free_bmw(int thr_id); +extern void free_c11(int thr_id); +extern void free_deep(int thr_id); +extern void free_keccak256(int thr_id); +extern void free_fresh(int thr_id); +extern void free_fugue256(int thr_id); +extern void free_groestlcoin(int thr_id); +extern void free_heavy(int thr_id); +extern void free_jackpot(int thr_id); +extern void free_luffa(int thr_id); +extern void free_lyra2(int thr_id); +extern void free_lyra2v2(int thr_id); +extern void free_myriad(int thr_id); +extern void free_neoscrypt(int thr_id); +extern void free_nist5(int thr_id); +extern void free_pentablake(int thr_id); +extern void free_quark(int thr_id); +extern void free_qubit(int thr_id); +extern void free_skeincoin(int thr_id); +extern void free_skein2(int thr_id); +extern void free_s3(int thr_id); +extern void free_whirlx(int thr_id); +extern void free_x11(int thr_id); +extern void free_x13(int thr_id); +extern void free_x14(int thr_id); +extern void free_x15(int thr_id); +extern void free_x17(int thr_id); +extern void free_zr5(int thr_id); +//extern void free_sha256d(int thr_id); +extern void free_scrypt(int thr_id); +extern void free_scrypt_jane(int thr_id); + /* api related */ void *api_thread(void *userdata); void api_set_throughput(int thr_id, uint32_t throughput); diff --git a/myriadgroestl.cpp b/myriadgroestl.cpp index 5bdbc5c..c93d872 100644 --- a/myriadgroestl.cpp +++ b/myriadgroestl.cpp @@ -8,6 +8,7 @@ #include "miner.h" void myriadgroestl_cpu_init(int thr_id, uint32_t threads); +void myriadgroestl_cpu_free(int thr_id); void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn); void myriadgroestl_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce); @@ -95,3 +96,16 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned return 0; } +// cleanup +void free_myriad(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + myriadgroestl_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/neoscrypt/cuda_neoscrypt.cu b/neoscrypt/cuda_neoscrypt.cu index f58d3df..79d14d6 100644 --- a/neoscrypt/cuda_neoscrypt.cu +++ b/neoscrypt/cuda_neoscrypt.cu @@ -735,6 +735,13 @@ void neoscrypt_cpu_init(int thr_id, uint32_t threads) cudaMemcpyToSymbol(BLAKE2S_SIGMA, BLAKE2S_SIGMA_host, sizeof(BLAKE2S_SIGMA_host), 0, cudaMemcpyHostToDevice); } +__host__ +void neoscrypt_cpu_free(int thr_id) +{ + cudaFree(d_NNonce[thr_id]); + cudaFree(d_buffer[thr_id]); +} + __host__ uint32_t neoscrypt_cpu_hash_k4(int thr_id, uint32_t threads, uint32_t startNounce, int have_stratum, int order) { diff --git a/neoscrypt/neoscrypt.cpp b/neoscrypt/neoscrypt.cpp index cb1bed4..0330248 100644 --- a/neoscrypt/neoscrypt.cpp +++ b/neoscrypt/neoscrypt.cpp @@ -4,6 +4,7 @@ extern void neoscrypt_setBlockTarget(uint32_t * data, const void *ptarget); extern void neoscrypt_cpu_init(int thr_id, uint32_t threads); +extern void neoscrypt_cpu_free(int thr_id); extern uint32_t neoscrypt_cpu_hash_k4(int thr_id, uint32_t threads, uint32_t startNounce, int have_stratum, int order); static bool init[MAX_GPUS] = { 0 }; @@ -81,3 +82,17 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign *hashes_done = pdata[19] - first_nonce + 1; return 0; } + +// cleanup +void free_neoscrypt(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + neoscrypt_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/pentablake.cu b/pentablake.cu index e777bb8..1cd7585 100644 --- a/pentablake.cu +++ b/pentablake.cu @@ -442,3 +442,20 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n *hashes_done = pdata[19] - first_nonce + 1; return rc; } + +// cleanup +void free_pentablake(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + cudaFree(h_resNounce[thr_id]); + cudaFree(d_resNounce[thr_id]); + + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/quark/cuda_quark_compactionTest.cu b/quark/cuda_quark_compactionTest.cu index dc78c85..47af463 100644 --- a/quark/cuda_quark_compactionTest.cu +++ b/quark/cuda_quark_compactionTest.cu @@ -27,7 +27,7 @@ __device__ cuda_compactTestFunction_t d_QuarkTrueFunction = QuarkTrueTest, d_Qua cuda_compactTestFunction_t h_QuarkTrueFunction[MAX_GPUS], h_QuarkFalseFunction[MAX_GPUS]; -// Setup-Funktionen +// Setup/Alloc Function __host__ void quark_compactTest_cpu_init(int thr_id, uint32_t threads) { cudaMemcpyFromSymbol(&h_QuarkTrueFunction[thr_id], d_QuarkTrueFunction, sizeof(cuda_compactTestFunction_t)); @@ -45,6 +45,18 @@ __host__ void quark_compactTest_cpu_init(int thr_id, uint32_t threads) cudaMalloc(&d_partSum[1][thr_id], sizeof(uint32_t) * s1); // BLOCKSIZE (Threads/Block) } +// Because all alloc should have a free... +__host__ void quark_compactTest_cpu_free(int thr_id) +{ + cudaFree(d_tempBranch1Nonces[thr_id]); + cudaFree(d_numValid[thr_id]); + + cudaFree(d_partSum[0][thr_id]); + cudaFree(d_partSum[1][thr_id]); + + cudaFreeHost(h_numValid[thr_id]); +} + #if __CUDA_ARCH__ < 300 /** * __shfl_up() calculates a source lane ID by subtracting delta from the caller's lane ID, and clamping to the range 0..width-1 diff --git a/quark/quarkcoin.cu b/quark/quarkcoin.cu index dc266dd..0f06f02 100644 --- a/quark/quarkcoin.cu +++ b/quark/quarkcoin.cu @@ -41,6 +41,7 @@ extern void quark_jh512_cpu_init(int thr_id, uint32_t threads); extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); +extern void quark_compactTest_cpu_free(int thr_id); extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, uint32_t *d_nonces1, uint32_t *nrm1, uint32_t *d_nonces2, uint32_t *nrm2, @@ -51,6 +52,7 @@ extern void quark_compactTest_single_false_cpu_hash_64(int thr_id, uint32_t thre extern uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); + // Original Quarkhash Funktion aus einem miner Quelltext extern "C" void quarkhash(void *state, const void *input) { @@ -252,3 +254,26 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } + +// cleanup +extern "C" void free_quark(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + cudaDeviceSynchronize(); + + cudaFree(d_hash[thr_id]); + + cudaFree(d_branch1Nonces[thr_id]); + cudaFree(d_branch2Nonces[thr_id]); + cudaFree(d_branch3Nonces[thr_id]); + + quark_compactTest_cpu_free(thr_id); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/qubit/deep.cu b/qubit/deep.cu index b0b1fbb..26b85d6 100644 --- a/qubit/deep.cu +++ b/qubit/deep.cu @@ -128,3 +128,19 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } + +// cleanup +extern "C" void free_deep(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/qubit/luffa.cu b/qubit/luffa.cu index 6b2fe75..84ab868 100644 --- a/qubit/luffa.cu +++ b/qubit/luffa.cu @@ -96,3 +96,19 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } + +// cleanup +extern "C" void free_luffa(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/qubit/qubit.cu b/qubit/qubit.cu index 6c69d72..a5e3a7e 100644 --- a/qubit/qubit.cu +++ b/qubit/qubit.cu @@ -156,3 +156,19 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } + +// cleanup +extern "C" void free_qubit(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/scrypt-jane.cpp b/scrypt-jane.cpp index eb20ea5..f65388e 100644 --- a/scrypt-jane.cpp +++ b/scrypt-jane.cpp @@ -426,6 +426,12 @@ unsigned char GetNfactor(unsigned int nTimestamp) return Nfactor; } +// cleanup +void free_scrypt_jane(int thr_id) +{ + // todo ? +} + #define bswap_32x4(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) \ | (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu)) static int s_Nfactor = 0; diff --git a/scrypt.cpp b/scrypt.cpp index e63139d..d13b0bd 100644 --- a/scrypt.cpp +++ b/scrypt.cpp @@ -685,6 +685,12 @@ static int lastFactor = 0; static void computeGold(uint32_t* const input, uint32_t *reference, uchar *scratchpad); +// cleanup +void free_scrypt(int thr_id) +{ + // todo ? +} + // Scrypt proof of work algorithm // using SSE2 vectorized HMAC SHA256 on CPU and // a salsa core implementation on GPU with CUDA diff --git a/skein.cu b/skein.cu index 3b52178..ed327ce 100644 --- a/skein.cu +++ b/skein.cu @@ -464,3 +464,19 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no return 0; } + +// cleanup +extern "C" void free_skeincoin(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/skein2.cpp b/skein2.cpp index 8a6fcef..08fb5ac 100644 --- a/skein2.cpp +++ b/skein2.cpp @@ -120,3 +120,19 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned return 0; } + +// cleanup +void free_skein2(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/x11/c11.cu b/x11/c11.cu index c0cffdc..b711d97 100644 --- a/x11/c11.cu +++ b/x11/c11.cu @@ -54,9 +54,6 @@ extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t start extern void x11_echo512_cpu_init(int thr_id, uint32_t threads); extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, - uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); // Flax/C11 CPU Hash extern "C" void c11hash(void *output, const void *input) @@ -248,3 +245,19 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u *hashes_done = pdata[19] - first_nonce + 1; return 0; } + +// cleanup +extern "C" void free_c11(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/x11/fresh.cu b/x11/fresh.cu index e667baf..af45366 100644 --- a/x11/fresh.cu +++ b/x11/fresh.cu @@ -25,10 +25,6 @@ extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t start extern void x11_echo512_cpu_init(int thr_id, uint32_t threads); extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, - uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, - int order); // CPU Hash extern "C" void fresh_hash(void *state, const void *input) @@ -157,3 +153,19 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } + +// cleanup +extern "C" void free_fresh(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/x11/s3.cu b/x11/s3.cu index 0f517f0..e17e8ec 100644 --- a/x11/s3.cu +++ b/x11/s3.cu @@ -136,3 +136,19 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un *hashes_done = pdata[19] - first_nonce + 1; return 0; } + +// cleanup +extern "C" void free_s3(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/x11/x11.cu b/x11/x11.cu index b13aff5..d65a48d 100644 --- a/x11/x11.cu +++ b/x11/x11.cu @@ -54,10 +54,6 @@ extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t start extern void x11_echo512_cpu_init(int thr_id, uint32_t threads); extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, - uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); - // X11 CPU Hash extern "C" void x11hash(void *output, const void *input) { @@ -247,3 +243,19 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u *hashes_done = pdata[19] - first_nonce + 1; return 0; } + +// cleanup +extern "C" void free_x11(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/x13/x13.cu b/x13/x13.cu index dea8709..4e980a4 100644 --- a/x13/x13.cu +++ b/x13/x13.cu @@ -62,9 +62,6 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads); extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, - uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); // X13 CPU Hash extern "C" void x13hash(void *output, const void *input) @@ -248,3 +245,19 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u *hashes_done = pdata[19] - first_nonce + 1; return 0; } + +// cleanup +extern "C" void free_x13(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/x15/cuda_whirlpoolx.cu b/x15/cuda_whirlpoolx.cu index 570b863..d5f94eb 100644 --- a/x15/cuda_whirlpoolx.cu +++ b/x15/cuda_whirlpoolx.cu @@ -544,6 +544,15 @@ extern void whirlpoolx_cpu_init(int thr_id, uint32_t threads) CUDA_SAFE_CALL(cudaMalloc(&d_tmp[thr_id], 8 * 9 * sizeof(uint64_t))); // d_tmp[threadIdx.x+64] (7+64) } +__host__ +extern void whirlpoolx_cpu_free(int thr_id) +{ + cudaFree(d_WXNonce[thr_id]); + cudaFreeHost(h_wxnounce[thr_id]); + cudaFree(d_xtra[thr_id]); + cudaFree(d_tmp[thr_id]); +} + __host__ void whirlpoolx_setBlock_80(void *pdata, const void *ptarget) { diff --git a/x15/whirlpoolx.cu b/x15/whirlpoolx.cu index 4aa4346..541d2de 100644 --- a/x15/whirlpoolx.cu +++ b/x15/whirlpoolx.cu @@ -12,6 +12,7 @@ extern "C" { static uint32_t *d_hash[MAX_GPUS]; extern void whirlpoolx_cpu_init(int thr_id, uint32_t threads); +extern void whirlpoolx_cpu_free(int thr_id); extern void whirlpoolx_setBlock_80(void *pdata, const void *ptarget); extern uint32_t whirlpoolx_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce); extern void whirlpoolx_precompute(int thr_id); @@ -99,3 +100,19 @@ extern "C" int scanhash_whirlx(int thr_id, struct work* work, uint32_t max_nonc return 0; } + +// cleanup +extern "C" void free_whirlx(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + whirlpoolx_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/x15/x14.cu b/x15/x14.cu index 337b746..fe09d04 100644 --- a/x15/x14.cu +++ b/x15/x14.cu @@ -69,9 +69,6 @@ extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads); extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, - uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); // X14 CPU Hash function extern "C" void x14hash(void *output, const void *input) @@ -252,3 +249,19 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce, *hashes_done = pdata[19] - first_nonce + 1; return 0; } + +// cleanup +extern "C" void free_x14(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/x15/x15.cu b/x15/x15.cu index e569875..01cf00b 100644 --- a/x15/x15.cu +++ b/x15/x15.cu @@ -74,9 +74,6 @@ extern void x15_whirlpool_cpu_init(int thr_id, uint32_t threads, int mode); extern void x15_whirlpool_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x15_whirlpool_cpu_free(int thr_id); -extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, - uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); // X15 CPU Hash function extern "C" void x15hash(void *output, const void *input) @@ -267,3 +264,19 @@ extern "C" int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce, x15_whirlpool_cpu_free(thr_id); return 0; } + +// cleanup +extern "C" void free_x15(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/x17/x17.cu b/x17/x17.cu index 2193d32..0bcb028 100644 --- a/x17/x17.cu +++ b/x17/x17.cu @@ -82,10 +82,6 @@ extern void x17_sha512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startN extern void x17_haval256_cpu_init(int thr_id, uint32_t threads); extern void x17_haval256_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, - uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, - int order); // X17 Hashfunktion extern "C" void x17hash(void *output, const void *input) @@ -290,3 +286,19 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u *hashes_done = pdata[19] - first_nonce + 1; return 0; } + +// cleanup +extern "C" void free_x17(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file diff --git a/zr5.cu b/zr5.cu index 63ed68b..50cf7d7 100644 --- a/zr5.cu +++ b/zr5.cu @@ -469,3 +469,30 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work, *hashes_done = pdata[19] - first_nonce + 1; return 0; } + +// cleanup +extern "C" void free_zr5(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaSetDevice(device_map[thr_id]); + + cudaFree(d_hash[thr_id]); + + cudaFree(d_poks[thr_id]); + cudaFree(d_permut[thr_id]); + cudaFree(d_buffers[thr_id]); + + cudaFree(d_blake[thr_id]); + cudaFree(d_groes[thr_id]); + cudaFree(d_jh512[thr_id]); + cudaFree(d_skein[thr_id]); + + cudaFree(d_txs[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +} \ No newline at end of file