From 355b835ae0daa34b6f0df4040b573a9b538ff6f2 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Fri, 16 Oct 2015 22:01:29 +0200 Subject: [PATCH] benchmark: enhance the mem leak detection reduce "false" warnings, and ignore unrelated/small ones <= 1 MB On windows the gpu memory can be allocated by other processes + some cleanup in algos... (free/gpulog) --- Algo256/blake256.cu | 2 +- Algo256/bmw.cu | 2 +- Algo256/keccak256.cu | 2 +- JHA/jackpotcoin.cu | 2 +- bench.cpp | 21 ++++++++++++++++----- cuda.cpp | 4 ---- cuda_groestlcoin.cu | 2 +- cuda_nist5.cu | 4 ++-- fuguecoin.cpp | 5 ++--- groestlcoin.cpp | 10 +++++----- heavy/heavy.cu | 6 +++--- myriadgroestl.cpp | 5 ++--- neoscrypt/neoscrypt.cpp | 4 ++-- pentablake.cu | 2 +- qubit/deep.cu | 7 ++++--- qubit/luffa.cu | 6 +++--- qubit/qubit.cu | 4 ++-- skein.cu | 4 ++-- skein2.cpp | 4 ++-- x11/c11.cu | 4 ++-- x11/fresh.cu | 2 +- x11/s3.cu | 6 +++--- x11/x11.cu | 6 +++--- x13/x13.cu | 4 ++-- x15/whirlpool.cu | 2 +- x15/whirlpoolx.cu | 2 +- x15/x14.cu | 2 +- x15/x15.cu | 2 +- x17/x17.cu | 2 +- zr5.cu | 2 +- 30 files changed, 68 insertions(+), 62 deletions(-) diff --git a/Algo256/blake256.cu b/Algo256/blake256.cu index c3a3ea2..4a08410 100644 --- a/Algo256/blake256.cu +++ b/Algo256/blake256.cu @@ -474,7 +474,7 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non else if (vhashcpu[7] > ptarget[7] && opt_debug) { applog_hash((uchar*)ptarget); applog_compare_hash((uchar*)vhashcpu, (uchar*)ptarget); - gpulog(LOG_WARNING, thr_id, "result for nonce %08x does not validate on CPU!", foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } diff --git a/Algo256/bmw.cu b/Algo256/bmw.cu index 1cf06ea..c578d98 100644 --- a/Algo256/bmw.cu +++ b/Algo256/bmw.cu @@ -87,7 +87,7 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u return 1; } else { - gpulog(LOG_WARNING, thr_id, "result for nonce %08x does not validate on CPU!", foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } diff --git a/Algo256/keccak256.cu b/Algo256/keccak256.cu index 7922912..d2e3ec1 100644 --- a/Algo256/keccak256.cu +++ b/Algo256/keccak256.cu @@ -80,7 +80,7 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no return 1; } else { - gpulog(LOG_WARNING, thr_id, "result for nonce %08x does not validate on CPU!", foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu index b441362..642b749 100644 --- a/JHA/jackpotcoin.cu +++ b/JHA/jackpotcoin.cu @@ -247,7 +247,7 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc pdata[19] = foundNonce; return res; } else { - gpulog(LOG_WARNING, thr_id, "result for nonce %08x does not validate on CPU!", foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } diff --git a/bench.cpp b/bench.cpp index 31944ae..1100d4b 100644 --- a/bench.cpp +++ b/bench.cpp @@ -4,11 +4,11 @@ * 2015 - tpruvot@github */ +#include + #include "miner.h" #include "algos.h" -#include - int bench_algo = -1; static double algo_hashrates[MAX_GPUS][ALGO_COUNT] = { 0 }; @@ -120,7 +120,13 @@ bool bench_algo_switch_next(int thr_id) // free current algo memory and track mem usage mused = cuda_available_memory(thr_id); algo_free_all(thr_id); + + // device can take some time to free mfree = cuda_available_memory(thr_id); + if (device_mem_free[thr_id] > mfree) { + sleep(1); + mfree = cuda_available_memory(thr_id); + } // we need to wait completion on all cards before the switch if (opt_n_threads > 1) { @@ -132,10 +138,15 @@ bool bench_algo_switch_next(int thr_id) format_hashrate(hashrate, rate); gpulog(LOG_NOTICE, thr_id, "%s hashrate = %s", algo_names[prev_algo], rate); - // check if there is memory leak + // ensure memory leak is still real after the barrier if (device_mem_free[thr_id] > mfree) { - gpulog(LOG_WARNING, thr_id, "memory leak detected in %s ! %d MB free", - algo_names[prev_algo], mfree); + mfree = cuda_available_memory(thr_id); + } + + // check if there is memory leak + if (device_mem_free[thr_id] - mfree > 1) { + gpulog(LOG_WARNING, thr_id, "possible %d MB memory leak in %s! %d MB free", + (device_mem_free[thr_id] - mfree), algo_names[prev_algo], mfree); cuda_reset_device(thr_id, NULL); // force to free the leak mfree = cuda_available_memory(thr_id); } diff --git a/cuda.cpp b/cuda.cpp index b912cad..635d9df 100644 --- a/cuda.cpp +++ b/cuda.cpp @@ -4,10 +4,6 @@ #include #include -#ifndef _WIN32 -#include -#endif - // include thrust #ifndef __cplusplus #include diff --git a/cuda_groestlcoin.cu b/cuda_groestlcoin.cu index 4e4b728..eb4f833 100644 --- a/cuda_groestlcoin.cu +++ b/cuda_groestlcoin.cu @@ -99,7 +99,7 @@ void groestlcoin_cpu_init(int thr_id, uint32_t threads) // to check if the binary supports SM3+ cuda_get_arch(thr_id); - cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t)); + CUDA_SAFE_CALL(cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t))); } __host__ diff --git a/cuda_nist5.cu b/cuda_nist5.cu index a48171c..ef875d6 100644 --- a/cuda_nist5.cu +++ b/cuda_nist5.cu @@ -148,7 +148,7 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, goto out; } else { - applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } @@ -171,7 +171,7 @@ extern "C" void free_nist5(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); cudaFree(d_hash[thr_id]); diff --git a/fuguecoin.cpp b/fuguecoin.cpp index aff0828..d7681c8 100644 --- a/fuguecoin.cpp +++ b/fuguecoin.cpp @@ -82,8 +82,7 @@ int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce, unsigne *hashes_done = foundNounce - start_nonce + 1; return 1; } else { - applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", - device_map[thr_id], foundNounce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNounce); } } @@ -106,7 +105,7 @@ void free_fugue256(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); fugue256_cpu_free(thr_id); diff --git a/groestlcoin.cpp b/groestlcoin.cpp index a4cb26d..c3f803a 100644 --- a/groestlcoin.cpp +++ b/groestlcoin.cpp @@ -36,14 +36,15 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi uint32_t throughput = cuda_default_throughput(thr_id, 1 << 19); // 256*256*8 if (init[thr_id]) throughput = min(throughput, max_nonce - start_nonce); - uint32_t *outputHash = (uint32_t*)malloc(throughput * 64); + uint32_t *outputHash = (uint32_t*)malloc((size_t) 64* throughput); if (opt_benchmark) - ((uint32_t*)ptarget)[7] = 0x000000ff; + ptarget[7] = 0x000ff; if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + CUDA_LOG_ERROR(); groestlcoin_cpu_init(thr_id, throughput); init[thr_id] = true; } @@ -73,8 +74,7 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi free(outputHash); return true; } else { - applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", - device_map[thr_id], foundNounce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNounce); } } @@ -97,7 +97,7 @@ void free_groestlcoin(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); groestlcoin_cpu_free(thr_id); init[thr_id] = false; diff --git a/heavy/heavy.cu b/heavy/heavy.cu index 4454ec6..07f706f 100644 --- a/heavy/heavy.cu +++ b/heavy/heavy.cu @@ -275,8 +275,8 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l uint32_t vhash[8]; pdata[19] += nonce - pdata[19]; heavycoin_hash((uchar*)vhash, (uchar*)pdata, blocklen); - if (memcmp(vhash, foundhash, 8*sizeof(uint32_t))) { - applog(LOG_ERR, "hash for nonce %08x does not validate on CPU!\n", nonce); + if (memcmp(vhash, foundhash, 32)) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", nonce); } else { *hashes_done = pdata[19] - first_nonce; work_set_target_ratio(work, vhash); @@ -306,7 +306,7 @@ extern "C" void free_heavy(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); cudaFree(heavy_nonceVector[thr_id]); diff --git a/myriadgroestl.cpp b/myriadgroestl.cpp index 0eea601..f27d060 100644 --- a/myriadgroestl.cpp +++ b/myriadgroestl.cpp @@ -78,8 +78,7 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned free(outputHash); return 1; } else { - applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", - device_map[thr_id], foundNounce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNounce); } } @@ -102,7 +101,7 @@ void free_myriad(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); myriadgroestl_cpu_free(thr_id); init[thr_id] = false; diff --git a/neoscrypt/neoscrypt.cpp b/neoscrypt/neoscrypt.cpp index c111cfc..0e07845 100644 --- a/neoscrypt/neoscrypt.cpp +++ b/neoscrypt/neoscrypt.cpp @@ -75,7 +75,7 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign pdata[19] = foundNonce; return 1; } else { - applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", dev_id, foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } @@ -93,7 +93,7 @@ void free_neoscrypt(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); neoscrypt_cpu_free(thr_id); init[thr_id] = false; diff --git a/pentablake.cu b/pentablake.cu index b0d8132..062b270 100644 --- a/pentablake.cu +++ b/pentablake.cu @@ -430,7 +430,7 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n pdata[19] = foundNonce; return rc; } else { - gpulog(LOG_WARNING, thr_id, "result for nonce %08x does not validate on CPU!", foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } diff --git a/qubit/deep.cu b/qubit/deep.cu index 32df85c..e30bd2e 100644 --- a/qubit/deep.cu +++ b/qubit/deep.cu @@ -67,8 +67,9 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + CUDA_LOG_ERROR(); - CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], throughput * 64)); + CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); qubit_luffa512_cpu_init(thr_id, throughput); x11_cubehash512_cpu_init(thr_id, throughput); @@ -117,7 +118,7 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, return res; } else { - applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } @@ -135,7 +136,7 @@ extern "C" void free_deep(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); cudaFree(d_hash[thr_id]); diff --git a/qubit/luffa.cu b/qubit/luffa.cu index f03efaf..afbf545 100644 --- a/qubit/luffa.cu +++ b/qubit/luffa.cu @@ -48,7 +48,7 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce, CUDA_LOG_ERROR(); //if (opt_cudaschedule == -1) // to reduce cpu usage... // cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); - CUDA_LOG_ERROR(); + //CUDA_LOG_ERROR(); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); @@ -82,7 +82,7 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce, pdata[19] = foundNonce; return 1; } else { - applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } @@ -111,6 +111,6 @@ extern "C" void free_luffa(int thr_id) cuda_check_cpu_free(thr_id); - cudaDeviceSynchronize(); init[thr_id] = false; + cudaDeviceSynchronize(); } diff --git a/qubit/qubit.cu b/qubit/qubit.cu index 1a94a54..629adad 100644 --- a/qubit/qubit.cu +++ b/qubit/qubit.cu @@ -160,7 +160,7 @@ extern "C" void free_qubit(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); cudaFree(d_hash[thr_id]); @@ -170,4 +170,4 @@ extern "C" void free_qubit(int thr_id) init[thr_id] = false; cudaDeviceSynchronize(); -} \ No newline at end of file +} diff --git a/skein.cu b/skein.cu index 9efcd6d..f178a78 100644 --- a/skein.cu +++ b/skein.cu @@ -449,7 +449,7 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no return res; } else { - applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } @@ -473,7 +473,7 @@ extern "C" void free_skeincoin(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); if (sm5) skeincoin_free(thr_id); diff --git a/skein2.cpp b/skein2.cpp index 86f5fc3..b05ccb3 100644 --- a/skein2.cpp +++ b/skein2.cpp @@ -106,7 +106,7 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned pdata[19] = swab32(foundNonce); return res; } else { - applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", dev_id, foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } @@ -129,7 +129,7 @@ void free_skein2(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); cudaFree(d_hash[thr_id]); diff --git a/x11/c11.cu b/x11/c11.cu index f514cdb..5db4cb6 100644 --- a/x11/c11.cu +++ b/x11/c11.cu @@ -235,7 +235,7 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u pdata[19] = foundNonce; return res; } else { - applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); pdata[19] = foundNonce + 1; } } @@ -254,7 +254,7 @@ extern "C" void free_c11(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); cudaFree(d_hash[thr_id]); quark_groestl512_cpu_free(thr_id); diff --git a/x11/fresh.cu b/x11/fresh.cu index 8ef4fe7..8e2b42a 100644 --- a/x11/fresh.cu +++ b/x11/fresh.cu @@ -143,7 +143,7 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce, pdata[19] = foundNonce; return res; } else { - applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } diff --git a/x11/s3.cu b/x11/s3.cu index ddc6382..d8d11e4 100644 --- a/x11/s3.cu +++ b/x11/s3.cu @@ -126,7 +126,7 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un return res; } else { - applog(LOG_WARNING, "GPU #%d: result for nonce $%08X does not validate on CPU!", device_map[thr_id], foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } @@ -144,7 +144,7 @@ extern "C" void free_s3(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); cudaFree(d_hash[thr_id]); x11_simd512_cpu_free(thr_id); @@ -153,4 +153,4 @@ extern "C" void free_s3(int thr_id) init[thr_id] = false; cudaDeviceSynchronize(); -} \ No newline at end of file +} diff --git a/x11/x11.cu b/x11/x11.cu index bffed09..ee33cc1 100644 --- a/x11/x11.cu +++ b/x11/x11.cu @@ -233,7 +233,7 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u pdata[19] = foundNonce; return res; } else { - applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); pdata[19] = foundNonce + 1; } } @@ -252,7 +252,7 @@ extern "C" void free_x11(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); cudaFree(d_hash[thr_id]); @@ -263,4 +263,4 @@ extern "C" void free_x11(int thr_id) init[thr_id] = false; cudaDeviceSynchronize(); -} \ No newline at end of file +} diff --git a/x13/x13.cu b/x13/x13.cu index cfdc3fb..5859029 100644 --- a/x13/x13.cu +++ b/x13/x13.cu @@ -234,7 +234,7 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u } return res; } else { - applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } @@ -272,4 +272,4 @@ extern "C" void free_x13(int thr_id) cudaDeviceSynchronize(); init[thr_id] = false; -} \ No newline at end of file +} diff --git a/x15/whirlpool.cu b/x15/whirlpool.cu index 99d0d33..31f6466 100644 --- a/x15/whirlpool.cu +++ b/x15/whirlpool.cu @@ -126,7 +126,7 @@ extern "C" void free_whirl(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); cudaFree(d_hash[thr_id]); diff --git a/x15/whirlpoolx.cu b/x15/whirlpoolx.cu index b7b356f..4416512 100644 --- a/x15/whirlpoolx.cu +++ b/x15/whirlpoolx.cu @@ -107,7 +107,7 @@ extern "C" void free_whirlx(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); cudaFree(d_hash[thr_id]); diff --git a/x15/x14.cu b/x15/x14.cu index 559d03f..2696c4d 100644 --- a/x15/x14.cu +++ b/x15/x14.cu @@ -248,7 +248,7 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce, pdata[19] = foundNonce; return res; } else { - applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } pdata[19] += throughput; diff --git a/x15/x15.cu b/x15/x15.cu index ba72208..5f9eab5 100644 --- a/x15/x15.cu +++ b/x15/x15.cu @@ -254,7 +254,7 @@ extern "C" int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce, pdata[19] = foundNonce; return res; } else { - applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } diff --git a/x17/x17.cu b/x17/x17.cu index 9bb40bd..a5198df 100644 --- a/x17/x17.cu +++ b/x17/x17.cu @@ -279,7 +279,7 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u pdata[19] = foundNonce; return res; } else { - applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce); + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); } } diff --git a/zr5.cu b/zr5.cu index d591f1d..8f6d821 100644 --- a/zr5.cu +++ b/zr5.cu @@ -481,7 +481,7 @@ extern "C" void free_zr5(int thr_id) if (!init[thr_id]) return; - cudaSetDevice(device_map[thr_id]); + cudaThreadSynchronize(); cudaFree(d_hash[thr_id]);