From d195f2e8a25951392dae3bfef9685ae3ae3d4e0d Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Sun, 11 Oct 2015 04:56:58 +0200 Subject: [PATCH] intensity: do not reduce throughput before init Else the memory allocated could be less than required later btw, use the new "cuda" function to apply intensity/throughput --- Algo256/blake256.cu | 2 +- Algo256/bmw.cu | 4 ++-- Algo256/keccak256.cu | 4 ++-- JHA/jackpotcoin.cu | 4 ++-- bench.cpp | 4 ++++ cuda_nist5.cu | 4 ++-- fuguecoin.cpp | 6 +++--- groestlcoin.cpp | 6 +++--- heavy/heavy.cu | 4 ++-- lyra2/lyra2RE.cu | 4 ++-- lyra2/lyra2REv2.cu | 4 ++-- myriadgroestl.cpp | 6 +++--- neoscrypt/neoscrypt.cpp | 4 ++-- pentablake.cu | 4 ++-- quark/quarkcoin.cu | 4 ++-- qubit/deep.cu | 6 +++--- qubit/luffa.cu | 4 ++-- qubit/qubit.cu | 4 ++-- skein.cu | 4 ++-- skein2.cpp | 4 ++-- x11/c11.cu | 4 ++-- x11/fresh.cu | 6 +++--- x11/s3.cu | 4 ++-- x11/x11.cu | 4 ++-- x13/x13.cu | 4 ++-- x15/whirlpool.cu | 4 ++-- x15/whirlpoolx.cu | 4 ++-- x15/x14.cu | 4 ++-- x15/x15.cu | 4 ++-- x17/x17.cu | 4 ++-- zr5.cu | 4 ++-- 31 files changed, 68 insertions(+), 64 deletions(-) diff --git a/Algo256/blake256.cu b/Algo256/blake256.cu index 065f1f6..0b5dd87 100644 --- a/Algo256/blake256.cu +++ b/Algo256/blake256.cu @@ -389,7 +389,7 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non uint64_t targetHigh = ((uint64_t*)ptarget)[3]; int intensity = (device_sm[device_map[thr_id]] > 500) ? 22 : 20; uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); - throughput = min(throughput, max_nonce - first_nonce); + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); int rc = 0; diff --git a/Algo256/bmw.cu b/Algo256/bmw.cu index d9f9e72..f4bbb4c 100644 --- a/Algo256/bmw.cu +++ b/Algo256/bmw.cu @@ -45,8 +45,8 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; bool swapnonce = true; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << 21); - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << 21); + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ptarget[7] = 0x0005; diff --git a/Algo256/keccak256.cu b/Algo256/keccak256.cu index ed074fd..e2234d3 100644 --- a/Algo256/keccak256.cu +++ b/Algo256/keccak256.cu @@ -42,8 +42,8 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << 21); // 256*256*8*4 - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << 21); // 256*256*8*4 + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ptarget[7] = 0x00ff; diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu index 558233e..4e811df 100644 --- a/JHA/jackpotcoin.cu +++ b/JHA/jackpotcoin.cu @@ -96,8 +96,8 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << 20); - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << 20); + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x000f; diff --git a/bench.cpp b/bench.cpp index 016e120..3ef7264 100644 --- a/bench.cpp +++ b/bench.cpp @@ -29,6 +29,10 @@ void bench_init(int threads) applog(LOG_BLUE, "Starting benchmark mode with %s", algo_names[opt_algo]); pthread_barrier_init(&miner_barr, NULL, threads); pthread_barrier_init(&algo_barr, NULL, threads); + // required for usage of first algo. + for (int n=0; n < opt_n_threads; n++) { + device_mem_free[n] = cuda_available_memory(n); + } } void bench_free() diff --git a/cuda_nist5.cu b/cuda_nist5.cu index 9f92987..a48171c 100644 --- a/cuda_nist5.cu +++ b/cuda_nist5.cu @@ -75,8 +75,8 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; int res = 0; - uint32_t throughput = device_intensity(thr_id, __func__, 1 << 20); // 256*256*16 - throughput = min(throughput, (max_nonce - first_nonce)); + uint32_t throughput = cuda_default_throughput(thr_id, 1 << 20); // 256*256*16 + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00FF; diff --git a/fuguecoin.cpp b/fuguecoin.cpp index 0b40671..aff0828 100644 --- a/fuguecoin.cpp +++ b/fuguecoin.cpp @@ -38,8 +38,8 @@ int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce, unsigne uint32_t *ptarget = work->target; uint32_t start_nonce = pdata[19]++; int intensity = (device_sm[device_map[thr_id]] > 500) ? 22 : 19; - uint32_t throughput = device_intensity(thr_id, __func__, 1 << intensity); // 256*256*8 - throughput = min(throughput, max_nonce - start_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 256*256*8 + if (init[thr_id]) throughput = min(throughput, max_nonce - start_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0xf; @@ -113,4 +113,4 @@ void free_fugue256(int thr_id) init[thr_id] = false; cudaDeviceSynchronize(); -} \ No newline at end of file +} diff --git a/groestlcoin.cpp b/groestlcoin.cpp index 1ea54cb..a4cb26d 100644 --- a/groestlcoin.cpp +++ b/groestlcoin.cpp @@ -33,8 +33,8 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi uint32_t *pdata = work->data; uint32_t *ptarget = work->target; uint32_t start_nonce = pdata[19]; - uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8 - throughput = min(throughput, max_nonce - start_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1 << 19); // 256*256*8 + if (init[thr_id]) throughput = min(throughput, max_nonce - start_nonce); uint32_t *outputHash = (uint32_t*)malloc(throughput * 64); @@ -103,4 +103,4 @@ void free_groestlcoin(int thr_id) init[thr_id] = false; cudaDeviceSynchronize(); -} \ No newline at end of file +} diff --git a/heavy/heavy.cu b/heavy/heavy.cu index 37eb8b8..4454ec6 100644 --- a/heavy/heavy.cu +++ b/heavy/heavy.cu @@ -136,8 +136,8 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; // CUDA will process thousands of threads. - uint32_t throughput = device_intensity(thr_id, __func__, (1U << 19) - 256); - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, (1U << 19) - 256); + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); int rc = 0; uint32_t *hash = NULL; diff --git a/lyra2/lyra2RE.cu b/lyra2/lyra2RE.cu index 41753cc..3786964 100644 --- a/lyra2/lyra2RE.cu +++ b/lyra2/lyra2RE.cu @@ -85,8 +85,8 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 18 : 17; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity); // 18=256*256*4; - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 18=256*256*4; + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ptarget[7] = 0x00ff; diff --git a/lyra2/lyra2REv2.cu b/lyra2/lyra2REv2.cu index a8ff8bb..98a0291 100644 --- a/lyra2/lyra2REv2.cu +++ b/lyra2/lyra2REv2.cu @@ -82,8 +82,8 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc const uint32_t first_nonce = pdata[19]; int dev_id = device_map[thr_id]; int intensity = (device_sm[dev_id] > 500 && !is_windows()) ? 20 : 18; - unsigned int defthr = 1U << intensity; - uint32_t throughput = device_intensity(dev_id, __func__, defthr); + uint32_t throughput = cuda_default_throughput(dev_id, 1U << intensity); + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ptarget[7] = 0x00ff; diff --git a/myriadgroestl.cpp b/myriadgroestl.cpp index ab2da64..0eea601 100644 --- a/myriadgroestl.cpp +++ b/myriadgroestl.cpp @@ -37,8 +37,8 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned uint32_t *pdata = work->data; uint32_t *ptarget = work->target; uint32_t start_nonce = pdata[19]; - uint32_t throughput = device_intensity(thr_id, __func__, 1 << 17); - throughput = min(throughput, max_nonce - start_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << 17); + if (init[thr_id]) throughput = min(throughput, max_nonce - start_nonce); uint32_t *outputHash = (uint32_t*)malloc(throughput * 64); @@ -108,4 +108,4 @@ void free_myriad(int thr_id) init[thr_id] = false; cudaDeviceSynchronize(); -} \ No newline at end of file +} diff --git a/neoscrypt/neoscrypt.cpp b/neoscrypt/neoscrypt.cpp index 92d7d1f..43681d9 100644 --- a/neoscrypt/neoscrypt.cpp +++ b/neoscrypt/neoscrypt.cpp @@ -18,9 +18,9 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign int dev_id = device_map[thr_id]; int intensity = is_windows() ? 18 : 19; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); throughput = throughput / 32; /* set for max intensity ~= 20 */ - throughput = min(throughput, max_nonce - first_nonce + 1); + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce + 1); if (opt_benchmark) ptarget[7] = 0x00ff; diff --git a/pentablake.cu b/pentablake.cu index 5d19a9c..c54eac3 100644 --- a/pentablake.cu +++ b/pentablake.cu @@ -371,8 +371,8 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; int rc = 0; - uint32_t throughput = device_intensity(thr_id, __func__, 128U * 2560); // 18.5 - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 128U * 2560); // 18.5 + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x000F; diff --git a/quark/quarkcoin.cu b/quark/quarkcoin.cu index 772fd65..c1a5d3f 100644 --- a/quark/quarkcoin.cu +++ b/quark/quarkcoin.cu @@ -141,8 +141,8 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce, uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; - uint32_t throughput = device_intensity(thr_id, __func__, 1 << 20); // 256*4096 - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1 << 20); // 256*4096 + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00F; diff --git a/qubit/deep.cu b/qubit/deep.cu index dbcd4b3..32df85c 100644 --- a/qubit/deep.cu +++ b/qubit/deep.cu @@ -58,8 +58,8 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << 19); // 256*256*8 - throughput = min(throughput, (max_nonce - first_nonce)); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 256*256*8 + if (init[thr_id]) throughput = min(throughput, (max_nonce - first_nonce)); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000f; @@ -143,4 +143,4 @@ extern "C" void free_deep(int thr_id) init[thr_id] = false; cudaDeviceSynchronize(); -} \ No newline at end of file +} diff --git a/qubit/luffa.cu b/qubit/luffa.cu index e04be70..5d278d6 100644 --- a/qubit/luffa.cu +++ b/qubit/luffa.cu @@ -36,8 +36,8 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce, uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << 22); // 256*256*8*8 - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << 22); // 256*256*8*8 + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000f; diff --git a/qubit/qubit.cu b/qubit/qubit.cu index 7837892..1a94a54 100644 --- a/qubit/qubit.cu +++ b/qubit/qubit.cu @@ -77,8 +77,8 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce, uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << 19); // 256*256*8 - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 256*256*8 + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/skein.cu b/skein.cu index da88227..9efcd6d 100644 --- a/skein.cu +++ b/skein.cu @@ -360,8 +360,8 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no sm5 = (device_sm[device_map[thr_id]] >= 500); bool checkSecnonce = (have_stratum || have_longpoll) && !sm5; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << 20); - throughput = min(throughput, (max_nonce - first_nonce)); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << 20); + if (init[thr_id]) throughput = min(throughput, (max_nonce - first_nonce)); uint32_t foundNonce, secNonce = 0; uint64_t target64 = 0; diff --git a/skein2.cpp b/skein2.cpp index ef0c930..86f5fc3 100644 --- a/skein2.cpp +++ b/skein2.cpp @@ -44,8 +44,8 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; - uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8 - throughput = min(throughput, (max_nonce - first_nonce)); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 256*256*8 + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0; diff --git a/x11/c11.cu b/x11/c11.cu index c3a0848..f514cdb 100644 --- a/x11/c11.cu +++ b/x11/c11.cu @@ -147,8 +147,8 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity); // 19=256*256*8; - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 19=256*256*8; + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x5; diff --git a/x11/fresh.cu b/x11/fresh.cu index 49af70b..8ef4fe7 100644 --- a/x11/fresh.cu +++ b/x11/fresh.cu @@ -74,8 +74,8 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t endiandata[20]; - uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); - throughput = min(throughput, (max_nonce - first_nonce)); + uint32_t throughput = cuda_default_throughput(thr_id, 1 << 19); + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00ff; @@ -170,4 +170,4 @@ extern "C" void free_fresh(int thr_id) init[thr_id] = false; cudaDeviceSynchronize(); -} \ No newline at end of file +} diff --git a/x11/s3.cu b/x11/s3.cu index e8e522c..ddc6382 100644 --- a/x11/s3.cu +++ b/x11/s3.cu @@ -63,8 +63,8 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un // reduce by one the intensity on windows intensity--; #endif - uint32_t throughput = device_intensity(thr_id, __func__, 1 << intensity); - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1 << intensity); + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0xF; diff --git a/x11/x11.cu b/x11/x11.cu index ba061e0..bffed09 100644 --- a/x11/x11.cu +++ b/x11/x11.cu @@ -146,8 +146,8 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity); // 19=256*256*8; - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 19=256*256*8; + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x5; diff --git a/x13/x13.cu b/x13/x13.cu index 5833e71..e7fc923 100644 --- a/x13/x13.cu +++ b/x13/x13.cu @@ -150,8 +150,8 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; int intensity = 19; // (device_sm[device_map[thr_id]] > 500 && !is_windows()) ? 20 : 19; - uint32_t throughput = device_intensity(thr_id, __func__, 1 << intensity); // 19=256*256*8; - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1 << intensity); // 19=256*256*8; + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x000f; diff --git a/x15/whirlpool.cu b/x15/whirlpool.cu index ad49f7b..99d0d33 100644 --- a/x15/whirlpool.cu +++ b/x15/whirlpool.cu @@ -58,8 +58,8 @@ extern "C" int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce, uint32_t* ptarget = work->target; const uint32_t first_nonce = pdata[19]; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8; - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 19=256*256*8; + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/x15/whirlpoolx.cu b/x15/whirlpoolx.cu index 4c54440..ec8db42 100644 --- a/x15/whirlpoolx.cu +++ b/x15/whirlpoolx.cu @@ -45,8 +45,8 @@ extern "C" int scanhash_whirlx(int thr_id, struct work* work, uint32_t max_nonc const uint32_t first_nonce = pdata[19]; uint32_t endiandata[20]; int intensity = is_windows() ? 20 : 22; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity); - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/x15/x14.cu b/x15/x14.cu index 236bff5..817ec43 100644 --- a/x15/x14.cu +++ b/x15/x14.cu @@ -164,8 +164,8 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t endiandata[20]; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8; - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 19=256*256*8; + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ptarget[7] = 0x000f; diff --git a/x15/x15.cu b/x15/x15.cu index 3b21345..a7c224a 100644 --- a/x15/x15.cu +++ b/x15/x15.cu @@ -174,8 +174,8 @@ extern "C" int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t endiandata[20]; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8; - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 19=256*256*8; + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ptarget[7] = 0x00FF; diff --git a/x17/x17.cu b/x17/x17.cu index ee42522..d72f319 100644 --- a/x17/x17.cu +++ b/x17/x17.cu @@ -191,8 +191,8 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8; - throughput = min(throughput, max_nonce - first_nonce); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 19=256*256*8; + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00ff; diff --git a/zr5.cu b/zr5.cu index 93fe4c7..5d05b01 100644 --- a/zr5.cu +++ b/zr5.cu @@ -338,9 +338,9 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work, const uint32_t oldp0 = pdata[0]; const uint32_t version = (oldp0 & (~POK_DATA_MASK)) | (use_pok ? POK_BOOL_MASK : 0); const uint32_t first_nonce = pdata[19]; - uint32_t throughput = device_intensity(thr_id, __func__, 1U << 18); + uint32_t throughput = cuda_default_throughput(thr_id, 1U << 18); throughput = min(throughput, (1U << 20)-1024); - throughput = min(throughput, max_nonce - first_nonce); + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ptarget[7] = 0x0000ff;