From 11c5ec810d5dcafb0fcf2f3a6b6e0eaca7b152cf Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Sun, 9 Nov 2014 21:55:19 +0100 Subject: [PATCH] Handle intensity param in all algos and add a check related to start/max nounce params --- JHA/jackpotcoin.cu | 6 +++--- blake32.cu | 7 +++---- cuda_nist5.cu | 6 +++--- heavy/heavy.cu | 10 +++++----- keccak/keccak256.cu | 8 +++----- quark/animecoin.cu | 3 ++- quark/quarkcoin.cu | 4 +++- qubit/deep.cu | 3 ++- qubit/doom.cu | 3 ++- qubit/qubit.cu | 5 +++-- x11/fresh.cu | 4 +++- x11/s3.cu | 9 +++++---- x11/x11.cu | 3 ++- x13/x13.cu | 3 ++- x15/whirlpool.cu | 8 ++++---- x15/x14.cu | 8 +++++--- x15/x15.cu | 8 +++++--- x17/x17.cu | 7 ++++--- 18 files changed, 59 insertions(+), 46 deletions(-) diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu index 804e609..9050edb 100644 --- a/JHA/jackpotcoin.cu +++ b/JHA/jackpotcoin.cu @@ -97,9 +97,8 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata, if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; - const uint32_t Htarg = ptarget[7]; - - const int throughput = 256*4096*4; // 100; + int throughput = opt_work_size ? opt_work_size : (1 << 22); // 256*4096*4 + throughput = min(throughput, max_nonce - first_nonce); static bool init[8] = {0,0,0,0,0,0,0,0}; if (!init[thr_id]) @@ -212,6 +211,7 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata, { unsigned int rounds; uint32_t vhash64[8]; + uint32_t Htarg = ptarget[7]; be32enc(&endiandata[19], foundNonce); // diese jackpothash Funktion gibt die Zahl der Runden zurück diff --git a/blake32.cu b/blake32.cu index 1fb763a..c5a2197 100644 --- a/blake32.cu +++ b/blake32.cu @@ -16,7 +16,6 @@ extern "C" { /* threads per block and throughput (intensity) */ #define TPB 128 -#define INTENSITY (1 << 20) // = 1048576 nonces per call /* added in sph_blake.c */ extern "C" int blake256_rounds = 14; @@ -393,15 +392,15 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt { const uint32_t first_nonce = pdata[19]; static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - uint64_t targetHigh = ((uint64_t*)ptarget)[3]; // 0x00000000.0fffffff + uint64_t targetHigh = ((uint64_t*)ptarget)[3]; uint32_t _ALIGN(64) endiandata[20]; #if PRECALC64 uint32_t _ALIGN(64) midstate[8]; #else uint32_t crcsum; #endif - /* todo: -i param */ - uint32_t throughput = min(INTENSITY, max_nonce - first_nonce); + uint32_t throughput = opt_work_size ? opt_work_size : (1 << 20); // 1048576 nonces per call + throughput = min(throughput, max_nonce - first_nonce); int rc = 0; diff --git a/cuda_nist5.cu b/cuda_nist5.cu index 1189f18..5a41b0d 100644 --- a/cuda_nist5.cu +++ b/cuda_nist5.cu @@ -77,9 +77,8 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata, if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00FF; - const uint32_t Htarg = ptarget[7]; - - const int throughput = 256*4096; // 100; + int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096 + throughput = min(throughput, max_nonce - first_nonce); static bool init[8] = {0,0,0,0,0,0,0,0}; if (!init[thr_id]) @@ -119,6 +118,7 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata, if (foundNonce != 0xffffffff) { uint32_t vhash64[8]; + uint32_t Htarg = ptarget[7]; be32enc(&endiandata[19], foundNonce); nist5hash(vhash64, endiandata); diff --git a/heavy/heavy.cu b/heavy/heavy.cu index f5a71f2..3221e77 100644 --- a/heavy/heavy.cu +++ b/heavy/heavy.cu @@ -281,8 +281,10 @@ int scanhash_heavy_cpp(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done, uint32_t maxvote, int blocklen) { + const uint32_t first_nonce = pdata[19]; /* to check */ // CUDA will process thousands of threads. - const int throughput = 4096 * 128; + int throughput = opt_work_size ? opt_work_size : (1 << 19); // 128*4096 + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x000000ff; @@ -296,8 +298,6 @@ int scanhash_heavy_cpp(int thr_id, uint32_t *pdata, int nrmCalls[6]; memset(nrmCalls, 0, sizeof(int) * 6); - uint32_t start_nonce = pdata[19]; - // für jeden Hash ein individuelles Target erstellen basierend // auf dem höchsten Bit, das in ptarget gesetzt ist. int highbit = findhighbit(ptarget, 8); @@ -418,7 +418,7 @@ int scanhash_heavy_cpp(int thr_id, uint32_t *pdata, } else { - *hashes_done = pdata[19] - start_nonce; + *hashes_done = pdata[19] - first_nonce; rc = 1; goto exit; } @@ -432,7 +432,7 @@ emptyNonceVector: pdata[19] += throughput; } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); - *hashes_done = pdata[19] - start_nonce; + *hashes_done = pdata[19] - first_nonce; exit: cudaFreeHost(cpu_nonceVector); diff --git a/keccak/keccak256.cu b/keccak/keccak256.cu index 52108d0..3d4c8cc 100644 --- a/keccak/keccak256.cu +++ b/keccak/keccak256.cu @@ -46,9 +46,8 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata, if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x000f; - const uint32_t Htarg = ptarget[7]; - - const int throughput = 256*256*8*8; + int throughput = opt_work_size ? opt_work_size : (1 << 22); // 256*256*8*8 + throughput = min(throughput, max_nonce - first_nonce); static bool init[8] = {0,0,0,0,0,0,0,0}; if (!init[thr_id]) { @@ -72,10 +71,9 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata, uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); if (foundNonce != 0xffffffff) { - uint32_t vhash64[8]; + uint32_t Htarg = ptarget[7]; be32enc(&endiandata[19], foundNonce); - keccak256_hash(vhash64, endiandata); if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { diff --git a/quark/animecoin.cu b/quark/animecoin.cu index bb4f7da..b461e92 100644 --- a/quark/animecoin.cu +++ b/quark/animecoin.cu @@ -170,7 +170,8 @@ extern "C" int scanhash_anime(int thr_id, uint32_t *pdata, ((uint32_t*)ptarget)[7] = 0x00000f; const uint32_t Htarg = ptarget[7]; - const int throughput = 256*2048; // 100; + int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*2048*2 + throughput = min(throughput, max_nonce - first_nonce); static bool init[8] = {0,0,0,0,0,0,0,0}; if (!init[thr_id]) diff --git a/quark/quarkcoin.cu b/quark/quarkcoin.cu index 1adac57..2604461 100644 --- a/quark/quarkcoin.cu +++ b/quark/quarkcoin.cu @@ -135,9 +135,11 @@ extern "C" int scanhash_quark(int thr_id, uint32_t *pdata, unsigned long *hashes_done) { const uint32_t first_nonce = pdata[19]; - const int throughput = 256*4096; // 100; static bool init[8] = {0,0,0,0,0,0,0,0}; + int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096 + throughput = min(throughput, max_nonce - first_nonce); + if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00FF; diff --git a/qubit/deep.cu b/qubit/deep.cu index 8e9a8be..538a851 100644 --- a/qubit/deep.cu +++ b/qubit/deep.cu @@ -60,9 +60,10 @@ extern "C" int scanhash_deep(int thr_id, uint32_t *pdata, unsigned long *hashes_done) { const uint32_t first_nonce = pdata[19]; - const int throughput = 256*256*8*8; static bool init[8] = {0,0,0,0,0,0,0,0}; uint32_t endiandata[20]; + int throughput = opt_work_size ? opt_work_size : (1 << 22); // 256*256*8*8 + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/qubit/doom.cu b/qubit/doom.cu index 8f93ac4..f8254b3 100644 --- a/qubit/doom.cu +++ b/qubit/doom.cu @@ -40,9 +40,10 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata, unsigned long *hashes_done) { const uint32_t first_nonce = pdata[19]; - const int throughput = 256*256*8*8; static bool init[8] = {0,0,0,0,0,0,0,0}; uint32_t endiandata[20]; + int throughput = opt_work_size ? opt_work_size : (1 << 22); // 256*256*8*8 + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/qubit/qubit.cu b/qubit/qubit.cu index 11eb0d1..45b0d14 100644 --- a/qubit/qubit.cu +++ b/qubit/qubit.cu @@ -78,10 +78,11 @@ extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done) { - const uint32_t first_nonce = pdata[19]; - const int throughput = 256*256*8; static bool init[8] = {0,0,0,0,0,0,0,0}; uint32_t endiandata[20]; + const uint32_t first_nonce = pdata[19]; + int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8 + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/x11/fresh.cu b/x11/fresh.cu index 0c5179a..8aa1214 100644 --- a/x11/fresh.cu +++ b/x11/fresh.cu @@ -75,10 +75,12 @@ extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata, unsigned long *hashes_done) { const uint32_t first_nonce = pdata[19]; - const int throughput = 256*256*8; static bool init[8] = {0,0,0,0,0,0,0,0}; uint32_t endiandata[20]; + int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; + throughput = min(throughput, max_nonce - first_nonce); + if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00ff; diff --git a/x11/s3.cu b/x11/s3.cu index 08f4018..24e73bb 100644 --- a/x11/s3.cu +++ b/x11/s3.cu @@ -57,14 +57,15 @@ extern "C" int scanhash_s3(int thr_id, uint32_t *pdata, unsigned long *hashes_done) { const uint32_t first_nonce = pdata[19]; + static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + #ifdef WIN32 // reduce a bit the intensity on windows - const int throughput = 256 * 256 * 8; + int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; #else - const int throughput = 256 * 256 * 8 * 2; + int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*256*8*2; #endif - - static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0xF; diff --git a/x11/x11.cu b/x11/x11.cu index 4834130..6dad14c 100644 --- a/x11/x11.cu +++ b/x11/x11.cu @@ -138,8 +138,9 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, unsigned long *hashes_done) { const uint32_t first_nonce = pdata[19]; - const int throughput = 256*256*8; static bool init[8] = {0,0,0,0,0,0,0,0}; + int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000f; diff --git a/x13/x13.cu b/x13/x13.cu index c751211..747ec92 100644 --- a/x13/x13.cu +++ b/x13/x13.cu @@ -162,7 +162,8 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, const uint32_t Htarg = ptarget[7]; - const int throughput = 256*256*8; + int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; + throughput = min(throughput, max_nonce - first_nonce); static bool init[8] = {0,0,0,0,0,0,0,0}; if (!init[thr_id]) diff --git a/x15/whirlpool.cu b/x15/whirlpool.cu index 192a71c..c9575ed 100644 --- a/x15/whirlpool.cu +++ b/x15/whirlpool.cu @@ -56,13 +56,13 @@ extern "C" int scanhash_whc(int thr_id, uint32_t *pdata, unsigned long *hashes_done) { const uint32_t first_nonce = pdata[19]; - const int throughput = 256*256*8; static bool init[8] = {0,0,0,0,0,0,0,0}; uint32_t endiandata[20]; - uint32_t Htarg = ptarget[7]; + int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) - ((uint32_t*)ptarget)[7] = Htarg = 0x0000ff; + ((uint32_t*)ptarget)[7] = 0x0000ff; if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); @@ -91,8 +91,8 @@ extern "C" int scanhash_whc(int thr_id, uint32_t *pdata, if (foundNonce != 0xffffffff) { uint32_t vhash64[8]; + uint32_t Htarg = ptarget[7]; be32enc(&endiandata[19], foundNonce); - wcoinhash(vhash64, endiandata); if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) diff --git a/x15/x14.cu b/x15/x14.cu index 7335a0d..b24a61d 100644 --- a/x15/x14.cu +++ b/x15/x14.cu @@ -168,13 +168,14 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, unsigned long *hashes_done) { const uint32_t first_nonce = pdata[19]; - const int throughput = 256*256*8; static bool init[8] = {0,0,0,0,0,0,0,0}; uint32_t endiandata[20]; - uint32_t Htarg = ptarget[7]; + + int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) - ((uint32_t*)ptarget)[7] = Htarg = 0xff; + ((uint32_t*)ptarget)[7] = 0xff; if (!init[thr_id]) { @@ -230,6 +231,7 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, uint32_t vhash64[8]; be32enc(&endiandata[19], foundNonce); x14hash(vhash64, endiandata); + uint32_t Htarg = ptarget[7]; if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { pdata[19] = foundNonce; *hashes_done = foundNonce - first_nonce + 1; diff --git a/x15/x15.cu b/x15/x15.cu index 012cafd..ec73805 100644 --- a/x15/x15.cu +++ b/x15/x15.cu @@ -177,13 +177,14 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, unsigned long *hashes_done) { const uint32_t first_nonce = pdata[19]; - const int throughput = 256*256*8; static bool init[8] = {0,0,0,0,0,0,0,0}; uint32_t endiandata[20]; - uint32_t Htarg = ptarget[7]; + + int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) - ((uint32_t*)ptarget)[7] = Htarg = 0x00FF; + ((uint32_t*)ptarget)[7] = 0x00FF; if (!init[thr_id]) { @@ -241,6 +242,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, { /* check now with the CPU to confirm */ uint32_t vhash64[8]; + uint32_t Htarg = ptarget[7]; be32enc(&endiandata[19], foundNonce); x15hash(vhash64, endiandata); if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { diff --git a/x17/x17.cu b/x17/x17.cu index 608ce60..e4573a5 100644 --- a/x17/x17.cu +++ b/x17/x17.cu @@ -197,12 +197,12 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata, unsigned long *hashes_done) { const uint32_t first_nonce = pdata[19]; - const int throughput = 256*256*8; static bool init[8] = {0,0,0,0,0,0,0,0}; - uint32_t Htarg = ptarget[7]; + int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) - ((uint32_t*)ptarget)[7] = Htarg = 0x00FF; + ((uint32_t*)ptarget)[7] = 0x00FF; if (!init[thr_id]) { @@ -265,6 +265,7 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata, if (foundNonce != 0xffffffff) { uint32_t vhash64[8]; + uint32_t Htarg = ptarget[7]; be32enc(&endiandata[19], foundNonce); x17hash(vhash64, endiandata);