From c6baddc85a5388f9149d35a38518f3ca75ffa086 Mon Sep 17 00:00:00 2001 From: KlausT Date: Sat, 13 Dec 2014 08:35:22 +0100 Subject: [PATCH] make throughput unsigned Note: The next thing to do is to change all "int threads" in cuda --- JHA/jackpotcoin.cu | 4 ++-- cuda_nist5.cu | 2 +- heavy/heavy.cu | 6 +++--- lyra2/lyra2RE.cu | 4 ++-- pentablake.cu | 5 +++-- quark/quarkcoin.cu | 5 ++--- qubit/deep.cu | 2 +- qubit/qubit.cu | 5 +++-- x11/s3.cu | 11 ++++------- x11/x11.cu | 5 +++-- x15/whirlpool.cu | 4 ++-- x15/x14.cu | 4 ++-- x15/x15.cu | 4 ++-- x17/x17.cu | 4 ++-- 14 files changed, 32 insertions(+), 33 deletions(-) diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu index 29059a3..965f7c4 100644 --- a/JHA/jackpotcoin.cu +++ b/JHA/jackpotcoin.cu @@ -96,8 +96,8 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata, if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x000f; - int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096 - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096 + throughput = min(throughput, max_nonce - first_nonce); if (!init[thr_id]) { diff --git a/cuda_nist5.cu b/cuda_nist5.cu index e88017f..7879457 100644 --- a/cuda_nist5.cu +++ b/cuda_nist5.cu @@ -75,7 +75,7 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata, if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00FF; - int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096 + uint32_t throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096 throughput = min(throughput, (int) (max_nonce - first_nonce)); if (!init[thr_id]) diff --git a/heavy/heavy.cu b/heavy/heavy.cu index 860d1c7..cbb7ebc 100644 --- a/heavy/heavy.cu +++ b/heavy/heavy.cu @@ -127,7 +127,7 @@ struct check_nonce_for_remove } }; -static bool init[8] = {0,0,0,0,0,0,0,0}; +static bool init[8] = { 0 }; __host__ int scanhash_heavy(int thr_id, uint32_t *pdata, @@ -135,8 +135,8 @@ int scanhash_heavy(int thr_id, uint32_t *pdata, unsigned long *hashes_done, uint32_t maxvote, int blocklen) { const uint32_t first_nonce = pdata[19]; - // CUDA will process thousands of threads. - int throughput = opt_work_size ? opt_work_size : (1 << 19) - 256; // 256*2048 + // Remove 256 to allow -i 20 + uint32_t throughput = opt_work_size ? opt_work_size : (1 << 19) - 256; // 256*2048 throughput = min(throughput, (int)(max_nonce - first_nonce)); int rc = 0; diff --git a/lyra2/lyra2RE.cu b/lyra2/lyra2RE.cu index 9838614..b1865f1 100644 --- a/lyra2/lyra2RE.cu +++ b/lyra2/lyra2RE.cu @@ -64,8 +64,8 @@ extern "C" int scanhash_lyra2(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 18 : 17; - int throughput = opt_work_size ? opt_work_size : (1 << intensity); // 18=256*256*4; - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = opt_work_size ? opt_work_size : (1 << intensity); // 18=256*256*4; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/pentablake.cu b/pentablake.cu index 49f49e3..c205c6f 100644 --- a/pentablake.cu +++ b/pentablake.cu @@ -492,8 +492,9 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t * const uint32_t first_nonce = pdata[19]; uint32_t endiandata[20]; int rc = 0; - int throughput = opt_work_size ? opt_work_size : (128 * 2560); // 18.5 - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = opt_work_size ? opt_work_size : (128 * 2560); // 18.5 + + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x000F; diff --git a/quark/quarkcoin.cu b/quark/quarkcoin.cu index f52337f..c44063a 100644 --- a/quark/quarkcoin.cu +++ b/quark/quarkcoin.cu @@ -137,9 +137,8 @@ extern "C" int scanhash_quark(int thr_id, uint32_t *pdata, unsigned long *hashes_done) { const uint32_t first_nonce = pdata[19]; - - int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096 - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096 + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00F; diff --git a/qubit/deep.cu b/qubit/deep.cu index f2c5957..a5e7b4f 100644 --- a/qubit/deep.cu +++ b/qubit/deep.cu @@ -69,7 +69,7 @@ extern "C" int scanhash_deep(int thr_id, uint32_t *pdata, if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); - cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput)); qubit_luffa512_cpu_init(thr_id, throughput); x11_cubehash512_cpu_init(thr_id, throughput); diff --git a/qubit/qubit.cu b/qubit/qubit.cu index f9c3461..a155d61 100644 --- a/qubit/qubit.cu +++ b/qubit/qubit.cu @@ -80,8 +80,9 @@ extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata, { uint32_t endiandata[20]; const uint32_t first_nonce = pdata[19]; - int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8 - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8 + + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/x11/s3.cu b/x11/s3.cu index 0b495ac..12b200c 100644 --- a/x11/s3.cu +++ b/x11/s3.cu @@ -57,13 +57,10 @@ extern "C" int scanhash_s3(int thr_id, uint32_t *pdata, unsigned long *hashes_done) { const uint32_t first_nonce = pdata[19]; - int intensity = 20; // 256*256*8*2; -#ifdef WIN32 - // reduce by one the intensity on windows - intensity--; -#endif - int throughput = opt_work_size ? opt_work_size : (1 << intensity); - throughput = min(throughput, (int)(max_nonce - first_nonce)); + int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19; + uint32_t throughput = opt_work_size ? opt_work_size : (1 << intensity); + + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0xF; diff --git a/x11/x11.cu b/x11/x11.cu index 5a561ee..99dc131 100644 --- a/x11/x11.cu +++ b/x11/x11.cu @@ -133,8 +133,9 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19; - int throughput = opt_work_size ? opt_work_size : (1 << intensity); // 20=256*256*16; - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = opt_work_size ? opt_work_size : intensity; // 20=256*256*16; + + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x5; diff --git a/x15/whirlpool.cu b/x15/whirlpool.cu index a18ce69..192cbd3 100644 --- a/x15/whirlpool.cu +++ b/x15/whirlpool.cu @@ -57,8 +57,8 @@ extern "C" int scanhash_whc(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; uint32_t endiandata[20]; - int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/x15/x14.cu b/x15/x14.cu index d14151c..9b71380 100644 --- a/x15/x14.cu +++ b/x15/x14.cu @@ -163,8 +163,8 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; uint32_t endiandata[20]; - int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x000f; diff --git a/x15/x15.cu b/x15/x15.cu index c714bd3..56d1a2b 100644 --- a/x15/x15.cu +++ b/x15/x15.cu @@ -174,8 +174,8 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, const uint32_t first_nonce = pdata[19]; uint32_t endiandata[20]; - int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00FF; diff --git a/x17/x17.cu b/x17/x17.cu index 32b6dc7..ddc01a5 100644 --- a/x17/x17.cu +++ b/x17/x17.cu @@ -191,8 +191,8 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata, unsigned long *hashes_done) { const uint32_t first_nonce = pdata[19]; - int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00ff;