From 34e97bf3e6b3bab935243d18997d6538f6a397bc Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Tue, 27 Sep 2016 00:05:17 +0200 Subject: [PATCH] Show intensity on init for all algos --- Algo256/blake256.cu | 1 + Algo256/bmw.cu | 6 ++++++ Algo256/decred.cu | 1 + Algo256/keccak256.cu | 10 +++++++++- Algo256/vanilla.cu | 12 +++++++----- JHA/jackpotcoin.cu | 1 + ccminer.cpp | 2 -- configure.ac | 2 +- fuguecoin.cpp | 7 +++++++ groestlcoin.cpp | 2 ++ heavy/heavy.cu | 1 + lyra2/lyra2RE.cu | 43 +++++++++++++++++++---------------------- lyra2/lyra2REv2.cu | 1 + myriadgroestl.cpp | 2 ++ neoscrypt/neoscrypt.cpp | 1 + pentablake.cu | 1 + quark/quarkcoin.cu | 1 + qubit/deep.cu | 3 ++- qubit/luffa.cu | 1 + qubit/qubit.cu | 1 + scrypt-jane.cpp | 3 ++- scrypt.cpp | 3 ++- sia.cu | 1 + skein.cu | 1 + skein2.cpp | 1 + x11/c11.cu | 1 + x11/fresh.cu | 8 +++++++- x11/s3.cu | 1 + x11/sib.cu | 1 + x11/x11.cu | 1 + x11/x11evo.cu | 1 + x13/x13.cu | 1 + x15/whirlpool.cu | 2 ++ x15/whirlpoolx.cu | 1 + x15/x14.cu | 1 + x15/x15.cu | 1 + x17/x17.cu | 1 + zr5.cu | 1 + 38 files changed, 93 insertions(+), 36 deletions(-) diff --git a/Algo256/blake256.cu b/Algo256/blake256.cu index 8cd9035..6366bbb 100644 --- a/Algo256/blake256.cu +++ b/Algo256/blake256.cu @@ -504,6 +504,7 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), -1); CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), -1); diff --git a/Algo256/bmw.cu b/Algo256/bmw.cu index 2bef9ca..085fa49 100644 --- a/Algo256/bmw.cu +++ b/Algo256/bmw.cu @@ -52,6 +52,12 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); cuda_check_cpu_init(thr_id, throughput); bmw256_midstate_init(thr_id, throughput); diff --git a/Algo256/decred.cu b/Algo256/decred.cu index e0322ca..996af17 100644 --- a/Algo256/decred.cu +++ b/Algo256/decred.cu @@ -376,6 +376,7 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], maxResults*sizeof(uint32_t)), -1); CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], maxResults*sizeof(uint32_t)), -1); diff --git a/Algo256/keccak256.cu b/Algo256/keccak256.cu index 4d5f315..c8c47ea 100644 --- a/Algo256/keccak256.cu +++ b/Algo256/keccak256.cu @@ -48,8 +48,16 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no if (opt_benchmark) ptarget[7] = 0x000f; - if (!init[thr_id]) { + if (!init[thr_id]) + { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], throughput * 64)); keccak256_cpu_init(thr_id, throughput); diff --git a/Algo256/vanilla.cu b/Algo256/vanilla.cu index ddf77ee..e3a4aff 100644 --- a/Algo256/vanilla.cu +++ b/Algo256/vanilla.cu @@ -378,6 +378,11 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc const uint32_t targetHigh = ptarget[6]; int dev_id = device_map[thr_id]; + int intensity = (device_sm[dev_id] > 500 && !is_windows()) ? 30 : 24; + if (device_sm[dev_id] < 350) intensity = 22; + uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); + if (!init[thr_id]) { cudaSetDevice(dev_id); if (opt_cudaschedule == -1 && gpu_threads == 1) { @@ -387,6 +392,8 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); + CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), -1); CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), -1); cudaStreamCreate(&streams[thr_id]); @@ -402,11 +409,6 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc vanilla_cpu_setBlock_16(thr_id,endiandata,&pdata[16]); - int intensity = (device_sm[dev_id] > 500 && !is_windows()) ? 30 : 24; - if (device_sm[dev_id] < 350) intensity = 22; - uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); - if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); - const dim3 grid((throughput + (NPT*TPB)-1)/(NPT*TPB)); const dim3 block(TPB); int rc = 0; diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu index d2872c5..10370ac 100644 --- a/JHA/jackpotcoin.cu +++ b/JHA/jackpotcoin.cu @@ -105,6 +105,7 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc gpulog(LOG_ERR, thr_id, "Sorry, This algo is not supported by this GPU arch (SM 3.0 required)"); proper_exit(EXIT_CODE_CUDA_ERROR); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); diff --git a/ccminer.cpp b/ccminer.cpp index 5a2742b..0de1cf7 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -2981,8 +2981,6 @@ void parse_arg(int key, char *arg) } else if (gpus_intensity[n] != (1 << v)) { gpus_intensity[n] = (1 << v); - applog(LOG_INFO, "Intensity set to %u, %u cuda threads", - v, gpus_intensity[n]); } } last = gpus_intensity[n]; diff --git a/configure.ac b/configure.ac index 20c549b..fec7bef 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([ccminer], [1.8.2], [], [ccminer], [http://github.com/tpruvot/ccminer]) +AC_INIT([ccminer], [1.8.3], [], [ccminer], [http://github.com/tpruvot/ccminer]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/fuguecoin.cpp b/fuguecoin.cpp index 9166774..1e36e67 100644 --- a/fuguecoin.cpp +++ b/fuguecoin.cpp @@ -40,6 +40,13 @@ int scanhash_fugue256(int thr_id, struct work* work, uint32_t max_nonce, unsigne if(!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); fugue256_cpu_init(thr_id, throughput); init[thr_id] = true; diff --git a/groestlcoin.cpp b/groestlcoin.cpp index 96a6d38..a1b5136 100644 --- a/groestlcoin.cpp +++ b/groestlcoin.cpp @@ -48,6 +48,8 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); + CUDA_LOG_ERROR(); groestlcoin_cpu_init(thr_id, throughput); init[thr_id] = true; diff --git a/heavy/heavy.cu b/heavy/heavy.cu index 8782616..b22401f 100644 --- a/heavy/heavy.cu +++ b/heavy/heavy.cu @@ -178,6 +178,7 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); hefty_cpu_init(thr_id, throughput); sha256_cpu_init(thr_id, throughput); diff --git a/lyra2/lyra2RE.cu b/lyra2/lyra2RE.cu index 9cd9ccb..cd3ca6a 100644 --- a/lyra2/lyra2RE.cu +++ b/lyra2/lyra2RE.cu @@ -79,7 +79,7 @@ extern "C" void lyra2re_hash(void *state, const void *input) } static bool init[MAX_GPUS] = { 0 }; -static uint32_t throughput[MAX_GPUS] = { 0 }; +static __thread uint32_t throughput = 0; extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) { @@ -99,9 +99,8 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, int intensity = (device_sm[dev_id] >= 500 && !is_windows()) ? 17 : 16; if (device_sm[device_map[thr_id]] == 500) intensity = 15; - int temp = intensity; - throughput[thr_id] = cuda_default_throughput(thr_id, 1U << intensity); // 18=256*256*4; - if (init[thr_id]) throughput[thr_id] = min(throughput[thr_id], max_nonce - first_nonce); + throughput = cuda_default_throughput(thr_id, 1U << intensity); // 18=256*256*4; + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); cudaDeviceProp props; cudaGetDeviceProperties(&props, dev_id); @@ -109,25 +108,23 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, if (strstr(props.name, "750 Ti")) gtx750ti = true; else gtx750ti = false; - blake256_cpu_init(thr_id, throughput[thr_id]); - keccak256_cpu_init(thr_id, throughput[thr_id]); - skein256_cpu_init(thr_id, throughput[thr_id]); - groestl256_cpu_init(thr_id, throughput[thr_id]); + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); + + blake256_cpu_init(thr_id, throughput); + keccak256_cpu_init(thr_id, throughput); + skein256_cpu_init(thr_id, throughput); + groestl256_cpu_init(thr_id, throughput); if (device_sm[dev_id] >= 500) { size_t matrix_sz = device_sm[dev_id] > 500 ? sizeof(uint64_t) * 4 * 4 : sizeof(uint64_t) * 8 * 8 * 3 * 4; - CUDA_SAFE_CALL(cudaMalloc(&d_matrix[thr_id], matrix_sz * throughput[thr_id])); - lyra2_cpu_init(thr_id, throughput[thr_id], d_matrix[thr_id]); + CUDA_SAFE_CALL(cudaMalloc(&d_matrix[thr_id], matrix_sz * throughput)); + lyra2_cpu_init(thr_id, throughput, d_matrix[thr_id]); } - CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)32 * throughput[thr_id])); + CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)32 * throughput)); init[thr_id] = true; - if (temp != intensity){ - gpulog(LOG_INFO, thr_id, "Intensity set to %u, %u cuda threads", - intensity, throughput[thr_id]); - } } uint32_t _ALIGN(128) endiandata[20]; @@ -141,15 +138,15 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, int order = 0; uint32_t foundNonce; - blake256_cpu_hash_80(thr_id, throughput[thr_id], pdata[19], d_hash[thr_id], order++); - keccak256_cpu_hash_32(thr_id, throughput[thr_id], pdata[19], d_hash[thr_id], order++); - lyra2_cpu_hash_32(thr_id, throughput[thr_id], pdata[19], d_hash[thr_id], gtx750ti); - skein256_cpu_hash_32(thr_id, throughput[thr_id], pdata[19], d_hash[thr_id], order++); + blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + keccak256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti); + skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); TRACE("S") - *hashes_done = pdata[19] - first_nonce + throughput[thr_id]; + *hashes_done = pdata[19] - first_nonce + throughput; - foundNonce = groestl256_cpu_hash_32(thr_id, throughput[thr_id], pdata[19], d_hash[thr_id], order++); + foundNonce = groestl256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); if (foundNonce != UINT32_MAX) { uint32_t _ALIGN(64) vhash64[8]; @@ -181,11 +178,11 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, } } - if ((uint64_t)throughput[thr_id] + pdata[19] >= max_nonce) { + if ((uint64_t)throughput + pdata[19] >= max_nonce) { pdata[19] = max_nonce; break; } - pdata[19] += throughput[thr_id]; + pdata[19] += throughput; } while (!work_restart[thr_id].restart); diff --git a/lyra2/lyra2REv2.cu b/lyra2/lyra2REv2.cu index f7342f2..f6699ac 100644 --- a/lyra2/lyra2REv2.cu +++ b/lyra2/lyra2REv2.cu @@ -113,6 +113,7 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); blake256_cpu_init(thr_id, throughput); keccak256_cpu_init(thr_id,throughput); diff --git a/myriadgroestl.cpp b/myriadgroestl.cpp index 988c586..fdc6a2f 100644 --- a/myriadgroestl.cpp +++ b/myriadgroestl.cpp @@ -55,6 +55,8 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); + myriadgroestl_cpu_init(thr_id, throughput); init[thr_id] = true; } diff --git a/neoscrypt/neoscrypt.cpp b/neoscrypt/neoscrypt.cpp index fd38f23..41fdfa7 100644 --- a/neoscrypt/neoscrypt.cpp +++ b/neoscrypt/neoscrypt.cpp @@ -40,6 +40,7 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); cudaGetLastError(); // reset errors if device is not "reset" } + gpulog(LOG_INFO, thr_id, "Intensity set to %g (+5), %u cuda threads", throughput2intensity(throughput), throughput); if (device_sm[dev_id] <= 300) { gpulog(LOG_ERR, thr_id, "Sorry neoscrypt is not supported on SM 3.0 devices"); diff --git a/pentablake.cu b/pentablake.cu index 97dde16..5a34bc4 100644 --- a/pentablake.cu +++ b/pentablake.cu @@ -69,6 +69,7 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); diff --git a/quark/quarkcoin.cu b/quark/quarkcoin.cu index 91a0e11..beea1d5 100644 --- a/quark/quarkcoin.cu +++ b/quark/quarkcoin.cu @@ -145,6 +145,7 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce, cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); cudaGetLastError(); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); diff --git a/qubit/deep.cu b/qubit/deep.cu index f884602..b681a47 100644 --- a/qubit/deep.cu +++ b/qubit/deep.cu @@ -66,8 +66,9 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, cudaDeviceReset(); // reduce cpu usage cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); } - CUDA_LOG_ERROR(); + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); diff --git a/qubit/luffa.cu b/qubit/luffa.cu index db283cb..f92011d 100644 --- a/qubit/luffa.cu +++ b/qubit/luffa.cu @@ -51,6 +51,7 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce, cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); diff --git a/qubit/qubit.cu b/qubit/qubit.cu index e29f8c5..927c93b 100644 --- a/qubit/qubit.cu +++ b/qubit/qubit.cu @@ -79,6 +79,7 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce, cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); qubit_luffa512_cpu_init(thr_id, throughput); x11_cubehash512_cpu_init(thr_id, throughput); diff --git a/scrypt-jane.cpp b/scrypt-jane.cpp index 070092d..89e7472 100644 --- a/scrypt-jane.cpp +++ b/scrypt-jane.cpp @@ -489,8 +489,9 @@ int scanhash_scrypt_jane(int thr_id, struct work *work, uint32_t max_nonce, unsi cudaDeviceSynchronize(); cudaDeviceReset(); cudaSetDevice(dev_id); + throughput = cuda_throughput(thr_id); - applog(LOG_INFO, "GPU #%d: cuda throughput is %d", dev_id, throughput); + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); init[thr_id] = true; } diff --git a/scrypt.cpp b/scrypt.cpp index 5dd3f27..1d53d17 100644 --- a/scrypt.cpp +++ b/scrypt.cpp @@ -721,8 +721,9 @@ int scanhash_scrypt(int thr_id, struct work *work, uint32_t max_nonce, unsigned cudaDeviceSynchronize(); cudaDeviceReset(); cudaSetDevice(dev_id); + throughput = cuda_throughput(thr_id); - applog(LOG_INFO, "GPU #%d: cuda throughput is %d", dev_id, throughput); + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); init[thr_id] = true; } diff --git a/sia.cu b/sia.cu index 1a5b681..ee08e40 100644 --- a/sia.cu +++ b/sia.cu @@ -214,6 +214,7 @@ int scanhash_sia(int thr_id, struct work *work, uint32_t max_nonce, unsigned lon //cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonces[thr_id], NBN * sizeof(uint32_t)), -1); init[thr_id] = true; diff --git a/skein.cu b/skein.cu index f6b1619..12e08d7 100644 --- a/skein.cu +++ b/skein.cu @@ -378,6 +378,7 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); if (sm5) { skeincoin_init(thr_id); diff --git a/skein2.cpp b/skein2.cpp index 8875042..a27e5dc 100644 --- a/skein2.cpp +++ b/skein2.cpp @@ -62,6 +62,7 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput); diff --git a/x11/c11.cu b/x11/c11.cu index 5536cc8..048cd69 100644 --- a/x11/c11.cu +++ b/x11/c11.cu @@ -125,6 +125,7 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); quark_blake512_cpu_init(thr_id, throughput); quark_bmw512_cpu_init(thr_id, throughput); diff --git a/x11/fresh.cu b/x11/fresh.cu index 5e3bf38..9c426c6 100644 --- a/x11/fresh.cu +++ b/x11/fresh.cu @@ -83,7 +83,13 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce, if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); - CUDA_LOG_ERROR(); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t)64 * throughput + 4), -1); diff --git a/x11/s3.cu b/x11/s3.cu index 84f0998..e2887bf 100644 --- a/x11/s3.cu +++ b/x11/s3.cu @@ -86,6 +86,7 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); diff --git a/x11/sib.cu b/x11/sib.cu index 28794fb..ca7bb77 100644 --- a/x11/sib.cu +++ b/x11/sib.cu @@ -120,6 +120,7 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); quark_blake512_cpu_init(thr_id, throughput); quark_bmw512_cpu_init(thr_id, throughput); diff --git a/x11/x11.cu b/x11/x11.cu index 2c56892..df7dd84 100644 --- a/x11/x11.cu +++ b/x11/x11.cu @@ -114,6 +114,7 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); quark_blake512_cpu_init(thr_id, throughput); quark_bmw512_cpu_init(thr_id, throughput); diff --git a/x11/x11evo.cu b/x11/x11evo.cu index 289fe70..da208ac 100644 --- a/x11/x11evo.cu +++ b/x11/x11evo.cu @@ -257,6 +257,7 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); quark_blake512_cpu_init(thr_id, throughput); quark_bmw512_cpu_init(thr_id, throughput); diff --git a/x13/x13.cu b/x13/x13.cu index cf04d32..ad3d02a 100644 --- a/x13/x13.cu +++ b/x13/x13.cu @@ -133,6 +133,7 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); diff --git a/x15/whirlpool.cu b/x15/whirlpool.cu index 44a0f2b..2a490de 100644 --- a/x15/whirlpool.cu +++ b/x15/whirlpool.cu @@ -86,6 +86,8 @@ extern "C" int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce, cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); + CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); x15_whirlpool_cpu_init(thr_id, throughput, 1 /* old whirlpool */); diff --git a/x15/whirlpoolx.cu b/x15/whirlpoolx.cu index 8622643..e9ec79b 100644 --- a/x15/whirlpoolx.cu +++ b/x15/whirlpoolx.cu @@ -59,6 +59,7 @@ extern "C" int scanhash_whirlx(int thr_id, struct work* work, uint32_t max_nonc cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), -1); diff --git a/x15/x14.cu b/x15/x14.cu index c2f6912..926f1e8 100644 --- a/x15/x14.cu +++ b/x15/x14.cu @@ -147,6 +147,7 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce, cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); diff --git a/x15/x15.cu b/x15/x15.cu index 2ad2900..45d94fc 100644 --- a/x15/x15.cu +++ b/x15/x15.cu @@ -157,6 +157,7 @@ extern "C" int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce, cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); diff --git a/x17/x17.cu b/x17/x17.cu index fa2ab31..8b804e5 100644 --- a/x17/x17.cu +++ b/x17/x17.cu @@ -172,6 +172,7 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u // reduce cpu usage cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); diff --git a/zr5.cu b/zr5.cu index cf86819..e5281c3 100644 --- a/zr5.cu +++ b/zr5.cu @@ -357,6 +357,7 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work, cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); // constants cudaMemcpyToSymbol(c_permut, permut, 24*4, 0, cudaMemcpyHostToDevice);