From a23760174770acb36edfac805670aa46773a6c3a Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Tue, 26 Jan 2016 20:38:17 +0100 Subject: [PATCH] 1.7.1 release set schedule flags to reduce linux cpu usage without MyStreamSynchronize() --- Algo256/blake256.cu | 11 ++++++++--- JHA/jackpotcoin.cu | 6 ++++++ groestlcoin.cpp | 6 ++++++ heavy/heavy.cu | 6 ++++++ myriadgroestl.cpp | 6 ++++++ pentablake.cu | 7 ++++++- quark/quarkcoin.cu | 6 ++++++ qubit/deep.cu | 5 +++++ qubit/qubit.cu | 6 ++++++ res/ccminer.rc | 8 ++++---- skein.cu | 8 +++++++- skein2.cpp | 6 ++++++ x11/c11.cu | 6 ++++++ x13/x13.cu | 6 ++++++ x15/whirlpoolx.cu | 6 ++++++ x15/x14.cu | 6 ++++++ x17/x17.cu | 5 +++++ zr5.cu | 6 ++++++ 18 files changed, 107 insertions(+), 9 deletions(-) diff --git a/Algo256/blake256.cu b/Algo256/blake256.cu index 5fc48f2..1fb5538 100644 --- a/Algo256/blake256.cu +++ b/Algo256/blake256.cu @@ -256,7 +256,7 @@ uint32_t blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const ui return result; blake256_gpu_hash_80<<>>(threads, startNonce, d_resNonce[thr_id], highTarget, crcsum, (int) rounds); - MyStreamSynchronize(NULL, 0, thr_id); + //MyStreamSynchronize(NULL, 0, thr_id); if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) { result = h_resNonce[thr_id][0]; for (int n=0; n < (NBN-1); n++) @@ -343,7 +343,7 @@ static uint32_t blake256_cpu_hash_16(const int thr_id, const uint32_t threads, c return result; blake256_gpu_hash_16 <<>> (threads, startNonce, d_resNonce[thr_id], highTarget, (int) rounds, opt_tracegpu); - MyStreamSynchronize(NULL, 0, thr_id); + //MyStreamSynchronize(NULL, 0, thr_id); if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) { result = h_resNonce[thr_id][0]; for (int n=0; n < (NBN-1); n++) @@ -413,7 +413,12 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); - CUDA_LOG_ERROR(); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage (linux) + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)); cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)); diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu index 495057f..d2872c5 100644 --- a/JHA/jackpotcoin.cu +++ b/JHA/jackpotcoin.cu @@ -94,6 +94,12 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc if (!init[thr_id]) { cudaSetDevice(dev_id); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } cuda_get_arch(thr_id); if (device_sm[dev_id] < 300 || cuda_arch[dev_id] < 300) { gpulog(LOG_ERR, thr_id, "Sorry, This algo is not supported by this GPU arch (SM 3.0 required)"); diff --git a/groestlcoin.cpp b/groestlcoin.cpp index 89f178a..1b0ec4e 100644 --- a/groestlcoin.cpp +++ b/groestlcoin.cpp @@ -44,6 +44,12 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } CUDA_LOG_ERROR(); groestlcoin_cpu_init(thr_id, throughput); init[thr_id] = true; diff --git a/heavy/heavy.cu b/heavy/heavy.cu index 40fb0c9..8782616 100644 --- a/heavy/heavy.cu +++ b/heavy/heavy.cu @@ -172,6 +172,12 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } hefty_cpu_init(thr_id, throughput); sha256_cpu_init(thr_id, throughput); diff --git a/myriadgroestl.cpp b/myriadgroestl.cpp index 4311120..b4ce83e 100644 --- a/myriadgroestl.cpp +++ b/myriadgroestl.cpp @@ -49,6 +49,12 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned if(!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } myriadgroestl_cpu_init(thr_id, throughput); init[thr_id] = true; } diff --git a/pentablake.cu b/pentablake.cu index 2abdeb7..97dde16 100644 --- a/pentablake.cu +++ b/pentablake.cu @@ -63,7 +63,12 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); - CUDA_LOG_ERROR(); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); diff --git a/quark/quarkcoin.cu b/quark/quarkcoin.cu index 4241d0b..98e4599 100644 --- a/quark/quarkcoin.cu +++ b/quark/quarkcoin.cu @@ -139,6 +139,12 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce, if (!init[thr_id]) { cudaSetDevice(dev_id); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } cudaGetLastError(); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); diff --git a/qubit/deep.cu b/qubit/deep.cu index a5d4c63..f884602 100644 --- a/qubit/deep.cu +++ b/qubit/deep.cu @@ -62,6 +62,11 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + } CUDA_LOG_ERROR(); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput)); diff --git a/qubit/qubit.cu b/qubit/qubit.cu index de72f51..e29f8c5 100644 --- a/qubit/qubit.cu +++ b/qubit/qubit.cu @@ -73,6 +73,12 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce, if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } qubit_luffa512_cpu_init(thr_id, throughput); x11_cubehash512_cpu_init(thr_id, throughput); diff --git a/res/ccminer.rc b/res/ccminer.rc index 79501e6..ad65b6f 100644 --- a/res/ccminer.rc +++ b/res/ccminer.rc @@ -60,8 +60,8 @@ IDI_ICON1 ICON "ccminer.ico" // VS_VERSION_INFO VERSIONINFO - FILEVERSION 1,7,0,0 - PRODUCTVERSION 1,7,0,0 + FILEVERSION 1,7,1,0 + PRODUCTVERSION 1,7,1,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x21L @@ -76,10 +76,10 @@ BEGIN BEGIN BLOCK "040904e4" BEGIN - VALUE "FileVersion", "1.7" + VALUE "FileVersion", "1.7.1" VALUE "LegalCopyright", "Copyright (C) 2015" VALUE "ProductName", "ccminer" - VALUE "ProductVersion", "1.7" + VALUE "ProductVersion", "1.7.1" END END BLOCK "VarFileInfo" diff --git a/skein.cu b/skein.cu index 97a7c0b..f6b1619 100644 --- a/skein.cu +++ b/skein.cu @@ -22,7 +22,7 @@ extern void skeincoin_free(int thr_id); extern void skeincoin_setBlock_80(int thr_id, void *pdata); extern uint32_t skeincoin_hash_sm5(int thr_id, uint32_t threads, uint32_t startNounce, int swap, uint64_t target64, uint32_t *secNonce); -static __device__ __constant__ uint32_t sha256_hashTable[] = { +static __device__ uint32_t sha256_hashTable[] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 }; @@ -372,6 +372,12 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } if (sm5) { skeincoin_init(thr_id); diff --git a/skein2.cpp b/skein2.cpp index 38c0a5b..6e3e5a7 100644 --- a/skein2.cpp +++ b/skein2.cpp @@ -53,6 +53,12 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned if (!init[thr_id]) { cudaSetDevice(dev_id); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput); diff --git a/x11/c11.cu b/x11/c11.cu index b48e85e..ae5068a 100644 --- a/x11/c11.cu +++ b/x11/c11.cu @@ -119,6 +119,12 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } quark_blake512_cpu_init(thr_id, throughput); quark_bmw512_cpu_init(thr_id, throughput); diff --git a/x13/x13.cu b/x13/x13.cu index 39e0fe2..1093042 100644 --- a/x13/x13.cu +++ b/x13/x13.cu @@ -127,6 +127,12 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); diff --git a/x15/whirlpoolx.cu b/x15/whirlpoolx.cu index 517d5ca..8622643 100644 --- a/x15/whirlpoolx.cu +++ b/x15/whirlpoolx.cu @@ -53,6 +53,12 @@ extern "C" int scanhash_whirlx(int thr_id, struct work* work, uint32_t max_nonc if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), -1); diff --git a/x15/x14.cu b/x15/x14.cu index d020f5c..06f40e2 100644 --- a/x15/x14.cu +++ b/x15/x14.cu @@ -141,6 +141,12 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce, if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); diff --git a/x17/x17.cu b/x17/x17.cu index 3be51d1..fcfdf42 100644 --- a/x17/x17.cu +++ b/x17/x17.cu @@ -169,6 +169,11 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + } quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); diff --git a/zr5.cu b/zr5.cu index 3d32e21..cf86819 100644 --- a/zr5.cu +++ b/zr5.cu @@ -351,6 +351,12 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work, if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } // constants cudaMemcpyToSymbol(c_permut, permut, 24*4, 0, cudaMemcpyHostToDevice);