From 47f309ffb4fd3a8b11e98a1c74a28be9ac2a258a Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Wed, 28 Oct 2015 07:25:11 +0100 Subject: [PATCH] ifdef some unused kernels on SM5+ no need to build both (mine and sm variants) and put global hashrate to 0 while waiting... --- ccminer.cpp | 1 + quark/cuda_quark_blake512.cu | 27 ++++++++++++++++----------- quark/cuda_skein512.cu | 2 ++ quark/cuda_skein512_sp.cuh | 2 +- 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/ccminer.cpp b/ccminer.cpp index 3ae49bb..1ba68c1 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -1646,6 +1646,7 @@ static void *miner_thread(void *userdata) } pool_on_hold = true; + global_hashrate = 0; sleep(5); if (!thr_id) pools[cur_pooln].wait_time += 5; continue; diff --git a/quark/cuda_quark_blake512.cu b/quark/cuda_quark_blake512.cu index bee2a93..68a411a 100644 --- a/quark/cuda_quark_blake512.cu +++ b/quark/cuda_quark_blake512.cu @@ -7,6 +7,9 @@ #define ROTR(x,n) ROTR64(x,n) +// use sp kernel on SM 5+ +#define SP_KERNEL + #define USE_SHUFFLE 0 __constant__ @@ -111,16 +114,10 @@ void quark_blake512_compress(uint64_t *h, const uint64_t *block, const uint8_t ( h[7] ^= v[7] ^ v[15]; } -// Hash-Padding -__device__ __constant__ -static const uint64_t d_constHashPadding[8] = { - 0x0000000000000080ull, 0, 0, 0, - 0, 0x0100000000000000ull, 0, 0x0002000000000000ull -}; - __global__ __launch_bounds__(256, 4) void quark_blake512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *g_nonceVector, uint64_t *g_hash) { +#if !defined(SP_KERNEL) || __CUDA_ARCH__ < 500 uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); #if USE_SHUFFLE @@ -156,9 +153,15 @@ void quark_blake512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t for (int i=0; i < 8; ++i) buf[i] = inpHash[i]; - #pragma unroll 8 - for (int i=0; i < 8; i++) - buf[i+8] = d_constHashPadding[i]; + // Hash Pad + buf[8] = 0x0000000000000080ull; + buf[9] = 0; + buf[10] = 0; + buf[11] = 0; + buf[12] = 0; + buf[13] = 0x0100000000000000ull; + buf[14] = 0; + buf[15] = 0x0002000000000000ull; // Ending round quark_blake512_compress(h, buf, c_sigma_big, c_u512, 512); @@ -177,11 +180,13 @@ void quark_blake512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t } #endif } +#endif /* SP */ } __global__ __launch_bounds__(256,4) void quark_blake512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outputHash) { +//#if !defined(SP_KERNEL) || __CUDA_ARCH__ < 500 uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { @@ -221,9 +226,9 @@ void quark_blake512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *ou } #endif } +//#endif } -#define SP_KERNEL #ifdef SP_KERNEL #include "cuda_quark_blake512_sp.cuh" #endif diff --git a/quark/cuda_skein512.cu b/quark/cuda_skein512.cu index ab3d951..563a5ee 100644 --- a/quark/cuda_skein512.cu +++ b/quark/cuda_skein512.cu @@ -406,6 +406,7 @@ uint64_t skein_rotl64(const uint64_t x, const int offset) __global__ void quark_skein512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t * const __restrict__ g_hash, uint32_t *g_nonceVector) { +#if !defined(SP_KERNEL) || __CUDA_ARCH__ < 500 uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { @@ -508,6 +509,7 @@ void quark_skein512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t for(int i=0; i<8; i++) outpHash[i] = devectorize(p[i]); } +#endif /* SM < 5.0 */ } __global__ diff --git a/quark/cuda_skein512_sp.cuh b/quark/cuda_skein512_sp.cuh index f45c8d7..9e606ee 100644 --- a/quark/cuda_skein512_sp.cuh +++ b/quark/cuda_skein512_sp.cuh @@ -1943,7 +1943,7 @@ __host__ void quark_skein512_setTarget(const void *ptarget) } __host__ void quark_skein512_cpu_free(int32_t thr_id) { - cudaFreeHost(&d_nonce[thr_id]); + cudaFree(d_nonce[thr_id]); }