Browse Source

ifdef some unused kernels on SM5+

no need to build both (mine and sm variants)

and put global hashrate to 0 while waiting...
master
Tanguy Pruvot 9 years ago
parent
commit
47f309ffb4
  1. 1
      ccminer.cpp
  2. 27
      quark/cuda_quark_blake512.cu
  3. 2
      quark/cuda_skein512.cu
  4. 2
      quark/cuda_skein512_sp.cuh

1
ccminer.cpp

@ -1646,6 +1646,7 @@ static void *miner_thread(void *userdata)
} }
pool_on_hold = true; pool_on_hold = true;
global_hashrate = 0;
sleep(5); sleep(5);
if (!thr_id) pools[cur_pooln].wait_time += 5; if (!thr_id) pools[cur_pooln].wait_time += 5;
continue; continue;

27
quark/cuda_quark_blake512.cu

@ -7,6 +7,9 @@
#define ROTR(x,n) ROTR64(x,n) #define ROTR(x,n) ROTR64(x,n)
// use sp kernel on SM 5+
#define SP_KERNEL
#define USE_SHUFFLE 0 #define USE_SHUFFLE 0
__constant__ __constant__
@ -111,16 +114,10 @@ void quark_blake512_compress(uint64_t *h, const uint64_t *block, const uint8_t (
h[7] ^= v[7] ^ v[15]; h[7] ^= v[7] ^ v[15];
} }
// Hash-Padding
__device__ __constant__
static const uint64_t d_constHashPadding[8] = {
0x0000000000000080ull, 0, 0, 0,
0, 0x0100000000000000ull, 0, 0x0002000000000000ull
};
__global__ __launch_bounds__(256, 4) __global__ __launch_bounds__(256, 4)
void quark_blake512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *g_nonceVector, uint64_t *g_hash) void quark_blake512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *g_nonceVector, uint64_t *g_hash)
{ {
#if !defined(SP_KERNEL) || __CUDA_ARCH__ < 500
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
#if USE_SHUFFLE #if USE_SHUFFLE
@ -156,9 +153,15 @@ void quark_blake512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t
for (int i=0; i < 8; ++i) for (int i=0; i < 8; ++i)
buf[i] = inpHash[i]; buf[i] = inpHash[i];
#pragma unroll 8 // Hash Pad
for (int i=0; i < 8; i++) buf[8] = 0x0000000000000080ull;
buf[i+8] = d_constHashPadding[i]; buf[9] = 0;
buf[10] = 0;
buf[11] = 0;
buf[12] = 0;
buf[13] = 0x0100000000000000ull;
buf[14] = 0;
buf[15] = 0x0002000000000000ull;
// Ending round // Ending round
quark_blake512_compress(h, buf, c_sigma_big, c_u512, 512); quark_blake512_compress(h, buf, c_sigma_big, c_u512, 512);
@ -177,11 +180,13 @@ void quark_blake512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t
} }
#endif #endif
} }
#endif /* SP */
} }
__global__ __launch_bounds__(256,4) __global__ __launch_bounds__(256,4)
void quark_blake512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outputHash) void quark_blake512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outputHash)
{ {
//#if !defined(SP_KERNEL) || __CUDA_ARCH__ < 500
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads) if (thread < threads)
{ {
@ -221,9 +226,9 @@ void quark_blake512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *ou
} }
#endif #endif
} }
//#endif
} }
#define SP_KERNEL
#ifdef SP_KERNEL #ifdef SP_KERNEL
#include "cuda_quark_blake512_sp.cuh" #include "cuda_quark_blake512_sp.cuh"
#endif #endif

2
quark/cuda_skein512.cu

@ -406,6 +406,7 @@ uint64_t skein_rotl64(const uint64_t x, const int offset)
__global__ __global__
void quark_skein512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t * const __restrict__ g_hash, uint32_t *g_nonceVector) void quark_skein512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t * const __restrict__ g_hash, uint32_t *g_nonceVector)
{ {
#if !defined(SP_KERNEL) || __CUDA_ARCH__ < 500
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads) if (thread < threads)
{ {
@ -508,6 +509,7 @@ void quark_skein512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t
for(int i=0; i<8; i++) for(int i=0; i<8; i++)
outpHash[i] = devectorize(p[i]); outpHash[i] = devectorize(p[i]);
} }
#endif /* SM < 5.0 */
} }
__global__ __global__

2
quark/cuda_skein512_sp.cuh

@ -1943,7 +1943,7 @@ __host__ void quark_skein512_setTarget(const void *ptarget)
} }
__host__ void quark_skein512_cpu_free(int32_t thr_id) __host__ void quark_skein512_cpu_free(int32_t thr_id)
{ {
cudaFreeHost(&d_nonce[thr_id]); cudaFree(d_nonce[thr_id]);
} }

Loading…
Cancel
Save