From 6500e7401ab275b616cfb2b4d43cc90cdb432fbc Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Tue, 18 Aug 2015 13:43:21 +0200 Subject: [PATCH] lyra2v2: properly exclude SM 3.0 devices --- lyra2/cuda_lyra2v2.cu | 24 +++++++++++++++--------- lyra2/lyra2REv2.cu | 6 ++++++ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/lyra2/cuda_lyra2v2.cu b/lyra2/cuda_lyra2v2.cu index 70869d6..8a6ee24 100644 --- a/lyra2/cuda_lyra2v2.cu +++ b/lyra2/cuda_lyra2v2.cu @@ -1,12 +1,17 @@ #include #include +#ifdef __INTELLISENSE__ +#define __CUDA_ARCH__ 500 +#endif + #include "cuda_lyra2_vectors.h" #define TPB 16 #define Nrow 4 #define Ncol 4 + #if __CUDA_ARCH__ < 500 #define vectype ulonglong4 #define u64type uint64_t @@ -23,7 +28,9 @@ __device__ vectype *DMatrix; -#ifdef __CUDA_ARCH__ +#if __CUDA_ARCH__ >= 320 + +#if __CUDA_ARCH__ >= 500 static __device__ __forceinline__ void Gfunc_v35(uint2 &a, uint2 &b, uint2 &c, uint2 &d) { @@ -32,8 +39,7 @@ void Gfunc_v35(uint2 &a, uint2 &b, uint2 &c, uint2 &d) a += b; d ^= a; d = ROR16(d); c += d; b ^= c; b = ROR2(b, 63); } - -#if __CUDA_ARCH__ < 500 +#else static __device__ __forceinline__ void Gfunc_v35(unsigned long long &a, unsigned long long &b, unsigned long long &c, unsigned long long &d) { @@ -57,10 +63,6 @@ void round_lyra_v35(vectype* s) Gfunc_v35(s[0].z, s[1].w, s[2].x, s[3].y); Gfunc_v35(s[0].w, s[1].x, s[2].y, s[3].z); } -#else -#define round_lyra_v35(s) {} -#endif - static __device__ __forceinline__ void reduceDuplex(vectype state[4], uint32_t thread) @@ -489,6 +491,10 @@ void lyra2v2_gpu_hash_32(uint32_t threads, uint32_t startNounce, uint2 *outputHa } //thread } +#else /*__CUDA_ARCH__ >= 320 */ +__global__ void lyra2v2_gpu_hash_32(uint32_t threads, uint32_t startNounce, uint2 *outputHash) {} +__global__ void lyra2v2_gpu_hash_32_v3(uint32_t threads, uint32_t startNounce, uint2 *outputHash) {} +#endif __host__ void lyra2v2_cpu_init(int thr_id, uint32_t threads,uint64_t *hash) @@ -511,9 +517,9 @@ void lyra2v2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uin dim3 block(tpb); if (device_sm[device_map[thr_id]] >= 500) - lyra2v2_gpu_hash_32 << > > (threads, startNounce, (uint2*)d_outputHash); + lyra2v2_gpu_hash_32 <<>> (threads, startNounce, (uint2*)d_outputHash); else - lyra2v2_gpu_hash_32_v3 <<>> (threads, startNounce,(uint2*) d_outputHash); + lyra2v2_gpu_hash_32_v3 <<>> (threads, startNounce, (uint2*)d_outputHash); MyStreamSynchronize(NULL, order, thr_id); } diff --git a/lyra2/lyra2REv2.cu b/lyra2/lyra2REv2.cu index c29612c..ee4ddab 100644 --- a/lyra2/lyra2REv2.cu +++ b/lyra2/lyra2REv2.cu @@ -94,6 +94,12 @@ extern "C" int scanhash_lyra2v2(int thr_id, uint32_t *pdata, skein256_cpu_init(thr_id, throughput); bmw256_cpu_init(thr_id, throughput); + if (device_sm[device_map[thr_id]] < 320) { + applog(LOG_ERR, "Device SM 3.2 required!"); + proper_exit(1); + return -1; + } + // DMatrix CUDA_SAFE_CALL(cudaMalloc(&d_hash2[thr_id], 16 * 4 * 4 * sizeof(uint64_t) * throughput)); lyra2v2_cpu_init(thr_id, throughput, d_hash2[thr_id]);