diff --git a/x11/cuda_x11_cubehash512.cu b/x11/cuda_x11_cubehash512.cu index 76ad2e9..5f77b0b 100644 --- a/x11/cuda_x11_cubehash512.cu +++ b/x11/cuda_x11_cubehash512.cu @@ -8,8 +8,15 @@ typedef unsigned char BitSequence; #define CUBEHASH_ROUNDS 16 /* this is r for CubeHashr/b */ #define CUBEHASH_BLOCKBYTES 32 /* this is b for CubeHashr/b */ -#define ROTATEUPWARDS7(a) (((a) << 7) | ((a) >> 25)) -#define ROTATEUPWARDS11(a) (((a) << 11) | ((a) >> 21)) +#if __CUDA_ARCH__ < 350 +#define LROT(x,bits) ((x << bits) | (x >> (32 - bits))) +#else +#define LROT(x, bits) __funnelshift_l(x, x, bits) +#endif + +#define ROTATEUPWARDS7(a) LROT(a,7) +#define ROTATEUPWARDS11(a) LROT(a,11) + #define SWAP(a,b) { uint32_t u = a; a = b; b = u; } __device__ __constant__ @@ -278,11 +285,13 @@ __global__ void x11_cubehash512_gpu_hash_64(int threads, uint32_t startNounce, u // Setup-Funktionen -__host__ void x11_cubehash512_cpu_init(int thr_id, int threads) +__host__ +void x11_cubehash512_cpu_init(int thr_id, int threads) { } -__host__ void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +__host__ +void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { const int threadsperblock = 256; diff --git a/x11/cuda_x11_luffa512.cu b/x11/cuda_x11_luffa512.cu index 72deb5f..c6dd1dc 100644 --- a/x11/cuda_x11_luffa512.cu +++ b/x11/cuda_x11_luffa512.cu @@ -20,9 +20,6 @@ #include "cuda_helper.h" -// aus heavy.cu -extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); - typedef unsigned char BitSequence; typedef struct { @@ -41,11 +38,17 @@ typedef struct { a[1+(8*j)] = a[0+(8*j)] ^ tmp;\ a[0+(8*j)] = tmp; +#if __CUDA_ARCH__ < 350 +#define LROT(x,bits) ((x << bits) | (x >> (32 - bits))) +#else +#define LROT(x, bits) __funnelshift_l(x, x, bits) +#endif + #define TWEAK(a0,a1,a2,a3,j)\ - a0 = (a0<<(j))|(a0>>(32-j));\ - a1 = (a1<<(j))|(a1>>(32-j));\ - a2 = (a2<<(j))|(a2>>(32-j));\ - a3 = (a3<<(j))|(a3>>(32-j)); + a0 = LROT(a0,j);\ + a1 = LROT(a1,j);\ + a2 = LROT(a2,j);\ + a3 = LROT(a3,j); #define STEP(c0,c1)\ SUBCRUMB(chainv[0],chainv[1],chainv[2],chainv[3],tmp);\ @@ -77,13 +80,13 @@ typedef struct { #define MIXWORD(a0,a4)\ a4 ^= a0;\ - a0 = (a0<<2) | (a0>>(30));\ + a0 = LROT(a0,2);\ a0 ^= a4;\ - a4 = (a4<<14) | (a4>>(18));\ + a4 = LROT(a4,14);\ a4 ^= a0;\ - a0 = (a0<<10) | (a0>>(22));\ + a0 = LROT(a0,10);\ a0 ^= a4;\ - a4 = (a4<<1) | (a4>>(31)); + a4 = LROT(a4,1); #define ADD_CONSTANT(a0,b0,c0,c1)\ a0 ^= c0;\