From e5d1cf8416b12dcd0fe0d2f850f072255edcb1c7 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Thu, 15 Oct 2015 06:48:42 +0200 Subject: [PATCH] lyra2v2: typo in type, its a struct of 4x uint2 :p --- lyra2/cuda_lyra2v2.cu | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lyra2/cuda_lyra2v2.cu b/lyra2/cuda_lyra2v2.cu index bd2ad25..d096f2f 100644 --- a/lyra2/cuda_lyra2v2.cu +++ b/lyra2/cuda_lyra2v2.cu @@ -18,10 +18,10 @@ #define Nrow 4 #define Ncol 4 -#define uint4x2 uint28 +#define uint2x4 uint28 #define memshift 3 -__device__ uint4x2 *DMatrix; +__device__ uint2x4 *DMatrix; __device__ __forceinline__ void Gfunc_v5(uint2 &a, uint2 &b, uint2 &c, uint2 &d) @@ -33,7 +33,7 @@ void Gfunc_v5(uint2 &a, uint2 &b, uint2 &c, uint2 &d) } __device__ __forceinline__ -void round_lyra_v5(uint4x2* s) +void round_lyra_v5(uint2x4* s) { Gfunc_v5(s[0].x, s[1].x, s[2].x, s[3].x); Gfunc_v5(s[0].y, s[1].y, s[2].y, s[3].y); @@ -47,9 +47,9 @@ void round_lyra_v5(uint4x2* s) } __device__ __forceinline__ -void reduceDuplex(uint4x2 state[4], const uint32_t thread) +void reduceDuplex(uint2x4 state[4], const uint32_t thread) { - uint4x2 state1[3]; + uint2x4 state1[3]; const uint32_t ps1 = (Nrow * Ncol * memshift * thread); const uint32_t ps2 = (memshift * (Ncol-1) + memshift * Ncol + Nrow * Ncol * memshift * thread); @@ -80,7 +80,7 @@ void reduceDuplex(uint4x2 state[4], const uint32_t thread) } __device__ __forceinline__ -void reduceDuplex50(uint4x2 state[4], const uint32_t thread) +void reduceDuplex50(uint2x4 state[4], const uint32_t thread) { const uint32_t ps1 = (Nrow * Ncol * memshift * thread); const uint32_t ps2 = (memshift * (Ncol - 1) + memshift * Ncol + Nrow * Ncol * memshift * thread); @@ -104,9 +104,9 @@ void reduceDuplex50(uint4x2 state[4], const uint32_t thread) } __device__ __forceinline__ -void reduceDuplexRowSetupV2(const int rowIn, const int rowInOut, const int rowOut, uint4x2 state[4], const uint32_t thread) +void reduceDuplexRowSetupV2(const int rowIn, const int rowInOut, const int rowOut, uint2x4 state[4], const uint32_t thread) { - uint4x2 state2[3], state1[3]; + uint2x4 state2[3], state1[3]; const uint32_t ps1 = (memshift * Ncol * rowIn + Nrow * Ncol * memshift * thread); const uint32_t ps2 = (memshift * Ncol * rowInOut + Nrow * Ncol * memshift * thread); @@ -151,7 +151,7 @@ void reduceDuplexRowSetupV2(const int rowIn, const int rowInOut, const int rowOu #pragma unroll for (int j = 0; j < 3; j++) { - uint4x2 tmp = state1[j] + state2[j]; + uint2x4 tmp = state1[j] + state2[j]; state[j] ^= tmp; } @@ -179,9 +179,9 @@ void reduceDuplexRowSetupV2(const int rowIn, const int rowInOut, const int rowOu __device__ __forceinline__ -void reduceDuplexRowtV2(const int rowIn, const int rowInOut, const int rowOut, uint4x2* state, const uint32_t thread) +void reduceDuplexRowtV2(const int rowIn, const int rowInOut, const int rowOut, uint2x4* state, const uint32_t thread) { - uint4x2 state1[3], state2[3]; + uint2x4 state1[3], state2[3]; const uint32_t ps1 = (memshift * Ncol * rowIn + Nrow * Ncol * memshift * thread); const uint32_t ps2 = (memshift * Ncol * rowInOut + Nrow * Ncol * memshift * thread); const uint32_t ps3 = (memshift * Ncol * rowOut + Nrow * Ncol * memshift * thread); @@ -259,7 +259,7 @@ void lyra2v2_gpu_hash_32(const uint32_t threads, uint32_t startNounce, uint2 *g_ { const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); - uint4x2 blake2b_IV[2]; + uint2x4 blake2b_IV[2]; if (threadIdx.x == 0) { @@ -273,7 +273,7 @@ void lyra2v2_gpu_hash_32(const uint32_t threads, uint32_t startNounce, uint2 *g_ if (thread < threads) { - uint4x2 state[4]; + uint2x4 state[4]; ((uint2*)state)[0] = __ldg(&g_hash[thread]); ((uint2*)state)[1] = __ldg(&g_hash[thread + threads]);