Browse Source

skein: merge the double implementations in one

based on alexis skein kernels, tested ok on SM 2.1 and 3.0

code is a bit hard to read but... well... users dont care :p
2upstream
Tanguy Pruvot 8 years ago
parent
commit
feb99d020f
  1. 34
      cuda_vectors.h
  2. 1369
      quark/cuda_skein512.cu
  3. 2634
      quark/cuda_skein512_sp.cuh

34
cuda_vectors.h

@ -0,0 +1,34 @@
#include "cuda_helper.h"
/* Macros for uint2 operations (used by skein) */
__device__ __forceinline__
uint2 ROR8(const uint2 a) {
uint2 result;
result.x = __byte_perm(a.x, a.y, 0x4321);
result.y = __byte_perm(a.y, a.x, 0x4321);
return result;
}
__device__ __forceinline__
uint2 ROL24(const uint2 a) {
uint2 result;
result.x = __byte_perm(a.x, a.y, 0x0765);
result.y = __byte_perm(a.y, a.x, 0x0765);
return result;
}
static __device__ __forceinline__ uint2 operator+ (const uint2 a, const uint32_t b)
{
#if 0 && defined(__CUDA_ARCH__) && CUDA_VERSION < 7000
uint2 result;
asm(
"add.cc.u32 %0,%2,%4; \n\t"
"addc.u32 %1,%3,%5; \n\t"
: "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(b), "r"(0));
return result;
#else
return vectorize(devectorize(a) + b);
#endif
}

1369
quark/cuda_skein512.cu

File diff suppressed because it is too large Load Diff

2634
quark/cuda_skein512_sp.cuh

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save