mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-24 05:24:16 +00:00
35 lines
794 B
C
35 lines
794 B
C
|
#include "cuda_helper.h"
|
||
|
|
||
|
/* Macros for uint2 operations (used by skein) */
|
||
|
|
||
|
__device__ __forceinline__
|
||
|
uint2 ROR8(const uint2 a) {
|
||
|
uint2 result;
|
||
|
result.x = __byte_perm(a.x, a.y, 0x4321);
|
||
|
result.y = __byte_perm(a.y, a.x, 0x4321);
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
__device__ __forceinline__
|
||
|
uint2 ROL24(const uint2 a) {
|
||
|
uint2 result;
|
||
|
result.x = __byte_perm(a.x, a.y, 0x0765);
|
||
|
result.y = __byte_perm(a.y, a.x, 0x0765);
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
static __device__ __forceinline__ uint2 operator+ (const uint2 a, const uint32_t b)
|
||
|
{
|
||
|
#if 0 && defined(__CUDA_ARCH__) && CUDA_VERSION < 7000
|
||
|
uint2 result;
|
||
|
asm(
|
||
|
"add.cc.u32 %0,%2,%4; \n\t"
|
||
|
"addc.u32 %1,%3,%5; \n\t"
|
||
|
: "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(b), "r"(0));
|
||
|
return result;
|
||
|
#else
|
||
|
return vectorize(devectorize(a) + b);
|
||
|
#endif
|
||
|
}
|
||
|
|