Browse Source

some cleanup

2upstream
orignal 8 years ago
parent
commit
3f543538a1
  1. 119
      gost/cuda_gosthash.cu

119
gost/cuda_gosthash.cu

@ -25,8 +25,6 @@ __constant__ static uint64_t __align__(8) c_header1[10] =
__device__ uint64_t d_target[1]; __device__ uint64_t d_target[1];
//#define FULL_UNROLL
// Tables for function F // Tables for function F
__device__ static uint64_t T0[256] = { __device__ static uint64_t T0[256] = {
0xE6F87E5C5B711FD0, 0x258377800924FA16, 0xC849E07E852EA4A8, 0x5B4686A18F06C16A, 0xE6F87E5C5B711FD0, 0x258377800924FA16, 0xC849E07E852EA4A8, 0x5B4686A18F06C16A,
@ -596,21 +594,10 @@ __constant__ static uint64_t CC[12][8] = {{
0x86cc91189def805d, 0x5228e188aaa41de7, 0x991bb2d9d517f4fa, 0x20d71bf14a92bc48 0x86cc91189def805d, 0x5228e188aaa41de7, 0x991bb2d9d517f4fa, 0x20d71bf14a92bc48
}}; }};
__device__ __forceinline__
void ADD_ASM_512_32(uint32_t* x, uint32_t* const a, uint32_t* const b)
{
asm("add.cc.u32 %0, %1, %2;\n\t" : "=r"(x[0]) : "r"(a[0]),"r"(b[0]));
#pragma unroll 14
for(int i=1; i < 15; i++) {
asm("addc.cc.u32 %0, %1, %2;\n\t" : "=r"(x[i]) : "r"(a[i]),"r"(b[i]));
}
asm("addc.u32 %0, %1, %2;\n\t" : "=r"(x[15]) : "r"(a[15]),"r"(b[15]));
}
__device__ __forceinline__ __device__ __forceinline__
void GOST_Add512(void *x, void * const a, void * const b) void GOST_Add512(void *x, void * const a, void * const b)
{ {
//ADD_ASM_512_32((uint32_t*)x, (uint32_t*)a, (uint32_t*)b);
uint16_t t = 0; uint16_t t = 0;
#pragma unroll #pragma unroll
for(int i = 63; i >= 0; i--) for(int i = 63; i >= 0; i--)
@ -639,7 +626,6 @@ void GOST_Copy256(uint64_t* dst, uint64_t* const __restrict__ src)
__device__ __forceinline__ __device__ __forceinline__
void GOST_Xor512(uint64_t* C, uint64_t* const A, const uint64_t* B) void GOST_Xor512(uint64_t* C, uint64_t* const A, const uint64_t* B)
{ {
#if 1
C[0] = A[0] ^ B[0]; C[0] = A[0] ^ B[0];
C[1] = A[1] ^ B[1]; C[1] = A[1] ^ B[1];
C[2] = A[2] ^ B[2]; C[2] = A[2] ^ B[2];
@ -648,37 +634,15 @@ void GOST_Xor512(uint64_t* C, uint64_t* const A, const uint64_t* B)
C[5] = A[5] ^ B[5]; C[5] = A[5] ^ B[5];
C[6] = A[6] ^ B[6]; C[6] = A[6] ^ B[6];
C[7] = A[7] ^ B[7]; C[7] = A[7] ^ B[7];
#elif 0
/* x = y ^ z */
#pragma unroll 8
for(int i=0; i < 8; i++) {
asm("xor.b64 %0, %1, %2;\n\t" : "=l"(C[i]) : "l"(A[i]), "l"(B[i]));
}
#else
for(int i=0; i<8; i++) {
C[i] = A[i] ^ B[i];
}
#endif
} }
__device__ __forceinline__ __device__ __forceinline__
void GOST_Xor512_3(uint64_t* C, uint64_t* const A, uint64_t* const B) void GOST_Xor512_3(uint64_t* C, uint64_t* const A, uint64_t* const B)
{ {
#if 0
#pragma unroll 8
for(int i=0; i < 8; i++) {
asm(
"xor.b64 %0, %0, %1;\n\t"
"xor.b64 %0, %0, %2;\n\t"
: "+l"(C[i]) : "l"(A[i]), "l"(B[i])
);
}
#else
#pragma unroll 8 #pragma unroll 8
for(int i=0; i<8; i++) { for(int i=0; i<8; i++) {
C[i] ^= A[i] ^ B[i]; C[i] ^= A[i] ^ B[i];
} }
#endif
} }
__device__ __forceinline__ __device__ __forceinline__
@ -695,87 +659,7 @@ void GOST_FS(uint64_t* const state64, uint64_t* return_state)
{ {
uchar* state = (uchar*) state64; uchar* state = (uchar*) state64;
uint64_t r; uint64_t r;
#ifdef FULL_UNROLL
r = T0[state[56]];
r ^= T1[state[48]];
r ^= T2[state[40]];
r ^= T3[state[32]];
r ^= T4[state[24]];
r ^= T5[state[16]];
r ^= T6[state[8]];
r ^= T7[state[0]];
return_state[0] = r;
r = T0[state[57]];
r ^= T1[state[49]];
r ^= T2[state[41]];
r ^= T3[state[33]];
r ^= T4[state[25]];
r ^= T5[state[17]];
r ^= T6[state[9]];
r ^= T7[state[1]];
return_state[1] = r;
r = T0[state[58]];
r ^= T1[state[50]];
r ^= T2[state[42]];
r ^= T3[state[34]];
r ^= T4[state[26]];
r ^= T5[state[18]];
r ^= T6[state[10]];
r ^= T7[state[2]];
return_state[2] = r;
r = T0[state[59]];
r ^= T1[state[51]];
r ^= T2[state[43]];
r ^= T3[state[35]];
r ^= T4[state[27]];
r ^= T5[state[19]];
r ^= T6[state[11]];
r ^= T7[state[3]];
return_state[3] = r;
r = T0[state[60]];
r ^= T1[state[52]];
r ^= T2[state[44]];
r ^= T3[state[36]];
r ^= T4[state[28]];
r ^= T5[state[20]];
r ^= T6[state[12]];
r ^= T7[state[4]];
return_state[4] = r;
r = T0[state[61]];
r ^= T1[state[53]];
r ^= T2[state[45]];
r ^= T3[state[37]];
r ^= T4[state[29]];
r ^= T5[state[21]];
r ^= T6[state[13]];
r ^= T7[state[5]];
return_state[5] = r;
r = T0[state[62]];
r ^= T1[state[54]];
r ^= T2[state[46]];
r ^= T3[state[38]];
r ^= T4[state[30]];
r ^= T5[state[22]];
r ^= T6[state[14]];
r ^= T7[state[6]];
return_state[6] = r;
r = T0[state[63]];
r ^= T1[state[55]];
r ^= T2[state[47]];
r ^= T3[state[39]];
r ^= T4[state[31]];
r ^= T5[state[23]];
r ^= T6[state[15]];
r ^= T7[state[7]];
return_state[7] = r;
#else
for (int b=0; b<8; b++) { for (int b=0; b<8; b++) {
r = T0[state[b+56]]; r = T0[state[b+56]];
r ^= T1[state[b+48]]; r ^= T1[state[b+48]];
@ -787,7 +671,6 @@ void GOST_FS(uint64_t* const state64, uint64_t* return_state)
r ^= T7[state[b]]; r ^= T7[state[b]];
return_state[b] = r; return_state[b] = r;
} }
#endif
} }
__device__ __forceinline__ __device__ __forceinline__

Loading…
Cancel
Save