Browse Source

use pre-calculated F0 and F1

2upstream
orignal 8 years ago
parent
commit
601880ade8
  1. 38
      gost/cuda_gosthash.cu

38
gost/cuda_gosthash.cu

@ -595,6 +595,18 @@ __constant__ static uint64_t CC[12][8] = {{
}}; }};
__constant__ static uint64_t F0[8] = // GOST_F(0)
{
0x74a5d4ce2efc83b3, 0x74a5d4ce2efc83b3, 0x74a5d4ce2efc83b3, 0x74a5d4ce2efc83b3,
0x74a5d4ce2efc83b3, 0x74a5d4ce2efc83b3, 0x74a5d4ce2efc83b3, 0x74a5d4ce2efc83b3
};
__constant__ static uint64_t F1[8] = // GOST_F(1)
{
0x155f7bb040eec523, 0x155f7bb040eec523, 0x155f7bb040eec523, 0x155f7bb040eec523,
0x155f7bb040eec523, 0x155f7bb040eec523, 0x155f7bb040eec523, 0x155f7bb040eec523
};
__device__ __forceinline__ __device__ __forceinline__
void GOST_Add512(void *x, void * const a, void * const b) void GOST_Add512(void *x, void * const a, void * const b)
{ {
@ -684,22 +696,22 @@ static void GOST_F(uint64_t* state)
__device__ __device__
static void GOST_E12(uint64_t* K, uint64_t *state) static void GOST_E12(uint64_t* const K, uint64_t *state)
{ {
uint64_t state1[8], K1[8]; uint64_t state1[8], K1[8];
GOST_Copy512(K1, K); GOST_Copy512(K1, K);
//#pragma unroll 1 //#pragma unroll 1
for(int i=0; i<12; i++) for(int i=0; i<12; i++)
{ {
GOST_Xor512(state1, K1, CC[i]);
GOST_FS(state1, K1);
GOST_FS(state, state1); GOST_FS(state, state1);
GOST_Xor512(K, K1, CC[i]);
GOST_FS(K, K1);
GOST_Xor512(state, state1, K1); GOST_Xor512(state, state1, K1);
} }
} }
__device__ __device__
void GOST_E(uint64_t* K, uint64_t* const m, uint64_t *state /* out only */) void GOST_E(uint64_t* const K, uint64_t* const m, uint64_t *state /* out only */)
{ {
GOST_Xor512(state, m, K); // state = m ^ K GOST_Xor512(state, m, K); // state = m ^ K
GOST_E12(K, state); GOST_E12(K, state);
@ -737,25 +749,15 @@ void GOST_g_0(uint64_t* h, uint64_t* const M)
__device__ __device__
void GOST_g_0_0(uint64_t* h, uint64_t* const M) // input h assumed zero, for iv 512 void GOST_g_0_0(uint64_t* h, uint64_t* const M) // input h assumed zero, for iv 512
{ {
#pragma unroll GOST_E(F0, M, h);
for(int i = 0; i < 8; i++) h[i] = 0x74a5d4ce2efc83b3; // GOST_F(0) GOST_Xor512 (h, h, M); // h = h ^ M
uint64_t t[8];
GOST_E(h, M, t);
GOST_Xor512 (h, t, M); // h = t ^ M
} }
__device__ __device__
void GOST_g_0_1(uint64_t* h, uint64_t* const M) // input h assumed all bytes one, for iv 256 void GOST_g_0_1(uint64_t* h, uint64_t* const M) // input h assumed all bytes one, for iv 256
{ {
#pragma unroll GOST_E(F1, M, h);
for(int i = 0; i < 8; i++) h[i] = 0x155f7bb040eec523; // GOST_F(1) GOST_Xor512_c(h, h, M, 0x0101010101010101); // h = h ^ M ^ 1
uint64_t t[8];
GOST_E(h, M, t);
GOST_Xor512_c(h, t, M, 0x0101010101010101); // h = t ^ M ^ 1
} }
__global__ __global__

Loading…
Cancel
Save