@ -1070,8 +1074,12 @@ __device__ __forceinline__ void Round8_0_final(uint32_t *A, int r, int s, int t,
@@ -1070,8 +1074,12 @@ __device__ __forceinline__ void Round8_0_final(uint32_t *A, int r, int s, int t,
STEP8_MAJ_7(d_cw0[7], u, r, &A[8], &A[16], &A[24], A);
@ -1094,8 +1102,12 @@ __device__ __forceinline__ void Round8_1_final(uint32_t *A, int r, int s, int t,
@@ -1094,8 +1102,12 @@ __device__ __forceinline__ void Round8_1_final(uint32_t *A, int r, int s, int t,
STEP8_MAJ_15(d_cw1[7], u, r, &A[8], &A[16], &A[24], A);
@ -1118,8 +1130,12 @@ __device__ __forceinline__ void Round8_2_final(uint32_t *A, int r, int s, int t,
@@ -1118,8 +1130,12 @@ __device__ __forceinline__ void Round8_2_final(uint32_t *A, int r, int s, int t,
STEP8_MAJ_23(d_cw2[7], u, r, &A[8], &A[16], &A[24], A);
void STEP8_IF(const uint32_t *w, const int i, const int r, const int s, uint32_t *A, const uint32_t *B, const uint32_t *C, uint32_t *D)
@ -193,7 +195,6 @@ void Round8(uint32_t A[32], const int y[256], int i, int r, int s, int t, int u)
@@ -193,7 +195,6 @@ void Round8(uint32_t A[32], const int y[256], int i, int r, int s, int t, int u)
{
uint32_t w[8][8];
int code = i<2? 185: 233;
int a, b;
/*
* The FFT output y is in revbin permuted order,
@ -201,9 +202,9 @@ void Round8(uint32_t A[32], const int y[256], int i, int r, int s, int t, int u)
@@ -201,9 +202,9 @@ void Round8(uint32_t A[32], const int y[256], int i, int r, int s, int t, int u)