@ -898,9 +898,7 @@ __device__ __forceinline__ void STEP8_MAJ_29(const uint32_t *w, const int r, con
@@ -898,9 +898,7 @@ __device__ __forceinline__ void STEP8_MAJ_29(const uint32_t *w, const int r, con
A[j] = R[j];
}
}
__device__ __forceinline__
void STEP8_MAJ_30(const uint32_t *w, const int r, const int s, uint32_t * A, const uint32_t * B, const uint32_t * C, uint32_t * D)
__device__ __forceinline__ void STEP8_MAJ_30(const uint32_t *w, const int r, const int s, uint32_t * A, const uint32_t * B, const uint32_t * C, uint32_t * D)
{
int j;
uint32_t temp;
@ -930,9 +928,7 @@ void STEP8_MAJ_30(const uint32_t *w, const int r, const int s, uint32_t * A, con
@@ -930,9 +928,7 @@ void STEP8_MAJ_30(const uint32_t *w, const int r, const int s, uint32_t * A, con
A[j] = R[j];
}
}
__device__ __forceinline__
void STEP8_MAJ_31(const uint32_t *w, const int r, const int s, uint32_t * A, const uint32_t * B, const uint32_t * C, uint32_t * D)
__device__ __forceinline__ void STEP8_MAJ_31(const uint32_t *w, const int r, const int s, uint32_t * A, const uint32_t * B, const uint32_t * C, uint32_t * D)
{
int j;
uint32_t temp;
@ -962,9 +958,7 @@ void STEP8_MAJ_31(const uint32_t *w, const int r, const int s, uint32_t * A, con
@@ -962,9 +958,7 @@ void STEP8_MAJ_31(const uint32_t *w, const int r, const int s, uint32_t * A, con
A[j] = R[j];
}
}
__device__ __forceinline__
void STEP8_IF_32(const uint32_t *w, const int r, const int s, uint32_t * A, const uint32_t * B, const uint32_t * C, uint32_t * D)
__device__ __forceinline__ void STEP8_IF_32(const uint32_t *w, const int r, const int s, uint32_t * A, const uint32_t * B, const uint32_t * C, uint32_t * D)
{
int j;
uint32_t temp;
@ -994,9 +988,7 @@ void STEP8_IF_32(const uint32_t *w, const int r, const int s, uint32_t * A, cons
@@ -994,9 +988,7 @@ void STEP8_IF_32(const uint32_t *w, const int r, const int s, uint32_t * A, cons
A[j] = R[j];
}
}
__device__ __forceinline__
void STEP8_IF_33(const uint32_t *w, const int r, const int s, uint32_t * A, const uint32_t * B, const uint32_t * C, uint32_t * D)
__device__ __forceinline__ void STEP8_IF_33(const uint32_t *w, const int r, const int s, uint32_t * A, const uint32_t * B, const uint32_t * C, uint32_t * D)
{
int j;
uint32_t temp;
@ -1026,9 +1018,7 @@ void STEP8_IF_33(const uint32_t *w, const int r, const int s, uint32_t * A, cons
@@ -1026,9 +1018,7 @@ void STEP8_IF_33(const uint32_t *w, const int r, const int s, uint32_t * A, cons
A[j] = R[j];
}
}
__device__ __forceinline__
void STEP8_IF_34(const uint32_t *w, const int r, const int s, uint32_t * A, const uint32_t * B, const uint32_t * C, uint32_t * D)
__device__ __forceinline__ void STEP8_IF_34(const uint32_t *w, const int r, const int s, uint32_t * A, const uint32_t * B, const uint32_t * C, uint32_t * D)
{
int j;
uint32_t temp;
@ -1058,9 +1048,7 @@ void STEP8_IF_34(const uint32_t *w, const int r, const int s, uint32_t * A, cons
@@ -1058,9 +1048,7 @@ void STEP8_IF_34(const uint32_t *w, const int r, const int s, uint32_t * A, cons
A[j] = R[j];
}
}
__device__ __forceinline__
void STEP8_IF_35(const uint32_t *w, const int r, const int s, uint32_t * A, const uint32_t * B, const uint32_t * C, uint32_t * D)
__device__ __forceinline__ void STEP8_IF_35(const uint32_t *w, const int r, const int s, uint32_t * A, const uint32_t * B, const uint32_t * C, uint32_t * D)
{
int j;
uint32_t temp;
@ -1090,9 +1078,8 @@ void STEP8_IF_35(const uint32_t *w, const int r, const int s, uint32_t * A, cons
@@ -1090,9 +1078,8 @@ void STEP8_IF_35(const uint32_t *w, const int r, const int s, uint32_t * A, cons
STEP8_IF_0(d_cw0[0], r, s, A, &A[8], &A[16], &A[24]);
STEP8_IF_1(d_cw0[1], s, t, &A[24], A, &A[8], &A[16]);
STEP8_IF_2(d_cw0[2], t, u, &A[16], &A[24], A, &A[8]);
@ -1113,9 +1102,8 @@ void Round8_0_final(uint32_t *A, int r, int s, int t, int u) {
@@ -1113,9 +1102,8 @@ void Round8_0_final(uint32_t *A, int r, int s, int t, int u) {
STEP8_MAJ_6(d_cw0[6], t, u, &A[16], &A[24], A, &A[8]);
STEP8_MAJ_7(d_cw0[7], u, r, &A[8], &A[16], &A[24], A);
STEP8_IF_8(d_cw1[0], r, s, A, &A[8], &A[16], &A[24]);
STEP8_IF_9(d_cw1[1], s, t, &A[24], A, &A[8], &A[16]);
STEP8_IF_10(d_cw1[2], t, u, &A[16], &A[24], A, &A[8]);
@ -1136,9 +1126,8 @@ void Round8_1_final(uint32_t *A, int r, int s, int t, int u) {
@@ -1136,9 +1126,8 @@ void Round8_1_final(uint32_t *A, int r, int s, int t, int u) {
STEP8_MAJ_14(d_cw1[6], t, u, &A[16], &A[24], A, &A[8]);
STEP8_MAJ_15(d_cw1[7], u, r, &A[8], &A[16], &A[24], A);
STEP8_IF_16(d_cw2[0], r, s, A, &A[8], &A[16], &A[24]);
STEP8_IF_17(d_cw2[1], s, t, &A[24], A, &A[8], &A[16]);
STEP8_IF_18(d_cw2[2], t, u, &A[16], &A[24], A, &A[8]);
@ -1159,9 +1150,8 @@ void Round8_2_final(uint32_t *A, int r, int s, int t, int u) {
@@ -1159,9 +1150,8 @@ void Round8_2_final(uint32_t *A, int r, int s, int t, int u) {
STEP8_MAJ_22(d_cw2[6], t, u, &A[16], &A[24], A, &A[8]);
STEP8_MAJ_23(d_cw2[7], u, r, &A[8], &A[16], &A[24], A);
STEP8_IF_24(d_cw3[0], r, s, A, &A[8], &A[16], &A[24]);
STEP8_IF_25(d_cw3[1], s, t, &A[24], A, &A[8], &A[16]);
STEP8_IF_26(d_cw3[2], t, u, &A[16], &A[24], A, &A[8]);
@ -1190,8 +1182,8 @@ void Round8_3_final(uint32_t *A, int r, int s, int t, int u) {
@@ -1190,8 +1182,8 @@ void Round8_3_final(uint32_t *A, int r, int s, int t, int u) {
#define expanded_vector(x) __ldg(&g_fft4[x])
#endif
__device__ __forceinline__
void Round8_0(uint32_t *A, const int thr_offset, int r, int s, int t, int u, uint4 *g_fft4) {
__device__ __forceinline__ void Round8_0(uint32_t *A, const int thr_offset,