|
|
|
@ -92,7 +92,33 @@ __constant uint K[] = {
@@ -92,7 +92,33 @@ __constant uint K[] = {
|
|
|
|
|
0x90befffaU, |
|
|
|
|
0xa4506cebU, // 60 |
|
|
|
|
0xbef9a3f7U, |
|
|
|
|
0xc67178f2U |
|
|
|
|
0xc67178f2U, |
|
|
|
|
0x98c7e2a2U, |
|
|
|
|
0xfc08884dU, |
|
|
|
|
0xcd2a11aeU, |
|
|
|
|
0x510e527fU, |
|
|
|
|
0x9b05688cU, |
|
|
|
|
0xC3910C8EU, |
|
|
|
|
0xfb6feee7U, |
|
|
|
|
0x2a01a605U, // 70 |
|
|
|
|
0x0c2e12e0U, |
|
|
|
|
0x4498517BU, |
|
|
|
|
0x6a09e667U, |
|
|
|
|
0xa4ce148bU, |
|
|
|
|
0x95F61999U, |
|
|
|
|
0xc19bf174U, |
|
|
|
|
0xBB67AE85U, |
|
|
|
|
0x3C6EF372U, |
|
|
|
|
0xA54FF53AU, |
|
|
|
|
0x1F83D9ABU, // 80 |
|
|
|
|
0x5BE0CD19U, |
|
|
|
|
0x5C5C5C5CU, |
|
|
|
|
0x36363636U, |
|
|
|
|
0x80000000U, |
|
|
|
|
0x000003FFU, |
|
|
|
|
0x00000280U, |
|
|
|
|
0x000004a0U, |
|
|
|
|
0x00000300U |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
#define rotl(x,y) rotate(x,y) |
|
|
|
@ -131,184 +157,184 @@ void SHA256(uint4*restrict state0,uint4*restrict state1, const uint4 block0, con
@@ -131,184 +157,184 @@ void SHA256(uint4*restrict state0,uint4*restrict state1, const uint4 block0, con
|
|
|
|
|
uint4 W[4]; |
|
|
|
|
|
|
|
|
|
W[ 0].x = block0.x; |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+K[0]); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+ K[0]); |
|
|
|
|
W[ 0].y = block0.y; |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[0].y+K[1]); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[0].y+ K[1]); |
|
|
|
|
W[ 0].z = block0.z; |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[0].z+K[2]); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[0].z+ K[2]); |
|
|
|
|
W[ 0].w = block0.w; |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[0].w+K[3]); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[0].w+ K[3]); |
|
|
|
|
|
|
|
|
|
W[ 1].x = block1.x; |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+K[4]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+ K[4]); |
|
|
|
|
W[ 1].y = block1.y; |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+K[5]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+ K[5]); |
|
|
|
|
W[ 1].z = block1.z; |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+K[6]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+ K[6]); |
|
|
|
|
W[ 1].w = block1.w; |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+K[7]); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+ K[7]); |
|
|
|
|
|
|
|
|
|
W[ 2].x = block2.x; |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+K[8]); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+ K[8]); |
|
|
|
|
W[ 2].y = block2.y; |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+K[9]); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+ K[9]); |
|
|
|
|
W[ 2].z = block2.z; |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+K[10]); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+ K[10]); |
|
|
|
|
W[ 2].w = block2.w; |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+K[11]); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+ K[11]); |
|
|
|
|
|
|
|
|
|
W[ 3].x = block3.x; |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+K[12]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+ K[12]); |
|
|
|
|
W[ 3].y = block3.y; |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+K[13]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+ K[13]); |
|
|
|
|
W[ 3].z = block3.z; |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+K[14]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+ K[14]); |
|
|
|
|
W[ 3].w = block3.w; |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+ K[76]); |
|
|
|
|
|
|
|
|
|
W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+K[15]); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+ K[15]); |
|
|
|
|
|
|
|
|
|
W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[0].y+K[16]); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[0].y+ K[16]); |
|
|
|
|
|
|
|
|
|
W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[0].z+K[17]); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[0].z+ K[17]); |
|
|
|
|
|
|
|
|
|
W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[0].w+K[18]); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[0].w+ K[18]); |
|
|
|
|
|
|
|
|
|
W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+K[19]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+ K[19]); |
|
|
|
|
|
|
|
|
|
W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+K[20]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+ K[20]); |
|
|
|
|
|
|
|
|
|
W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+K[21]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+ K[21]); |
|
|
|
|
|
|
|
|
|
W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+K[22]); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+ K[22]); |
|
|
|
|
|
|
|
|
|
W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+K[23]); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+ K[23]); |
|
|
|
|
|
|
|
|
|
W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+K[24]); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+ K[24]); |
|
|
|
|
|
|
|
|
|
W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+K[25]); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+ K[25]); |
|
|
|
|
|
|
|
|
|
W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+K[26]); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+ K[26]); |
|
|
|
|
|
|
|
|
|
W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+K[27]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+ K[27]); |
|
|
|
|
|
|
|
|
|
W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+K[28]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+ K[28]); |
|
|
|
|
|
|
|
|
|
W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+K[29]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+ K[29]); |
|
|
|
|
|
|
|
|
|
W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+K[30]); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+ K[30]); |
|
|
|
|
|
|
|
|
|
W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+K[31]); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+ K[31]); |
|
|
|
|
|
|
|
|
|
W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[0].y+K[32]); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[0].y+ K[32]); |
|
|
|
|
|
|
|
|
|
W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[0].z+K[33]); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[0].z+ K[33]); |
|
|
|
|
|
|
|
|
|
W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[0].w+K[34]); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[0].w+ K[34]); |
|
|
|
|
|
|
|
|
|
W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+K[35]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+ K[35]); |
|
|
|
|
|
|
|
|
|
W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+K[36]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+ K[36]); |
|
|
|
|
|
|
|
|
|
W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+K[37]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+ K[37]); |
|
|
|
|
|
|
|
|
|
W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+K[38]); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+ K[38]); |
|
|
|
|
|
|
|
|
|
W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+K[39]); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+ K[39]); |
|
|
|
|
|
|
|
|
|
W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+K[40]); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+ K[40]); |
|
|
|
|
|
|
|
|
|
W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+K[41]); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+ K[41]); |
|
|
|
|
|
|
|
|
|
W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+K[42]); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+ K[42]); |
|
|
|
|
|
|
|
|
|
W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+K[43]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+ K[43]); |
|
|
|
|
|
|
|
|
|
W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+K[44]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+ K[44]); |
|
|
|
|
|
|
|
|
|
W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+K[45]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+ K[45]); |
|
|
|
|
|
|
|
|
|
W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+K[46]); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+ K[46]); |
|
|
|
|
|
|
|
|
|
W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+K[47]); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+ K[47]); |
|
|
|
|
|
|
|
|
|
W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[0].y+K[48]); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[0].y+ K[48]); |
|
|
|
|
|
|
|
|
|
W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[0].z+K[49]); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[0].z+ K[49]); |
|
|
|
|
|
|
|
|
|
W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[0].w+K[50]); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[0].w+ K[50]); |
|
|
|
|
|
|
|
|
|
W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+K[51]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+ K[51]); |
|
|
|
|
|
|
|
|
|
W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+K[52]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+ K[52]); |
|
|
|
|
|
|
|
|
|
W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+K[53]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+ K[53]); |
|
|
|
|
|
|
|
|
|
W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+K[54]); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+ K[54]); |
|
|
|
|
|
|
|
|
|
W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+K[55]); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+ K[55]); |
|
|
|
|
|
|
|
|
|
W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+K[56]); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+ K[56]); |
|
|
|
|
|
|
|
|
|
W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+K[57]); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+ K[57]); |
|
|
|
|
|
|
|
|
|
W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+K[58]); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+ K[58]); |
|
|
|
|
|
|
|
|
|
W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+K[59]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+ K[59]); |
|
|
|
|
|
|
|
|
|
W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+K[60]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+ K[60]); |
|
|
|
|
|
|
|
|
|
W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+K[61]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+ K[61]); |
|
|
|
|
|
|
|
|
|
W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+K[62]); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+ K[62]); |
|
|
|
|
|
|
|
|
|
#undef A |
|
|
|
|
#undef B |
|
|
|
@ -337,191 +363,191 @@ void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block
@@ -337,191 +363,191 @@ void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block
|
|
|
|
|
uint4 W[4]; |
|
|
|
|
|
|
|
|
|
W[0].x = block0.x; |
|
|
|
|
D=0x98c7e2a2U+W[0].x; |
|
|
|
|
H=0xfc08884dU+W[0].x; |
|
|
|
|
D= K[63] +W[0].x; |
|
|
|
|
H= K[64] +W[0].x; |
|
|
|
|
|
|
|
|
|
W[0].y = block0.y; |
|
|
|
|
C=0xcd2a11aeU+Tr1(D)+Ch(D,0x510e527fU,0x9b05688cU)+W[0].y; |
|
|
|
|
G=0xC3910C8EU+C+Tr2(H)+Ch(H,0xfb6feee7U,0x2a01a605U); |
|
|
|
|
C= K[65] +Tr1(D)+Ch(D, K[66], K[67])+W[0].y; |
|
|
|
|
G= K[68] +C+Tr2(H)+Ch(H, K[69] ,K[70]); |
|
|
|
|
|
|
|
|
|
W[0].z = block0.z; |
|
|
|
|
B=0x0c2e12e0U+Tr1(C)+Ch(C,D,0x510e527fU)+W[0].z; |
|
|
|
|
F=0x4498517BU+B+Tr2(G)+Maj(G,H,0x6a09e667U); |
|
|
|
|
B= K[71] +Tr1(C)+Ch(C,D,K[66])+W[0].z; |
|
|
|
|
F= K[72] +B+Tr2(G)+Maj(G,H, K[73]); |
|
|
|
|
|
|
|
|
|
W[0].w = block0.w; |
|
|
|
|
A=0xa4ce148bU+Tr1(B)+Ch(B,C,D)+W[0].w; |
|
|
|
|
E=0x95F61999U+A+Tr2(F)+Maj(F,G,H); |
|
|
|
|
A= K[74] +Tr1(B)+Ch(B,C,D)+W[0].w; |
|
|
|
|
E= K[75] +A+Tr2(F)+Maj(F,G,H); |
|
|
|
|
|
|
|
|
|
W[1].x = block1.x; |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+K[4]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+ K[4]); |
|
|
|
|
W[1].y = block1.y; |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+K[5]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+ K[5]); |
|
|
|
|
W[1].z = block1.z; |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+K[6]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+ K[6]); |
|
|
|
|
W[1].w = block1.w; |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+K[7]); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+ K[7]); |
|
|
|
|
|
|
|
|
|
W[2].x = block2.x; |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+K[8]); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+ K[8]); |
|
|
|
|
W[2].y = block2.y; |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+K[9]); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+ K[9]); |
|
|
|
|
W[2].z = block2.z; |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+K[10]); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+ K[10]); |
|
|
|
|
W[2].w = block2.w; |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+K[11]); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+ K[11]); |
|
|
|
|
|
|
|
|
|
W[3].x = block3.x; |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+K[12]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+ K[12]); |
|
|
|
|
W[3].y = block3.y; |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+K[13]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+ K[13]); |
|
|
|
|
W[3].z = block3.z; |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+K[14]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+ K[14]); |
|
|
|
|
W[3].w = block3.w; |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+ K[76]); |
|
|
|
|
|
|
|
|
|
W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+K[15]); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+ K[15]); |
|
|
|
|
|
|
|
|
|
W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[0].y+K[16]); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[0].y+ K[16]); |
|
|
|
|
|
|
|
|
|
W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[0].z+K[17]); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[0].z+ K[17]); |
|
|
|
|
|
|
|
|
|
W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[0].w+K[18]); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[0].w+ K[18]); |
|
|
|
|
|
|
|
|
|
W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+K[19]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+ K[19]); |
|
|
|
|
|
|
|
|
|
W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+K[20]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+ K[20]); |
|
|
|
|
|
|
|
|
|
W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+K[21]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+ K[21]); |
|
|
|
|
|
|
|
|
|
W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+K[22]); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+ K[22]); |
|
|
|
|
|
|
|
|
|
W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+K[23]); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+ K[23]); |
|
|
|
|
|
|
|
|
|
W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+K[24]); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+ K[24]); |
|
|
|
|
|
|
|
|
|
W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+K[25]); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+ K[25]); |
|
|
|
|
|
|
|
|
|
W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+K[26]); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+ K[26]); |
|
|
|
|
|
|
|
|
|
W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+K[27]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+ K[27]); |
|
|
|
|
|
|
|
|
|
W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+K[28]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+ K[28]); |
|
|
|
|
|
|
|
|
|
W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+K[29]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+ K[29]); |
|
|
|
|
|
|
|
|
|
W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+K[30]); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+ K[30]); |
|
|
|
|
|
|
|
|
|
W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+K[31]); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+ K[31]); |
|
|
|
|
|
|
|
|
|
W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[0].y+K[32]); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[0].y+ K[32]); |
|
|
|
|
|
|
|
|
|
W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[0].z+K[33]); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[0].z+ K[33]); |
|
|
|
|
|
|
|
|
|
W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[0].w+K[34]); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[0].w+ K[34]); |
|
|
|
|
|
|
|
|
|
W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+K[35]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+ K[35]); |
|
|
|
|
|
|
|
|
|
W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+K[36]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+ K[36]); |
|
|
|
|
|
|
|
|
|
W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+K[37]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+ K[37]); |
|
|
|
|
|
|
|
|
|
W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+K[38]); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+ K[38]); |
|
|
|
|
|
|
|
|
|
W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+K[39]); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+ K[39]); |
|
|
|
|
|
|
|
|
|
W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+K[40]); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+ K[40]); |
|
|
|
|
|
|
|
|
|
W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+K[41]); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+ K[41]); |
|
|
|
|
|
|
|
|
|
W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+K[42]); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+ K[42]); |
|
|
|
|
|
|
|
|
|
W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+K[43]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+ K[43]); |
|
|
|
|
|
|
|
|
|
W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+K[44]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+ K[44]); |
|
|
|
|
|
|
|
|
|
W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+K[45]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+ K[45]); |
|
|
|
|
|
|
|
|
|
W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+K[46]); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+ K[46]); |
|
|
|
|
|
|
|
|
|
W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+K[47]); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+ K[47]); |
|
|
|
|
|
|
|
|
|
W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[0].y+K[48]); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[0].y+ K[48]); |
|
|
|
|
|
|
|
|
|
W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[0].z+K[49]); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[0].z+ K[49]); |
|
|
|
|
|
|
|
|
|
W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[0].w+K[50]); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[0].w+ K[50]); |
|
|
|
|
|
|
|
|
|
W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+K[51]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+ K[51]); |
|
|
|
|
|
|
|
|
|
W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+K[52]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[1].y+ K[52]); |
|
|
|
|
|
|
|
|
|
W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+K[53]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[1].z+ K[53]); |
|
|
|
|
|
|
|
|
|
W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+K[54]); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[1].w+ K[54]); |
|
|
|
|
|
|
|
|
|
W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+K[55]); |
|
|
|
|
RND(A,B,C,D,E,F,G,H, W[2].x+ K[55]); |
|
|
|
|
|
|
|
|
|
W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+K[56]); |
|
|
|
|
RND(H,A,B,C,D,E,F,G, W[2].y+ K[56]); |
|
|
|
|
|
|
|
|
|
W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+K[57]); |
|
|
|
|
RND(G,H,A,B,C,D,E,F, W[2].z+ K[57]); |
|
|
|
|
|
|
|
|
|
W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+K[58]); |
|
|
|
|
RND(F,G,H,A,B,C,D,E, W[2].w+ K[58]); |
|
|
|
|
|
|
|
|
|
W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+K[59]); |
|
|
|
|
RND(E,F,G,H,A,B,C,D, W[3].x+ K[59]); |
|
|
|
|
|
|
|
|
|
W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+K[60]); |
|
|
|
|
RND(D,E,F,G,H,A,B,C, W[3].y+ K[60]); |
|
|
|
|
|
|
|
|
|
W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+K[61]); |
|
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+ K[61]); |
|
|
|
|
|
|
|
|
|
W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+K[62]); |
|
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+ K[62]); |
|
|
|
|
|
|
|
|
|
#undef A |
|
|
|
|
#undef B |
|
|
|
@ -532,8 +558,8 @@ void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block
@@ -532,8 +558,8 @@ void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block
|
|
|
|
|
#undef G |
|
|
|
|
#undef H |
|
|
|
|
|
|
|
|
|
*state0 += (uint4)(0x6A09E667U,0xBB67AE85U,0x3C6EF372U,0xA54FF53AU); |
|
|
|
|
*state1 += (uint4)(0x510E527FU,0x9B05688CU,0x1F83D9ABU,0x5BE0CD19U); |
|
|
|
|
*state0 += (uint4)(K[73], K[77], K[78], K[79]); |
|
|
|
|
*state1 += (uint4)(K[66], K[67], K[80], K[81]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__constant uint fixedW[64] = |
|
|
|
@ -758,7 +784,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
@@ -758,7 +784,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
|
|
|
|
for (uint i=0; i<1024; ++i) |
|
|
|
|
{ |
|
|
|
|
uint4 V[8]; |
|
|
|
|
uint j = X[7].x & 0x3FF; |
|
|
|
|
uint j = X[7].x & K[85]; |
|
|
|
|
uint y = (j/LOOKUP_GAP); |
|
|
|
|
#pragma unroll |
|
|
|
|
for(uint z=0; z<zSIZE; ++z) |
|
|
|
@ -796,9 +822,9 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
@@ -796,9 +822,9 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
|
|
|
|
|
uint4 data = (uint4)(input[4].x,input[4].y,input[4].z,gid); |
|
|
|
|
uint4 pad0 = midstate0, pad1 = midstate16; |
|
|
|
|
|
|
|
|
|
SHA256(&pad0,&pad1, data, (uint4)(0x80000000U,0,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0,0x280)); |
|
|
|
|
SHA256_fresh(&ostate0,&ostate1, pad0^0x5C5C5C5CU, pad1^0x5C5C5C5CU, 0x5C5C5C5CU, 0x5C5C5C5CU); |
|
|
|
|
SHA256_fresh(&tstate0,&tstate1, pad0^0x36363636U, pad1^0x36363636U, 0x36363636U, 0x36363636U); |
|
|
|
|
SHA256(&pad0,&pad1, data, (uint4)(K[84],0,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[86])); |
|
|
|
|
SHA256_fresh(&ostate0,&ostate1, pad0^ K[82], pad1^ K[82], K[82], K[82]); |
|
|
|
|
SHA256_fresh(&tstate0,&tstate1, pad0^ K[83], pad1^ K[83], K[83], K[83]); |
|
|
|
|
|
|
|
|
|
tmp0 = tstate0; |
|
|
|
|
tmp1 = tstate1; |
|
|
|
@ -812,14 +838,14 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
@@ -812,14 +838,14 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
|
|
|
|
|
X[i*2 ] = ostate0; |
|
|
|
|
X[i*2+1] = ostate1; |
|
|
|
|
|
|
|
|
|
SHA256(&pad0,&pad1, data, (uint4)(i+1,0x80000000U,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0,0x4a0U)); |
|
|
|
|
SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U)); |
|
|
|
|
SHA256(&pad0,&pad1, data, (uint4)(i+1,K[84],0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[87])); |
|
|
|
|
SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88])); |
|
|
|
|
} |
|
|
|
|
scrypt_core(X,padcache); |
|
|
|
|
SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3]); |
|
|
|
|
SHA256(&tmp0,&tmp1, X[4], X[5], X[6], X[7]); |
|
|
|
|
SHA256_fixed(&tmp0,&tmp1); |
|
|
|
|
SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U)); |
|
|
|
|
SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88])); |
|
|
|
|
|
|
|
|
|
bool result = (EndianSwap(ostate1.w) <= target); |
|
|
|
|
if (result) |
|
|
|
|