From 7f595ca04bc77c1b8e56f7993a0c4124573b8011 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 21 Feb 2013 22:11:36 +1100 Subject: [PATCH] Use global constant arrays for all other constants used in scrypt kernel. --- scrypt121016.cl | 310 ++++++++++++++++++++++++++---------------------- 1 file changed, 168 insertions(+), 142 deletions(-) diff --git a/scrypt121016.cl b/scrypt121016.cl index d6503904..6979458d 100644 --- a/scrypt121016.cl +++ b/scrypt121016.cl @@ -92,7 +92,33 @@ __constant uint K[] = { 0x90befffaU, 0xa4506cebU, // 60 0xbef9a3f7U, - 0xc67178f2U + 0xc67178f2U, + 0x98c7e2a2U, + 0xfc08884dU, + 0xcd2a11aeU, + 0x510e527fU, + 0x9b05688cU, + 0xC3910C8EU, + 0xfb6feee7U, + 0x2a01a605U, // 70 + 0x0c2e12e0U, + 0x4498517BU, + 0x6a09e667U, + 0xa4ce148bU, + 0x95F61999U, + 0xc19bf174U, + 0xBB67AE85U, + 0x3C6EF372U, + 0xA54FF53AU, + 0x1F83D9ABU, // 80 + 0x5BE0CD19U, + 0x5C5C5C5CU, + 0x36363636U, + 0x80000000U, + 0x000003FFU, + 0x00000280U, + 0x000004a0U, + 0x00000300U }; #define rotl(x,y) rotate(x,y) @@ -131,184 +157,184 @@ void SHA256(uint4*restrict state0,uint4*restrict state1, const uint4 block0, con uint4 W[4]; W[ 0].x = block0.x; - RND(A,B,C,D,E,F,G,H, W[0].x+K[0]); + RND(A,B,C,D,E,F,G,H, W[0].x+ K[0]); W[ 0].y = block0.y; - RND(H,A,B,C,D,E,F,G, W[0].y+K[1]); + RND(H,A,B,C,D,E,F,G, W[0].y+ K[1]); W[ 0].z = block0.z; - RND(G,H,A,B,C,D,E,F, W[0].z+K[2]); + RND(G,H,A,B,C,D,E,F, W[0].z+ K[2]); W[ 0].w = block0.w; - RND(F,G,H,A,B,C,D,E, W[0].w+K[3]); + RND(F,G,H,A,B,C,D,E, W[0].w+ K[3]); W[ 1].x = block1.x; - RND(E,F,G,H,A,B,C,D, W[1].x+K[4]); + RND(E,F,G,H,A,B,C,D, W[1].x+ K[4]); W[ 1].y = block1.y; - RND(D,E,F,G,H,A,B,C, W[1].y+K[5]); + RND(D,E,F,G,H,A,B,C, W[1].y+ K[5]); W[ 1].z = block1.z; - RND(C,D,E,F,G,H,A,B, W[1].z+K[6]); + RND(C,D,E,F,G,H,A,B, W[1].z+ K[6]); W[ 1].w = block1.w; - RND(B,C,D,E,F,G,H,A, W[1].w+K[7]); + RND(B,C,D,E,F,G,H,A, W[1].w+ K[7]); W[ 2].x = block2.x; - RND(A,B,C,D,E,F,G,H, W[2].x+K[8]); + RND(A,B,C,D,E,F,G,H, W[2].x+ K[8]); W[ 2].y = block2.y; - RND(H,A,B,C,D,E,F,G, W[2].y+K[9]); + RND(H,A,B,C,D,E,F,G, W[2].y+ K[9]); W[ 2].z = block2.z; - RND(G,H,A,B,C,D,E,F, W[2].z+K[10]); + RND(G,H,A,B,C,D,E,F, W[2].z+ K[10]); W[ 2].w = block2.w; - RND(F,G,H,A,B,C,D,E, W[2].w+K[11]); + RND(F,G,H,A,B,C,D,E, W[2].w+ K[11]); W[ 3].x = block3.x; - RND(E,F,G,H,A,B,C,D, W[3].x+K[12]); + RND(E,F,G,H,A,B,C,D, W[3].x+ K[12]); W[ 3].y = block3.y; - RND(D,E,F,G,H,A,B,C, W[3].y+K[13]); + RND(D,E,F,G,H,A,B,C, W[3].y+ K[13]); W[ 3].z = block3.z; - RND(C,D,E,F,G,H,A,B, W[3].z+K[14]); + RND(C,D,E,F,G,H,A,B, W[3].z+ K[14]); W[ 3].w = block3.w; - RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U); + RND(B,C,D,E,F,G,H,A, W[3].w+ K[76]); W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); - RND(A,B,C,D,E,F,G,H, W[0].x+K[15]); + RND(A,B,C,D,E,F,G,H, W[0].x+ K[15]); W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z); - RND(H,A,B,C,D,E,F,G, W[0].y+K[16]); + RND(H,A,B,C,D,E,F,G, W[0].y+ K[16]); W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w); - RND(G,H,A,B,C,D,E,F, W[0].z+K[17]); + RND(G,H,A,B,C,D,E,F, W[0].z+ K[17]); W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x); - RND(F,G,H,A,B,C,D,E, W[0].w+K[18]); + RND(F,G,H,A,B,C,D,E, W[0].w+ K[18]); W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y); - RND(E,F,G,H,A,B,C,D, W[1].x+K[19]); + RND(E,F,G,H,A,B,C,D, W[1].x+ K[19]); W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z); - RND(D,E,F,G,H,A,B,C, W[1].y+K[20]); + RND(D,E,F,G,H,A,B,C, W[1].y+ K[20]); W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w); - RND(C,D,E,F,G,H,A,B, W[1].z+K[21]); + RND(C,D,E,F,G,H,A,B, W[1].z+ K[21]); W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x); - RND(B,C,D,E,F,G,H,A, W[1].w+K[22]); + RND(B,C,D,E,F,G,H,A, W[1].w+ K[22]); W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y); - RND(A,B,C,D,E,F,G,H, W[2].x+K[23]); + RND(A,B,C,D,E,F,G,H, W[2].x+ K[23]); W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z); - RND(H,A,B,C,D,E,F,G, W[2].y+K[24]); + RND(H,A,B,C,D,E,F,G, W[2].y+ K[24]); W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w); - RND(G,H,A,B,C,D,E,F, W[2].z+K[25]); + RND(G,H,A,B,C,D,E,F, W[2].z+ K[25]); W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x); - RND(F,G,H,A,B,C,D,E, W[2].w+K[26]); + RND(F,G,H,A,B,C,D,E, W[2].w+ K[26]); W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y); - RND(E,F,G,H,A,B,C,D, W[3].x+K[27]); + RND(E,F,G,H,A,B,C,D, W[3].x+ K[27]); W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z); - RND(D,E,F,G,H,A,B,C, W[3].y+K[28]); + RND(D,E,F,G,H,A,B,C, W[3].y+ K[28]); W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w); - RND(C,D,E,F,G,H,A,B, W[3].z+K[29]); + RND(C,D,E,F,G,H,A,B, W[3].z+ K[29]); W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x); - RND(B,C,D,E,F,G,H,A, W[3].w+K[30]); + RND(B,C,D,E,F,G,H,A, W[3].w+ K[30]); W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); - RND(A,B,C,D,E,F,G,H, W[0].x+K[31]); + RND(A,B,C,D,E,F,G,H, W[0].x+ K[31]); W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z); - RND(H,A,B,C,D,E,F,G, W[0].y+K[32]); + RND(H,A,B,C,D,E,F,G, W[0].y+ K[32]); W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w); - RND(G,H,A,B,C,D,E,F, W[0].z+K[33]); + RND(G,H,A,B,C,D,E,F, W[0].z+ K[33]); W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x); - RND(F,G,H,A,B,C,D,E, W[0].w+K[34]); + RND(F,G,H,A,B,C,D,E, W[0].w+ K[34]); W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y); - RND(E,F,G,H,A,B,C,D, W[1].x+K[35]); + RND(E,F,G,H,A,B,C,D, W[1].x+ K[35]); W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z); - RND(D,E,F,G,H,A,B,C, W[1].y+K[36]); + RND(D,E,F,G,H,A,B,C, W[1].y+ K[36]); W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w); - RND(C,D,E,F,G,H,A,B, W[1].z+K[37]); + RND(C,D,E,F,G,H,A,B, W[1].z+ K[37]); W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x); - RND(B,C,D,E,F,G,H,A, W[1].w+K[38]); + RND(B,C,D,E,F,G,H,A, W[1].w+ K[38]); W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y); - RND(A,B,C,D,E,F,G,H, W[2].x+K[39]); + RND(A,B,C,D,E,F,G,H, W[2].x+ K[39]); W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z); - RND(H,A,B,C,D,E,F,G, W[2].y+K[40]); + RND(H,A,B,C,D,E,F,G, W[2].y+ K[40]); W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w); - RND(G,H,A,B,C,D,E,F, W[2].z+K[41]); + RND(G,H,A,B,C,D,E,F, W[2].z+ K[41]); W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x); - RND(F,G,H,A,B,C,D,E, W[2].w+K[42]); + RND(F,G,H,A,B,C,D,E, W[2].w+ K[42]); W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y); - RND(E,F,G,H,A,B,C,D, W[3].x+K[43]); + RND(E,F,G,H,A,B,C,D, W[3].x+ K[43]); W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z); - RND(D,E,F,G,H,A,B,C, W[3].y+K[44]); + RND(D,E,F,G,H,A,B,C, W[3].y+ K[44]); W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w); - RND(C,D,E,F,G,H,A,B, W[3].z+K[45]); + RND(C,D,E,F,G,H,A,B, W[3].z+ K[45]); W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x); - RND(B,C,D,E,F,G,H,A, W[3].w+K[46]); + RND(B,C,D,E,F,G,H,A, W[3].w+ K[46]); W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); - RND(A,B,C,D,E,F,G,H, W[0].x+K[47]); + RND(A,B,C,D,E,F,G,H, W[0].x+ K[47]); W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z); - RND(H,A,B,C,D,E,F,G, W[0].y+K[48]); + RND(H,A,B,C,D,E,F,G, W[0].y+ K[48]); W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w); - RND(G,H,A,B,C,D,E,F, W[0].z+K[49]); + RND(G,H,A,B,C,D,E,F, W[0].z+ K[49]); W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x); - RND(F,G,H,A,B,C,D,E, W[0].w+K[50]); + RND(F,G,H,A,B,C,D,E, W[0].w+ K[50]); W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y); - RND(E,F,G,H,A,B,C,D, W[1].x+K[51]); + RND(E,F,G,H,A,B,C,D, W[1].x+ K[51]); W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z); - RND(D,E,F,G,H,A,B,C, W[1].y+K[52]); + RND(D,E,F,G,H,A,B,C, W[1].y+ K[52]); W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w); - RND(C,D,E,F,G,H,A,B, W[1].z+K[53]); + RND(C,D,E,F,G,H,A,B, W[1].z+ K[53]); W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x); - RND(B,C,D,E,F,G,H,A, W[1].w+K[54]); + RND(B,C,D,E,F,G,H,A, W[1].w+ K[54]); W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y); - RND(A,B,C,D,E,F,G,H, W[2].x+K[55]); + RND(A,B,C,D,E,F,G,H, W[2].x+ K[55]); W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z); - RND(H,A,B,C,D,E,F,G, W[2].y+K[56]); + RND(H,A,B,C,D,E,F,G, W[2].y+ K[56]); W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w); - RND(G,H,A,B,C,D,E,F, W[2].z+K[57]); + RND(G,H,A,B,C,D,E,F, W[2].z+ K[57]); W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x); - RND(F,G,H,A,B,C,D,E, W[2].w+K[58]); + RND(F,G,H,A,B,C,D,E, W[2].w+ K[58]); W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y); - RND(E,F,G,H,A,B,C,D, W[3].x+K[59]); + RND(E,F,G,H,A,B,C,D, W[3].x+ K[59]); W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z); - RND(D,E,F,G,H,A,B,C, W[3].y+K[60]); + RND(D,E,F,G,H,A,B,C, W[3].y+ K[60]); W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w); - RND(C,D,E,F,G,H,A,B, W[3].z+K[61]); + RND(C,D,E,F,G,H,A,B, W[3].z+ K[61]); W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x); - RND(B,C,D,E,F,G,H,A, W[3].w+K[62]); + RND(B,C,D,E,F,G,H,A, W[3].w+ K[62]); #undef A #undef B @@ -337,191 +363,191 @@ void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block uint4 W[4]; W[0].x = block0.x; - D=0x98c7e2a2U+W[0].x; - H=0xfc08884dU+W[0].x; + D= K[63] +W[0].x; + H= K[64] +W[0].x; W[0].y = block0.y; - C=0xcd2a11aeU+Tr1(D)+Ch(D,0x510e527fU,0x9b05688cU)+W[0].y; - G=0xC3910C8EU+C+Tr2(H)+Ch(H,0xfb6feee7U,0x2a01a605U); + C= K[65] +Tr1(D)+Ch(D, K[66], K[67])+W[0].y; + G= K[68] +C+Tr2(H)+Ch(H, K[69] ,K[70]); W[0].z = block0.z; - B=0x0c2e12e0U+Tr1(C)+Ch(C,D,0x510e527fU)+W[0].z; - F=0x4498517BU+B+Tr2(G)+Maj(G,H,0x6a09e667U); + B= K[71] +Tr1(C)+Ch(C,D,K[66])+W[0].z; + F= K[72] +B+Tr2(G)+Maj(G,H, K[73]); W[0].w = block0.w; - A=0xa4ce148bU+Tr1(B)+Ch(B,C,D)+W[0].w; - E=0x95F61999U+A+Tr2(F)+Maj(F,G,H); + A= K[74] +Tr1(B)+Ch(B,C,D)+W[0].w; + E= K[75] +A+Tr2(F)+Maj(F,G,H); W[1].x = block1.x; - RND(E,F,G,H,A,B,C,D, W[1].x+K[4]); + RND(E,F,G,H,A,B,C,D, W[1].x+ K[4]); W[1].y = block1.y; - RND(D,E,F,G,H,A,B,C, W[1].y+K[5]); + RND(D,E,F,G,H,A,B,C, W[1].y+ K[5]); W[1].z = block1.z; - RND(C,D,E,F,G,H,A,B, W[1].z+K[6]); + RND(C,D,E,F,G,H,A,B, W[1].z+ K[6]); W[1].w = block1.w; - RND(B,C,D,E,F,G,H,A, W[1].w+K[7]); + RND(B,C,D,E,F,G,H,A, W[1].w+ K[7]); W[2].x = block2.x; - RND(A,B,C,D,E,F,G,H, W[2].x+K[8]); + RND(A,B,C,D,E,F,G,H, W[2].x+ K[8]); W[2].y = block2.y; - RND(H,A,B,C,D,E,F,G, W[2].y+K[9]); + RND(H,A,B,C,D,E,F,G, W[2].y+ K[9]); W[2].z = block2.z; - RND(G,H,A,B,C,D,E,F, W[2].z+K[10]); + RND(G,H,A,B,C,D,E,F, W[2].z+ K[10]); W[2].w = block2.w; - RND(F,G,H,A,B,C,D,E, W[2].w+K[11]); + RND(F,G,H,A,B,C,D,E, W[2].w+ K[11]); W[3].x = block3.x; - RND(E,F,G,H,A,B,C,D, W[3].x+K[12]); + RND(E,F,G,H,A,B,C,D, W[3].x+ K[12]); W[3].y = block3.y; - RND(D,E,F,G,H,A,B,C, W[3].y+K[13]); + RND(D,E,F,G,H,A,B,C, W[3].y+ K[13]); W[3].z = block3.z; - RND(C,D,E,F,G,H,A,B, W[3].z+K[14]); + RND(C,D,E,F,G,H,A,B, W[3].z+ K[14]); W[3].w = block3.w; - RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U); + RND(B,C,D,E,F,G,H,A, W[3].w+ K[76]); W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); - RND(A,B,C,D,E,F,G,H, W[0].x+K[15]); + RND(A,B,C,D,E,F,G,H, W[0].x+ K[15]); W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z); - RND(H,A,B,C,D,E,F,G, W[0].y+K[16]); + RND(H,A,B,C,D,E,F,G, W[0].y+ K[16]); W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w); - RND(G,H,A,B,C,D,E,F, W[0].z+K[17]); + RND(G,H,A,B,C,D,E,F, W[0].z+ K[17]); W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x); - RND(F,G,H,A,B,C,D,E, W[0].w+K[18]); + RND(F,G,H,A,B,C,D,E, W[0].w+ K[18]); W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y); - RND(E,F,G,H,A,B,C,D, W[1].x+K[19]); + RND(E,F,G,H,A,B,C,D, W[1].x+ K[19]); W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z); - RND(D,E,F,G,H,A,B,C, W[1].y+K[20]); + RND(D,E,F,G,H,A,B,C, W[1].y+ K[20]); W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w); - RND(C,D,E,F,G,H,A,B, W[1].z+K[21]); + RND(C,D,E,F,G,H,A,B, W[1].z+ K[21]); W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x); - RND(B,C,D,E,F,G,H,A, W[1].w+K[22]); + RND(B,C,D,E,F,G,H,A, W[1].w+ K[22]); W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y); - RND(A,B,C,D,E,F,G,H, W[2].x+K[23]); + RND(A,B,C,D,E,F,G,H, W[2].x+ K[23]); W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z); - RND(H,A,B,C,D,E,F,G, W[2].y+K[24]); + RND(H,A,B,C,D,E,F,G, W[2].y+ K[24]); W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w); - RND(G,H,A,B,C,D,E,F, W[2].z+K[25]); + RND(G,H,A,B,C,D,E,F, W[2].z+ K[25]); W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x); - RND(F,G,H,A,B,C,D,E, W[2].w+K[26]); + RND(F,G,H,A,B,C,D,E, W[2].w+ K[26]); W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y); - RND(E,F,G,H,A,B,C,D, W[3].x+K[27]); + RND(E,F,G,H,A,B,C,D, W[3].x+ K[27]); W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z); - RND(D,E,F,G,H,A,B,C, W[3].y+K[28]); + RND(D,E,F,G,H,A,B,C, W[3].y+ K[28]); W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w); - RND(C,D,E,F,G,H,A,B, W[3].z+K[29]); + RND(C,D,E,F,G,H,A,B, W[3].z+ K[29]); W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x); - RND(B,C,D,E,F,G,H,A, W[3].w+K[30]); + RND(B,C,D,E,F,G,H,A, W[3].w+ K[30]); W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); - RND(A,B,C,D,E,F,G,H, W[0].x+K[31]); + RND(A,B,C,D,E,F,G,H, W[0].x+ K[31]); W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z); - RND(H,A,B,C,D,E,F,G, W[0].y+K[32]); + RND(H,A,B,C,D,E,F,G, W[0].y+ K[32]); W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w); - RND(G,H,A,B,C,D,E,F, W[0].z+K[33]); + RND(G,H,A,B,C,D,E,F, W[0].z+ K[33]); W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x); - RND(F,G,H,A,B,C,D,E, W[0].w+K[34]); + RND(F,G,H,A,B,C,D,E, W[0].w+ K[34]); W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y); - RND(E,F,G,H,A,B,C,D, W[1].x+K[35]); + RND(E,F,G,H,A,B,C,D, W[1].x+ K[35]); W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z); - RND(D,E,F,G,H,A,B,C, W[1].y+K[36]); + RND(D,E,F,G,H,A,B,C, W[1].y+ K[36]); W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w); - RND(C,D,E,F,G,H,A,B, W[1].z+K[37]); + RND(C,D,E,F,G,H,A,B, W[1].z+ K[37]); W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x); - RND(B,C,D,E,F,G,H,A, W[1].w+K[38]); + RND(B,C,D,E,F,G,H,A, W[1].w+ K[38]); W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y); - RND(A,B,C,D,E,F,G,H, W[2].x+K[39]); + RND(A,B,C,D,E,F,G,H, W[2].x+ K[39]); W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z); - RND(H,A,B,C,D,E,F,G, W[2].y+K[40]); + RND(H,A,B,C,D,E,F,G, W[2].y+ K[40]); W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w); - RND(G,H,A,B,C,D,E,F, W[2].z+K[41]); + RND(G,H,A,B,C,D,E,F, W[2].z+ K[41]); W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x); - RND(F,G,H,A,B,C,D,E, W[2].w+K[42]); + RND(F,G,H,A,B,C,D,E, W[2].w+ K[42]); W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y); - RND(E,F,G,H,A,B,C,D, W[3].x+K[43]); + RND(E,F,G,H,A,B,C,D, W[3].x+ K[43]); W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z); - RND(D,E,F,G,H,A,B,C, W[3].y+K[44]); + RND(D,E,F,G,H,A,B,C, W[3].y+ K[44]); W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w); - RND(C,D,E,F,G,H,A,B, W[3].z+K[45]); + RND(C,D,E,F,G,H,A,B, W[3].z+ K[45]); W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x); - RND(B,C,D,E,F,G,H,A, W[3].w+K[46]); + RND(B,C,D,E,F,G,H,A, W[3].w+ K[46]); W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); - RND(A,B,C,D,E,F,G,H, W[0].x+K[47]); + RND(A,B,C,D,E,F,G,H, W[0].x+ K[47]); W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z); - RND(H,A,B,C,D,E,F,G, W[0].y+K[48]); + RND(H,A,B,C,D,E,F,G, W[0].y+ K[48]); W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w); - RND(G,H,A,B,C,D,E,F, W[0].z+K[49]); + RND(G,H,A,B,C,D,E,F, W[0].z+ K[49]); W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x); - RND(F,G,H,A,B,C,D,E, W[0].w+K[50]); + RND(F,G,H,A,B,C,D,E, W[0].w+ K[50]); W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y); - RND(E,F,G,H,A,B,C,D, W[1].x+K[51]); + RND(E,F,G,H,A,B,C,D, W[1].x+ K[51]); W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z); - RND(D,E,F,G,H,A,B,C, W[1].y+K[52]); + RND(D,E,F,G,H,A,B,C, W[1].y+ K[52]); W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w); - RND(C,D,E,F,G,H,A,B, W[1].z+K[53]); + RND(C,D,E,F,G,H,A,B, W[1].z+ K[53]); W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x); - RND(B,C,D,E,F,G,H,A, W[1].w+K[54]); + RND(B,C,D,E,F,G,H,A, W[1].w+ K[54]); W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y); - RND(A,B,C,D,E,F,G,H, W[2].x+K[55]); + RND(A,B,C,D,E,F,G,H, W[2].x+ K[55]); W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z); - RND(H,A,B,C,D,E,F,G, W[2].y+K[56]); + RND(H,A,B,C,D,E,F,G, W[2].y+ K[56]); W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w); - RND(G,H,A,B,C,D,E,F, W[2].z+K[57]); + RND(G,H,A,B,C,D,E,F, W[2].z+ K[57]); W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x); - RND(F,G,H,A,B,C,D,E, W[2].w+K[58]); + RND(F,G,H,A,B,C,D,E, W[2].w+ K[58]); W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y); - RND(E,F,G,H,A,B,C,D, W[3].x+K[59]); + RND(E,F,G,H,A,B,C,D, W[3].x+ K[59]); W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z); - RND(D,E,F,G,H,A,B,C, W[3].y+K[60]); + RND(D,E,F,G,H,A,B,C, W[3].y+ K[60]); W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w); - RND(C,D,E,F,G,H,A,B, W[3].z+K[61]); + RND(C,D,E,F,G,H,A,B, W[3].z+ K[61]); W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x); - RND(B,C,D,E,F,G,H,A, W[3].w+K[62]); + RND(B,C,D,E,F,G,H,A, W[3].w+ K[62]); #undef A #undef B @@ -532,8 +558,8 @@ void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block #undef G #undef H - *state0 += (uint4)(0x6A09E667U,0xBB67AE85U,0x3C6EF372U,0xA54FF53AU); - *state1 += (uint4)(0x510E527FU,0x9B05688CU,0x1F83D9ABU,0x5BE0CD19U); + *state0 += (uint4)(K[73], K[77], K[78], K[79]); + *state1 += (uint4)(K[66], K[67], K[80], K[81]); } __constant uint fixedW[64] = @@ -758,7 +784,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) for (uint i=0; i<1024; ++i) { uint4 V[8]; - uint j = X[7].x & 0x3FF; + uint j = X[7].x & K[85]; uint y = (j/LOOKUP_GAP); #pragma unroll for(uint z=0; z