|
|
@ -92,7 +92,33 @@ __constant uint K[] = { |
|
|
|
0x90befffaU, |
|
|
|
0x90befffaU, |
|
|
|
0xa4506cebU, // 60 |
|
|
|
0xa4506cebU, // 60 |
|
|
|
0xbef9a3f7U, |
|
|
|
0xbef9a3f7U, |
|
|
|
0xc67178f2U |
|
|
|
0xc67178f2U, |
|
|
|
|
|
|
|
0x98c7e2a2U, |
|
|
|
|
|
|
|
0xfc08884dU, |
|
|
|
|
|
|
|
0xcd2a11aeU, |
|
|
|
|
|
|
|
0x510e527fU, |
|
|
|
|
|
|
|
0x9b05688cU, |
|
|
|
|
|
|
|
0xC3910C8EU, |
|
|
|
|
|
|
|
0xfb6feee7U, |
|
|
|
|
|
|
|
0x2a01a605U, // 70 |
|
|
|
|
|
|
|
0x0c2e12e0U, |
|
|
|
|
|
|
|
0x4498517BU, |
|
|
|
|
|
|
|
0x6a09e667U, |
|
|
|
|
|
|
|
0xa4ce148bU, |
|
|
|
|
|
|
|
0x95F61999U, |
|
|
|
|
|
|
|
0xc19bf174U, |
|
|
|
|
|
|
|
0xBB67AE85U, |
|
|
|
|
|
|
|
0x3C6EF372U, |
|
|
|
|
|
|
|
0xA54FF53AU, |
|
|
|
|
|
|
|
0x1F83D9ABU, // 80 |
|
|
|
|
|
|
|
0x5BE0CD19U, |
|
|
|
|
|
|
|
0x5C5C5C5CU, |
|
|
|
|
|
|
|
0x36363636U, |
|
|
|
|
|
|
|
0x80000000U, |
|
|
|
|
|
|
|
0x000003FFU, |
|
|
|
|
|
|
|
0x00000280U, |
|
|
|
|
|
|
|
0x000004a0U, |
|
|
|
|
|
|
|
0x00000300U |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
#define rotl(x,y) rotate(x,y) |
|
|
|
#define rotl(x,y) rotate(x,y) |
|
|
@ -164,7 +190,7 @@ void SHA256(uint4*restrict state0,uint4*restrict state1, const uint4 block0, con |
|
|
|
W[ 3].z = block3.z; |
|
|
|
W[ 3].z = block3.z; |
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+ K[14]); |
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+ K[14]); |
|
|
|
W[ 3].w = block3.w; |
|
|
|
W[ 3].w = block3.w; |
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U); |
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+ K[76]); |
|
|
|
|
|
|
|
|
|
|
|
W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); |
|
|
|
W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y); |
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+ K[15]); |
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+ K[15]); |
|
|
@ -337,20 +363,20 @@ void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block |
|
|
|
uint4 W[4]; |
|
|
|
uint4 W[4]; |
|
|
|
|
|
|
|
|
|
|
|
W[0].x = block0.x; |
|
|
|
W[0].x = block0.x; |
|
|
|
D=0x98c7e2a2U+W[0].x; |
|
|
|
D= K[63] +W[0].x; |
|
|
|
H=0xfc08884dU+W[0].x; |
|
|
|
H= K[64] +W[0].x; |
|
|
|
|
|
|
|
|
|
|
|
W[0].y = block0.y; |
|
|
|
W[0].y = block0.y; |
|
|
|
C=0xcd2a11aeU+Tr1(D)+Ch(D,0x510e527fU,0x9b05688cU)+W[0].y; |
|
|
|
C= K[65] +Tr1(D)+Ch(D, K[66], K[67])+W[0].y; |
|
|
|
G=0xC3910C8EU+C+Tr2(H)+Ch(H,0xfb6feee7U,0x2a01a605U); |
|
|
|
G= K[68] +C+Tr2(H)+Ch(H, K[69] ,K[70]); |
|
|
|
|
|
|
|
|
|
|
|
W[0].z = block0.z; |
|
|
|
W[0].z = block0.z; |
|
|
|
B=0x0c2e12e0U+Tr1(C)+Ch(C,D,0x510e527fU)+W[0].z; |
|
|
|
B= K[71] +Tr1(C)+Ch(C,D,K[66])+W[0].z; |
|
|
|
F=0x4498517BU+B+Tr2(G)+Maj(G,H,0x6a09e667U); |
|
|
|
F= K[72] +B+Tr2(G)+Maj(G,H, K[73]); |
|
|
|
|
|
|
|
|
|
|
|
W[0].w = block0.w; |
|
|
|
W[0].w = block0.w; |
|
|
|
A=0xa4ce148bU+Tr1(B)+Ch(B,C,D)+W[0].w; |
|
|
|
A= K[74] +Tr1(B)+Ch(B,C,D)+W[0].w; |
|
|
|
E=0x95F61999U+A+Tr2(F)+Maj(F,G,H); |
|
|
|
E= K[75] +A+Tr2(F)+Maj(F,G,H); |
|
|
|
|
|
|
|
|
|
|
|
W[1].x = block1.x; |
|
|
|
W[1].x = block1.x; |
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+ K[4]); |
|
|
|
RND(E,F,G,H,A,B,C,D, W[1].x+ K[4]); |
|
|
@ -377,7 +403,7 @@ void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block |
|
|
|
W[3].z = block3.z; |
|
|
|
W[3].z = block3.z; |
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+ K[14]); |
|
|
|
RND(C,D,E,F,G,H,A,B, W[3].z+ K[14]); |
|
|
|
W[3].w = block3.w; |
|
|
|
W[3].w = block3.w; |
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U); |
|
|
|
RND(B,C,D,E,F,G,H,A, W[3].w+ K[76]); |
|
|
|
|
|
|
|
|
|
|
|
W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); |
|
|
|
W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y); |
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+ K[15]); |
|
|
|
RND(A,B,C,D,E,F,G,H, W[0].x+ K[15]); |
|
|
@ -532,8 +558,8 @@ void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block |
|
|
|
#undef G |
|
|
|
#undef G |
|
|
|
#undef H |
|
|
|
#undef H |
|
|
|
|
|
|
|
|
|
|
|
*state0 += (uint4)(0x6A09E667U,0xBB67AE85U,0x3C6EF372U,0xA54FF53AU); |
|
|
|
*state0 += (uint4)(K[73], K[77], K[78], K[79]); |
|
|
|
*state1 += (uint4)(0x510E527FU,0x9B05688CU,0x1F83D9ABU,0x5BE0CD19U); |
|
|
|
*state1 += (uint4)(K[66], K[67], K[80], K[81]); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
__constant uint fixedW[64] = |
|
|
|
__constant uint fixedW[64] = |
|
|
@ -758,7 +784,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) |
|
|
|
for (uint i=0; i<1024; ++i) |
|
|
|
for (uint i=0; i<1024; ++i) |
|
|
|
{ |
|
|
|
{ |
|
|
|
uint4 V[8]; |
|
|
|
uint4 V[8]; |
|
|
|
uint j = X[7].x & 0x3FF; |
|
|
|
uint j = X[7].x & K[85]; |
|
|
|
uint y = (j/LOOKUP_GAP); |
|
|
|
uint y = (j/LOOKUP_GAP); |
|
|
|
#pragma unroll |
|
|
|
#pragma unroll |
|
|
|
for(uint z=0; z<zSIZE; ++z) |
|
|
|
for(uint z=0; z<zSIZE; ++z) |
|
|
@ -796,9 +822,9 @@ const uint4 midstate0, const uint4 midstate16, const uint target) |
|
|
|
uint4 data = (uint4)(input[4].x,input[4].y,input[4].z,gid); |
|
|
|
uint4 data = (uint4)(input[4].x,input[4].y,input[4].z,gid); |
|
|
|
uint4 pad0 = midstate0, pad1 = midstate16; |
|
|
|
uint4 pad0 = midstate0, pad1 = midstate16; |
|
|
|
|
|
|
|
|
|
|
|
SHA256(&pad0,&pad1, data, (uint4)(0x80000000U,0,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0,0x280)); |
|
|
|
SHA256(&pad0,&pad1, data, (uint4)(K[84],0,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[86])); |
|
|
|
SHA256_fresh(&ostate0,&ostate1, pad0^0x5C5C5C5CU, pad1^0x5C5C5C5CU, 0x5C5C5C5CU, 0x5C5C5C5CU); |
|
|
|
SHA256_fresh(&ostate0,&ostate1, pad0^ K[82], pad1^ K[82], K[82], K[82]); |
|
|
|
SHA256_fresh(&tstate0,&tstate1, pad0^0x36363636U, pad1^0x36363636U, 0x36363636U, 0x36363636U); |
|
|
|
SHA256_fresh(&tstate0,&tstate1, pad0^ K[83], pad1^ K[83], K[83], K[83]); |
|
|
|
|
|
|
|
|
|
|
|
tmp0 = tstate0; |
|
|
|
tmp0 = tstate0; |
|
|
|
tmp1 = tstate1; |
|
|
|
tmp1 = tstate1; |
|
|
@ -812,14 +838,14 @@ const uint4 midstate0, const uint4 midstate16, const uint target) |
|
|
|
X[i*2 ] = ostate0; |
|
|
|
X[i*2 ] = ostate0; |
|
|
|
X[i*2+1] = ostate1; |
|
|
|
X[i*2+1] = ostate1; |
|
|
|
|
|
|
|
|
|
|
|
SHA256(&pad0,&pad1, data, (uint4)(i+1,0x80000000U,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0,0x4a0U)); |
|
|
|
SHA256(&pad0,&pad1, data, (uint4)(i+1,K[84],0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[87])); |
|
|
|
SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U)); |
|
|
|
SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88])); |
|
|
|
} |
|
|
|
} |
|
|
|
scrypt_core(X,padcache); |
|
|
|
scrypt_core(X,padcache); |
|
|
|
SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3]); |
|
|
|
SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3]); |
|
|
|
SHA256(&tmp0,&tmp1, X[4], X[5], X[6], X[7]); |
|
|
|
SHA256(&tmp0,&tmp1, X[4], X[5], X[6], X[7]); |
|
|
|
SHA256_fixed(&tmp0,&tmp1); |
|
|
|
SHA256_fixed(&tmp0,&tmp1); |
|
|
|
SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U)); |
|
|
|
SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88])); |
|
|
|
|
|
|
|
|
|
|
|
bool result = (EndianSwap(ostate1.w) <= target); |
|
|
|
bool result = (EndianSwap(ostate1.w) <= target); |
|
|
|
if (result) |
|
|
|
if (result) |
|
|
|