|
|
|
@ -1,7 +1,7 @@
@@ -1,7 +1,7 @@
|
|
|
|
|
// This file is taken and modified from the public-domain poclbm project, and |
|
|
|
|
// we have therefore decided to keep it public-domain in Phoenix. |
|
|
|
|
|
|
|
|
|
// 2011-07-11: further modified by Diapolo and still public-domain |
|
|
|
|
// 2011-07-12: further modified by Diapolo and still public-domain |
|
|
|
|
// -ck version to be compatible with cgminer |
|
|
|
|
// 2011-07-14: shorter code |
|
|
|
|
|
|
|
|
@ -38,6 +38,8 @@ __constant ulong L = 0x198c7e2a2;
@@ -38,6 +38,8 @@ __constant ulong L = 0x198c7e2a2;
|
|
|
|
|
#define BFI_INTX |
|
|
|
|
#define BITALIGNX |
|
|
|
|
|
|
|
|
|
#define O 15 |
|
|
|
|
|
|
|
|
|
#ifdef BITALIGN |
|
|
|
|
#pragma OPENCL EXTENSION cl_amd_media_ops : enable |
|
|
|
|
#define rot(x, y) amd_bitalign(x, x, (u)(32 - y)) |
|
|
|
@ -59,99 +61,105 @@ __constant ulong L = 0x198c7e2a2;
@@ -59,99 +61,105 @@ __constant ulong L = 0x198c7e2a2;
|
|
|
|
|
#define s1(n) (rot(Vals[(132 - n) % 8], 26) ^ rot(Vals[(132 - n) % 8], 21) ^ rot(Vals[(132 - n) % 8], 7)) |
|
|
|
|
#define ch(n) (Ch(Vals[(132 - n) % 8], Vals[(133 - n) % 8], Vals[(134 - n) % 8])) |
|
|
|
|
#define ma(n) (Ma(Vals[(129 - n) % 8], Vals[(130 - n) % 8], Vals[(128 - n) % 8])) |
|
|
|
|
#define t1(n) (K[n % 64] + Vals[(135 - n) % 8] + W[n] + s1(n) + ch(n)) |
|
|
|
|
#define t1(n) (K[n % 64] + Vals[(135 - n) % 8] + W[n - O] + s1(n) + ch(n)) |
|
|
|
|
#define t1_no_W(n) (K[n % 64] + Vals[(135 - n) % 8] + s1(n) + ch(n)) |
|
|
|
|
|
|
|
|
|
// intermediate W calculations |
|
|
|
|
#define P1(x) (rot(W[x - 2], 15) ^ rot(W[x - 2], 13) ^ (W[x - 2] >> 10U)) |
|
|
|
|
#define P2(x) (rot(W[x - 15], 25) ^ rot(W[x - 15], 14) ^ (W[x - 15] >> 3U)) |
|
|
|
|
#define P3(x) W[x - 7] |
|
|
|
|
#define P4(x) W[x - 16] |
|
|
|
|
#define P1(x) (rot(W[x - 2 - O], 15) ^ rot(W[x - 2 - O], 13) ^ (W[x - 2 - O] >> 10U)) |
|
|
|
|
#define P2(x) (rot(W[x - 15 - O], 25) ^ rot(W[x - 15 - O], 14) ^ (W[x - 15 - O] >> 3U)) |
|
|
|
|
#define P3(x) W[x - 7 - O] |
|
|
|
|
#define P4(x) W[x - 16 - O] |
|
|
|
|
|
|
|
|
|
// full W calculation |
|
|
|
|
#define W(x) (W[x] = P4(x) + P3(x) + P2(x) + P1(x)) |
|
|
|
|
#define W(x) (W[x - O] = P4(x) + P3(x) + P2(x) + P1(x)) |
|
|
|
|
|
|
|
|
|
// SHA round without W calc |
|
|
|
|
#define sharound(n) { Vals[(131 - n) % 8] += t1(n); Vals[(135 - n) % 8] = t1(n) + s0(n) + ma(n); } |
|
|
|
|
#define sharound_no_W(n) { Vals[(131 - n) % 8] += t1_no_W(n); Vals[(135 - n) % 8] = t1_no_W(n) + s0(n) + ma(n); } |
|
|
|
|
|
|
|
|
|
__kernel void search( const uint state0, const uint state1, const uint state2, const uint state3, |
|
|
|
|
const uint state4, const uint state5, const uint state6, const uint state7, |
|
|
|
|
const uint B1, const uint C1, const uint D1, |
|
|
|
|
const uint B1, const uint C1, const uint C1addK5, const uint D1, |
|
|
|
|
const uint F1, const uint G1, const uint H1, |
|
|
|
|
const uint base, |
|
|
|
|
const uint W2, |
|
|
|
|
const uint W16, const uint W17, |
|
|
|
|
const uint PreVal4, const uint T1, |
|
|
|
|
const uint W16, const uint W17, const uint W17_2, |
|
|
|
|
const uint PreVal4addT1, const uint T1substate0, |
|
|
|
|
__global uint * output) |
|
|
|
|
{ |
|
|
|
|
u W[124]; |
|
|
|
|
u W[124 - O]; |
|
|
|
|
u Vals[8]; |
|
|
|
|
#ifdef VECTORS4 |
|
|
|
|
u W_3 = base + (get_global_id(0) << 2) + (uint4)(0, 1, 2, 3); |
|
|
|
|
#elif defined VECTORS2 |
|
|
|
|
u W_3 = base + (get_global_id(0) << 1) + (uint2)(0, 1); |
|
|
|
|
#else |
|
|
|
|
u W_3 = base + get_global_id(0); |
|
|
|
|
#endif |
|
|
|
|
u Temp; |
|
|
|
|
|
|
|
|
|
Vals[0] = W_3 + PreVal4addT1 + T1substate0; |
|
|
|
|
Vals[1] = B1; |
|
|
|
|
Vals[2] = C1; |
|
|
|
|
|
|
|
|
|
Vals[4] = W_3 + PreVal4addT1; |
|
|
|
|
Vals[5] = F1; |
|
|
|
|
Vals[6] = G1; |
|
|
|
|
|
|
|
|
|
W[2] = W2; |
|
|
|
|
#ifdef VECTORS4 |
|
|
|
|
Vals[4] = (W[3] = base + (get_global_id(0) << 2) + (uint4)(0, 1, 2, 3)) + PreVal4; |
|
|
|
|
#elif defined VECTORS2 |
|
|
|
|
Vals[4] = (W[3] = base + (get_global_id(0) << 1) + (uint2)(0, 1)) + PreVal4; |
|
|
|
|
#else |
|
|
|
|
Vals[4] = (W[3] = base + get_global_id(0)) + PreVal4; |
|
|
|
|
#endif |
|
|
|
|
// used in: P2(19) == 285220864 (0x11002000), P4(20) |
|
|
|
|
W[4] = 0x80000000U; |
|
|
|
|
// W[4] = 0x80000000U; |
|
|
|
|
// P1(x) is 0 for x == 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 |
|
|
|
|
// P2(x) is 0 for x == 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 |
|
|
|
|
// P3(x) is 0 for x == 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 |
|
|
|
|
// P4(x) is 0 for x == 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 |
|
|
|
|
// W[x] in sharound(x) is 0 for x == 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 |
|
|
|
|
W[14] = W[13] = W[12] = W[11] = W[10] = W[9] = W[8] = W[7] = W[6] = W[5] = 0x00000000U; |
|
|
|
|
// W[14] = W[13] = W[12] = W[11] = W[10] = W[9] = W[8] = W[7] = W[6] = W[5] = 0x00000000U; |
|
|
|
|
// used in: P2(30) == 10485845 (0xA00055), P3(22), P4(31) |
|
|
|
|
// K[15] + W[15] == 0xc19bf174 + 0x00000280U = 0xc19bf3f4 |
|
|
|
|
W[15] = 0x00000280U; |
|
|
|
|
|
|
|
|
|
W[16] = W16; |
|
|
|
|
W[17] = W17; |
|
|
|
|
// removed P3(18) from add because it is == 0 |
|
|
|
|
W[18] = P1(18) + P4(18) + P2(18); |
|
|
|
|
// removed P3(19) from add because it is == 0 |
|
|
|
|
W[19] = (u)0x11002000 + P1(19) + P4(19); |
|
|
|
|
// removed P2(20), P3(20) from add because it is == 0 |
|
|
|
|
W[20] = P1(20) + P4(20); |
|
|
|
|
W[21] = P1(21); |
|
|
|
|
W[22] = P1(22) + P3(22); |
|
|
|
|
W[23] = P1(23) + P3(23); |
|
|
|
|
W[24] = P1(24) + P3(24); |
|
|
|
|
W[25] = P1(25) + P3(25); |
|
|
|
|
W[26] = P1(26) + P3(26); |
|
|
|
|
W[27] = P1(27) + P3(27); |
|
|
|
|
W[28] = P1(28) + P3(28); |
|
|
|
|
W[29] = P1(29) + P3(29); |
|
|
|
|
W[30] = (u)0xA00055 + P1(30) + P3(30); |
|
|
|
|
|
|
|
|
|
// Round 3 |
|
|
|
|
Vals[0] = state0 + Vals[4]; |
|
|
|
|
Vals[4] += T1; |
|
|
|
|
W[15 - O] = 0x00000280U; |
|
|
|
|
W[16 - O] = W16; |
|
|
|
|
W[17 - O] = W17; |
|
|
|
|
W[18 - O] = W2 + (rot(W_3, 25) ^ rot(W_3, 14) ^ (W_3 >> 3U)); |
|
|
|
|
W[19 - O] = W_3 + W17_2; |
|
|
|
|
W[20 - O] = (u)0x80000000U + P1(20); |
|
|
|
|
W[21 - O] = P1(21); |
|
|
|
|
W[22 - O] = P1(22) + P3(22); |
|
|
|
|
W[23 - O] = P1(23) + P3(23); |
|
|
|
|
W[24 - O] = P1(24) + P3(24); |
|
|
|
|
W[25 - O] = P1(25) + P3(25); |
|
|
|
|
W[26 - O] = P1(26) + P3(26); |
|
|
|
|
W[27 - O] = P1(27) + P3(27); |
|
|
|
|
W[28 - O] = P1(28) + P3(28); |
|
|
|
|
W[29 - O] = P1(29) + P3(29); |
|
|
|
|
W[30 - O] = (u)0xA00055 + P1(30) + P3(30); |
|
|
|
|
|
|
|
|
|
// Round 4 |
|
|
|
|
// K[4] + W[4] == 0x3956c25b + 0x80000000U = 0xb956c25b |
|
|
|
|
Vals[7] = (Vals[3] = (u)0xb956c25b + D1 + s1(4) + ch(4)) + H1; |
|
|
|
|
Vals[3] += s0(4) + ma(4); |
|
|
|
|
Temp = D1 + ch(4) + s1(4); |
|
|
|
|
Vals[7] = Temp + H1; |
|
|
|
|
Vals[3] = Temp + ma(4) + s0(4); |
|
|
|
|
|
|
|
|
|
// Round 5 |
|
|
|
|
Vals[2] = K[5] + C1 + s1(5) + ch(5) + s0(5) + ma(5); |
|
|
|
|
Vals[6] = K[5] + C1 + G1 + s1(5) + ch(5); |
|
|
|
|
|
|
|
|
|
sharound(6); |
|
|
|
|
sharound(7); |
|
|
|
|
sharound(8); |
|
|
|
|
sharound(9); |
|
|
|
|
sharound(10); |
|
|
|
|
sharound(11); |
|
|
|
|
sharound(12); |
|
|
|
|
sharound(13); |
|
|
|
|
sharound(14); |
|
|
|
|
Temp = C1addK5 + ch(5) + s1(5); |
|
|
|
|
Vals[6] = Temp + G1; |
|
|
|
|
Vals[2] = Temp + ma(5) + s0(5); |
|
|
|
|
|
|
|
|
|
// W[6] to W[14] are 0, so no need to add them! |
|
|
|
|
sharound_no_W(6); |
|
|
|
|
sharound_no_W(7); |
|
|
|
|
sharound_no_W(8); |
|
|
|
|
sharound_no_W(9); |
|
|
|
|
sharound_no_W(10); |
|
|
|
|
sharound_no_W(11); |
|
|
|
|
sharound_no_W(12); |
|
|
|
|
sharound_no_W(13); |
|
|
|
|
sharound_no_W(14); |
|
|
|
|
|
|
|
|
|
// #define sharound(n) { Vals[(131 - n) % 8] += t1(n); Vals[(135 - n) % 8] = t1(n) + s0(n) + ma(n); } |
|
|
|
|
// #define t1(n) (K[n % 64] + Vals[(135 - n) % 8] + W[n] + s1(n) + ch(n)) |
|
|
|
|
|
|
|
|
|
// Vals[(131 - 15) % 8] += (Vals[(135 - 15) % 8] = (u)0xc19bf3f4 + Vals[(135 - 15) % 8] + s1(15) + ch(15)); |
|
|
|
|
// Vals[(135 - 15) % 8] += s0(15) + ma(15); |
|
|
|
|
|
|
|
|
|
sharound(15); |
|
|
|
|
sharound(16); |
|
|
|
|
sharound(17); |
|
|
|
@ -236,35 +244,35 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
@@ -236,35 +244,35 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
|
|
|
|
|
W(63); |
|
|
|
|
sharound(63); |
|
|
|
|
|
|
|
|
|
W[64] = state0 + Vals[0]; |
|
|
|
|
W[65] = state1 + Vals[1]; |
|
|
|
|
W[66] = state2 + Vals[2]; |
|
|
|
|
W[67] = state3 + Vals[3]; |
|
|
|
|
W[68] = state4 + Vals[4]; |
|
|
|
|
W[69] = state5 + Vals[5]; |
|
|
|
|
W[70] = state6 + Vals[6]; |
|
|
|
|
W[71] = state7 + Vals[7]; |
|
|
|
|
W[64 - O] = state0 + Vals[0]; |
|
|
|
|
W[65 - O] = state1 + Vals[1]; |
|
|
|
|
W[66 - O] = state2 + Vals[2]; |
|
|
|
|
W[67 - O] = state3 + Vals[3]; |
|
|
|
|
W[68 - O] = state4 + Vals[4]; |
|
|
|
|
W[69 - O] = state5 + Vals[5]; |
|
|
|
|
W[70 - O] = state6 + Vals[6]; |
|
|
|
|
W[71 - O] = state7 + Vals[7]; |
|
|
|
|
// used in: P2(87) = 285220864 (0x11002000), P4(88) |
|
|
|
|
// K[72] + W[72] == |
|
|
|
|
W[72] = 0x80000000U; |
|
|
|
|
W[72 - O] = 0x80000000U; |
|
|
|
|
// P1(x) is 0 for x == 75, 76, 77, 78, 79, 80 |
|
|
|
|
// P2(x) is 0 for x == 88, 89, 90, 91, 92, 93 |
|
|
|
|
// P3(x) is 0 for x == 80, 81, 82, 83, 84, 85 |
|
|
|
|
// P4(x) is 0 for x == 89, 90, 91, 92, 93, 94 |
|
|
|
|
// W[x] in sharound(x) is 0 for x == 73, 74, 75, 76, 77, 78 |
|
|
|
|
W[78] = W[77] = W[76] = W[75] = W[74] = W[73] = 0x00000000U; |
|
|
|
|
// W[78] = W[77] = W[76] = W[75] = W[74] = W[73] = 0x00000000U; |
|
|
|
|
// used in: P1(81) = 10485760 (0xA00000), P2(94) = 4194338 (0x400022), P3(86), P4(95) |
|
|
|
|
// K[79] + W[79] == |
|
|
|
|
W[79] = 0x00000100U; |
|
|
|
|
W[79 - O] = 0x00000100U; |
|
|
|
|
|
|
|
|
|
Vals[0] = H[0]; |
|
|
|
|
Vals[1] = H[1]; |
|
|
|
|
Vals[2] = H[2]; |
|
|
|
|
Vals[3] = (u)L + W[64]; |
|
|
|
|
Vals[3] = (u)L + W[64 - O]; |
|
|
|
|
Vals[4] = H[3]; |
|
|
|
|
Vals[5] = H[4]; |
|
|
|
|
Vals[6] = H[5]; |
|
|
|
|
Vals[7] = H[6] + W[64]; |
|
|
|
|
Vals[7] = H[6] + W[64 - O]; |
|
|
|
|
|
|
|
|
|
sharound(65); |
|
|
|
|
sharound(66); |
|
|
|
@ -274,21 +282,23 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
@@ -274,21 +282,23 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
|
|
|
|
|
sharound(70); |
|
|
|
|
sharound(71); |
|
|
|
|
sharound(72); |
|
|
|
|
sharound(73); |
|
|
|
|
sharound(74); |
|
|
|
|
sharound(75); |
|
|
|
|
sharound(76); |
|
|
|
|
sharound(77); |
|
|
|
|
sharound(78); |
|
|
|
|
|
|
|
|
|
// W is also zero for these rounds |
|
|
|
|
sharound_no_W(73); |
|
|
|
|
sharound_no_W(74); |
|
|
|
|
sharound_no_W(75); |
|
|
|
|
sharound_no_W(76); |
|
|
|
|
sharound_no_W(77); |
|
|
|
|
sharound_no_W(78); |
|
|
|
|
|
|
|
|
|
sharound(79); |
|
|
|
|
|
|
|
|
|
// removed P1(80), P3(80) from add because it is == 0 |
|
|
|
|
W[80] = P2(80) + P4(80); |
|
|
|
|
W[81] = (u)0xA00000 + P4(81) + P2(81); |
|
|
|
|
W[82] = P4(82) + P2(82) + P1(82); |
|
|
|
|
W[83] = P4(83) + P2(83) + P1(83); |
|
|
|
|
W[84] = P4(84) + P2(84) + P1(84); |
|
|
|
|
W[85] = P4(85) + P2(85) + P1(85); |
|
|
|
|
W[80 - O] = P2(80) + P4(80); |
|
|
|
|
W[81 - O] = (u)0xA00000 + P4(81) + P2(81); |
|
|
|
|
W[82 - O] = P4(82) + P2(82) + P1(82); |
|
|
|
|
W[83 - O] = P4(83) + P2(83) + P1(83); |
|
|
|
|
W[84 - O] = P4(84) + P2(84) + P1(84); |
|
|
|
|
W[85 - O] = P4(85) + P2(85) + P1(85); |
|
|
|
|
W(86); |
|
|
|
|
|
|
|
|
|
sharound(80); |
|
|
|
@ -299,27 +309,25 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
@@ -299,27 +309,25 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
|
|
|
|
|
sharound(85); |
|
|
|
|
sharound(86); |
|
|
|
|
|
|
|
|
|
W[87] = (u)0x11002000 + P4(87) + P3(87) + P1(87); |
|
|
|
|
W[87 - O] = (u)0x11002000 + P4(87) + P3(87) + P1(87); |
|
|
|
|
sharound(87); |
|
|
|
|
W[88] = P4(88) + P3(88) + P1(88); |
|
|
|
|
W[88 - O] = (u)0x80000000U + P3(88) + P1(88); |
|
|
|
|
sharound(88); |
|
|
|
|
W[89] = P3(89) + P1(89); |
|
|
|
|
W[89 - O] = P3(89) + P1(89); |
|
|
|
|
sharound(89); |
|
|
|
|
W[90] = P3(90) + P1(90); |
|
|
|
|
W[90 - O] = P3(90) + P1(90); |
|
|
|
|
sharound(90); |
|
|
|
|
W[91] = P3(91) + P1(91); |
|
|
|
|
W[91 - O] = P3(91) + P1(91); |
|
|
|
|
sharound(91); |
|
|
|
|
W[92] = P3(92) + P1(92); |
|
|
|
|
W[92 - O] = P3(92) + P1(92); |
|
|
|
|
sharound(92); |
|
|
|
|
// removed P2(93), P4(93) from add because it is == 0 |
|
|
|
|
W[93] = P3(93) + P1(93); |
|
|
|
|
W[93 - O] = P3(93) + P1(93); |
|
|
|
|
sharound(93); |
|
|
|
|
// removed P4(94) from add because it is == 0 |
|
|
|
|
W[94] = (u)0x400022 + P3(94) + P1(94); |
|
|
|
|
W[94 - O] = (u)0x400022 + P3(94) + P1(94); |
|
|
|
|
sharound(94); |
|
|
|
|
|
|
|
|
|
W(95); |
|
|
|
|
W[95 - O] = (u)0x00000100U + P3(95) + P2(95) + P1(95); |
|
|
|
|
sharound(95); |
|
|
|
|
|
|
|
|
|
W(96); |
|
|
|
|
sharound(96); |
|
|
|
|
W(97); |
|
|
|
@ -386,26 +394,26 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
@@ -386,26 +394,26 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
|
|
|
|
|
#if defined(VECTORS4) || defined(VECTORS2) |
|
|
|
|
if (Vals[7].x == -H[7]) |
|
|
|
|
{ |
|
|
|
|
output[MAXBUFFERS] = output[NFLAG & W[3].x] = W[3].x; |
|
|
|
|
output[MAXBUFFERS] = output[NFLAG & (W[3].x >> 2)] = W_3.x; |
|
|
|
|
} |
|
|
|
|
if (Vals[7].y == -H[7]) |
|
|
|
|
{ |
|
|
|
|
output[MAXBUFFERS] = output[NFLAG & W[3].y] = W[3].y; |
|
|
|
|
output[MAXBUFFERS] = output[NFLAG & (W[3].y >> 2)] = W_3.y; |
|
|
|
|
} |
|
|
|
|
#ifdef VECTORS4 |
|
|
|
|
if (Vals[7].z == -H[7]) |
|
|
|
|
{ |
|
|
|
|
output[MAXBUFFERS] = output[NFLAG & W[3].z] = W[3].z; |
|
|
|
|
output[MAXBUFFERS] = output[NFLAG & (W[3].z >> 2)] = W_3.z; |
|
|
|
|
} |
|
|
|
|
if (Vals[7].w == -H[7]) |
|
|
|
|
{ |
|
|
|
|
output[MAXBUFFERS] = output[NFLAG & W[3].w] = W[3].w; |
|
|
|
|
output[MAXBUFFERS] = output[NFLAG & (W[3].w >> 2)] = W_3.w; |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
#else |
|
|
|
|
if (Vals[7] == -H[7]) |
|
|
|
|
{ |
|
|
|
|
output[MAXBUFFERS] = output[NFLAG & W[3]] = W[3]; |
|
|
|
|
output[MAXBUFFERS] = output[NFLAG & (W[3] >> 2)] = W_3; |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
} |