mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-11 07:17:58 +00:00
Put all constants used in poclbm kernel into __const memory array to speed up concurrent reads on the wavefront.
This commit is contained in:
parent
a7859bb416
commit
19725e7cdb
113
poclbm121016.cl
113
poclbm121016.cl
@ -13,7 +13,7 @@
|
||||
typedef uint u;
|
||||
#endif
|
||||
|
||||
__constant uint K[64] = {
|
||||
__constant uint K[87] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
@ -21,9 +21,56 @@ __constant uint K[64] = {
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
|
||||
|
||||
0xc19bf3f4U,
|
||||
0x80000000U,
|
||||
0x00000280U,
|
||||
0x00a00055U,
|
||||
0xf377ed68U,
|
||||
0xa54ff53aU,
|
||||
0x08909ae5U,
|
||||
0x90bb1e3cU,
|
||||
0x9b05688cU,
|
||||
0xca0b3af3U,
|
||||
0x3c6ef372U,
|
||||
0xbb67ae85U,
|
||||
0x6a09e667U,
|
||||
0x50c6645bU,
|
||||
0x510e527fU,
|
||||
0x3ac42e24U,
|
||||
0x5807aa98U,
|
||||
0xc19bf274U,
|
||||
0x00a00000U,
|
||||
0x00000100U,
|
||||
0x11002000U,
|
||||
0x00400022U,
|
||||
0x136032edU
|
||||
};
|
||||
|
||||
#define xc19bf3f4U K[64]
|
||||
#define x80000000U K[65]
|
||||
#define x00000280U K[66]
|
||||
#define x00a00055U K[67]
|
||||
#define xf377ed68U K[68]
|
||||
#define xa54ff53aU K[69]
|
||||
#define x08909ae5U K[70]
|
||||
#define x90bb1e3cU K[71]
|
||||
#define x9b05688cU K[72]
|
||||
#define xca0b3af3U K[73]
|
||||
#define x3c6ef372U K[74]
|
||||
#define xbb67ae85U K[75]
|
||||
#define x6a09e667U K[76]
|
||||
#define x50c6645bU K[77]
|
||||
#define x510e527fU K[78]
|
||||
#define x3ac42e24U K[79]
|
||||
#define x5807aa98U K[80]
|
||||
#define xc19bf274U K[81]
|
||||
#define x00a00000U K[82]
|
||||
#define x00000100U K[83]
|
||||
#define x11002000U K[84]
|
||||
#define x00400022U K[85]
|
||||
#define x136032edU K[86]
|
||||
|
||||
// This part is not from the stock poclbm kernel. It's part of an optimization
|
||||
// added in the Phoenix Miner.
|
||||
@ -183,7 +230,7 @@ Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
|
||||
|
||||
Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
|
||||
Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
|
||||
Vals[5]+=0xC19BF3F4U;
|
||||
Vals[5]+=xc19bf3f4U;
|
||||
Vals[1]+=Vals[5];
|
||||
Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
|
||||
Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
|
||||
@ -223,7 +270,7 @@ Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
|
||||
Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
|
||||
|
||||
W[4]=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
|
||||
W[4]+=0x80000000U;
|
||||
W[4]+=x80000000U;
|
||||
Vals[0]+=W[4];
|
||||
Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
|
||||
Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
|
||||
@ -242,7 +289,7 @@ Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
|
||||
Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
|
||||
|
||||
W[6]=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
|
||||
W[6]+=0x00000280U;
|
||||
W[6]+=x00000280U;
|
||||
Vals[7]+=W[6];
|
||||
Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
|
||||
Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
|
||||
@ -321,7 +368,7 @@ Vals[3]+=Vals[6];
|
||||
Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
|
||||
Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
|
||||
|
||||
W[14]=0x00a00055U;
|
||||
W[14]=x00a00055U;
|
||||
W[14]+=W[7];
|
||||
W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
|
||||
Vals[7]+=W[14];
|
||||
@ -701,22 +748,22 @@ Vals[5]+=state0;
|
||||
W[7]=state7;
|
||||
W[7]+=Vals[2];
|
||||
|
||||
Vals[2]=0xF377ED68U;
|
||||
Vals[2]=xf377ed68U;
|
||||
Vals[2]+=Vals[5];
|
||||
|
||||
W[3]=state3;
|
||||
W[3]+=Vals[0];
|
||||
|
||||
Vals[0]=0xa54ff53aU;
|
||||
Vals[0]=xa54ff53aU;
|
||||
Vals[0]+=Vals[2];
|
||||
Vals[2]+=0x08909ae5U;
|
||||
Vals[2]+=x08909ae5U;
|
||||
|
||||
W[6]=state6;
|
||||
W[6]+=Vals[3];
|
||||
|
||||
Vals[3]=0x90BB1E3CU;
|
||||
Vals[3]=x90bb1e3cU;
|
||||
Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
|
||||
Vals[3]+=(0x9b05688cU^(Vals[0]&0xca0b3af3U));
|
||||
Vals[3]+=(x9b05688cU^(Vals[0]&xca0b3af3U));
|
||||
|
||||
Vals[7]+=state1;
|
||||
Vals[3]+=Vals[7];
|
||||
@ -724,29 +771,29 @@ Vals[3]+=Vals[7];
|
||||
W[2]=state2;
|
||||
W[2]+=Vals[6];
|
||||
|
||||
Vals[6]=0x3c6ef372U;
|
||||
Vals[6]=x3c6ef372U;
|
||||
Vals[6]+=Vals[3];
|
||||
Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
|
||||
Vals[3]+=Ma2(0xbb67ae85U,Vals[2],0x6a09e667U);
|
||||
Vals[3]+=Ma2(xbb67ae85U,Vals[2],x6a09e667U);
|
||||
|
||||
W[5]=state5;
|
||||
W[5]+=Vals[4];
|
||||
|
||||
Vals[4]=0x50C6645BU;
|
||||
Vals[4]=x50c6645bU;
|
||||
Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
|
||||
Vals[4]+=ch(Vals[6],Vals[0],0x510e527fU);
|
||||
Vals[4]+=ch(Vals[6],Vals[0],x510e527fU);
|
||||
Vals[4]+=W[2];
|
||||
|
||||
W[1]=Vals[7];
|
||||
Vals[7]=0xbb67ae85U;
|
||||
Vals[7]=xbb67ae85U;
|
||||
Vals[7]+=Vals[4];
|
||||
Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
|
||||
Vals[4]+=Ma2(0x6a09e667U,Vals[3],Vals[2]);
|
||||
Vals[4]+=Ma2(x6a09e667U,Vals[3],Vals[2]);
|
||||
|
||||
W[4]=state4;
|
||||
W[4]+=Vals[1];
|
||||
|
||||
Vals[1]=0x3AC42E24U;
|
||||
Vals[1]=x3ac42e24U;
|
||||
Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
|
||||
Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
|
||||
Vals[1]+=W[3];
|
||||
@ -754,7 +801,7 @@ Vals[1]+=W[3];
|
||||
W[0]=Vals[5];
|
||||
|
||||
Vals[5]=Vals[1];
|
||||
Vals[5]+=0x6a09e667U;
|
||||
Vals[5]+=x6a09e667U;
|
||||
|
||||
Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
|
||||
Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
|
||||
@ -793,7 +840,7 @@ Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
|
||||
|
||||
Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
|
||||
Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
|
||||
Vals[2]+=0x5807AA98U;
|
||||
Vals[2]+=x5807aa98U;
|
||||
Vals[0]+=Vals[2];
|
||||
Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
|
||||
Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
|
||||
@ -842,7 +889,7 @@ Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
|
||||
|
||||
Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
|
||||
Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
|
||||
Vals[5]+=0xC19BF274U;
|
||||
Vals[5]+=xc19bf274U;
|
||||
Vals[1]+=Vals[5];
|
||||
Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
|
||||
Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
|
||||
@ -857,7 +904,7 @@ Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
|
||||
Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
|
||||
|
||||
W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
|
||||
W[1]+=0x00a00000U;
|
||||
W[1]+=x00a00000U;
|
||||
Vals[3]+=W[1];
|
||||
Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
|
||||
Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
|
||||
@ -907,7 +954,7 @@ Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
|
||||
Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
|
||||
|
||||
W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
|
||||
W[6]+=0x00000100U;
|
||||
W[6]+=x00000100U;
|
||||
W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
|
||||
Vals[7]+=W[6];
|
||||
Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
|
||||
@ -917,7 +964,7 @@ Vals[4]+=Vals[7];
|
||||
Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
|
||||
Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
|
||||
|
||||
W[7]+=0x11002000U;
|
||||
W[7]+=x11002000U;
|
||||
W[7]+=W[0];
|
||||
W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
|
||||
Vals[5]+=W[7];
|
||||
@ -928,7 +975,7 @@ Vals[1]+=Vals[5];
|
||||
Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
|
||||
Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
|
||||
|
||||
W[8]=0x80000000U;
|
||||
W[8]=x80000000U;
|
||||
W[8]+=W[1];
|
||||
W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
|
||||
Vals[2]+=W[8];
|
||||
@ -989,7 +1036,7 @@ Vals[3]+=Vals[6];
|
||||
Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
|
||||
Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
|
||||
|
||||
W[14]=0x00400022U;
|
||||
W[14]=x00400022U;
|
||||
W[14]+=W[7];
|
||||
W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
|
||||
Vals[7]+=W[14];
|
||||
@ -1000,7 +1047,7 @@ Vals[4]+=Vals[7];
|
||||
Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
|
||||
Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
|
||||
|
||||
W[15]=0x00000100U;
|
||||
W[15]=x00000100U;
|
||||
W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
|
||||
W[15]+=W[8];
|
||||
W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
|
||||
@ -1325,20 +1372,20 @@ Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
|
||||
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
|
||||
|
||||
#if defined(VECTORS2) || defined(VECTORS4)
|
||||
if (any(Vals[2] == 0x136032edU)) {
|
||||
if (Vals[2].x == 0x136032edU)
|
||||
if (any(Vals[2] == x136032edU)) {
|
||||
if (Vals[2].x == x136032edU)
|
||||
SETFOUND(nonce.x);
|
||||
if (Vals[2].y == 0x136032edU)
|
||||
if (Vals[2].y == x136032edU)
|
||||
SETFOUND(nonce.y);
|
||||
#if defined(VECTORS4)
|
||||
if (Vals[2].z == 0x136032edU)
|
||||
if (Vals[2].z == x136032edU)
|
||||
SETFOUND(nonce.z);
|
||||
if (Vals[2].w == 0x136032edU)
|
||||
if (Vals[2].w == x136032edU)
|
||||
SETFOUND(nonce.w);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
if (Vals[2] == 0x136032edU)
|
||||
if (Vals[2] == x136032edU)
|
||||
SETFOUND(nonce);
|
||||
#endif
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user