1
0
mirror of https://github.com/GOSTSec/sgminer synced 2025-01-11 07:17:58 +00:00

Put all constants used in poclbm kernel into __const memory array to speed up concurrent reads on the wavefront.

This commit is contained in:
Con Kolivas 2013-02-21 12:04:01 +11:00
parent a7859bb416
commit 19725e7cdb

View File

@ -13,7 +13,7 @@
typedef uint u;
#endif
__constant uint K[64] = {
__constant uint K[87] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
@ -21,9 +21,56 @@ __constant uint K[64] = {
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
0xc19bf3f4U,
0x80000000U,
0x00000280U,
0x00a00055U,
0xf377ed68U,
0xa54ff53aU,
0x08909ae5U,
0x90bb1e3cU,
0x9b05688cU,
0xca0b3af3U,
0x3c6ef372U,
0xbb67ae85U,
0x6a09e667U,
0x50c6645bU,
0x510e527fU,
0x3ac42e24U,
0x5807aa98U,
0xc19bf274U,
0x00a00000U,
0x00000100U,
0x11002000U,
0x00400022U,
0x136032edU
};
#define xc19bf3f4U K[64]
#define x80000000U K[65]
#define x00000280U K[66]
#define x00a00055U K[67]
#define xf377ed68U K[68]
#define xa54ff53aU K[69]
#define x08909ae5U K[70]
#define x90bb1e3cU K[71]
#define x9b05688cU K[72]
#define xca0b3af3U K[73]
#define x3c6ef372U K[74]
#define xbb67ae85U K[75]
#define x6a09e667U K[76]
#define x50c6645bU K[77]
#define x510e527fU K[78]
#define x3ac42e24U K[79]
#define x5807aa98U K[80]
#define xc19bf274U K[81]
#define x00a00000U K[82]
#define x00000100U K[83]
#define x11002000U K[84]
#define x00400022U K[85]
#define x136032edU K[86]
// This part is not from the stock poclbm kernel. It's part of an optimization
// added in the Phoenix Miner.
@ -183,7 +230,7 @@ Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
Vals[5]+=0xC19BF3F4U;
Vals[5]+=xc19bf3f4U;
Vals[1]+=Vals[5];
Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
@ -223,7 +270,7 @@ Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
W[4]=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
W[4]+=0x80000000U;
W[4]+=x80000000U;
Vals[0]+=W[4];
Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
@ -242,7 +289,7 @@ Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
W[6]=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
W[6]+=0x00000280U;
W[6]+=x00000280U;
Vals[7]+=W[6];
Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
@ -321,7 +368,7 @@ Vals[3]+=Vals[6];
Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
W[14]=0x00a00055U;
W[14]=x00a00055U;
W[14]+=W[7];
W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
Vals[7]+=W[14];
@ -701,22 +748,22 @@ Vals[5]+=state0;
W[7]=state7;
W[7]+=Vals[2];
Vals[2]=0xF377ED68U;
Vals[2]=xf377ed68U;
Vals[2]+=Vals[5];
W[3]=state3;
W[3]+=Vals[0];
Vals[0]=0xa54ff53aU;
Vals[0]=xa54ff53aU;
Vals[0]+=Vals[2];
Vals[2]+=0x08909ae5U;
Vals[2]+=x08909ae5U;
W[6]=state6;
W[6]+=Vals[3];
Vals[3]=0x90BB1E3CU;
Vals[3]=x90bb1e3cU;
Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
Vals[3]+=(0x9b05688cU^(Vals[0]&0xca0b3af3U));
Vals[3]+=(x9b05688cU^(Vals[0]&xca0b3af3U));
Vals[7]+=state1;
Vals[3]+=Vals[7];
@ -724,29 +771,29 @@ Vals[3]+=Vals[7];
W[2]=state2;
W[2]+=Vals[6];
Vals[6]=0x3c6ef372U;
Vals[6]=x3c6ef372U;
Vals[6]+=Vals[3];
Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
Vals[3]+=Ma2(0xbb67ae85U,Vals[2],0x6a09e667U);
Vals[3]+=Ma2(xbb67ae85U,Vals[2],x6a09e667U);
W[5]=state5;
W[5]+=Vals[4];
Vals[4]=0x50C6645BU;
Vals[4]=x50c6645bU;
Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
Vals[4]+=ch(Vals[6],Vals[0],0x510e527fU);
Vals[4]+=ch(Vals[6],Vals[0],x510e527fU);
Vals[4]+=W[2];
W[1]=Vals[7];
Vals[7]=0xbb67ae85U;
Vals[7]=xbb67ae85U;
Vals[7]+=Vals[4];
Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
Vals[4]+=Ma2(0x6a09e667U,Vals[3],Vals[2]);
Vals[4]+=Ma2(x6a09e667U,Vals[3],Vals[2]);
W[4]=state4;
W[4]+=Vals[1];
Vals[1]=0x3AC42E24U;
Vals[1]=x3ac42e24U;
Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
Vals[1]+=W[3];
@ -754,7 +801,7 @@ Vals[1]+=W[3];
W[0]=Vals[5];
Vals[5]=Vals[1];
Vals[5]+=0x6a09e667U;
Vals[5]+=x6a09e667U;
Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
@ -793,7 +840,7 @@ Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
Vals[2]+=0x5807AA98U;
Vals[2]+=x5807aa98U;
Vals[0]+=Vals[2];
Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
@ -842,7 +889,7 @@ Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
Vals[5]+=0xC19BF274U;
Vals[5]+=xc19bf274U;
Vals[1]+=Vals[5];
Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
@ -857,7 +904,7 @@ Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
W[1]+=0x00a00000U;
W[1]+=x00a00000U;
Vals[3]+=W[1];
Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
@ -907,7 +954,7 @@ Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
W[6]+=0x00000100U;
W[6]+=x00000100U;
W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
Vals[7]+=W[6];
Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
@ -917,7 +964,7 @@ Vals[4]+=Vals[7];
Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
W[7]+=0x11002000U;
W[7]+=x11002000U;
W[7]+=W[0];
W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
Vals[5]+=W[7];
@ -928,7 +975,7 @@ Vals[1]+=Vals[5];
Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
W[8]=0x80000000U;
W[8]=x80000000U;
W[8]+=W[1];
W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
Vals[2]+=W[8];
@ -989,7 +1036,7 @@ Vals[3]+=Vals[6];
Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
W[14]=0x00400022U;
W[14]=x00400022U;
W[14]+=W[7];
W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
Vals[7]+=W[14];
@ -1000,7 +1047,7 @@ Vals[4]+=Vals[7];
Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
W[15]=0x00000100U;
W[15]=x00000100U;
W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
W[15]+=W[8];
W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
@ -1325,20 +1372,20 @@ Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#if defined(VECTORS2) || defined(VECTORS4)
if (any(Vals[2] == 0x136032edU)) {
if (Vals[2].x == 0x136032edU)
if (any(Vals[2] == x136032edU)) {
if (Vals[2].x == x136032edU)
SETFOUND(nonce.x);
if (Vals[2].y == 0x136032edU)
if (Vals[2].y == x136032edU)
SETFOUND(nonce.y);
#if defined(VECTORS4)
if (Vals[2].z == 0x136032edU)
if (Vals[2].z == x136032edU)
SETFOUND(nonce.z);
if (Vals[2].w == 0x136032edU)
if (Vals[2].w == x136032edU)
SETFOUND(nonce.w);
#endif
}
#else
if (Vals[2] == 0x136032edU)
if (Vals[2] == x136032edU)
SETFOUND(nonce);
#endif
}