Browse Source

Hand optimise first variable declaration order in poclbm kernel.

nfactor-troky
Con Kolivas 13 years ago
parent
commit
dc2d553d5b
  1. 97
      poclbm120213.cl

97
poclbm120213.cl

@ -88,31 +88,38 @@ __kernel void search(const uint state0, const uint state1, const uint state2, co @@ -88,31 +88,38 @@ __kernel void search(const uint state0, const uint state1, const uint state2, co
W[20]=fcty_e;
W[20]+=nonce;
W[16]=state0;
W[16]+=W[20];
W[19]=d1;
W[19]+=(rotr(W[16],6)^rotr(W[16],11)^rotr(W[16],25));
W[16]=W[20];
W[16]+=state0;
W[19]=(rotr(W[16],6)^rotr(W[16],11)^rotr(W[16],25));
W[19]+=d1;
W[19]+=ch(W[16],b1,c1);
W[19]+=K[4];
W[23]=h1;
W[19]+=0x80000000;
W[23]+=W[19];
W[23]=W[19];
W[23]+=h1;
W[20]+=fcty_e2;
W[19]+=(rotr(W[20],2)^rotr(W[20],13)^rotr(W[20],22));
W[18]=c1;
W[18]+=(rotr(W[23],6)^rotr(W[23],11)^rotr(W[23],25));
W[18]+=ch(W[23],W[16],b1);
W[18]+=K[5];
W[22]=g1;
W[22]+=W[18];
W[22]=W[18];
W[22]+=g1;
W[19]+=Ma2(g1,W[20],f1);
W[18]+=(rotr(W[19],2)^rotr(W[19],13)^rotr(W[19],22));
W[17]=b1;
W[17]+=(rotr(W[22],6)^rotr(W[22],11)^rotr(W[22],25));
W[17]+=ch(W[22],W[23],W[16]);
W[17]+=K[6];
W[21]=f1;
W[21]+=W[17];
W[21]=W[17];
W[21]+=f1;
W[18]+=Ma2(f1,W[19],W[20]);
W[17]+=(rotr(W[18],2)^rotr(W[18],13)^rotr(W[18],22));
W[16]+=(rotr(W[21],6)^rotr(W[21],11)^rotr(W[21],25));
@ -184,6 +191,7 @@ W[22]+=K[17]; @@ -184,6 +191,7 @@ W[22]+=K[17];
W[22]+=fw1;
W[18]+=W[22];
W[22]+=(rotr(W[23],2)^rotr(W[23],13)^rotr(W[23],22));
W[2]=(rotr(nonce,7)^rotr(nonce,18)^(nonce>>3U));
W[2]+=fw2;
W[21]+=(rotr(W[18],6)^rotr(W[18],11)^rotr(W[18],25));
@ -194,6 +202,7 @@ W[22]+=Ma(W[17],W[23],W[16]); @@ -194,6 +202,7 @@ W[22]+=Ma(W[17],W[23],W[16]);
W[17]+=W[21];
W[21]+=(rotr(W[22],2)^rotr(W[22],13)^rotr(W[22],22));
W[21]+=Ma(W[16],W[22],W[23]);
W[3]=nonce;
W[3]+=fw3;
W[20]+=(rotr(W[17],6)^rotr(W[17],11)^rotr(W[17],25));
@ -202,6 +211,7 @@ W[20]+=K[19]; @@ -202,6 +211,7 @@ W[20]+=K[19];
W[20]+=W[3];
W[16]+=W[20];
W[20]+=(rotr(W[21],2)^rotr(W[21],13)^rotr(W[21],22));
W[4]=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
W[4]+=0x80000000;
W[19]+=(rotr(W[16],6)^rotr(W[16],11)^rotr(W[16],25));
@ -215,10 +225,12 @@ W[19]+=Ma(W[22],W[20],W[21]); @@ -215,10 +225,12 @@ W[19]+=Ma(W[22],W[20],W[21]);
W[18]+=(rotr(W[23],6)^rotr(W[23],11)^rotr(W[23],25));
W[18]+=ch(W[23],W[16],W[17]);
W[18]+=K[21];
W[5]=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
W[18]+=W[5];
W[22]+=W[18];
W[18]+=(rotr(W[19],2)^rotr(W[19],13)^rotr(W[19],22));
W[6]=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
W[6]+=0x00000280U;
W[17]+=(rotr(W[22],6)^rotr(W[22],11)^rotr(W[22],25));
@ -229,6 +241,7 @@ W[18]+=Ma(W[21],W[19],W[20]); @@ -229,6 +241,7 @@ W[18]+=Ma(W[21],W[19],W[20]);
W[21]+=W[17];
W[17]+=(rotr(W[18],2)^rotr(W[18],13)^rotr(W[18],22));
W[17]+=Ma(W[20],W[18],W[19]);
W[7]=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
W[7]+=fw0;
W[16]+=(rotr(W[21],6)^rotr(W[21],11)^rotr(W[21],25));
@ -237,6 +250,7 @@ W[16]+=K[23]; @@ -237,6 +250,7 @@ W[16]+=K[23];
W[16]+=W[7];
W[20]+=W[16];
W[16]+=(rotr(W[17],2)^rotr(W[17],13)^rotr(W[17],22));
W[8]=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
W[8]+=fw1;
W[23]+=(rotr(W[20],6)^rotr(W[20],11)^rotr(W[20],25));
@ -247,6 +261,7 @@ W[16]+=Ma(W[19],W[17],W[18]); @@ -247,6 +261,7 @@ W[16]+=Ma(W[19],W[17],W[18]);
W[19]+=W[23];
W[23]+=(rotr(W[16],2)^rotr(W[16],13)^rotr(W[16],22));
W[23]+=Ma(W[18],W[16],W[17]);
W[9]=W[2];
W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
W[22]+=(rotr(W[19],6)^rotr(W[19],11)^rotr(W[19],25));
@ -255,6 +270,7 @@ W[22]+=K[25]; @@ -255,6 +270,7 @@ W[22]+=K[25];
W[22]+=W[9];
W[18]+=W[22];
W[22]+=(rotr(W[23],2)^rotr(W[23],13)^rotr(W[23],22));
W[10]=W[3];
W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
W[21]+=(rotr(W[18],6)^rotr(W[18],11)^rotr(W[18],25));
@ -265,6 +281,7 @@ W[22]+=Ma(W[17],W[23],W[16]); @@ -265,6 +281,7 @@ W[22]+=Ma(W[17],W[23],W[16]);
W[17]+=W[21];
W[21]+=(rotr(W[22],2)^rotr(W[22],13)^rotr(W[22],22));
W[21]+=Ma(W[16],W[22],W[23]);
W[11]=W[4];
W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
W[20]+=(rotr(W[17],6)^rotr(W[17],11)^rotr(W[17],25));
@ -273,6 +290,7 @@ W[20]+=K[27]; @@ -273,6 +290,7 @@ W[20]+=K[27];
W[20]+=W[11];
W[16]+=W[20];
W[20]+=(rotr(W[21],2)^rotr(W[21],13)^rotr(W[21],22));
W[12]=W[5];
W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
W[19]+=(rotr(W[16],6)^rotr(W[16],11)^rotr(W[16],25));
@ -283,6 +301,7 @@ W[20]+=Ma(W[23],W[21],W[22]); @@ -283,6 +301,7 @@ W[20]+=Ma(W[23],W[21],W[22]);
W[23]+=W[19];
W[19]+=(rotr(W[20],2)^rotr(W[20],13)^rotr(W[20],22));
W[19]+=Ma(W[22],W[20],W[21]);
W[13]=W[6];
W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
W[18]+=(rotr(W[23],6)^rotr(W[23],11)^rotr(W[23],25));
@ -291,6 +310,7 @@ W[18]+=K[29]; @@ -291,6 +310,7 @@ W[18]+=K[29];
W[18]+=W[13];
W[22]+=W[18];
W[18]+=(rotr(W[19],2)^rotr(W[19],13)^rotr(W[19],22));
W[14]=0x00a00055U;
W[14]+=W[7];
W[17]+=(rotr(W[22],6)^rotr(W[22],11)^rotr(W[22],25));
@ -302,6 +322,7 @@ W[18]+=Ma(W[21],W[19],W[20]); @@ -302,6 +322,7 @@ W[18]+=Ma(W[21],W[19],W[20]);
W[21]+=W[17];
W[17]+=(rotr(W[18],2)^rotr(W[18],13)^rotr(W[18],22));
W[17]+=Ma(W[20],W[18],W[19]);
W[15]=fw15;
W[15]+=W[8];
W[16]+=(rotr(W[21],6)^rotr(W[21],11)^rotr(W[21],25));
@ -311,6 +332,7 @@ W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U)); @@ -311,6 +332,7 @@ W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
W[16]+=W[15];
W[20]+=W[16];
W[16]+=(rotr(W[17],2)^rotr(W[17],13)^rotr(W[17],22));
W[0]=fw01r;
W[0]+=W[9];
W[23]+=(rotr(W[20],6)^rotr(W[20],11)^rotr(W[20],25));
@ -322,6 +344,7 @@ W[16]+=Ma(W[19],W[17],W[18]); @@ -322,6 +344,7 @@ W[16]+=Ma(W[19],W[17],W[18]);
W[19]+=W[23];
W[23]+=(rotr(W[16],2)^rotr(W[16],13)^rotr(W[16],22));
W[23]+=Ma(W[18],W[16],W[17]);
W[1]=fw1;
W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
W[1]+=W[10];
@ -633,52 +656,68 @@ W[16]+=W[15]; @@ -633,52 +656,68 @@ W[16]+=W[15];
W[20]+=W[16];
W[16]+=(rotr(W[17],2)^rotr(W[17],13)^rotr(W[17],22));
W[16]+=Ma(W[19],W[17],W[18]);
W[0]=W[16];
W[7]=W[23];
W[7]+=state7;
W[7]=state7;
W[7]+=W[23];
W[23]=0xb0edbdd0;
W[23]+=K[0];
W[0]+=state0;
W[23]+=W[0];
W[3]=W[19];
W[3]+=state3;
W[3]=state3;
W[3]+=W[19];
W[19]=0xa54ff53a;
W[19]+=W[23];
W[1]=W[17];
W[1]+=state1;
W[6]=W[22];
W[6]+=state6;
W[6]=state6;
W[6]+=W[22];
W[22]=0x1f83d9abU;
W[22]+=(rotr(W[19],6)^rotr(W[19],11)^rotr(W[19],25));
W[22]+=(0x9b05688cU^(W[19]&0xca0b3af3U));
W[22]+=K[1];
W[2]=W[18];
W[2]+=state2;
W[22]+=W[1];
W[2]=state2;
W[2]+=W[18];
W[18]=0x3c6ef372U;
W[22]+=W[1];
W[18]+=W[22];
W[23]+=0x08909ae5U;
W[22]+=(rotr(W[23],2)^rotr(W[23],13)^rotr(W[23],22));
W[5]=W[21];
W[5]+=state5;
W[5]=state5;
W[5]+=W[21];
W[21]=0x9b05688cU;
W[21]+=(rotr(W[18],6)^rotr(W[18],11)^rotr(W[18],25));
W[21]+=ch(W[18],W[19],0x510e527fU);
W[21]+=K[2];
W[21]+=W[2];
W[17]=0xbb67ae85U;
W[17]+=W[21];
W[22]+=Ma2(0xbb67ae85U,W[23],0x6a09e667U);
W[21]+=(rotr(W[22],2)^rotr(W[22],13)^rotr(W[22],22));
W[4]=W[20];
W[4]+=state4;
W[4]=state4;
W[4]+=W[20];
W[20]=0x510e527fU;
W[20]+=(rotr(W[17],6)^rotr(W[17],11)^rotr(W[17],25));
W[20]+=ch(W[17],W[18],W[19]);
W[20]+=K[3];
W[20]+=W[3];
W[16]=0x6a09e667U;
W[16]+=W[20];
W[16]=W[20];
W[16]+=0x6a09e667U;
W[21]+=Ma2(0x6a09e667U,W[22],W[23]);
W[20]+=(rotr(W[21],2)^rotr(W[21],13)^rotr(W[21],22));
W[19]+=(rotr(W[16],6)^rotr(W[16],11)^rotr(W[16],25));
@ -832,6 +871,7 @@ W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U)); @@ -832,6 +871,7 @@ W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
W[16]+=W[7];
W[20]+=W[16];
W[16]+=(rotr(W[17],2)^rotr(W[17],13)^rotr(W[17],22));
W[8]=0x80000000;
W[8]+=W[1];
W[23]+=(rotr(W[20],6)^rotr(W[20],11)^rotr(W[20],25));
@ -843,6 +883,7 @@ W[16]+=Ma(W[19],W[17],W[18]); @@ -843,6 +883,7 @@ W[16]+=Ma(W[19],W[17],W[18]);
W[19]+=W[23];
W[23]+=(rotr(W[16],2)^rotr(W[16],13)^rotr(W[16],22));
W[23]+=Ma(W[18],W[16],W[17]);
W[9]=W[2];
W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
W[22]+=(rotr(W[19],6)^rotr(W[19],11)^rotr(W[19],25));
@ -851,6 +892,7 @@ W[22]+=K[25]; @@ -851,6 +892,7 @@ W[22]+=K[25];
W[22]+=W[9];
W[18]+=W[22];
W[22]+=(rotr(W[23],2)^rotr(W[23],13)^rotr(W[23],22));
W[10]=W[3];
W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
W[21]+=(rotr(W[18],6)^rotr(W[18],11)^rotr(W[18],25));
@ -861,6 +903,7 @@ W[22]+=Ma(W[17],W[23],W[16]); @@ -861,6 +903,7 @@ W[22]+=Ma(W[17],W[23],W[16]);
W[17]+=W[21];
W[21]+=(rotr(W[22],2)^rotr(W[22],13)^rotr(W[22],22));
W[21]+=Ma(W[16],W[22],W[23]);
W[11]=W[4];
W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
W[20]+=(rotr(W[17],6)^rotr(W[17],11)^rotr(W[17],25));
@ -869,6 +912,7 @@ W[20]+=K[27]; @@ -869,6 +912,7 @@ W[20]+=K[27];
W[20]+=W[11];
W[16]+=W[20];
W[20]+=(rotr(W[21],2)^rotr(W[21],13)^rotr(W[21],22));
W[12]=W[5];
W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
W[19]+=(rotr(W[16],6)^rotr(W[16],11)^rotr(W[16],25));
@ -879,6 +923,7 @@ W[20]+=Ma(W[23],W[21],W[22]); @@ -879,6 +923,7 @@ W[20]+=Ma(W[23],W[21],W[22]);
W[23]+=W[19];
W[19]+=(rotr(W[20],2)^rotr(W[20],13)^rotr(W[20],22));
W[19]+=Ma(W[22],W[20],W[21]);
W[13]=W[6];
W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
W[18]+=(rotr(W[23],6)^rotr(W[23],11)^rotr(W[23],25));
@ -887,6 +932,7 @@ W[18]+=K[29]; @@ -887,6 +932,7 @@ W[18]+=K[29];
W[18]+=W[13];
W[22]+=W[18];
W[18]+=(rotr(W[19],2)^rotr(W[19],13)^rotr(W[19],22));
W[14]=0x00400022U;
W[14]+=W[7];
W[17]+=(rotr(W[22],6)^rotr(W[22],11)^rotr(W[22],25));
@ -898,6 +944,7 @@ W[18]+=Ma(W[21],W[19],W[20]); @@ -898,6 +944,7 @@ W[18]+=Ma(W[21],W[19],W[20]);
W[21]+=W[17];
W[17]+=(rotr(W[18],2)^rotr(W[18],13)^rotr(W[18],22));
W[17]+=Ma(W[20],W[18],W[19]);
W[15]=0x00000100U;
W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
W[15]+=W[8];

Loading…
Cancel
Save