Browse Source

Microoptimise phatk kernel on return code.

nfactor-troky
Con Kolivas 13 years ago
parent
commit
b2b5083bda
  1. 2
      ocl.c
  2. 34
      phatk120203.cl

2
ocl.c

@ -597,7 +597,7 @@ build:
/* copy over all of the generated binaries. */ /* copy over all of the generated binaries. */
applog(LOG_DEBUG, "Binary size for gpu %d found in binary slot %d: %d", gpu, slot, binary_sizes[slot]); applog(LOG_DEBUG, "Binary size for gpu %d found in binary slot %d: %d", gpu, slot, binary_sizes[slot]);
if (!binary_sizes[slot]) { if (!binary_sizes[slot]) {
applog(LOG_ERR, "OpenCL compiler generated a zero sized binary, may need to reboot!"); applog(LOG_ERR, "OpenCL compiler generated a zero sized binary, FAIL!");
return NULL; return NULL;
} }
binaries[slot] = calloc(sizeof(char) * binary_sizes[slot], 1); binaries[slot] = calloc(sizeof(char) * binary_sizes[slot], 1);

34
phatk120203.cl

@ -4,13 +4,11 @@
#ifdef VECTORS4 #ifdef VECTORS4
typedef uint4 u; typedef uint4 u;
#else #elif defined VECTORS2
#ifdef VECTORS2
typedef uint2 u; typedef uint2 u;
#else #else
typedef uint u; typedef uint u;
#endif #endif
#endif
__constant uint K[64] = { __constant uint K[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
@ -194,8 +192,7 @@ void search( const uint state0, const uint state1, const uint state2, const uint
uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U); uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U);
//Since only the 2 LSB is opposite between the nonces, we can save an instruction by flipping the 4 bits in W18 rather than the 1 bit in W3 //Since only the 2 LSB is opposite between the nonces, we can save an instruction by flipping the 4 bits in W18 rather than the 1 bit in W3
W[18] = PreW18 + (u){r, r ^ 0x2004000U, r ^ 0x4008000U, r ^ 0x600C000U}; W[18] = PreW18 + (u){r, r ^ 0x2004000U, r ^ 0x4008000U, r ^ 0x600C000U};
#else #elif defined VECTORS2
#ifdef VECTORS2
W[3] = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u); W[3] = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U); uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U);
W[18] = PreW18 + (u){r, r ^ 0x2004000U}; W[18] = PreW18 + (u){r, r ^ 0x2004000U};
@ -204,7 +201,6 @@ void search( const uint state0, const uint state1, const uint state2, const uint
u r = rot(W[3],25u)^rot(W[3],14u)^((W[3])>>3U); u r = rot(W[3],25u)^rot(W[3],14u)^((W[3])>>3U);
W[18] = PreW18 + r; W[18] = PreW18 + r;
#endif #endif
#endif
//the order of the W calcs and Rounds is like this because the compiler needs help finding how to order the instructions //the order of the W calcs and Rounds is like this because the compiler needs help finding how to order the instructions
@ -388,36 +384,34 @@ void search( const uint state0, const uint state1, const uint state2, const uint
sharoundW(64 + 57); sharoundW(64 + 57);
sharoundW(64 + 58); sharoundW(64 + 58);
u v = W[117] + W[108] + Vals[3] + Vals[7] + P2(124) + P1(124) + Ch((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64),Vals[1],Vals[2]) ^ W[117] += W[108] + Vals[3] + Vals[7] + P2(124) + P1(124) + Ch((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64),Vals[1],Vals[2]) -
-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64)); (-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64)));
#define FOUND (0x80) #define FOUND (0x80)
#define NFLAG (0x7F) #define NFLAG (0x7F)
#ifdef VECTORS4 #ifdef VECTORS4
bool result = v.x & v.y & v.z & v.w; bool result = W[117].x & W[117].y & W[117].z & W[117].w;
if (!result) { if (!result) {
if (!v.x) if (!W[117].x)
output[FOUND] = output[NFLAG & W[3].x] = W[3].x; output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
if (!v.y) if (!W[117].y)
output[FOUND] = output[NFLAG & W[3].y] = W[3].y; output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
if (!v.z) if (!W[117].z)
output[FOUND] = output[NFLAG & W[3].z] = W[3].z; output[FOUND] = output[NFLAG & W[3].z] = W[3].z;
if (!v.w) if (!W[117].w)
output[FOUND] = output[NFLAG & W[3].w] = W[3].w; output[FOUND] = output[NFLAG & W[3].w] = W[3].w;
} }
#else #elif defined VECTORS2
#ifdef VECTORS2 bool result = W[117].x & W[117].y;
bool result = v.x & v.y;
if (!result) { if (!result) {
if (!v.x) if (!W[117].x)
output[FOUND] = output[NFLAG & W[3].x] = W[3].x; output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
if (!v.y) if (!W[117].y)
output[FOUND] = output[NFLAG & W[3].y] = W[3].y; output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
} }
#else #else
if (!v) if (!W[117])
output[FOUND] = output[NFLAG & W[3]] = W[3]; output[FOUND] = output[NFLAG & W[3]] = W[3];
#endif #endif
#endif
} }

Loading…
Cancel
Save