Browse Source

Remove atomic ops from opencl kernels given rarity of more than once nonce on the same wavefront and the potential increased ramspeed requirements to use the atomics.

nfactor-troky
ckolivas 12 years ago
parent
commit
775a27281a
  1. 7
      diablo120823.cl
  2. 7
      diakgcn120823.cl
  3. 7
      phatk120823.cl
  4. 7
      poclbm120823.cl
  5. 7
      scrypt120823.cl

7
diablo120823.cl

@ -1243,12 +1243,7 @@ void search( @@ -1243,12 +1243,7 @@ void search(
ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]);
#define FOUND (0x0F)
#if defined(OCL1)
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#else
#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
#endif
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#if defined(VECTORS4)
bool result = any(ZA[924] == 0x136032EDU);

7
diakgcn120823.cl

@ -572,12 +572,7 @@ __kernel @@ -572,12 +572,7 @@ __kernel
V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
#define FOUND (0x0F)
#if defined(OCL1)
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#else
#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
#endif
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#ifdef VECTORS4
if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) {

7
phatk120823.cl

@ -388,12 +388,7 @@ void search( const uint state0, const uint state1, const uint state2, const uint @@ -388,12 +388,7 @@ void search( const uint state0, const uint state1, const uint state2, const uint
(-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64)));
#define FOUND (0x0F)
#if defined(OCL1)
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#else
#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
#endif
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#ifdef VECTORS4
bool result = W[117].x & W[117].y & W[117].z & W[117].w;

7
poclbm120823.cl

@ -1322,12 +1322,7 @@ Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); @@ -1322,12 +1322,7 @@ Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
#define FOUND (0x0F)
#if defined(OCL1)
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#else
#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
#endif
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#if defined(VECTORS2) || defined(VECTORS4)
if (any(Vals[2] == 0x136032edU)) {

7
scrypt120823.cl

@ -683,12 +683,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -683,12 +683,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
}
#define FOUND (0x0F)
#if defined(OCL1)
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#else
#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
#endif
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global const uint4 * restrict input,

Loading…
Cancel
Save