1
0
mirror of https://github.com/GOSTSec/sgminer synced 2025-01-24 21:44:38 +00:00

Remove atomic ops from opencl kernels given rarity of more than once nonce on the same wavefront and the potential increased ramspeed requirements to use the atomics.

This commit is contained in:
ckolivas 2012-10-16 15:10:22 +11:00
parent 5fcc8612b4
commit 775a27281a
5 changed files with 5 additions and 30 deletions

View File

@ -1243,12 +1243,7 @@ void search(
ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]); ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]);
#define FOUND (0x0F) #define FOUND (0x0F)
#if defined(OCL1)
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce #define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#else
#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
#endif
#if defined(VECTORS4) #if defined(VECTORS4)
bool result = any(ZA[924] == 0x136032EDU); bool result = any(ZA[924] == 0x136032EDU);

View File

@ -572,12 +572,7 @@ __kernel
V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
#define FOUND (0x0F) #define FOUND (0x0F)
#if defined(OCL1)
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce #define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#else
#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
#endif
#ifdef VECTORS4 #ifdef VECTORS4
if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) { if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) {

View File

@ -388,12 +388,7 @@ void search( const uint state0, const uint state1, const uint state2, const uint
(-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64))); (-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64)));
#define FOUND (0x0F) #define FOUND (0x0F)
#if defined(OCL1)
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce #define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#else
#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
#endif
#ifdef VECTORS4 #ifdef VECTORS4
bool result = W[117].x & W[117].y & W[117].z & W[117].w; bool result = W[117].x & W[117].y & W[117].z & W[117].w;

View File

@ -1322,12 +1322,7 @@ Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
#define FOUND (0x0F) #define FOUND (0x0F)
#if defined(OCL1)
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce #define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#else
#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
#endif
#if defined(VECTORS2) || defined(VECTORS4) #if defined(VECTORS2) || defined(VECTORS4)
if (any(Vals[2] == 0x136032edU)) { if (any(Vals[2] == 0x136032edU)) {

View File

@ -683,12 +683,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
} }
#define FOUND (0x0F) #define FOUND (0x0F)
#if defined(OCL1)
#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce #define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#else
#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
#endif
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global const uint4 * restrict input, __kernel void search(__global const uint4 * restrict input,