mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-22 12:34:27 +00:00
OpenCL 1.0 does not have native atomic_add and extremely slow support with atom_add so detect opencl1.0 and use a non-atomic workaround.
This commit is contained in:
parent
ec522bdfdb
commit
4fbe5bed15
@ -1244,28 +1244,33 @@ void search(
|
||||
|
||||
#define FOUND (0x0F)
|
||||
|
||||
#if defined(OCL1)
|
||||
#define SETFOUND(Xfound, Xnonce) do { \
|
||||
(Xfound) = output[FOUND]; \
|
||||
output[FOUND] += 1; \
|
||||
output[Xfound] = Xnonce; \
|
||||
} while (0)
|
||||
#else
|
||||
#define SETFOUND(Xfound, Xnonce) do { \
|
||||
Xfound = atomic_add(&output[FOUND], 1); \
|
||||
output[Xfound] = Xnonce; \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#if defined(VECTORS4)
|
||||
bool result = any(ZA[924] == 0x136032EDU);
|
||||
|
||||
if (result) {
|
||||
uint found;
|
||||
|
||||
if (ZA[924].x == 0x136032EDU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = Znonce.x;
|
||||
}
|
||||
if (ZA[924].y == 0x136032EDU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = Znonce.y;
|
||||
}
|
||||
if (ZA[924].z == 0x136032EDU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = Znonce.z;
|
||||
}
|
||||
if (ZA[924].w == 0x136032EDU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = Znonce.w;
|
||||
}
|
||||
if (ZA[924].x == 0x136032EDU)
|
||||
SETFOUND(found, Znonce.x);
|
||||
if (ZA[924].y == 0x136032EDU)
|
||||
SETFOUND(found, Znonce.y);
|
||||
if (ZA[924].z == 0x136032EDU)
|
||||
SETFOUND(found, Znonce.z);
|
||||
if (ZA[924].w == 0x136032EDU)
|
||||
SETFOUND(found, Znonce.w);
|
||||
}
|
||||
#elif defined(VECTORS2)
|
||||
bool result = any(ZA[924] == 0x136032EDU);
|
||||
@ -1273,19 +1278,16 @@ void search(
|
||||
if (result) {
|
||||
uint found;
|
||||
|
||||
if (ZA[924].x == 0x136032EDU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = Znonce.x;
|
||||
}
|
||||
if (ZA[924].y == 0x136032EDU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = Znonce.y;
|
||||
}
|
||||
if (ZA[924].x == 0x136032EDU)
|
||||
SETFOUND(found, Znonce.x);
|
||||
if (ZA[924].y == 0x136032EDU)
|
||||
SETFOUND(found, Znonce.y);
|
||||
}
|
||||
#else
|
||||
if (ZA[924] == 0x136032EDU) {
|
||||
uint found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = Znonce;
|
||||
uint found;
|
||||
|
||||
SETFOUND(found, Znonce);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -573,44 +573,46 @@ __kernel
|
||||
|
||||
#define FOUND (0x0F)
|
||||
|
||||
#if defined(OCL1)
|
||||
#define SETFOUND(Xfound, Xnonce) do { \
|
||||
(Xfound) = output[FOUND]; \
|
||||
output[FOUND] += 1; \
|
||||
output[Xfound] = Xnonce; \
|
||||
} while (0)
|
||||
#else
|
||||
#define SETFOUND(Xfound, Xnonce) do { \
|
||||
Xfound = atomic_add(&output[FOUND], 1); \
|
||||
output[Xfound] = Xnonce; \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#ifdef VECTORS4
|
||||
if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) {
|
||||
uint found;
|
||||
|
||||
if (V[7].x == 0x136032edU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = nonce.x;
|
||||
}
|
||||
if (V[7].y == 0x136032edU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = nonce.y;
|
||||
}
|
||||
if (V[7].z == 0x136032edU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = nonce.z;
|
||||
}
|
||||
if (V[7].w == 0x136032edU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = nonce.w;
|
||||
}
|
||||
if (V[7].x == 0x136032edU)
|
||||
SETFOUND(found, nonce.x);
|
||||
if (V[7].y == 0x136032edU)
|
||||
SETFOUND(found, nonce.y);
|
||||
if (V[7].z == 0x136032edU)
|
||||
SETFOUND(found, nonce.z);
|
||||
if (V[7].w == 0x136032edU)
|
||||
SETFOUND(found, nonce.w);
|
||||
}
|
||||
#elif defined VECTORS2
|
||||
if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU)) {
|
||||
uint found;
|
||||
|
||||
if (V[7].x == 0x136032edU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = nonce.x;
|
||||
}
|
||||
if (V[7].y == 0x136032edU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = nonce.y;
|
||||
}
|
||||
if (V[7].x == 0x136032edU)
|
||||
SETFOUND(found, nonce.x);
|
||||
if (V[7].y == 0x136032edU)
|
||||
SETFOUND(found, nonce.y);
|
||||
}
|
||||
#else
|
||||
if (V[7] == 0x136032edU) {
|
||||
uint found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = nonce;
|
||||
uint found;
|
||||
|
||||
SETFOUND(found, nonce);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
3
ocl.c
3
ocl.c
@ -659,6 +659,9 @@ build:
|
||||
if (clState->goffset)
|
||||
strcat(CompilerOptions, " -D GOFFSET");
|
||||
|
||||
if (!clState->hasOpenCL11plus)
|
||||
strcat(CompilerOptions, " -D OCL1");
|
||||
|
||||
applog(LOG_DEBUG, "CompilerOptions: %s", CompilerOptions);
|
||||
status = clBuildProgram(clState->program, 1, &devices[gpu], CompilerOptions , NULL, NULL);
|
||||
free(CompilerOptions);
|
||||
|
@ -389,46 +389,48 @@ void search( const uint state0, const uint state1, const uint state2, const uint
|
||||
|
||||
#define FOUND (0x0F)
|
||||
|
||||
#if defined(OCL1)
|
||||
#define SETFOUND(Xfound, Xnonce) do { \
|
||||
(Xfound) = output[FOUND]; \
|
||||
output[FOUND] += 1; \
|
||||
output[Xfound] = Xnonce; \
|
||||
} while (0)
|
||||
#else
|
||||
#define SETFOUND(Xfound, Xnonce) do { \
|
||||
Xfound = atomic_add(&output[FOUND], 1); \
|
||||
output[Xfound] = Xnonce; \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#ifdef VECTORS4
|
||||
bool result = W[117].x & W[117].y & W[117].z & W[117].w;
|
||||
if (!result) {
|
||||
uint found;
|
||||
|
||||
if (!W[117].x) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = W[3].x;
|
||||
}
|
||||
if (!W[117].y) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = W[3].y;
|
||||
}
|
||||
if (!W[117].z) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = W[3].z;
|
||||
}
|
||||
if (!W[117].w) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = W[3].w;
|
||||
}
|
||||
if (!W[117].x)
|
||||
SETFOUND(found, W[3].x);
|
||||
if (!W[117].y)
|
||||
SETFOUND(found, W[3].y);
|
||||
if (!W[117].z)
|
||||
SETFOUND(found, W[3].z);
|
||||
if (!W[117].w)
|
||||
SETFOUND(found, W[3].w);
|
||||
}
|
||||
#elif defined VECTORS2
|
||||
bool result = W[117].x & W[117].y;
|
||||
if (!result) {
|
||||
uint found;
|
||||
|
||||
if (!W[117].x) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = W[3].x;
|
||||
}
|
||||
if (!W[117].y) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = W[3].y;
|
||||
}
|
||||
if (!W[117].x)
|
||||
SETFOUND(found, W[3].x);
|
||||
if (!W[117].y)
|
||||
SETFOUND(found, W[3].y);
|
||||
}
|
||||
#else
|
||||
if (!W[117]) {
|
||||
uint found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = W[3];
|
||||
uint found;
|
||||
|
||||
SETFOUND(found, W[3]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -1323,34 +1323,40 @@ Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
|
||||
|
||||
#define FOUND (0x0F)
|
||||
|
||||
#if defined(OCL1)
|
||||
#define SETFOUND(Xfound, Xnonce) do { \
|
||||
(Xfound) = output[FOUND]; \
|
||||
output[FOUND] += 1; \
|
||||
output[Xfound] = Xnonce; \
|
||||
} while (0)
|
||||
#else
|
||||
#define SETFOUND(Xfound, Xnonce) do { \
|
||||
Xfound = atomic_add(&output[FOUND], 1); \
|
||||
output[Xfound] = Xnonce; \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#if defined(VECTORS2) || defined(VECTORS4)
|
||||
|
||||
if (any(Vals[2] == 0x136032edU)) {
|
||||
uint found;
|
||||
|
||||
if (Vals[2].x == 0x136032edU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = nonce.x;
|
||||
}
|
||||
if (Vals[2].y == 0x136032edU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = nonce.y;
|
||||
}
|
||||
if (Vals[2].x == 0x136032edU)
|
||||
SETFOUND(found, nonce.x);
|
||||
if (Vals[2].y == 0x136032edU)
|
||||
SETFOUND(found, nonce.y);
|
||||
#if defined(VECTORS4)
|
||||
if (Vals[2].z == 0x136032edU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = nonce.z;
|
||||
}
|
||||
if (Vals[2].w == 0x136032edU) {
|
||||
found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = nonce.w;
|
||||
}
|
||||
if (Vals[2].z == 0x136032edU)
|
||||
SETFOUND(found, nonce.z);
|
||||
if (Vals[2].w == 0x136032edU)
|
||||
SETFOUND(found, nonce.w);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
if (Vals[2] == 0x136032edU) {
|
||||
uint found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = nonce;
|
||||
uint found;
|
||||
|
||||
SETFOUND(found, nonce);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -684,6 +684,19 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
|
||||
#define FOUND (0x0F)
|
||||
|
||||
#if defined(OCL1)
|
||||
#define SETFOUND(Xfound, Xnonce) do { \
|
||||
(Xfound) = output[FOUND]; \
|
||||
output[FOUND] += 1; \
|
||||
output[Xfound] = Xnonce; \
|
||||
} while (0)
|
||||
#else
|
||||
#define SETFOUND(Xfound, Xnonce) do { \
|
||||
Xfound = atomic_add(&output[FOUND], 1); \
|
||||
output[Xfound] = Xnonce; \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||
__kernel void search(__global const uint4 * restrict input,
|
||||
volatile __global uint*restrict output, __global uint4*restrict padcache,
|
||||
@ -722,8 +735,9 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
|
||||
|
||||
bool result = (EndianSwap(ostate1.w) <= target);
|
||||
if (result) {
|
||||
uint found = atomic_add(&output[FOUND], 1);
|
||||
output[found] = gid;
|
||||
uint found;
|
||||
|
||||
SETFOUND(found, gid);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user