1
0
mirror of https://github.com/GOSTSec/sgminer synced 2025-01-14 16:58:05 +00:00

Use any() in kernel output code and revert breakage of diakgcn kernel.

This commit is contained in:
Con Kolivas 2012-02-22 14:10:04 +11:00
parent 145f3c0b1d
commit 8f08a775ad
6 changed files with 101 additions and 80 deletions

View File

@ -1237,26 +1237,28 @@ __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search(
#define NFLAG (0x7F)
#if defined(VECTORS4)
ZA[924] ^= 0x136032EDU;
bool result = ZA[924].x & ZA[924].y & ZA[924].z & ZA[924].w;
if (!result) {
if (!ZA[924].x)
output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x;
if (!ZA[924].y)
output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y;
if (!ZA[924].z)
output[FOUND] = output[NFLAG & Znonce.z] = Znonce.z;
if (!ZA[924].w)
output[FOUND] = output[NFLAG & Znonce.w] = Znonce.w;
bool result = any(ZA[924] == 0x136032EDU);
if (result) {
output[FOUND] = FOUND;
if (ZA[924].x == 0x136032EDU)
output[NFLAG & Znonce.x] = Znonce.x;
if (ZA[924].y == 0x136032EDU)
output[NFLAG & Znonce.y] = Znonce.y;
if (ZA[924].z == 0x136032EDU)
output[NFLAG & Znonce.z] = Znonce.z;
if (ZA[924].w == 0x136032EDU)
output[NFLAG & Znonce.w] = Znonce.w;
}
#elif defined(VECTORS2)
ZA[924] ^= 0x136032EDU;
bool result = ZA[924].x & ZA[924].y;
if (!result) {
if (!ZA[924].x)
output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x;
if (!ZA[924].y)
output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y;
bool result = any(ZA[924] == 0x136032EDU);
if (result) {
output[FOUND] = FOUND;
if (ZA[924].x == 0x136032EDU)
output[NFLAG & Znonce.x] = Znonce.x;
if (ZA[924].y == 0x136032EDU)
output[NFLAG & Znonce.y] = Znonce.y;
}
#else
if (ZA[924] == 0x136032EDU)

View File

@ -748,7 +748,8 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk,
return status;
}
static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk,
__maybe_unused cl_uint threads)
{
cl_uint vwidth = clState->preferred_vwidth;
cl_kernel *kernel = &clState->kernel;
@ -758,7 +759,7 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint
nonces = alloca(sizeof(uint) * vwidth);
for (i = 0; i < vwidth; i++)
nonces[i] = blk->nonce + (i * threads);
nonces[i] = blk->nonce + i;
CL_SET_VARG(vwidth, nonces);
CL_SET_BLKARG(PreVal0);

View File

@ -55,18 +55,30 @@ __kernel
u V[8];
u W[16];
#ifdef GOFFSET
#ifdef VECTORS8
#ifdef VECTORS8
#ifdef GOFFSET
const u nonce = ((uint)get_global_id(0) << 3) + (u)(0, 1, 2, 3, 4, 5, 6, 7);
#elif defined VECTORS4
#else
const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 3) + ((uint)get_local_id(0) << 3) + base;
#endif
#elif defined VECTORS4
#ifdef GOFFSET
const u nonce = ((uint)get_global_id(0) << 2) + (u)(0, 1, 2, 3);
#elif defined VECTORS2
#else
const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 2) + ((uint)get_local_id(0) << 2) + base;
#endif
#elif defined VECTORS2
#ifdef GOFFSET
const u nonce = ((uint)get_global_id(0) << 1) + (u)(0, 1);
#else
const u nonce = (uint)get_global_id(0);
const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 1) + ((uint)get_local_id(0) << 1) + base;
#endif
#else
const u nonce = base + (uint)(get_global_id(0));
#ifdef GOFFSET
const u nonce = (uint)get_global_id(0);
#else
const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0)) + (uint)get_local_id(0) + base;
#endif
#endif
V[0] = PreVal0 + nonce;
@ -585,51 +597,54 @@ __kernel
#ifdef VECTORS8
V[7] ^= 0x136032ed;
bool result = V[7].s0 & V[7].s1 & V[7].s2 & V[7].s3 & V[7].s4 & V[7].s5 & V[7].s6 & V[7].s7;
bool result = any(V[7] == 0);
if (!result) {
if (result) {
output[FOUND] = FOUND;
if (!V[7].s0)
output[FOUND] = output[NFLAG & nonce.s0] = nonce.s0;
output[NFLAG & nonce.s0] = nonce.s0;
if (!V[7].s1)
output[FOUND] = output[NFLAG & nonce.s1] = nonce.s1;
output[NFLAG & nonce.s1] = nonce.s1;
if (!V[7].s2)
output[FOUND] = output[NFLAG & nonce.s2] = nonce.s2;
output[NFLAG & nonce.s2] = nonce.s2;
if (!V[7].s3)
output[FOUND] = output[NFLAG & nonce.s3] = nonce.s3;
output[NFLAG & nonce.s3] = nonce.s3;
if (!V[7].s4)
output[FOUND] = output[NFLAG & nonce.s4] = nonce.s4;
output[NFLAG & nonce.s4] = nonce.s4;
if (!V[7].s5)
output[FOUND] = output[NFLAG & nonce.s5] = nonce.s5;
output[NFLAG & nonce.s5] = nonce.s5;
if (!V[7].s6)
output[FOUND] = output[NFLAG & nonce.s6] = nonce.s6;
output[NFLAG & nonce.s6] = nonce.s6;
if (!V[7].s7)
output[FOUND] = output[NFLAG & nonce.s7] = nonce.s7;
output[NFLAG & nonce.s7] = nonce.s7;
}
#elif defined VECTORS4
V[7] ^= 0x136032ed;
bool result = V[7].x & V[7].y & V[7].z & V[7].w;
bool result = any(V[7] == 0);
if (!result) {
if (result) {
output[FOUND] = FOUND;
if (!V[7].x)
output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
output[NFLAG & nonce.x] = nonce.x;
if (!V[7].y)
output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
output[NFLAG & nonce.y] = nonce.y;
if (!V[7].z)
output[FOUND] = output[NFLAG & nonce.z] = nonce.z;
output[NFLAG & nonce.z] = nonce.z;
if (!V[7].w)
output[FOUND] = output[NFLAG & nonce.w] = nonce.w;
output[NFLAG & nonce.w] = nonce.w;
}
#elif defined VECTORS2
V[7] ^= 0x136032ed;
bool result = V[7].x & V[7].y;
bool result = any(V[7] == 0);
if (!result) {
if (result) {
output[FOUND] = FOUND;
if (!V[7].x)
output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
output[NFLAG & nonce.x] = nonce.x;
if (!V[7].y)
output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
output[NFLAG & nonce.y] = nonce.y;
}
#else
if (V[7] == 0x136032ed)

View File

@ -227,12 +227,11 @@ static void *postcalc_hash(void *userdata)
pthread_detach(pthread_self());
do {
if (pcd->res[entry]) {
for (entry = 0; entry < FOUND; entry++) {
if (pcd->res[entry])
send_nonce(pcd, pcd->res[entry]);
nonces++;
}
} while (++entry < FOUND);
nonces++;
}
free(pcd);

View File

@ -391,27 +391,29 @@ void search( const uint state0, const uint state1, const uint state2, const uint
#define NFLAG (0x7F)
#ifdef VECTORS4
bool result = W[117].x & W[117].y & W[117].z & W[117].w;
if (!result) {
bool result = any(W[117] == 0);
if (result) {
output[FOUND] = FOUND;
if (!W[117].x)
output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
output[NFLAG & W[3].x] = W[3].x;
if (!W[117].y)
output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
output[NFLAG & W[3].y] = W[3].y;
if (!W[117].z)
output[FOUND] = output[NFLAG & W[3].z] = W[3].z;
output[NFLAG & W[3].z] = W[3].z;
if (!W[117].w)
output[FOUND] = output[NFLAG & W[3].w] = W[3].w;
output[NFLAG & W[3].w] = W[3].w;
}
#elif defined VECTORS2
bool result = W[117].x & W[117].y;
if (!result) {
bool result = any(W[117] == 0);
if (result) {
output[FOUND] = FOUND;
if (!W[117].x)
output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
output[NFLAG & W[3].x] = W[3].x;
if (!W[117].y)
output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
output[NFLAG & W[3].y] = W[3].y;
}
#else
if (!W[117])
if (W[117] == 0)
output[FOUND] = output[NFLAG & W[3]] = W[3];
#endif
}

View File

@ -1256,26 +1256,28 @@ Vals[7]+=ch(Vals[0],Vals[1],Vals[2]);
#define NFLAG (0x7F)
#if defined(VECTORS4)
Vals[7] ^= 0x136032ED;
bool result = Vals[7].x & Vals[7].y & Vals[7].z & Vals[7].w;
if (!result) {
if (!Vals[7].x)
output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
if (!Vals[7].y)
output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
if (!Vals[7].z)
output[FOUND] = output[NFLAG & nonce.z] = nonce.z;
if (!Vals[7].w)
output[FOUND] = output[NFLAG & nonce.w] = nonce.w;
bool result = any(Vals[7] == 0x136032ed);
if (result) {
output[FOUND] = FOUND;
if (Vals[7].x == 0x136032ed)
output[NFLAG & nonce.x] = nonce.x;
if (Vals[7].y == 0x136032ed)
output[NFLAG & nonce.y] = nonce.y;
if (Vals[7].z == 0x136032ed)
output[NFLAG & nonce.z] = nonce.z;
if (Vals[7].w == 0x136032ed)
output[NFLAG & nonce.w] = nonce.w;
}
#elif defined(VECTORS2)
Vals[7] ^= 0x136032ED;
bool result = Vals[7].x & Vals[7].y;
if (!result) {
if (!Vals[7].x)
output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
if (!Vals[7].y)
output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
bool result = any(Vals[7] == 0x136032ed);
if (result) {
output[FOUND] = FOUND;
if (Vals[7].x == 0x136032ed)
output[NFLAG & nonce.x] = nonce.x;
if (Vals[7].y == 0x136032ed)
output[NFLAG & nonce.y] = nonce.y;
}
#else
if (Vals[7] == 0x136032ED)