From 8f08a775add15bb47845d1f833218bfd03b5ce71 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Wed, 22 Feb 2012 14:10:04 +1100 Subject: [PATCH] Use any() in kernel output code and revert breakage of diakgcn kernel. --- DiabloMiner120221.cl | 38 +++++++++++++------------ device-gpu.c | 5 ++-- diakgcn120216.cl | 67 +++++++++++++++++++++++++++----------------- findnonce.c | 9 +++--- phatk120213.cl | 24 ++++++++-------- poclbm120214.cl | 38 +++++++++++++------------ 6 files changed, 101 insertions(+), 80 deletions(-) diff --git a/DiabloMiner120221.cl b/DiabloMiner120221.cl index a151db38..4bc7394d 100644 --- a/DiabloMiner120221.cl +++ b/DiabloMiner120221.cl @@ -1237,26 +1237,28 @@ __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search( #define NFLAG (0x7F) #if defined(VECTORS4) - ZA[924] ^= 0x136032EDU; - bool result = ZA[924].x & ZA[924].y & ZA[924].z & ZA[924].w; - if (!result) { - if (!ZA[924].x) - output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x; - if (!ZA[924].y) - output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y; - if (!ZA[924].z) - output[FOUND] = output[NFLAG & Znonce.z] = Znonce.z; - if (!ZA[924].w) - output[FOUND] = output[NFLAG & Znonce.w] = Znonce.w; + bool result = any(ZA[924] == 0x136032EDU); + + if (result) { + output[FOUND] = FOUND; + if (ZA[924].x == 0x136032EDU) + output[NFLAG & Znonce.x] = Znonce.x; + if (ZA[924].y == 0x136032EDU) + output[NFLAG & Znonce.y] = Znonce.y; + if (ZA[924].z == 0x136032EDU) + output[NFLAG & Znonce.z] = Znonce.z; + if (ZA[924].w == 0x136032EDU) + output[NFLAG & Znonce.w] = Znonce.w; } #elif defined(VECTORS2) - ZA[924] ^= 0x136032EDU; - bool result = ZA[924].x & ZA[924].y; - if (!result) { - if (!ZA[924].x) - output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x; - if (!ZA[924].y) - output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y; + bool result = any(ZA[924] == 0x136032EDU); + + if (result) { + output[FOUND] = FOUND; + if (ZA[924].x == 0x136032EDU) + output[NFLAG & Znonce.x] = Znonce.x; + if (ZA[924].y == 0x136032EDU) + output[NFLAG & Znonce.y] = Znonce.y; } #else if (ZA[924] == 0x136032EDU) diff --git a/device-gpu.c b/device-gpu.c index 4aad0e2d..f8781409 100644 --- a/device-gpu.c +++ b/device-gpu.c @@ -748,7 +748,8 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk, return status; } -static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads) +static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, + __maybe_unused cl_uint threads) { cl_uint vwidth = clState->preferred_vwidth; cl_kernel *kernel = &clState->kernel; @@ -758,7 +759,7 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint nonces = alloca(sizeof(uint) * vwidth); for (i = 0; i < vwidth; i++) - nonces[i] = blk->nonce + (i * threads); + nonces[i] = blk->nonce + i; CL_SET_VARG(vwidth, nonces); CL_SET_BLKARG(PreVal0); diff --git a/diakgcn120216.cl b/diakgcn120216.cl index 62effa49..a64d546b 100644 --- a/diakgcn120216.cl +++ b/diakgcn120216.cl @@ -55,18 +55,30 @@ __kernel u V[8]; u W[16]; -#ifdef GOFFSET - #ifdef VECTORS8 +#ifdef VECTORS8 + #ifdef GOFFSET const u nonce = ((uint)get_global_id(0) << 3) + (u)(0, 1, 2, 3, 4, 5, 6, 7); - #elif defined VECTORS4 + #else + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 3) + ((uint)get_local_id(0) << 3) + base; + #endif +#elif defined VECTORS4 + #ifdef GOFFSET const u nonce = ((uint)get_global_id(0) << 2) + (u)(0, 1, 2, 3); - #elif defined VECTORS2 + #else + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 2) + ((uint)get_local_id(0) << 2) + base; + #endif +#elif defined VECTORS2 + #ifdef GOFFSET const u nonce = ((uint)get_global_id(0) << 1) + (u)(0, 1); #else - const u nonce = (uint)get_global_id(0); + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 1) + ((uint)get_local_id(0) << 1) + base; #endif #else - const u nonce = base + (uint)(get_global_id(0)); + #ifdef GOFFSET + const u nonce = (uint)get_global_id(0); + #else + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0)) + (uint)get_local_id(0) + base; + #endif #endif V[0] = PreVal0 + nonce; @@ -585,51 +597,54 @@ __kernel #ifdef VECTORS8 V[7] ^= 0x136032ed; - bool result = V[7].s0 & V[7].s1 & V[7].s2 & V[7].s3 & V[7].s4 & V[7].s5 & V[7].s6 & V[7].s7; + bool result = any(V[7] == 0); - if (!result) { + if (result) { + output[FOUND] = FOUND; if (!V[7].s0) - output[FOUND] = output[NFLAG & nonce.s0] = nonce.s0; + output[NFLAG & nonce.s0] = nonce.s0; if (!V[7].s1) - output[FOUND] = output[NFLAG & nonce.s1] = nonce.s1; + output[NFLAG & nonce.s1] = nonce.s1; if (!V[7].s2) - output[FOUND] = output[NFLAG & nonce.s2] = nonce.s2; + output[NFLAG & nonce.s2] = nonce.s2; if (!V[7].s3) - output[FOUND] = output[NFLAG & nonce.s3] = nonce.s3; + output[NFLAG & nonce.s3] = nonce.s3; if (!V[7].s4) - output[FOUND] = output[NFLAG & nonce.s4] = nonce.s4; + output[NFLAG & nonce.s4] = nonce.s4; if (!V[7].s5) - output[FOUND] = output[NFLAG & nonce.s5] = nonce.s5; + output[NFLAG & nonce.s5] = nonce.s5; if (!V[7].s6) - output[FOUND] = output[NFLAG & nonce.s6] = nonce.s6; + output[NFLAG & nonce.s6] = nonce.s6; if (!V[7].s7) - output[FOUND] = output[NFLAG & nonce.s7] = nonce.s7; + output[NFLAG & nonce.s7] = nonce.s7; } #elif defined VECTORS4 V[7] ^= 0x136032ed; - bool result = V[7].x & V[7].y & V[7].z & V[7].w; + bool result = any(V[7] == 0); - if (!result) { + if (result) { + output[FOUND] = FOUND; if (!V[7].x) - output[FOUND] = output[NFLAG & nonce.x] = nonce.x; + output[NFLAG & nonce.x] = nonce.x; if (!V[7].y) - output[FOUND] = output[NFLAG & nonce.y] = nonce.y; + output[NFLAG & nonce.y] = nonce.y; if (!V[7].z) - output[FOUND] = output[NFLAG & nonce.z] = nonce.z; + output[NFLAG & nonce.z] = nonce.z; if (!V[7].w) - output[FOUND] = output[NFLAG & nonce.w] = nonce.w; + output[NFLAG & nonce.w] = nonce.w; } #elif defined VECTORS2 V[7] ^= 0x136032ed; - bool result = V[7].x & V[7].y; + bool result = any(V[7] == 0); - if (!result) { + if (result) { + output[FOUND] = FOUND; if (!V[7].x) - output[FOUND] = output[NFLAG & nonce.x] = nonce.x; + output[NFLAG & nonce.x] = nonce.x; if (!V[7].y) - output[FOUND] = output[NFLAG & nonce.y] = nonce.y; + output[NFLAG & nonce.y] = nonce.y; } #else if (V[7] == 0x136032ed) diff --git a/findnonce.c b/findnonce.c index d11925b6..98d7f0e7 100644 --- a/findnonce.c +++ b/findnonce.c @@ -227,12 +227,11 @@ static void *postcalc_hash(void *userdata) pthread_detach(pthread_self()); - do { - if (pcd->res[entry]) { + for (entry = 0; entry < FOUND; entry++) { + if (pcd->res[entry]) send_nonce(pcd, pcd->res[entry]); - nonces++; - } - } while (++entry < FOUND); + nonces++; + } free(pcd); diff --git a/phatk120213.cl b/phatk120213.cl index 7d1c3200..5c89fb96 100644 --- a/phatk120213.cl +++ b/phatk120213.cl @@ -391,27 +391,29 @@ void search( const uint state0, const uint state1, const uint state2, const uint #define NFLAG (0x7F) #ifdef VECTORS4 - bool result = W[117].x & W[117].y & W[117].z & W[117].w; - if (!result) { + bool result = any(W[117] == 0); + if (result) { + output[FOUND] = FOUND; if (!W[117].x) - output[FOUND] = output[NFLAG & W[3].x] = W[3].x; + output[NFLAG & W[3].x] = W[3].x; if (!W[117].y) - output[FOUND] = output[NFLAG & W[3].y] = W[3].y; + output[NFLAG & W[3].y] = W[3].y; if (!W[117].z) - output[FOUND] = output[NFLAG & W[3].z] = W[3].z; + output[NFLAG & W[3].z] = W[3].z; if (!W[117].w) - output[FOUND] = output[NFLAG & W[3].w] = W[3].w; + output[NFLAG & W[3].w] = W[3].w; } #elif defined VECTORS2 - bool result = W[117].x & W[117].y; - if (!result) { + bool result = any(W[117] == 0); + if (result) { + output[FOUND] = FOUND; if (!W[117].x) - output[FOUND] = output[NFLAG & W[3].x] = W[3].x; + output[NFLAG & W[3].x] = W[3].x; if (!W[117].y) - output[FOUND] = output[NFLAG & W[3].y] = W[3].y; + output[NFLAG & W[3].y] = W[3].y; } #else - if (!W[117]) + if (W[117] == 0) output[FOUND] = output[NFLAG & W[3]] = W[3]; #endif } diff --git a/poclbm120214.cl b/poclbm120214.cl index 7e3ecff8..0972fcc6 100644 --- a/poclbm120214.cl +++ b/poclbm120214.cl @@ -1256,26 +1256,28 @@ Vals[7]+=ch(Vals[0],Vals[1],Vals[2]); #define NFLAG (0x7F) #if defined(VECTORS4) - Vals[7] ^= 0x136032ED; - bool result = Vals[7].x & Vals[7].y & Vals[7].z & Vals[7].w; - if (!result) { - if (!Vals[7].x) - output[FOUND] = output[NFLAG & nonce.x] = nonce.x; - if (!Vals[7].y) - output[FOUND] = output[NFLAG & nonce.y] = nonce.y; - if (!Vals[7].z) - output[FOUND] = output[NFLAG & nonce.z] = nonce.z; - if (!Vals[7].w) - output[FOUND] = output[NFLAG & nonce.w] = nonce.w; + bool result = any(Vals[7] == 0x136032ed); + + if (result) { + output[FOUND] = FOUND; + if (Vals[7].x == 0x136032ed) + output[NFLAG & nonce.x] = nonce.x; + if (Vals[7].y == 0x136032ed) + output[NFLAG & nonce.y] = nonce.y; + if (Vals[7].z == 0x136032ed) + output[NFLAG & nonce.z] = nonce.z; + if (Vals[7].w == 0x136032ed) + output[NFLAG & nonce.w] = nonce.w; } #elif defined(VECTORS2) - Vals[7] ^= 0x136032ED; - bool result = Vals[7].x & Vals[7].y; - if (!result) { - if (!Vals[7].x) - output[FOUND] = output[NFLAG & nonce.x] = nonce.x; - if (!Vals[7].y) - output[FOUND] = output[NFLAG & nonce.y] = nonce.y; + bool result = any(Vals[7] == 0x136032ed); + + if (result) { + output[FOUND] = FOUND; + if (Vals[7].x == 0x136032ed) + output[NFLAG & nonce.x] = nonce.x; + if (Vals[7].y == 0x136032ed) + output[NFLAG & nonce.y] = nonce.y; } #else if (Vals[7] == 0x136032ED)