From 59b88408f6e20be7366d52c3e577b9696cffde62 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 24 Feb 2012 00:29:55 +1100 Subject: [PATCH 1/5] phatk prefers to have the check unrolled instead of using any() since it's already zeroed. --- phatk120222.cl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/phatk120222.cl b/phatk120222.cl index 0072217c..4e548117 100644 --- a/phatk120222.cl +++ b/phatk120222.cl @@ -379,8 +379,8 @@ void search( const uint state0, const uint state1, const uint state2, const uint #define NFLAG (0x7F) #ifdef VECTORS4 - bool result = any(W[117] == 0); - if (result) { + bool result = W[117].x & W[117].y & W[117].z & W[117].w; + if (!result) { output[FOUND] = FOUND; if (!W[117].x) output[NFLAG & W[3].x] = W[3].x; @@ -392,8 +392,8 @@ void search( const uint state0, const uint state1, const uint state2, const uint output[NFLAG & W[3].w] = W[3].w; } #elif defined VECTORS2 - bool result = any(W[117] == 0); - if (result) { + bool result = W[117].x & W[117].y; + if (!result) { output[FOUND] = FOUND; if (!W[117].x) output[NFLAG & W[3].x] = W[3].x; @@ -401,7 +401,7 @@ void search( const uint state0, const uint state1, const uint state2, const uint output[NFLAG & W[3].y] = W[3].y; } #else - if (W[117] == 0) + if (!W[117]) output[FOUND] = output[NFLAG & W[3]] = W[3]; #endif } From 837f9ebb76485768f561c59e641eb8fc5787b190 Mon Sep 17 00:00:00 2001 From: Philip Kaufmann Date: Thu, 23 Feb 2012 15:49:50 +0100 Subject: [PATCH 2/5] revert to former method and do not use any()-function in check for valid nonces, as this is slower --- diakgcn120222.cl | 50 +++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/diakgcn120222.cl b/diakgcn120222.cl index 71f6345c..fa98dbd2 100644 --- a/diakgcn120222.cl +++ b/diakgcn120222.cl @@ -560,50 +560,56 @@ __kernel #define NFLAG (0x7F) #ifdef VECTORS8 - bool result = any(V[7] == 0x136032edU); + V[7] ^= 0x136032edU; - if (result) { + bool result = V[7].s0 & V[7].s1 & V[7].s2 & V[7].s3 & V[7].s4 & V[7].s5 & V[7].s6 & V[7].s7; + + if (!result) { output[FOUND] = FOUND; - if (V[7].s0 == 0x136032edU) + if (!V[7].s0) output[NFLAG & nonce.s0] = nonce.s0; - if (V[7].s1 == 0x136032edU) + if (!V[7].s1) output[NFLAG & nonce.s1] = nonce.s1; - if (V[7].s2 == 0x136032edU) + if (!V[7].s2) output[NFLAG & nonce.s2] = nonce.s2; - if (V[7].s3 == 0x136032edU) + if (!V[7].s3) output[NFLAG & nonce.s3] = nonce.s3; - if (V[7].s4 == 0x136032edU) + if (!V[7].s4) output[NFLAG & nonce.s4] = nonce.s4; - if (V[7].s5 == 0x136032edU) + if (!V[7].s5) output[NFLAG & nonce.s5] = nonce.s5; - if (V[7].s6 == 0x136032edU) + if (!V[7].s6) output[NFLAG & nonce.s6] = nonce.s6; - if (V[7].s7 == 0x136032edU) + if (!V[7].s7) output[NFLAG & nonce.s7] = nonce.s7; } #elif defined VECTORS4 - bool result = any(V[7] == 0x136032edU); + V[7] ^= 0x136032edU; + + bool result = V[7].x & V[7].y & V[7].z & V[7].w; - if (result) { + if (!result) { output[FOUND] = FOUND; - if (V[7].x == 0x136032edU) + if (!V[7].x) output[NFLAG & nonce.x] = nonce.x; - if (V[7].y == 0x136032edU) + if (!V[7].y) output[NFLAG & nonce.y] = nonce.y; - if (V[7].z == 0x136032edU) + if (!V[7].z) output[NFLAG & nonce.z] = nonce.z; - if (V[7].w == 0x136032edU) + if (!V[7].w) output[NFLAG & nonce.w] = nonce.w; } #elif defined VECTORS2 - bool result = any(V[7] == 0x136032edU); + V[7] ^= 0x136032edU; + + bool result = V[7].x & V[7].y; - if (result) { + if (!result) { output[FOUND] = FOUND; - if (V[7].x == 0x136032edU) - output[NFLAG & nonce.x] = nonce.x; - if (V[7].y == 0x136032edU) - output[NFLAG & nonce.y] = nonce.y; + if (!V[7].x) + output[FOUND] = output[NFLAG & nonce.x] = nonce.x; + if (!V[7].y) + output[FOUND] = output[NFLAG & nonce.y] = nonce.y; } #else if (V[7] == 0x136032edU) From b0a01be319acd0d9cc02fa8f4917f3a2a5edc8d3 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 24 Feb 2012 09:34:50 +1100 Subject: [PATCH 3/5] Revert use of any() in output code in poclbm kernel. Slower. --- poclbm120222.cl | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/poclbm120222.cl b/poclbm120222.cl index b0862707..e7ba623e 100644 --- a/poclbm120222.cl +++ b/poclbm120222.cl @@ -1256,28 +1256,32 @@ Vals[7]+=ch(Vals[0],Vals[1],Vals[2]); #define NFLAG (0x7F) #if defined(VECTORS4) - bool result = any(Vals[7] == 0x136032edU); + Vals[7] ^= 0x136032edU; - if (result) { + bool result = Vals[7].x & Vals[7].y & Vals[7].z & Vals[7].w; + + if (!result) { output[FOUND] = FOUND; - if (Vals[7].x == 0x136032edU) - output[NFLAG & nonce.x] = nonce.x; - if (Vals[7].y == 0x136032edU) - output[NFLAG & nonce.y] = nonce.y; - if (Vals[7].z == 0x136032edU) - output[NFLAG & nonce.z] = nonce.z; - if (Vals[7].w == 0x136032edU) - output[NFLAG & nonce.w] = nonce.w; + if (!Vals[7].x) + output[NFLAG & nonce.x] = nonce.x; + if (!Vals[7].y) + output[NFLAG & nonce.y] = nonce.y; + if (!Vals[7].z) + output[NFLAG & nonce.z] = nonce.z; + if (!Vals[7].w) + output[NFLAG & nonce.w] = nonce.w; } -#elif defined(VECTORS2) - bool result = any(Vals[7] == 0x136032edU); +#elif defined VECTORS2 + Vals[7] ^= 0x136032edU; + + bool result = Vals[7].x & Vals[7].y; - if (result) { + if (!result) { output[FOUND] = FOUND; - if (Vals[7].x == 0x136032edU) - output[NFLAG & nonce.x] = nonce.x; - if (Vals[7].y == 0x136032edU) - output[NFLAG & nonce.y] = nonce.y; + if (!Vals[7].x) + output[FOUND] = output[NFLAG & nonce.x] = nonce.x; + if (!Vals[7].y) + output[FOUND] = output[NFLAG & nonce.y] = nonce.y; } #else if (Vals[7] == 0x136032EDU) From 47747dc8a2c11ce4aa21c6644c8d74efff9cc76c Mon Sep 17 00:00:00 2001 From: Philip Kaufmann Date: Thu, 23 Feb 2012 16:14:27 +0100 Subject: [PATCH 4/5] revert to legacy nonce creation in the kernel without vector offset, but keep GOFFSET code removed --- device-gpu.c | 5 +++-- diakgcn120222.cl | 19 ++++++++++++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/device-gpu.c b/device-gpu.c index 233e59d2..a526c70c 100644 --- a/device-gpu.c +++ b/device-gpu.c @@ -835,7 +835,8 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk, return status; } -static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads) +static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, + __maybe_unused cl_uint threads) { cl_kernel *kernel = &clState->kernel; cl_uint vwidth = clState->vwidth; @@ -845,7 +846,7 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint nonces = alloca(sizeof(uint) * vwidth); for (i = 0; i < vwidth; i++) - nonces[i] = blk->nonce + (i * threads); + nonces[i] = blk->nonce + i; CL_SET_VARG(vwidth, nonces); CL_SET_BLKARG(PreVal0); diff --git a/diakgcn120222.cl b/diakgcn120222.cl index fa98dbd2..89421a23 100644 --- a/diakgcn120222.cl +++ b/diakgcn120222.cl @@ -53,7 +53,15 @@ __kernel u V[8]; u W[16]; - const u nonce = base + (uint)get_global_id(0); +#ifdef VECTORS8 + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 3) + ((uint)get_local_id(0) << 3) + base; +#elif defined VECTORS4 + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 2) + ((uint)get_local_id(0) << 2) + base; +#elif defined VECTORS2 + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 1) + ((uint)get_local_id(0) << 1) + base; +#else + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0)) + (uint)get_local_id(0) + base; +#endif V[0] = PreVal0 + nonce; V[1] = B1; @@ -108,7 +116,16 @@ __kernel //---------------------------------------------------------------------------------- +#ifdef VECTORS8 + W[0] = PreW18 + (u)( rotr25(nonce.s0), rotr25(nonce.s0) ^ 0x2004000U, rotr25(nonce.s0) ^ 0x4008000U, rotr25(nonce.s0) ^ 0x600c000U, + rotr25(nonce.s0) ^ 0x8010000U, rotr25(nonce.s0) ^ 0xa014000U, rotr25(nonce.s0) ^ 0xc018000U, rotr25(nonce.s0) ^ 0xe01c000U); +#elif defined VECTORS4 + W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U, rotr25(nonce.x) ^ 0x4008000U, rotr25(nonce.x) ^ 0x600c000U); +#elif defined VECTORS2 + W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U); +#else W[0] = PreW18 + rotr25(nonce); +#endif W[1] = PreW19 + nonce; W[2] = 0x80000000U + rotr15(W[0]); W[3] = rotr15(W[1]); From de944dfd17abe2228a2a73ab39e7454f0a94ad7c Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 24 Feb 2012 14:03:04 +1100 Subject: [PATCH 5/5] Only send out extra longpoll requests if we want longpolls. --- cgminer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cgminer.c b/cgminer.c index b710f732..5f88172c 100644 --- a/cgminer.c +++ b/cgminer.c @@ -1630,8 +1630,8 @@ static bool get_upstream_work(struct work *work, bool lagging) /* If this is the current pool and supports longpoll but has not sent * a longpoll, send one now */ - if (unlikely(!pool->is_lp && pool == current_pool() && pool->hdr_path && - !pool_tset(pool, &pool->lp_sent))) { + if (unlikely(want_longpoll && !pool->is_lp && pool == current_pool() && + pool->hdr_path && !pool_tset(pool, &pool->lp_sent))) { req_longpoll = true; url = pool->lp_url; }