From aa52e7dfd7d08b4a29d728cf79707f27b027cbd6 Mon Sep 17 00:00:00 2001 From: Philip Kaufmann Date: Thu, 1 Mar 2012 17:24:38 +0100 Subject: [PATCH 1/4] optimized nonce-check and output code for -v 2 and -v 4 --- diakgcn120223.cl | 31 +++++-------------------------- 1 file changed, 5 insertions(+), 26 deletions(-) diff --git a/diakgcn120223.cl b/diakgcn120223.cl index de9ce58a..ada19379 100644 --- a/diakgcn120223.cl +++ b/diakgcn120223.cl @@ -1,4 +1,4 @@ -// DiaKGCN 24-02-2012 - OpenCL kernel by Diapolo +// DiaKGCN 01-03-2012 - OpenCL kernel by Diapolo // // Parts and / or ideas for this kernel are based upon the public-domain poclbm project, the phatk kernel by Phateus and the DiabloMiner kernel by DiabloD3. // The kernel was rewritten by me (Diapolo) and is still public-domain! @@ -571,7 +571,6 @@ __kernel V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); - #define FOUND (0x80) #define NFLAG (0x7F) @@ -599,31 +598,11 @@ __kernel output[FOUND] = output[NFLAG & nonce.s7] = nonce.s7; } #elif defined VECTORS4 - V[7] ^= 0x136032edU; - - bool result = V[7].x & V[7].y & V[7].z & V[7].w; - - if (!result) { - if (!V[7].x) - output[FOUND] = output[NFLAG & nonce.x] = nonce.x; - if (!V[7].y) - output[FOUND] = output[NFLAG & nonce.y] = nonce.y; - if (!V[7].z) - output[FOUND] = output[NFLAG & nonce.z] = nonce.z; - if (!V[7].w) - output[FOUND] = output[NFLAG & nonce.w] = nonce.w; - } + if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) + output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : ((V[7].y == 0x136032edU) ? nonce.y : ((V[7].z == 0x136032edU) ? nonce.z : nonce.w)); #elif defined VECTORS2 - V[7] ^= 0x136032edU; - - bool result = V[7].x & V[7].y; - - if (!result) { - if (!V[7].x) - output[FOUND] = output[NFLAG & nonce.x] = nonce.x; - if (!V[7].y) - output[FOUND] = output[NFLAG & nonce.y] = nonce.y; - } + if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU)) + output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : nonce.y; #else if (V[7] == 0x136032edU) output[FOUND] = output[NFLAG & nonce] = nonce; From c32615871070ffe7e52d690685a3a2a6dc7582da Mon Sep 17 00:00:00 2001 From: Philip Kaufmann Date: Fri, 16 Mar 2012 23:34:15 +0100 Subject: [PATCH 2/4] reordered an addition in the kernel, which results in less instructions used in the GPU ISA code for GCN --- diakgcn120223.cl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/diakgcn120223.cl b/diakgcn120223.cl index ada19379..7951676a 100644 --- a/diakgcn120223.cl +++ b/diakgcn120223.cl @@ -141,8 +141,8 @@ __kernel W[14] = W[7] + PreW32 + rotr15(W[12]); W[15] = W[8] + W17 + rotr15(W[13]) + rotr25(W[0]); - V[1] += 0x0fc19dc6U + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]); - V[5] = 0x0fc19dc6U + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]); + V[1] += 0x0fc19dc6U + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + W[0]; + V[5] = 0x0fc19dc6U + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + W[0] + rotr30(V[6]) + ma(V[7], V[0], V[6]); V[0] += 0x240ca1ccU + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]); V[4] = 0x240ca1ccU + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]); From 782fc63c505b3cb374608d73178b6b0198251899 Mon Sep 17 00:00:00 2001 From: Philip Kaufmann Date: Fri, 16 Mar 2012 23:38:39 +0100 Subject: [PATCH 3/4] forgot to update kernel modification date, fixed ;) --- diakgcn120223.cl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diakgcn120223.cl b/diakgcn120223.cl index 7951676a..4e70fd65 100644 --- a/diakgcn120223.cl +++ b/diakgcn120223.cl @@ -1,4 +1,4 @@ -// DiaKGCN 01-03-2012 - OpenCL kernel by Diapolo +// DiaKGCN 16-03-2012 - OpenCL kernel by Diapolo // // Parts and / or ideas for this kernel are based upon the public-domain poclbm project, the phatk kernel by Phateus and the DiabloMiner kernel by DiabloD3. // The kernel was rewritten by me (Diapolo) and is still public-domain! From e2b7c934f60170a0d53770eb337b3765d0f12120 Mon Sep 17 00:00:00 2001 From: Philip Kaufmann Date: Fri, 16 Mar 2012 23:52:20 +0100 Subject: [PATCH 4/4] removed 8-component vector support from kernel, as this is not supported in CGMINER anyway --- diakgcn120223.cl | 37 ++++--------------------------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/diakgcn120223.cl b/diakgcn120223.cl index 4e70fd65..ad981a63 100644 --- a/diakgcn120223.cl +++ b/diakgcn120223.cl @@ -3,9 +3,7 @@ // Parts and / or ideas for this kernel are based upon the public-domain poclbm project, the phatk kernel by Phateus and the DiabloMiner kernel by DiabloD3. // The kernel was rewritten by me (Diapolo) and is still public-domain! -#ifdef VECTORS8 - typedef uint8 u; -#elif defined VECTORS4 +#ifdef VECTORS4 typedef uint4 u; #elif defined VECTORS2 typedef uint2 u; @@ -53,9 +51,7 @@ __kernel u V[8]; u W[16]; -#ifdef VECTORS8 - const u nonce = (uint)(get_local_id(0)) * 8U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base; -#elif defined VECTORS4 +#ifdef VECTORS4 const u nonce = (uint)(get_local_id(0)) * 4U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base; #elif defined VECTORS2 const u nonce = (uint)(get_local_id(0)) * 2U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base; @@ -116,9 +112,7 @@ __kernel //---------------------------------------------------------------------------------- -#ifdef VECTORS8 - W[0] = PreW18 + (u)(rotr25(nonce.s0), rotr25(nonce.s0) ^ 0x2004000U, rotr25(nonce.s0) ^ 0x4008000U, rotr25(nonce.s0) ^ 0x600c000U, rotr25(nonce.s0) ^ 0x8010000U, rotr25(nonce.s0) ^ 0xa014000U, rotr25(nonce.s0) ^ 0xc018000U, rotr25(nonce.s0) ^ 0xe01c000U); -#elif defined VECTORS4 +#ifdef VECTORS4 W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U, rotr25(nonce.x) ^ 0x4008000U, rotr25(nonce.x) ^ 0x600c000U); #elif defined VECTORS2 W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U); @@ -574,30 +568,7 @@ __kernel #define FOUND (0x80) #define NFLAG (0x7F) -#ifdef VECTORS8 - V[7] ^= 0x136032edU; - - bool result = V[7].s0 & V[7].s1 & V[7].s2 & V[7].s3 & V[7].s4 & V[7].s5 & V[7].s6 & V[7].s7; - - if (!result) { - if (!V[7].s0) - output[FOUND] = output[NFLAG & nonce.s0] = nonce.s0; - if (!V[7].s1) - output[FOUND] = output[NFLAG & nonce.s1] = nonce.s1; - if (!V[7].s2) - output[FOUND] = output[NFLAG & nonce.s2] = nonce.s2; - if (!V[7].s3) - output[FOUND] = output[NFLAG & nonce.s3] = nonce.s3; - if (!V[7].s4) - output[FOUND] = output[NFLAG & nonce.s4] = nonce.s4; - if (!V[7].s5) - output[FOUND] = output[NFLAG & nonce.s5] = nonce.s5; - if (!V[7].s6) - output[FOUND] = output[NFLAG & nonce.s6] = nonce.s6; - if (!V[7].s7) - output[FOUND] = output[NFLAG & nonce.s7] = nonce.s7; - } -#elif defined VECTORS4 +#ifdef VECTORS4 if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : ((V[7].y == 0x136032edU) ? nonce.y : ((V[7].z == 0x136032edU) ? nonce.z : nonce.w)); #elif defined VECTORS2