From d7d0797969aa062a65d1d897c62c196ffccf9445 Mon Sep 17 00:00:00 2001 From: Philip Kaufmann Date: Fri, 24 Feb 2012 08:24:36 +0100 Subject: [PATCH 1/3] unified code for generating nonce in kernel and moved addition of base to the end -> faster --- diakgcn120223.cl | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/diakgcn120223.cl b/diakgcn120223.cl index 6e637f54..54748867 100644 --- a/diakgcn120223.cl +++ b/diakgcn120223.cl @@ -53,14 +53,10 @@ __kernel u V[8]; u W[16]; -#ifdef VECTORS8 - const u nonce = base + (uint)(get_local_id(0)) * 8u + (uint)(get_group_id(0)) * (WORKVEC); -#elif VECTORS4 - const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKVEC); -#elif defined VECTORS2 - const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKVEC); +#if defined(VECTORS2) || defined(VECTORS4) || defined(VECTORS8) + const u nonce = (uint)(get_local_id(0)) * (uint)(vec_step(u)) + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base; #else - const u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE); + const u nonce = (uint)(get_local_id(0)) + (uint)(get_group_id(0)) * (uint)(WORKSIZE) + base; #endif V[0] = PreVal0 + nonce; From d9d4831b47609fa8e758a8364194522a93633603 Mon Sep 17 00:00:00 2001 From: Philip Kaufmann Date: Fri, 24 Feb 2012 15:32:19 +0100 Subject: [PATCH 2/3] removed vec_step() as this could lead to errors on older SDKs --- diakgcn120223.cl | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/diakgcn120223.cl b/diakgcn120223.cl index 54748867..203d1301 100644 --- a/diakgcn120223.cl +++ b/diakgcn120223.cl @@ -1,4 +1,4 @@ -// DiaKGCN 23-02-2012 - OpenCL kernel by Diapolo +// DiaKGCN 24-02-2012 - OpenCL kernel by Diapolo // // Parts and / or ideas for this kernel are based upon the public-domain poclbm project, the phatk kernel by Phateus and the DiabloMiner kernel by DiabloD3. // The kernel was rewritten by me (Diapolo) and is still public-domain! @@ -53,8 +53,12 @@ __kernel u V[8]; u W[16]; -#if defined(VECTORS2) || defined(VECTORS4) || defined(VECTORS8) - const u nonce = (uint)(get_local_id(0)) * (uint)(vec_step(u)) + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base; +#ifdef VECTORS8 + const u nonce = (uint)(get_local_id(0)) * 8U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base; +#elif defined VECTORS4 + const u nonce = (uint)(get_local_id(0)) * 4U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base; +#elif defined VECTORS2 + const u nonce = (uint)(get_local_id(0)) * 2U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base; #else const u nonce = (uint)(get_local_id(0)) + (uint)(get_group_id(0)) * (uint)(WORKSIZE) + base; #endif @@ -113,8 +117,8 @@ __kernel //---------------------------------------------------------------------------------- #ifdef VECTORS8 - W[0] = PreW18 + (u)( rotr25(nonce.s0), rotr25(nonce.s0) ^ 0x2004000U, rotr25(nonce.s0) ^ 0x4008000U, rotr25(nonce.s0) ^ 0x600c000U, - rotr25(nonce.s0) ^ 0x8010000U, rotr25(nonce.s0) ^ 0xa014000U, rotr25(nonce.s0) ^ 0xc018000U, rotr25(nonce.s0) ^ 0xe01c000U); + W[0] = PreW18 + (u)( rotr25(nonce.s0), rotr25(nonce.s0) ^ 0x2004000U, rotr25(nonce.s0) ^ 0x4008000U, rotr25(nonce.s0) ^ 0x600c000U, + rotr25(nonce.s0) ^ 0x8010000U, rotr25(nonce.s0) ^ 0xa014000U, rotr25(nonce.s0) ^ 0xc018000U, rotr25(nonce.s0) ^ 0xe01c000U); #elif defined VECTORS4 W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U, rotr25(nonce.x) ^ 0x4008000U, rotr25(nonce.x) ^ 0x600c000U); #elif defined VECTORS2 From 77f177b68310d5dfb36456b1aad18da300277235 Mon Sep 17 00:00:00 2001 From: Philip Kaufmann Date: Fri, 24 Feb 2012 15:38:41 +0100 Subject: [PATCH 3/3] only small code formating changes --- diakgcn120223.cl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/diakgcn120223.cl b/diakgcn120223.cl index 203d1301..de9ce58a 100644 --- a/diakgcn120223.cl +++ b/diakgcn120223.cl @@ -117,8 +117,7 @@ __kernel //---------------------------------------------------------------------------------- #ifdef VECTORS8 - W[0] = PreW18 + (u)( rotr25(nonce.s0), rotr25(nonce.s0) ^ 0x2004000U, rotr25(nonce.s0) ^ 0x4008000U, rotr25(nonce.s0) ^ 0x600c000U, - rotr25(nonce.s0) ^ 0x8010000U, rotr25(nonce.s0) ^ 0xa014000U, rotr25(nonce.s0) ^ 0xc018000U, rotr25(nonce.s0) ^ 0xe01c000U); + W[0] = PreW18 + (u)(rotr25(nonce.s0), rotr25(nonce.s0) ^ 0x2004000U, rotr25(nonce.s0) ^ 0x4008000U, rotr25(nonce.s0) ^ 0x600c000U, rotr25(nonce.s0) ^ 0x8010000U, rotr25(nonce.s0) ^ 0xa014000U, rotr25(nonce.s0) ^ 0xc018000U, rotr25(nonce.s0) ^ 0xe01c000U); #elif defined VECTORS4 W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U, rotr25(nonce.x) ^ 0x4008000U, rotr25(nonce.x) ^ 0x600c000U); #elif defined VECTORS2