From 47747dc8a2c11ce4aa21c6644c8d74efff9cc76c Mon Sep 17 00:00:00 2001 From: Philip Kaufmann Date: Thu, 23 Feb 2012 16:14:27 +0100 Subject: [PATCH] revert to legacy nonce creation in the kernel without vector offset, but keep GOFFSET code removed --- device-gpu.c | 5 +++-- diakgcn120222.cl | 19 ++++++++++++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/device-gpu.c b/device-gpu.c index 233e59d2..a526c70c 100644 --- a/device-gpu.c +++ b/device-gpu.c @@ -835,7 +835,8 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk, return status; } -static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads) +static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, + __maybe_unused cl_uint threads) { cl_kernel *kernel = &clState->kernel; cl_uint vwidth = clState->vwidth; @@ -845,7 +846,7 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint nonces = alloca(sizeof(uint) * vwidth); for (i = 0; i < vwidth; i++) - nonces[i] = blk->nonce + (i * threads); + nonces[i] = blk->nonce + i; CL_SET_VARG(vwidth, nonces); CL_SET_BLKARG(PreVal0); diff --git a/diakgcn120222.cl b/diakgcn120222.cl index fa98dbd2..89421a23 100644 --- a/diakgcn120222.cl +++ b/diakgcn120222.cl @@ -53,7 +53,15 @@ __kernel u V[8]; u W[16]; - const u nonce = base + (uint)get_global_id(0); +#ifdef VECTORS8 + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 3) + ((uint)get_local_id(0) << 3) + base; +#elif defined VECTORS4 + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 2) + ((uint)get_local_id(0) << 2) + base; +#elif defined VECTORS2 + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 1) + ((uint)get_local_id(0) << 1) + base; +#else + const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0)) + (uint)get_local_id(0) + base; +#endif V[0] = PreVal0 + nonce; V[1] = B1; @@ -108,7 +116,16 @@ __kernel //---------------------------------------------------------------------------------- +#ifdef VECTORS8 + W[0] = PreW18 + (u)( rotr25(nonce.s0), rotr25(nonce.s0) ^ 0x2004000U, rotr25(nonce.s0) ^ 0x4008000U, rotr25(nonce.s0) ^ 0x600c000U, + rotr25(nonce.s0) ^ 0x8010000U, rotr25(nonce.s0) ^ 0xa014000U, rotr25(nonce.s0) ^ 0xc018000U, rotr25(nonce.s0) ^ 0xe01c000U); +#elif defined VECTORS4 + W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U, rotr25(nonce.x) ^ 0x4008000U, rotr25(nonce.x) ^ 0x600c000U); +#elif defined VECTORS2 + W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U); +#else W[0] = PreW18 + rotr25(nonce); +#endif W[1] = PreW19 + nonce; W[2] = 0x80000000U + rotr15(W[0]); W[3] = rotr15(W[1]);