diff --git a/DiabloMiner120221.cl b/DiabloMiner120221.cl index 44bf23cf..a151db38 100644 --- a/DiabloMiner120221.cl +++ b/DiabloMiner120221.cl @@ -62,13 +62,7 @@ __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search( z ZA[930]; -#ifdef VECTORS4 - const z Znonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u); -#elif defined VECTORS2 - const z Znonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u); -#else - const z Znonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE); -#endif + const z Znonce = base + (uint)(get_global_id(0)); ZA[15] = Znonce + PreVal4_state0; diff --git a/device-gpu.c b/device-gpu.c index 03d98dc9..4aad0e2d 100644 --- a/device-gpu.c +++ b/device-gpu.c @@ -653,7 +653,7 @@ static _clState *clStates[MAX_GPUDEVICES]; #define CL_SET_ARG(var) status |= clSetKernelArg(*kernel, num++, sizeof(var), (void *)&var) #define CL_SET_VARG(args, var) status |= clSetKernelArg(*kernel, num++, args * sizeof(uint), (void *)var) -static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk) +static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads) { cl_uint vwidth = clState->preferred_vwidth; cl_kernel *kernel = &clState->kernel; @@ -680,7 +680,7 @@ static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk) nonces = alloca(sizeof(uint) * vwidth); for (i = 0; i < vwidth; i++) - nonces[i] = blk->nonce + i; + nonces[i] = blk->nonce + (i * threads); CL_SET_VARG(vwidth, nonces); CL_SET_BLKARG(fW0); @@ -704,7 +704,8 @@ static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk) return status; } -static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk) +static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk, + __maybe_unused cl_uint threads) { cl_uint vwidth = clState->preferred_vwidth; cl_kernel *kernel = &clState->kernel; @@ -747,7 +748,7 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk) return status; } -static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk) +static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads) { cl_uint vwidth = clState->preferred_vwidth; cl_kernel *kernel = &clState->kernel; @@ -757,7 +758,7 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk) nonces = alloca(sizeof(uint) * vwidth); for (i = 0; i < vwidth; i++) - nonces[i] = blk->nonce + i; + nonces[i] = blk->nonce + (i * threads); CL_SET_VARG(vwidth, nonces); CL_SET_BLKARG(PreVal0); @@ -805,7 +806,7 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk) return status; } -static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk) +static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads) { cl_uint vwidth = clState->preferred_vwidth; cl_kernel *kernel = &clState->kernel; @@ -815,7 +816,7 @@ static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk) nonces = alloca(sizeof(uint) * vwidth); for (i = 0; i < vwidth; i++) - nonces[i] = blk->nonce + i; + nonces[i] = blk->nonce + (i * threads); CL_SET_VARG(vwidth, nonces); CL_SET_BLKARG(PreVal0); @@ -1071,7 +1072,7 @@ static void get_opencl_statline(char *buf, struct cgpu_info *gpu) } struct opencl_thread_data { - cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *); + cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *, cl_uint); uint32_t *res; struct work *last_work; struct work _last_work; @@ -1244,7 +1245,7 @@ static uint64_t opencl_scanhash(struct thr_info *thr, struct work *work, localThreads[0], gpu->intensity); if (hashes > gpu->max_hashes) gpu->max_hashes = hashes; - status = thrdata->queue_kernel_parameters(clState, &work->blk); + status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]); if (unlikely(status != CL_SUCCESS)) { applog(LOG_ERR, "Error: clSetKernelArg of all params failed."); return 0; diff --git a/diakgcn120216.cl b/diakgcn120216.cl index 88ddb74f..62effa49 100644 --- a/diakgcn120216.cl +++ b/diakgcn120216.cl @@ -55,30 +55,18 @@ __kernel u V[8]; u W[16]; -#ifdef VECTORS8 - #ifdef GOFFSET +#ifdef GOFFSET + #ifdef VECTORS8 const u nonce = ((uint)get_global_id(0) << 3) + (u)(0, 1, 2, 3, 4, 5, 6, 7); - #else - const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 3) + ((uint)get_local_id(0) << 3) + base; - #endif -#elif defined VECTORS4 - #ifdef GOFFSET + #elif defined VECTORS4 const u nonce = ((uint)get_global_id(0) << 2) + (u)(0, 1, 2, 3); - #else - const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 2) + ((uint)get_local_id(0) << 2) + base; - #endif -#elif defined VECTORS2 - #ifdef GOFFSET + #elif defined VECTORS2 const u nonce = ((uint)get_global_id(0) << 1) + (u)(0, 1); #else - const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 1) + ((uint)get_local_id(0) << 1) + base; - #endif -#else - #ifdef GOFFSET const u nonce = (uint)get_global_id(0); - #else - const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0)) + (uint)get_local_id(0) + base; #endif +#else + const u nonce = base + (uint)(get_global_id(0)); #endif V[0] = PreVal0 + nonce; diff --git a/poclbm120214.cl b/poclbm120214.cl index 78c928c9..7e3ecff8 100644 --- a/poclbm120214.cl +++ b/poclbm120214.cl @@ -82,13 +82,7 @@ __kernel void search(const uint state0, const uint state1, const uint state2, co u W[24]; u *Vals = &W[16]; // Now put at W[16] to be in same array -#ifdef VECTORS4 - const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u); -#elif defined VECTORS2 - const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u); -#else - const u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE); -#endif + const u nonce = base + (uint)(get_global_id(0)); Vals[0]=Preval0+nonce;