mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-10 23:08:07 +00:00
Put the nonce for each vector offset in advance, avoiding one extra addition in the kernel.
This commit is contained in:
parent
bca9814d41
commit
145f3c0b1d
@ -62,13 +62,7 @@ __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search(
|
|||||||
|
|
||||||
z ZA[930];
|
z ZA[930];
|
||||||
|
|
||||||
#ifdef VECTORS4
|
const z Znonce = base + (uint)(get_global_id(0));
|
||||||
const z Znonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
|
|
||||||
#elif defined VECTORS2
|
|
||||||
const z Znonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
|
|
||||||
#else
|
|
||||||
const z Znonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ZA[15] = Znonce + PreVal4_state0;
|
ZA[15] = Znonce + PreVal4_state0;
|
||||||
|
|
||||||
|
19
device-gpu.c
19
device-gpu.c
@ -653,7 +653,7 @@ static _clState *clStates[MAX_GPUDEVICES];
|
|||||||
#define CL_SET_ARG(var) status |= clSetKernelArg(*kernel, num++, sizeof(var), (void *)&var)
|
#define CL_SET_ARG(var) status |= clSetKernelArg(*kernel, num++, sizeof(var), (void *)&var)
|
||||||
#define CL_SET_VARG(args, var) status |= clSetKernelArg(*kernel, num++, args * sizeof(uint), (void *)var)
|
#define CL_SET_VARG(args, var) status |= clSetKernelArg(*kernel, num++, args * sizeof(uint), (void *)var)
|
||||||
|
|
||||||
static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk)
|
static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
|
||||||
{
|
{
|
||||||
cl_uint vwidth = clState->preferred_vwidth;
|
cl_uint vwidth = clState->preferred_vwidth;
|
||||||
cl_kernel *kernel = &clState->kernel;
|
cl_kernel *kernel = &clState->kernel;
|
||||||
@ -680,7 +680,7 @@ static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk)
|
|||||||
|
|
||||||
nonces = alloca(sizeof(uint) * vwidth);
|
nonces = alloca(sizeof(uint) * vwidth);
|
||||||
for (i = 0; i < vwidth; i++)
|
for (i = 0; i < vwidth; i++)
|
||||||
nonces[i] = blk->nonce + i;
|
nonces[i] = blk->nonce + (i * threads);
|
||||||
CL_SET_VARG(vwidth, nonces);
|
CL_SET_VARG(vwidth, nonces);
|
||||||
|
|
||||||
CL_SET_BLKARG(fW0);
|
CL_SET_BLKARG(fW0);
|
||||||
@ -704,7 +704,8 @@ static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk)
|
static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk,
|
||||||
|
__maybe_unused cl_uint threads)
|
||||||
{
|
{
|
||||||
cl_uint vwidth = clState->preferred_vwidth;
|
cl_uint vwidth = clState->preferred_vwidth;
|
||||||
cl_kernel *kernel = &clState->kernel;
|
cl_kernel *kernel = &clState->kernel;
|
||||||
@ -747,7 +748,7 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk)
|
static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
|
||||||
{
|
{
|
||||||
cl_uint vwidth = clState->preferred_vwidth;
|
cl_uint vwidth = clState->preferred_vwidth;
|
||||||
cl_kernel *kernel = &clState->kernel;
|
cl_kernel *kernel = &clState->kernel;
|
||||||
@ -757,7 +758,7 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk)
|
|||||||
|
|
||||||
nonces = alloca(sizeof(uint) * vwidth);
|
nonces = alloca(sizeof(uint) * vwidth);
|
||||||
for (i = 0; i < vwidth; i++)
|
for (i = 0; i < vwidth; i++)
|
||||||
nonces[i] = blk->nonce + i;
|
nonces[i] = blk->nonce + (i * threads);
|
||||||
CL_SET_VARG(vwidth, nonces);
|
CL_SET_VARG(vwidth, nonces);
|
||||||
|
|
||||||
CL_SET_BLKARG(PreVal0);
|
CL_SET_BLKARG(PreVal0);
|
||||||
@ -805,7 +806,7 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk)
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk)
|
static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
|
||||||
{
|
{
|
||||||
cl_uint vwidth = clState->preferred_vwidth;
|
cl_uint vwidth = clState->preferred_vwidth;
|
||||||
cl_kernel *kernel = &clState->kernel;
|
cl_kernel *kernel = &clState->kernel;
|
||||||
@ -815,7 +816,7 @@ static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk)
|
|||||||
|
|
||||||
nonces = alloca(sizeof(uint) * vwidth);
|
nonces = alloca(sizeof(uint) * vwidth);
|
||||||
for (i = 0; i < vwidth; i++)
|
for (i = 0; i < vwidth; i++)
|
||||||
nonces[i] = blk->nonce + i;
|
nonces[i] = blk->nonce + (i * threads);
|
||||||
CL_SET_VARG(vwidth, nonces);
|
CL_SET_VARG(vwidth, nonces);
|
||||||
|
|
||||||
CL_SET_BLKARG(PreVal0);
|
CL_SET_BLKARG(PreVal0);
|
||||||
@ -1071,7 +1072,7 @@ static void get_opencl_statline(char *buf, struct cgpu_info *gpu)
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct opencl_thread_data {
|
struct opencl_thread_data {
|
||||||
cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *);
|
cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *, cl_uint);
|
||||||
uint32_t *res;
|
uint32_t *res;
|
||||||
struct work *last_work;
|
struct work *last_work;
|
||||||
struct work _last_work;
|
struct work _last_work;
|
||||||
@ -1244,7 +1245,7 @@ static uint64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|||||||
localThreads[0], gpu->intensity);
|
localThreads[0], gpu->intensity);
|
||||||
if (hashes > gpu->max_hashes)
|
if (hashes > gpu->max_hashes)
|
||||||
gpu->max_hashes = hashes;
|
gpu->max_hashes = hashes;
|
||||||
status = thrdata->queue_kernel_parameters(clState, &work->blk);
|
status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
|
||||||
if (unlikely(status != CL_SUCCESS)) {
|
if (unlikely(status != CL_SUCCESS)) {
|
||||||
applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
|
applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -55,30 +55,18 @@ __kernel
|
|||||||
u V[8];
|
u V[8];
|
||||||
u W[16];
|
u W[16];
|
||||||
|
|
||||||
#ifdef VECTORS8
|
#ifdef GOFFSET
|
||||||
#ifdef GOFFSET
|
#ifdef VECTORS8
|
||||||
const u nonce = ((uint)get_global_id(0) << 3) + (u)(0, 1, 2, 3, 4, 5, 6, 7);
|
const u nonce = ((uint)get_global_id(0) << 3) + (u)(0, 1, 2, 3, 4, 5, 6, 7);
|
||||||
#else
|
#elif defined VECTORS4
|
||||||
const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 3) + ((uint)get_local_id(0) << 3) + base;
|
|
||||||
#endif
|
|
||||||
#elif defined VECTORS4
|
|
||||||
#ifdef GOFFSET
|
|
||||||
const u nonce = ((uint)get_global_id(0) << 2) + (u)(0, 1, 2, 3);
|
const u nonce = ((uint)get_global_id(0) << 2) + (u)(0, 1, 2, 3);
|
||||||
#else
|
#elif defined VECTORS2
|
||||||
const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 2) + ((uint)get_local_id(0) << 2) + base;
|
|
||||||
#endif
|
|
||||||
#elif defined VECTORS2
|
|
||||||
#ifdef GOFFSET
|
|
||||||
const u nonce = ((uint)get_global_id(0) << 1) + (u)(0, 1);
|
const u nonce = ((uint)get_global_id(0) << 1) + (u)(0, 1);
|
||||||
#else
|
#else
|
||||||
const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 1) + ((uint)get_local_id(0) << 1) + base;
|
const u nonce = (uint)get_global_id(0);
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#ifdef GOFFSET
|
const u nonce = base + (uint)(get_global_id(0));
|
||||||
const u nonce = (uint)get_global_id(0);
|
|
||||||
#else
|
|
||||||
const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0)) + (uint)get_local_id(0) + base;
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
V[0] = PreVal0 + nonce;
|
V[0] = PreVal0 + nonce;
|
||||||
|
@ -82,13 +82,7 @@ __kernel void search(const uint state0, const uint state1, const uint state2, co
|
|||||||
u W[24];
|
u W[24];
|
||||||
u *Vals = &W[16]; // Now put at W[16] to be in same array
|
u *Vals = &W[16]; // Now put at W[16] to be in same array
|
||||||
|
|
||||||
#ifdef VECTORS4
|
const u nonce = base + (uint)(get_global_id(0));
|
||||||
const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
|
|
||||||
#elif defined VECTORS2
|
|
||||||
const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
|
|
||||||
#else
|
|
||||||
const u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
Vals[0]=Preval0+nonce;
|
Vals[0]=Preval0+nonce;
|
||||||
|
Loading…
Reference in New Issue
Block a user