mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-29 16:04:33 +00:00
Use global offset parameter to diablo and poclbm kernel ONLY for 1 vector kernels.
This commit is contained in:
parent
39395eb1e0
commit
621bcca7f5
47
device-gpu.c
47
device-gpu.c
@ -743,10 +743,8 @@ static _clState *clStates[MAX_GPUDEVICES];
|
||||
static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
|
||||
{
|
||||
cl_kernel *kernel = &clState->kernel;
|
||||
cl_uint vwidth = clState->vwidth;
|
||||
unsigned int i, num = 0;
|
||||
unsigned int num = 0;
|
||||
cl_int status = 0;
|
||||
uint *nonces;
|
||||
|
||||
CL_SET_BLKARG(ctx_a);
|
||||
CL_SET_BLKARG(ctx_b);
|
||||
@ -765,10 +763,15 @@ static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint t
|
||||
CL_SET_BLKARG(cty_g);
|
||||
CL_SET_BLKARG(cty_h);
|
||||
|
||||
nonces = alloca(sizeof(uint) * vwidth);
|
||||
for (i = 0; i < vwidth; i++)
|
||||
nonces[i] = blk->nonce + (i * threads);
|
||||
CL_SET_VARG(vwidth, nonces);
|
||||
if (!clState->goffset) {
|
||||
cl_uint vwidth = clState->vwidth;
|
||||
uint *nonces = alloca(sizeof(uint) * vwidth);
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < vwidth; i++)
|
||||
nonces[i] = blk->nonce + (i * threads);
|
||||
CL_SET_VARG(vwidth, nonces);
|
||||
}
|
||||
|
||||
CL_SET_BLKARG(fW0);
|
||||
CL_SET_BLKARG(fW1);
|
||||
@ -896,15 +899,19 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk,
|
||||
static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
|
||||
{
|
||||
cl_kernel *kernel = &clState->kernel;
|
||||
cl_uint vwidth = clState->vwidth;
|
||||
unsigned int i, num = 0;
|
||||
unsigned int num = 0;
|
||||
cl_int status = 0;
|
||||
uint *nonces;
|
||||
|
||||
nonces = alloca(sizeof(uint) * vwidth);
|
||||
for (i = 0; i < vwidth; i++)
|
||||
nonces[i] = blk->nonce + (i * threads);
|
||||
CL_SET_VARG(vwidth, nonces);
|
||||
if (!clState->goffset) {
|
||||
cl_uint vwidth = clState->vwidth;
|
||||
uint *nonces = alloca(sizeof(uint) * vwidth);
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < vwidth; i++)
|
||||
nonces[i] = blk->nonce + (i * threads);
|
||||
CL_SET_VARG(vwidth, nonces);
|
||||
}
|
||||
|
||||
|
||||
CL_SET_BLKARG(PreVal0);
|
||||
CL_SET_BLKARG(PreVal0addK7);
|
||||
@ -1338,8 +1345,16 @@ static uint64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
||||
memset(thrdata->res, 0, BUFFERSIZE);
|
||||
clFinish(clState->commandQueue);
|
||||
}
|
||||
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
|
||||
globalThreads, localThreads, 0, NULL, NULL);
|
||||
|
||||
if (clState->goffset) {
|
||||
size_t global_work_offset[1];
|
||||
|
||||
global_work_offset[0] = work->blk.nonce;
|
||||
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, global_work_offset,
|
||||
globalThreads, localThreads, 0, NULL, NULL);
|
||||
} else
|
||||
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
|
||||
globalThreads, localThreads, 0, NULL, NULL);
|
||||
if (unlikely(status != CL_SUCCESS)) {
|
||||
applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)");
|
||||
return 0;
|
||||
|
@ -48,7 +48,9 @@ __kernel
|
||||
__attribute__((vec_type_hint(z)))
|
||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||
void search(
|
||||
#ifndef GOFFSET
|
||||
const z base,
|
||||
#endif
|
||||
const uint PreVal4_state0, const uint PreVal4_state0_k7,
|
||||
const uint PreVal4_T1,
|
||||
const uint W18, const uint W19,
|
||||
@ -65,7 +67,11 @@ void search(
|
||||
|
||||
z ZA[25];
|
||||
|
||||
#ifdef GOFFSET
|
||||
const z Znonce = (uint)(get_global_id(0));
|
||||
#else
|
||||
const z Znonce = base + (uint)(get_global_id(0));
|
||||
#endif
|
||||
|
||||
ZA[2]=Znonce;
|
||||
ZA[2]+=PreVal4_state0;
|
||||
|
12
ocl.c
12
ocl.c
@ -335,7 +335,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
||||
/* Create binary filename based on parameters passed to opencl
|
||||
* compiler to ensure we only load a binary that matches what would
|
||||
* have otherwise created. The filename is:
|
||||
* name + kernelname + v + vectors + w + work_size + l + sizeof(long) + .bin
|
||||
* name + kernelname +/- g(offset) + v + vectors + w + work_size + l + sizeof(long) + .bin
|
||||
*/
|
||||
char binaryfilename[255];
|
||||
char filename[255];
|
||||
@ -398,6 +398,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
||||
gpus[gpu].vwidth = preferred_vwidth;
|
||||
}
|
||||
|
||||
if ((clState->chosen_kernel == KL_POCLBM || clState->chosen_kernel == KL_DIABLO) &&
|
||||
clState->vwidth == 1 && clState->hasOpenCL11plus)
|
||||
clState->goffset = true;
|
||||
|
||||
if (gpus[gpu].work_size && gpus[gpu].work_size <= clState->max_work_size)
|
||||
clState->wsize = gpus[gpu].work_size;
|
||||
else if (strstr(name, "Tahiti"))
|
||||
@ -431,7 +435,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
||||
}
|
||||
|
||||
strcat(binaryfilename, name);
|
||||
|
||||
if (clState->goffset)
|
||||
strcat(binaryfilename, "g");
|
||||
strcat(binaryfilename, "v");
|
||||
sprintf(numbuf, "%d", clState->vwidth);
|
||||
strcat(binaryfilename, numbuf);
|
||||
@ -533,6 +538,9 @@ build:
|
||||
} else
|
||||
applog(LOG_DEBUG, "BFI_INT patch requiring device not found, will not BFI_INT patch");
|
||||
|
||||
if (clState->goffset)
|
||||
strcat(CompilerOptions, " -D GOFFSET");
|
||||
|
||||
applog(LOG_DEBUG, "CompilerOptions: %s", CompilerOptions);
|
||||
status = clBuildProgram(clState->program, 1, &devices[gpu], CompilerOptions , NULL, NULL);
|
||||
free(CompilerOptions);
|
||||
|
1
ocl.h
1
ocl.h
@ -21,6 +21,7 @@ typedef struct {
|
||||
cl_mem outputBuffer;
|
||||
bool hasBitAlign;
|
||||
bool hasOpenCL11plus;
|
||||
bool goffset;
|
||||
cl_uint vwidth;
|
||||
size_t max_work_size;
|
||||
size_t wsize;
|
||||
|
@ -73,7 +73,9 @@ void search(const uint state0, const uint state1, const uint state2, const uint
|
||||
const uint state4, const uint state5, const uint state6, const uint state7,
|
||||
const uint b1, const uint c1,
|
||||
const uint f1, const uint g1, const uint h1,
|
||||
#ifndef GOFFSET
|
||||
const u base,
|
||||
#endif
|
||||
const uint fw0, const uint fw1, const uint fw2, const uint fw3, const uint fw15, const uint fw01r,
|
||||
const uint D1A, const uint C1addK5, const uint B1addK6,
|
||||
const uint W16addK16, const uint W17addK17,
|
||||
@ -83,8 +85,11 @@ void search(const uint state0, const uint state1, const uint state2, const uint
|
||||
u W[24];
|
||||
u *Vals = &W[16]; // Now put at W[16] to be in same array
|
||||
|
||||
#ifdef GOFFSET
|
||||
const u nonce = (uint)(get_global_id(0));
|
||||
#else
|
||||
const u nonce = base + (uint)(get_global_id(0));
|
||||
|
||||
#endif
|
||||
|
||||
Vals[0]=Preval0;
|
||||
Vals[0]+=nonce;
|
||||
|
Loading…
x
Reference in New Issue
Block a user