mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-22 12:34:27 +00:00
Pass vectors * worksize to kernel to avoid one op.
This commit is contained in:
parent
70e8ade54f
commit
fb077c6d59
@ -63,9 +63,9 @@ __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search(
|
||||
z ZA[930];
|
||||
|
||||
#ifdef VECTORS4
|
||||
const z Znonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
|
||||
const z Znonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKVEC);
|
||||
#elif defined VECTORS2
|
||||
const z Znonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
|
||||
const z Znonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKVEC);
|
||||
#else
|
||||
const z Znonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
|
||||
#endif
|
||||
|
@ -54,13 +54,13 @@ __kernel
|
||||
u W[16];
|
||||
|
||||
#ifdef VECTORS8
|
||||
const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 3) + ((uint)get_local_id(0) << 3) + base;
|
||||
#elif defined VECTORS4
|
||||
const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 2) + ((uint)get_local_id(0) << 2) + base;
|
||||
const u nonce = base + (uint)(get_local_id(0)) * 8u + (uint)(get_group_id(0)) * (WORKVEC);
|
||||
#elif VECTORS4
|
||||
const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKVEC);
|
||||
#elif defined VECTORS2
|
||||
const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 1) + ((uint)get_local_id(0) << 1) + base;
|
||||
const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKVEC);
|
||||
#else
|
||||
const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0)) + (uint)get_local_id(0) + base;
|
||||
const u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
|
||||
#endif
|
||||
|
||||
V[0] = PreVal0 + nonce;
|
||||
|
4
ocl.c
4
ocl.c
@ -505,8 +505,8 @@ build:
|
||||
/* create a cl program executable for all the devices specified */
|
||||
char *CompilerOptions = calloc(1, 256);
|
||||
|
||||
sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d",
|
||||
(int)clState->wsize, clState->vwidth);
|
||||
sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d -D WORKVEC%d",
|
||||
(int)clState->wsize, clState->vwidth, (int)clState->wsize * clState->vwidth);
|
||||
applog(LOG_DEBUG, "Setting worksize to %d", clState->wsize);
|
||||
if (clState->vwidth > 1)
|
||||
applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->vwidth);
|
||||
|
@ -188,12 +188,12 @@ void search( const uint state0, const uint state1, const uint state2, const uint
|
||||
|
||||
#ifdef VECTORS4
|
||||
//Less dependencies to get both the local id and group id and then add them
|
||||
W[3] = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
|
||||
W[3] = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKVEC);
|
||||
uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U);
|
||||
//Since only the 2 LSB is opposite between the nonces, we can save an instruction by flipping the 4 bits in W18 rather than the 1 bit in W3
|
||||
W[18] = PreW18 + (u){r, r ^ 0x2004000U, r ^ 0x4008000U, r ^ 0x600C000U};
|
||||
#elif defined VECTORS2
|
||||
W[3] = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
|
||||
W[3] = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKVEC);
|
||||
uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U);
|
||||
W[18] = PreW18 + (u){r, r ^ 0x2004000U};
|
||||
#else
|
||||
|
@ -83,9 +83,9 @@ __kernel void search(const uint state0, const uint state1, const uint state2, co
|
||||
u *Vals = &W[16]; // Now put at W[16] to be in same array
|
||||
|
||||
#ifdef VECTORS4
|
||||
const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
|
||||
const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKVEC);
|
||||
#elif defined VECTORS2
|
||||
const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
|
||||
const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKVEC);
|
||||
#else
|
||||
const u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user