diff --git a/diablo120223.cl b/diablo120223.cl index a08e47cf..a222dddf 100644 --- a/diablo120223.cl +++ b/diablo120223.cl @@ -63,9 +63,9 @@ __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search( z ZA[930]; #ifdef VECTORS4 - const z Znonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u); + const z Znonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKVEC); #elif defined VECTORS2 - const z Znonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u); + const z Znonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKVEC); #else const z Znonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE); #endif diff --git a/diakgcn120223.cl b/diakgcn120223.cl index 89421a23..9635ff5d 100644 --- a/diakgcn120223.cl +++ b/diakgcn120223.cl @@ -54,13 +54,13 @@ __kernel u W[16]; #ifdef VECTORS8 - const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 3) + ((uint)get_local_id(0) << 3) + base; -#elif defined VECTORS4 - const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 2) + ((uint)get_local_id(0) << 2) + base; + const u nonce = base + (uint)(get_local_id(0)) * 8u + (uint)(get_group_id(0)) * (WORKVEC); +#elif VECTORS4 + const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKVEC); #elif defined VECTORS2 - const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 1) + ((uint)get_local_id(0) << 1) + base; + const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKVEC); #else - const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0)) + (uint)get_local_id(0) + base; + const u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE); #endif V[0] = PreVal0 + nonce; diff --git a/ocl.c b/ocl.c index e5eb6ec6..ae210e21 100644 --- a/ocl.c +++ b/ocl.c @@ -505,8 +505,8 @@ build: /* create a cl program executable for all the devices specified */ char *CompilerOptions = calloc(1, 256); - sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d", - (int)clState->wsize, clState->vwidth); + sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d -D WORKVEC%d", + (int)clState->wsize, clState->vwidth, (int)clState->wsize * clState->vwidth); applog(LOG_DEBUG, "Setting worksize to %d", clState->wsize); if (clState->vwidth > 1) applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->vwidth); diff --git a/phatk120223.cl b/phatk120223.cl index f6f20784..a1f4fc27 100644 --- a/phatk120223.cl +++ b/phatk120223.cl @@ -188,12 +188,12 @@ void search( const uint state0, const uint state1, const uint state2, const uint #ifdef VECTORS4 //Less dependencies to get both the local id and group id and then add them - W[3] = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u); + W[3] = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKVEC); uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U); //Since only the 2 LSB is opposite between the nonces, we can save an instruction by flipping the 4 bits in W18 rather than the 1 bit in W3 W[18] = PreW18 + (u){r, r ^ 0x2004000U, r ^ 0x4008000U, r ^ 0x600C000U}; #elif defined VECTORS2 - W[3] = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u); + W[3] = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKVEC); uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U); W[18] = PreW18 + (u){r, r ^ 0x2004000U}; #else diff --git a/poclbm120223.cl b/poclbm120223.cl index 4567fb8c..47dbb5b1 100644 --- a/poclbm120223.cl +++ b/poclbm120223.cl @@ -83,9 +83,9 @@ __kernel void search(const uint state0, const uint state1, const uint state2, co u *Vals = &W[16]; // Now put at W[16] to be in same array #ifdef VECTORS4 - const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u); + const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKVEC); #elif defined VECTORS2 - const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u); + const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKVEC); #else const u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE); #endif