Browse Source

X13 Fix

The modified fugue.cl included in the x15 changes broke x13 kernels.
djm34
ystarnaud 11 years ago
parent
commit
e234b2d969
  1. 13
      kernel/fuguecoin.cl
  2. 16
      kernel/marucoin-mod.cl
  3. 13
      kernel/marucoin-modold.cl

13
kernel/fuguecoin.cl

@ -79,6 +79,19 @@ __kernel void search(__global unsigned char* input, volatile __global uint* outp
{ {
uint gid = get_global_id(0); uint gid = get_global_id(0);
//mixtab
__local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256];
int init = get_local_id(0);
int step = get_local_size(0);
for (int i = init; i < 256; i += step)
{
mixtab0[i] = mixtab0_c[i];
mixtab1[i] = mixtab1_c[i];
mixtab2[i] = mixtab2_c[i];
mixtab3[i] = mixtab3_c[i];
}
barrier(CLK_GLOBAL_MEM_FENCE);
sph_u32 S00 = 0, S01 = 0, S02 = 0, S03 = 0, S04 = 0, S05 = 0, S06 = 0, S07 = 0, S08 = 0, S09 = 0; \ sph_u32 S00 = 0, S01 = 0, S02 = 0, S03 = 0, S04 = 0, S05 = 0, S06 = 0, S07 = 0, S08 = 0, S09 = 0; \
sph_u32 S10 = 0, S11 = 0, S12 = 0, S13 = 0, S14 = 0, S15 = 0, S16 = 0, S17 = 0, S18 = 0, S19 = 0; \ sph_u32 S10 = 0, S11 = 0, S12 = 0, S13 = 0, S14 = 0, S15 = 0, S16 = 0, S17 = 0, S18 = 0, S19 = 0; \
sph_u32 S20 = 0, S21 = 0, S22 = IV256[0], S23 = IV256[1], S24 = IV256[2], S25 = IV256[3], S26 = IV256[4], S27 = IV256[5], S28 = IV256[6], S29 = IV256[7]; sph_u32 S20 = 0, S21 = 0, S22 = IV256[0], S23 = IV256[1], S24 = IV256[2], S25 = IV256[3], S26 = IV256[4], S27 = IV256[5], S28 = IV256[6], S29 = IV256[7];

16
kernel/marucoin-mod.cl

@ -909,7 +909,8 @@ __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search12(__global hash_t* hashes, __global uint* output, const ulong target) __kernel void search12(__global hash_t* hashes, __global uint* output, const ulong target)
{ {
uint gid = get_global_id(0); uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); uint offset = get_global_offset(0);
__global hash_t *hash = &(hashes[gid-offset]);
//mixtab //mixtab
__local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256]; __local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256];
@ -922,7 +923,8 @@ __kernel void search12(__global hash_t* hashes, __global uint* output, const ulo
mixtab2[i] = mixtab2_c[i]; mixtab2[i] = mixtab2_c[i];
mixtab3[i] = mixtab3_c[i]; mixtab3[i] = mixtab3_c[i];
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE);
// fugue // fugue
sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09; sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09;
sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19; sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19;
@ -947,12 +949,15 @@ __kernel void search12(__global hash_t* hashes, __global uint* output, const ulo
// apply round shift if necessary // apply round shift if necessary
int i; int i;
for (i = 0; i < 32; i ++) { for (i = 0; i < 32; i ++)
{
ROR3; ROR3;
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20);
SMIX(S00, S01, S02, S03); SMIX(S00, S01, S02, S03);
} }
for (i = 0; i < 13; i ++) {
for (i = 0; i < 13; i ++)
{
S04 ^= S00; S04 ^= S00;
S09 ^= S00; S09 ^= S00;
S18 ^= S00; S18 ^= S00;
@ -1002,7 +1007,8 @@ __kernel void search12(__global hash_t* hashes, __global uint* output, const ulo
bool result = (hash->h8[3] <= target); bool result = (hash->h8[3] <= target);
if (result) if (result)
output[output[0xFF]++] = SWAP4(gid); output[atomic_inc(output+0xFF)] = SWAP4(gid);
barrier(CLK_GLOBAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE);
} }

13
kernel/marucoin-modold.cl

@ -895,6 +895,19 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
} }
//mixtab
__local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256];
init = get_local_id(0);
step = get_local_size(0);
for (int i = init; i < 256; i += step)
{
mixtab0[i] = mixtab0_c[i];
mixtab1[i] = mixtab1_c[i];
mixtab2[i] = mixtab2_c[i];
mixtab3[i] = mixtab3_c[i];
}
barrier(CLK_GLOBAL_MEM_FENCE);
// fugue // fugue
{ {

Loading…
Cancel
Save