Browse Source

X13 Fix

The modified fugue.cl included in the x15 changes broke x13 kernels.
djm34
ystarnaud 11 years ago
parent
commit
e234b2d969
  1. 13
      kernel/fuguecoin.cl
  2. 198
      kernel/marucoin-mod.cl
  3. 15
      kernel/marucoin-modold.cl

13
kernel/fuguecoin.cl

@ -79,6 +79,19 @@ __kernel void search(__global unsigned char* input, volatile __global uint* outp @@ -79,6 +79,19 @@ __kernel void search(__global unsigned char* input, volatile __global uint* outp
{
uint gid = get_global_id(0);
//mixtab
__local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256];
int init = get_local_id(0);
int step = get_local_size(0);
for (int i = init; i < 256; i += step)
{
mixtab0[i] = mixtab0_c[i];
mixtab1[i] = mixtab1_c[i];
mixtab2[i] = mixtab2_c[i];
mixtab3[i] = mixtab3_c[i];
}
barrier(CLK_GLOBAL_MEM_FENCE);
sph_u32 S00 = 0, S01 = 0, S02 = 0, S03 = 0, S04 = 0, S05 = 0, S06 = 0, S07 = 0, S08 = 0, S09 = 0; \
sph_u32 S10 = 0, S11 = 0, S12 = 0, S13 = 0, S14 = 0, S15 = 0, S16 = 0, S17 = 0, S18 = 0, S19 = 0; \
sph_u32 S20 = 0, S21 = 0, S22 = IV256[0], S23 = IV256[1], S24 = IV256[2], S25 = IV256[3], S26 = IV256[4], S27 = IV256[5], S28 = IV256[6], S29 = IV256[7];

198
kernel/marucoin-mod.cl

@ -908,102 +908,108 @@ __kernel void search11(__global hash_t* hashes) @@ -908,102 +908,108 @@ __kernel void search11(__global hash_t* hashes)
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search12(__global hash_t* hashes, __global uint* output, const ulong target)
{
uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
// mixtab
__local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256];
int init = get_local_id(0);
int step = get_local_size(0);
for (int i = init; i < 256; i += step)
{
mixtab0[i] = mixtab0_c[i];
mixtab1[i] = mixtab1_c[i];
mixtab2[i] = mixtab2_c[i];
mixtab3[i] = mixtab3_c[i];
}
barrier(CLK_LOCAL_MEM_FENCE);
// fugue
sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09;
sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19;
sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29;
sph_u32 S30, S31, S32, S33, S34, S35;
ulong fc_bit_count = (sph_u64) 64 << 3;
S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0;
S20 = SPH_C32(0x8807a57e); S21 = SPH_C32(0xe616af75); S22 = SPH_C32(0xc5d3e4db); S23 = SPH_C32(0xac9ab027);
S24 = SPH_C32(0xd915f117); S25 = SPH_C32(0xb6eecc54); S26 = SPH_C32(0x06e8020b); S27 = SPH_C32(0x4a92efd1);
S28 = SPH_C32(0xaac6e2c9); S29 = SPH_C32(0xddb21398); S30 = SPH_C32(0xcae65838); S31 = SPH_C32(0x437f203f);
S32 = SPH_C32(0x25ea78e7); S33 = SPH_C32(0x951fddd6); S34 = SPH_C32(0xda6ed11d); S35 = SPH_C32(0xe13e3567);
FUGUE512_3((hash->h4[0x0]), (hash->h4[0x1]), (hash->h4[0x2]));
FUGUE512_3((hash->h4[0x3]), (hash->h4[0x4]), (hash->h4[0x5]));
FUGUE512_3((hash->h4[0x6]), (hash->h4[0x7]), (hash->h4[0x8]));
FUGUE512_3((hash->h4[0x9]), (hash->h4[0xA]), (hash->h4[0xB]));
FUGUE512_3((hash->h4[0xC]), (hash->h4[0xD]), (hash->h4[0xE]));
FUGUE512_3((hash->h4[0xF]), as_uint2(fc_bit_count).y, as_uint2(fc_bit_count).x);
// apply round shift if necessary
int i;
for (i = 0; i < 32; i ++) {
ROR3;
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20);
SMIX(S00, S01, S02, S03);
}
for (i = 0; i < 13; i ++) {
S04 ^= S00;
S09 ^= S00;
S18 ^= S00;
S27 ^= S00;
ROR9;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S10 ^= S00;
S18 ^= S00;
S27 ^= S00;
ROR9;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S10 ^= S00;
S19 ^= S00;
S27 ^= S00;
ROR9;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S10 ^= S00;
S19 ^= S00;
S28 ^= S00;
ROR8;
SMIX(S00, S01, S02, S03);
}
S04 ^= S00;
S09 ^= S00;
S18 ^= S00;
S27 ^= S00;
hash->h4[0] = SWAP4(S01);
hash->h4[1] = SWAP4(S02);
hash->h4[2] = SWAP4(S03);
hash->h4[3] = SWAP4(S04);
hash->h4[4] = SWAP4(S09);
hash->h4[5] = SWAP4(S10);
hash->h4[6] = SWAP4(S11);
hash->h4[7] = SWAP4(S12);
hash->h4[8] = SWAP4(S18);
hash->h4[9] = SWAP4(S19);
hash->h4[10] = SWAP4(S20);
hash->h4[11] = SWAP4(S21);
hash->h4[12] = SWAP4(S27);
hash->h4[13] = SWAP4(S28);
hash->h4[14] = SWAP4(S29);
hash->h4[15] = SWAP4(S30);
bool result = (hash->h8[3] <= target);
if (result)
output[output[0xFF]++] = SWAP4(gid);
barrier(CLK_GLOBAL_MEM_FENCE);
uint gid = get_global_id(0);
uint offset = get_global_offset(0);
__global hash_t *hash = &(hashes[gid-offset]);
//mixtab
__local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256];
int init = get_local_id(0);
int step = get_local_size(0);
for (int i = init; i < 256; i += step)
{
mixtab0[i] = mixtab0_c[i];
mixtab1[i] = mixtab1_c[i];
mixtab2[i] = mixtab2_c[i];
mixtab3[i] = mixtab3_c[i];
}
barrier(CLK_GLOBAL_MEM_FENCE);
// fugue
sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09;
sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19;
sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29;
sph_u32 S30, S31, S32, S33, S34, S35;
ulong fc_bit_count = (sph_u64) 64 << 3;
S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0;
S20 = SPH_C32(0x8807a57e); S21 = SPH_C32(0xe616af75); S22 = SPH_C32(0xc5d3e4db); S23 = SPH_C32(0xac9ab027);
S24 = SPH_C32(0xd915f117); S25 = SPH_C32(0xb6eecc54); S26 = SPH_C32(0x06e8020b); S27 = SPH_C32(0x4a92efd1);
S28 = SPH_C32(0xaac6e2c9); S29 = SPH_C32(0xddb21398); S30 = SPH_C32(0xcae65838); S31 = SPH_C32(0x437f203f);
S32 = SPH_C32(0x25ea78e7); S33 = SPH_C32(0x951fddd6); S34 = SPH_C32(0xda6ed11d); S35 = SPH_C32(0xe13e3567);
FUGUE512_3((hash->h4[0x0]), (hash->h4[0x1]), (hash->h4[0x2]));
FUGUE512_3((hash->h4[0x3]), (hash->h4[0x4]), (hash->h4[0x5]));
FUGUE512_3((hash->h4[0x6]), (hash->h4[0x7]), (hash->h4[0x8]));
FUGUE512_3((hash->h4[0x9]), (hash->h4[0xA]), (hash->h4[0xB]));
FUGUE512_3((hash->h4[0xC]), (hash->h4[0xD]), (hash->h4[0xE]));
FUGUE512_3((hash->h4[0xF]), as_uint2(fc_bit_count).y, as_uint2(fc_bit_count).x);
// apply round shift if necessary
int i;
for (i = 0; i < 32; i ++)
{
ROR3;
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20);
SMIX(S00, S01, S02, S03);
}
for (i = 0; i < 13; i ++)
{
S04 ^= S00;
S09 ^= S00;
S18 ^= S00;
S27 ^= S00;
ROR9;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S10 ^= S00;
S18 ^= S00;
S27 ^= S00;
ROR9;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S10 ^= S00;
S19 ^= S00;
S27 ^= S00;
ROR9;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S10 ^= S00;
S19 ^= S00;
S28 ^= S00;
ROR8;
SMIX(S00, S01, S02, S03);
}
S04 ^= S00;
S09 ^= S00;
S18 ^= S00;
S27 ^= S00;
hash->h4[0] = SWAP4(S01);
hash->h4[1] = SWAP4(S02);
hash->h4[2] = SWAP4(S03);
hash->h4[3] = SWAP4(S04);
hash->h4[4] = SWAP4(S09);
hash->h4[5] = SWAP4(S10);
hash->h4[6] = SWAP4(S11);
hash->h4[7] = SWAP4(S12);
hash->h4[8] = SWAP4(S18);
hash->h4[9] = SWAP4(S19);
hash->h4[10] = SWAP4(S20);
hash->h4[11] = SWAP4(S21);
hash->h4[12] = SWAP4(S27);
hash->h4[13] = SWAP4(S28);
hash->h4[14] = SWAP4(S29);
hash->h4[15] = SWAP4(S30);
bool result = (hash->h8[3] <= target);
if (result)
output[atomic_inc(output+0xFF)] = SWAP4(gid);
barrier(CLK_GLOBAL_MEM_FENCE);
}

15
kernel/marucoin-modold.cl

@ -895,7 +895,20 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo @@ -895,7 +895,20 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
}
// fugue
//mixtab
__local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256];
init = get_local_id(0);
step = get_local_size(0);
for (int i = init; i < 256; i += step)
{
mixtab0[i] = mixtab0_c[i];
mixtab1[i] = mixtab1_c[i];
mixtab2[i] = mixtab2_c[i];
mixtab3[i] = mixtab3_c[i];
}
barrier(CLK_GLOBAL_MEM_FENCE);
// fugue
{

Loading…
Cancel
Save