mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-22 20:44:19 +00:00
429 lines
11 KiB
Common Lisp
429 lines
11 KiB
Common Lisp
#if __ENDIAN_LITTLE__
|
|
#define SPH_LITTLE_ENDIAN 1
|
|
#else
|
|
#define SPH_BIG_ENDIAN 1
|
|
#endif
|
|
|
|
#define SPH_UPTR sph_u64
|
|
|
|
typedef unsigned int sph_u32;
|
|
typedef int sph_s32;
|
|
#ifndef __OPENCL_VERSION__
|
|
typedef unsigned long long sph_u64;
|
|
typedef long long sph_s64;
|
|
#else
|
|
typedef unsigned long sph_u64;
|
|
typedef long sph_s64;
|
|
#endif
|
|
|
|
#define SPH_64 1
|
|
#define SPH_64_TRUE 1
|
|
|
|
#define SPH_C32(x) ((sph_u32)(x ## U))
|
|
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
|
|
#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
|
|
#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
|
|
|
|
#define SPH_C64(x) ((sph_u64)(x ## UL))
|
|
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
|
|
#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
|
|
#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n)))
|
|
|
|
#define SPH_HAMSI_EXPAND_SMALL 1
|
|
|
|
#include "fugue.cl"
|
|
#include "shavite.cl"
|
|
#include "hamsi.cl"
|
|
#include "panama.cl"
|
|
|
|
#define SWAP4(x) as_uint(as_uchar4(x).wzyx)
|
|
#define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
|
|
|
|
#if SPH_BIG_ENDIAN
|
|
#define DEC32BE(x) (*(const __global sph_u32 *) (x))
|
|
#else
|
|
#define DEC32BE(x) SWAP4(*(const __global sph_u32 *) (x))
|
|
#endif
|
|
|
|
#define sph_bswap32(x) SWAP4(x)
|
|
|
|
static void sph_enc32be(void *dst, sph_u32 val)
|
|
{
|
|
#if defined SPH_UPTR
|
|
#if SPH_UNALIGNED
|
|
#if SPH_LITTLE_ENDIAN
|
|
val = sph_bswap32(val);
|
|
#endif
|
|
*(sph_u32 *)dst = val;
|
|
#else
|
|
if (((SPH_UPTR)dst & 3) == 0) {
|
|
#if SPH_LITTLE_ENDIAN
|
|
val = sph_bswap32(val);
|
|
#endif
|
|
*(sph_u32 *)dst = val;
|
|
} else {
|
|
((unsigned char *)dst)[0] = (val >> 24);
|
|
((unsigned char *)dst)[1] = (val >> 16);
|
|
((unsigned char *)dst)[2] = (val >> 8);
|
|
((unsigned char *)dst)[3] = val;
|
|
}
|
|
#endif
|
|
#else
|
|
((unsigned char *)dst)[0] = (val >> 24);
|
|
((unsigned char *)dst)[1] = (val >> 16);
|
|
((unsigned char *)dst)[2] = (val >> 8);
|
|
((unsigned char *)dst)[3] = val;
|
|
#endif
|
|
}
|
|
|
|
static void sph_enc32le(void *dst, sph_u32 val)
|
|
{
|
|
#if defined SPH_UPTR
|
|
#if SPH_UNALIGNED
|
|
#if SPH_BIG_ENDIAN
|
|
val = sph_bswap32(val);
|
|
#endif
|
|
*(sph_u32 *)dst = val;
|
|
#else
|
|
if (((SPH_UPTR)dst & 3) == 0) {
|
|
#if SPH_BIG_ENDIAN
|
|
val = sph_bswap32(val);
|
|
#endif
|
|
*(sph_u32 *)dst = val;
|
|
} else {
|
|
((unsigned char *)dst)[0] = val;
|
|
((unsigned char *)dst)[1] = (val >> 8);
|
|
((unsigned char *)dst)[2] = (val >> 16);
|
|
((unsigned char *)dst)[3] = (val >> 24);
|
|
}
|
|
#endif
|
|
#else
|
|
((unsigned char *)dst)[0] = val;
|
|
((unsigned char *)dst)[1] = (val >> 8);
|
|
((unsigned char *)dst)[2] = (val >> 16);
|
|
((unsigned char *)dst)[3] = (val >> 24);
|
|
#endif
|
|
}
|
|
|
|
static sph_u32 sph_dec32le_aligned(const void *src)
|
|
{
|
|
#if SPH_LITTLE_ENDIAN
|
|
return *(const sph_u32 *)src;
|
|
#elif SPH_BIG_ENDIAN
|
|
return sph_bswap32(*(const sph_u32 *)src);
|
|
#else
|
|
return (sph_u32)(((const unsigned char *)src)[0])
|
|
| ((sph_u32)(((const unsigned char *)src)[1]) << 8)
|
|
| ((sph_u32)(((const unsigned char *)src)[2]) << 16)
|
|
| ((sph_u32)(((const unsigned char *)src)[3]) << 24);
|
|
#endif
|
|
}
|
|
|
|
|
|
__kernel void search(__global unsigned char* block, volatile __global uint* output, const ulong target)
|
|
{
|
|
__local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256];
|
|
int init = get_local_id(0);
|
|
int step = get_local_size(0);
|
|
for (int i = init; i < 256; i += step)
|
|
{
|
|
AES0[i] = AES0_C[i];
|
|
AES1[i] = AES1_C[i];
|
|
AES2[i] = AES2_C[i];
|
|
AES3[i] = AES3_C[i];
|
|
}
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
unsigned char hash[64];
|
|
for(unsigned j = 0; j < 64; j++)
|
|
hash[j] = 0;
|
|
|
|
sph_u32 gid = get_global_id(0);
|
|
|
|
// fugue
|
|
{
|
|
sph_u32 S00 = 0, S01 = 0, S02 = 0, S03 = 0, S04 = 0, S05 = 0, S06 = 0, S07 = 0, S08 = 0, S09 = 0; \
|
|
sph_u32 S10 = 0, S11 = 0, S12 = 0, S13 = 0, S14 = 0, S15 = 0, S16 = 0, S17 = 0, S18 = 0, S19 = 0; \
|
|
sph_u32 S20 = 0, S21 = 0, S22 = IV256[0], S23 = IV256[1], S24 = IV256[2], S25 = IV256[3], S26 = IV256[4], S27 = IV256[5], S28 = IV256[6], S29 = IV256[7];
|
|
|
|
FUGUE256_5(DEC32BE(block + 0x0), DEC32BE(block + 0x4), DEC32BE(block + 0x8), DEC32BE(block + 0xc), DEC32BE(block + 0x10));
|
|
FUGUE256_5(DEC32BE(block + 0x14), DEC32BE(block + 0x18), DEC32BE(block + 0x1c), DEC32BE(block + 0x20), DEC32BE(block + 0x24));
|
|
FUGUE256_5(DEC32BE(block + 0x28), DEC32BE(block + 0x2c), DEC32BE(block + 0x30), DEC32BE(block + 0x34), DEC32BE(block + 0x38));
|
|
FUGUE256_4(DEC32BE(block + 0x3c), DEC32BE(block + 0x40), DEC32BE(block + 0x44), DEC32BE(block + 0x48));
|
|
|
|
TIX2(SWAP4(gid), S06, S07, S14, S16, S00);
|
|
CMIX30(S03, S04, S05, S07, S08, S09, S18, S19, S20);
|
|
SMIX(S03, S04, S05, S06);
|
|
CMIX30(S00, S01, S02, S04, S05, S06, S15, S16, S17);
|
|
SMIX(S00, S01, S02, S03);
|
|
|
|
TIX2(0, S00, S01, S08, S10, S24);
|
|
CMIX30(S27, S28, S29, S01, S02, S03, S12, S13, S14);
|
|
SMIX(S27, S28, S29, S00);
|
|
CMIX30(S24, S25, S26, S28, S29, S00, S09, S10, S11);
|
|
SMIX(S24, S25, S26, S27);
|
|
|
|
TIX2(0x280, S24, S25, S02, S04, S18);
|
|
CMIX30(S21, S22, S23, S25, S26, S27, S06, S07, S08);
|
|
SMIX(S21, S22, S23, S24);
|
|
CMIX30(S18, S19, S20, S22, S23, S24, S03, S04, S05);
|
|
SMIX(S18, S19, S20, S21);
|
|
|
|
CMIX30(S15, S16, S17, S19, S20, S21, S00, S01, S02);
|
|
SMIX(S15, S16, S17, S18);
|
|
CMIX30(S12, S13, S14, S16, S17, S18, S27, S28, S29);
|
|
SMIX(S12, S13, S14, S15);
|
|
CMIX30(S09, S10, S11, S13, S14, S15, S24, S25, S26);
|
|
SMIX(S09, S10, S11, S12);
|
|
CMIX30(S06, S07, S08, S10, S11, S12, S21, S22, S23);
|
|
SMIX(S06, S07, S08, S09);
|
|
CMIX30(S03, S04, S05, S07, S08, S09, S18, S19, S20);
|
|
SMIX(S03, S04, S05, S06);
|
|
CMIX30(S00, S01, S02, S04, S05, S06, S15, S16, S17);
|
|
SMIX(S00, S01, S02, S03);
|
|
CMIX30(S27, S28, S29, S01, S02, S03, S12, S13, S14);
|
|
SMIX(S27, S28, S29, S00);
|
|
CMIX30(S24, S25, S26, S28, S29, S00, S09, S10, S11);
|
|
SMIX(S24, S25, S26, S27);
|
|
CMIX30(S21, S22, S23, S25, S26, S27, S06, S07, S08);
|
|
SMIX(S21, S22, S23, S24);
|
|
CMIX30(S18, S19, S20, S22, S23, S24, S03, S04, S05);
|
|
SMIX(S18, S19, S20, S21);
|
|
S22 ^= S18;
|
|
S03 ^= S18;
|
|
SMIX(S03, S04, S05, S06);
|
|
S07 ^= S03;
|
|
S19 ^= S03;
|
|
SMIX(S19, S20, S21, S22);
|
|
S23 ^= S19;
|
|
S04 ^= S19;
|
|
SMIX(S04, S05, S06, S07);
|
|
S08 ^= S04;
|
|
S20 ^= S04;
|
|
SMIX(S20, S21, S22, S23);
|
|
S24 ^= S20;
|
|
S05 ^= S20;
|
|
SMIX(S05, S06, S07, S08);
|
|
S09 ^= S05;
|
|
S21 ^= S05;
|
|
SMIX(S21, S22, S23, S24);
|
|
S25 ^= S21;
|
|
S06 ^= S21;
|
|
SMIX(S06, S07, S08, S09);
|
|
S10 ^= S06;
|
|
S22 ^= S06;
|
|
SMIX(S22, S23, S24, S25);
|
|
S26 ^= S22;
|
|
S07 ^= S22;
|
|
SMIX(S07, S08, S09, S10);
|
|
S11 ^= S07;
|
|
S23 ^= S07;
|
|
SMIX(S23, S24, S25, S26);
|
|
S27 ^= S23;
|
|
S08 ^= S23;
|
|
SMIX(S08, S09, S10, S11);
|
|
S12 ^= S08;
|
|
S24 ^= S08;
|
|
SMIX(S24, S25, S26, S27);
|
|
S28 ^= S24;
|
|
S09 ^= S24;
|
|
SMIX(S09, S10, S11, S12);
|
|
S13 ^= S09;
|
|
S25 ^= S09;
|
|
SMIX(S25, S26, S27, S28);
|
|
S29 ^= S25;
|
|
S10 ^= S25;
|
|
SMIX(S10, S11, S12, S13);
|
|
S14 ^= S10;
|
|
S26 ^= S10;
|
|
SMIX(S26, S27, S28, S29);
|
|
S00 ^= S26;
|
|
S11 ^= S26;
|
|
SMIX(S11, S12, S13, S14);
|
|
S15 ^= S11;
|
|
S27 ^= S11;
|
|
SMIX(S27, S28, S29, S00);
|
|
S01 ^= S27;
|
|
S12 ^= S27;
|
|
SMIX(S12, S13, S14, S15);
|
|
S16 ^= S12;
|
|
S28 ^= S12;
|
|
SMIX(S28, S29, S00, S01);
|
|
S02 ^= S28;
|
|
S13 ^= S28;
|
|
SMIX(S13, S14, S15, S16);
|
|
S17 ^= S13;
|
|
S29 ^= S13;
|
|
SMIX(S29, S00, S01, S02);
|
|
S03 ^= S29;
|
|
S14 ^= S29;
|
|
SMIX(S14, S15, S16, S17);
|
|
S18 ^= S14;
|
|
S00 ^= S14;
|
|
SMIX(S00, S01, S02, S03);
|
|
S04 ^= S00;
|
|
S15 ^= S00;
|
|
SMIX(S15, S16, S17, S18);
|
|
S19 ^= S15;
|
|
S01 ^= S15;
|
|
SMIX(S01, S02, S03, S04);
|
|
|
|
S05 ^= S01;
|
|
S16 ^= S01;
|
|
|
|
sph_enc32be((unsigned char*) &hash + 0, S02);
|
|
sph_enc32be((unsigned char*) &hash + 4, S03);
|
|
sph_enc32be((unsigned char*) &hash + 8, S04);
|
|
sph_enc32be((unsigned char*) &hash + 12, S05);
|
|
sph_enc32be((unsigned char*) &hash + 16, S16);
|
|
sph_enc32be((unsigned char*) &hash + 20, S17);
|
|
sph_enc32be((unsigned char*) &hash + 24, S18);
|
|
sph_enc32be((unsigned char*) &hash + 28, S19);
|
|
}
|
|
|
|
// shavite
|
|
{
|
|
sph_u32 h[] = { SPH_C32(0x49BB3E47), SPH_C32(0x2674860D), SPH_C32(0xA8B392AC), SPH_C32(0x021AC4E6), SPH_C32(0x409283CF), SPH_C32(0x620E5D86), SPH_C32(0x6D929DCB), SPH_C32(0x96CC2A8B) };
|
|
sph_u32 rk0, rk1, rk2, rk3, rk4, rk5, rk6, rk7;
|
|
sph_u32 rk8, rk9, rkA, rkB, rkC, rkD, rkE, rkF;
|
|
sph_u32 count0, count1;
|
|
|
|
rk0 = sph_dec32le_aligned((const unsigned char *)&hash + 0);
|
|
rk1 = sph_dec32le_aligned((const unsigned char *)&hash + 4);
|
|
rk2 = sph_dec32le_aligned((const unsigned char *)&hash + 8);
|
|
rk3 = sph_dec32le_aligned((const unsigned char *)&hash + 12);
|
|
rk4 = sph_dec32le_aligned((const unsigned char *)&hash + 16);
|
|
rk5 = sph_dec32le_aligned((const unsigned char *)&hash + 20);
|
|
rk6 = sph_dec32le_aligned((const unsigned char *)&hash + 24);
|
|
rk7 = sph_dec32le_aligned((const unsigned char *)&hash + 28);
|
|
rk8 = sph_dec32le_aligned((const unsigned char *)&hash + 32);
|
|
rk9 = sph_dec32le_aligned((const unsigned char *)&hash + 36);
|
|
rkA = sph_dec32le_aligned((const unsigned char *)&hash + 40);
|
|
rkB = sph_dec32le_aligned((const unsigned char *)&hash + 44);
|
|
rkC = sph_dec32le_aligned((const unsigned char *)&hash + 48);
|
|
rkD = sph_dec32le_aligned((const unsigned char *)&hash + 52);
|
|
rkE = sph_dec32le_aligned((const unsigned char *)&hash + 56);
|
|
rkF = sph_dec32le_aligned((const unsigned char *)&hash + 60);
|
|
count0 = 0x200;
|
|
count1 = 0;
|
|
c256(buf);
|
|
|
|
rk0 = 0x80;
|
|
rk1 = 0;
|
|
rk2 = 0;
|
|
rk3 = 0;
|
|
rk4 = 0;
|
|
rk5 = 0;
|
|
rk6 = 0;
|
|
rk7 = 0;
|
|
rk8 = 0;
|
|
rk9 = 0;
|
|
rkA = 0;
|
|
rkB = 0;
|
|
rkC = 0;
|
|
rkD = 0x2000000;
|
|
rkE = 0;
|
|
rkF = 0x1000000;
|
|
count0 = 0;
|
|
count1 = 0;
|
|
c256(buf);
|
|
|
|
for (unsigned u = 0; u < 8; u ++)
|
|
sph_enc32le((unsigned char *)&hash + (u << 2), h[u]);
|
|
}
|
|
|
|
// hamsi
|
|
{
|
|
sph_u32 c0 = HAMSI_IV256[0], c1 = HAMSI_IV256[1], c2 = HAMSI_IV256[2], c3 = HAMSI_IV256[3];
|
|
sph_u32 c4 = HAMSI_IV256[4], c5 = HAMSI_IV256[5], c6 = HAMSI_IV256[6], c7 = HAMSI_IV256[7];
|
|
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
|
|
sph_u32 h[8] = { c0, c1, c2, c3, c4, c5, c6, c7 };
|
|
|
|
#define buf(u) hash[i + u]
|
|
for(int i = 0; i < 64; i += 4) {
|
|
INPUT_SMALL;
|
|
P_SMALL;
|
|
T_SMALL;
|
|
}
|
|
#undef buf
|
|
#define buf(u) (u == 0 ? 0x80 : 0)
|
|
INPUT_SMALL;
|
|
P_SMALL;
|
|
T_SMALL;
|
|
#undef buf
|
|
#define buf(u) 0
|
|
INPUT_SMALL;
|
|
P_SMALL;
|
|
T_SMALL;
|
|
#undef buf
|
|
#define buf(u) (u == 2 ? 2 : 0)
|
|
INPUT_SMALL;
|
|
PF_SMALL;
|
|
T_SMALL;
|
|
|
|
for (unsigned u = 0; u < 8; u ++)
|
|
sph_enc32be((unsigned char*) &hash + (u << 2), h[u]);
|
|
}
|
|
|
|
// panama
|
|
{
|
|
sph_u32 buffer[32][8];
|
|
sph_u32 state[17];
|
|
int i, j;
|
|
for(i = 0; i < 32; i++)
|
|
for(j = 0; j < 8; j++)
|
|
buffer[i][j] = 0;
|
|
for(i = 0; i < 17; i++)
|
|
state[i] = 0;
|
|
|
|
LVARS
|
|
unsigned ptr0 = 0;
|
|
#define INW1(i) sph_dec32le_aligned((unsigned char*) &hash + 4 * (i))
|
|
#define INW2(i) INW1(i)
|
|
|
|
M17(RSTATE);
|
|
PANAMA_STEP;
|
|
|
|
#undef INW1
|
|
#undef INW2
|
|
#define INW1(i) sph_dec32le_aligned((unsigned char*) &hash + 32 + 4 * (i))
|
|
#define INW2(i) INW1(i)
|
|
PANAMA_STEP;
|
|
M17(WSTATE);
|
|
|
|
#undef INW1
|
|
#undef INW2
|
|
|
|
#define INW1(i) (sph_u32) (i == 0)
|
|
#define INW2(i) INW1(i)
|
|
|
|
M17(RSTATE);
|
|
PANAMA_STEP;
|
|
M17(WSTATE);
|
|
|
|
#undef INW1
|
|
#undef INW2
|
|
|
|
#define INW1(i) INW_H1(INC ## i)
|
|
#define INW_H1(i) INW_H2(i)
|
|
#define INW_H2(i) a ## i
|
|
#define INW2(i) buffer[ptr4][i]
|
|
|
|
M17(RSTATE);
|
|
for(i = 0; i < 32; i++) {
|
|
unsigned ptr4 = (ptr0 + 4) & 31;
|
|
PANAMA_STEP;
|
|
}
|
|
M17(WSTATE);
|
|
|
|
#undef INW1
|
|
#undef INW_H1
|
|
#undef INW_H2
|
|
#undef INW2
|
|
|
|
bool result = ((((sph_u64) state[16] << 32) | state[15]) <= target);
|
|
if (result)
|
|
output[output[0xFF]++] = SWAP4(gid);
|
|
}
|
|
}
|