mirror of https://github.com/GOSTSec/sgminer
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
428 lines
12 KiB
428 lines
12 KiB
#if __ENDIAN_LITTLE__ |
|
#define SPH_LITTLE_ENDIAN 1 |
|
#else |
|
#define SPH_BIG_ENDIAN 1 |
|
#endif |
|
|
|
#define SPH_UPTR sph_u64 |
|
|
|
typedef unsigned int sph_u32; |
|
typedef int sph_s32; |
|
#ifndef __OPENCL_VERSION__ |
|
typedef unsigned long long sph_u64; |
|
typedef long long sph_s64; |
|
#else |
|
typedef unsigned long sph_u64; |
|
typedef long sph_s64; |
|
#endif |
|
|
|
#define SPH_64 1 |
|
#define SPH_64_TRUE 1 |
|
|
|
#define SPH_C32(x) ((sph_u32)(x ## U)) |
|
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) |
|
#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) |
|
#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) |
|
|
|
#define SPH_C64(x) ((sph_u64)(x ## UL)) |
|
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) |
|
#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) |
|
#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) |
|
|
|
#define SPH_HAMSI_EXPAND_SMALL 1 |
|
|
|
#include "fugue.cl" |
|
#include "shavite.cl" |
|
#include "hamsi.cl" |
|
#include "panama.cl" |
|
|
|
#define SWAP4(x) as_uint(as_uchar4(x).wzyx) |
|
#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) |
|
|
|
#if SPH_BIG_ENDIAN |
|
#define DEC32BE(x) (*(const __global sph_u32 *) (x)) |
|
#else |
|
#define DEC32BE(x) SWAP4(*(const __global sph_u32 *) (x)) |
|
#endif |
|
|
|
#define sph_bswap32(x) SWAP4(x) |
|
|
|
static void sph_enc32be(void *dst, sph_u32 val) |
|
{ |
|
#if defined SPH_UPTR |
|
#if SPH_UNALIGNED |
|
#if SPH_LITTLE_ENDIAN |
|
val = sph_bswap32(val); |
|
#endif |
|
*(sph_u32 *)dst = val; |
|
#else |
|
if (((SPH_UPTR)dst & 3) == 0) { |
|
#if SPH_LITTLE_ENDIAN |
|
val = sph_bswap32(val); |
|
#endif |
|
*(sph_u32 *)dst = val; |
|
} else { |
|
((unsigned char *)dst)[0] = (val >> 24); |
|
((unsigned char *)dst)[1] = (val >> 16); |
|
((unsigned char *)dst)[2] = (val >> 8); |
|
((unsigned char *)dst)[3] = val; |
|
} |
|
#endif |
|
#else |
|
((unsigned char *)dst)[0] = (val >> 24); |
|
((unsigned char *)dst)[1] = (val >> 16); |
|
((unsigned char *)dst)[2] = (val >> 8); |
|
((unsigned char *)dst)[3] = val; |
|
#endif |
|
} |
|
|
|
static void sph_enc32le(void *dst, sph_u32 val) |
|
{ |
|
#if defined SPH_UPTR |
|
#if SPH_UNALIGNED |
|
#if SPH_BIG_ENDIAN |
|
val = sph_bswap32(val); |
|
#endif |
|
*(sph_u32 *)dst = val; |
|
#else |
|
if (((SPH_UPTR)dst & 3) == 0) { |
|
#if SPH_BIG_ENDIAN |
|
val = sph_bswap32(val); |
|
#endif |
|
*(sph_u32 *)dst = val; |
|
} else { |
|
((unsigned char *)dst)[0] = val; |
|
((unsigned char *)dst)[1] = (val >> 8); |
|
((unsigned char *)dst)[2] = (val >> 16); |
|
((unsigned char *)dst)[3] = (val >> 24); |
|
} |
|
#endif |
|
#else |
|
((unsigned char *)dst)[0] = val; |
|
((unsigned char *)dst)[1] = (val >> 8); |
|
((unsigned char *)dst)[2] = (val >> 16); |
|
((unsigned char *)dst)[3] = (val >> 24); |
|
#endif |
|
} |
|
|
|
static sph_u32 sph_dec32le_aligned(const void *src) |
|
{ |
|
#if SPH_LITTLE_ENDIAN |
|
return *(const sph_u32 *)src; |
|
#elif SPH_BIG_ENDIAN |
|
return sph_bswap32(*(const sph_u32 *)src); |
|
#else |
|
return (sph_u32)(((const unsigned char *)src)[0]) |
|
| ((sph_u32)(((const unsigned char *)src)[1]) << 8) |
|
| ((sph_u32)(((const unsigned char *)src)[2]) << 16) |
|
| ((sph_u32)(((const unsigned char *)src)[3]) << 24); |
|
#endif |
|
} |
|
|
|
|
|
__kernel void search(__global unsigned char* block, volatile __global uint* output, const ulong target) |
|
{ |
|
__local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; |
|
int init = get_local_id(0); |
|
int step = get_local_size(0); |
|
for (int i = init; i < 256; i += step) |
|
{ |
|
AES0[i] = AES0_C[i]; |
|
AES1[i] = AES1_C[i]; |
|
AES2[i] = AES2_C[i]; |
|
AES3[i] = AES3_C[i]; |
|
} |
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
unsigned char hash[64]; |
|
for(unsigned j = 0; j < 64; j++) |
|
hash[j] = 0; |
|
|
|
sph_u32 gid = get_global_id(0); |
|
|
|
// fugue |
|
{ |
|
sph_u32 S00 = 0, S01 = 0, S02 = 0, S03 = 0, S04 = 0, S05 = 0, S06 = 0, S07 = 0, S08 = 0, S09 = 0; \ |
|
sph_u32 S10 = 0, S11 = 0, S12 = 0, S13 = 0, S14 = 0, S15 = 0, S16 = 0, S17 = 0, S18 = 0, S19 = 0; \ |
|
sph_u32 S20 = 0, S21 = 0, S22 = IV256[0], S23 = IV256[1], S24 = IV256[2], S25 = IV256[3], S26 = IV256[4], S27 = IV256[5], S28 = IV256[6], S29 = IV256[7]; |
|
|
|
FUGUE256_5(DEC32BE(block + 0x0), DEC32BE(block + 0x4), DEC32BE(block + 0x8), DEC32BE(block + 0xc), DEC32BE(block + 0x10)); |
|
FUGUE256_5(DEC32BE(block + 0x14), DEC32BE(block + 0x18), DEC32BE(block + 0x1c), DEC32BE(block + 0x20), DEC32BE(block + 0x24)); |
|
FUGUE256_5(DEC32BE(block + 0x28), DEC32BE(block + 0x2c), DEC32BE(block + 0x30), DEC32BE(block + 0x34), DEC32BE(block + 0x38)); |
|
FUGUE256_4(DEC32BE(block + 0x3c), DEC32BE(block + 0x40), DEC32BE(block + 0x44), DEC32BE(block + 0x48)); |
|
|
|
TIX2(SWAP4(gid), S06, S07, S14, S16, S00); |
|
CMIX30(S03, S04, S05, S07, S08, S09, S18, S19, S20); |
|
SMIX(S03, S04, S05, S06); |
|
CMIX30(S00, S01, S02, S04, S05, S06, S15, S16, S17); |
|
SMIX(S00, S01, S02, S03); |
|
|
|
TIX2(0, S00, S01, S08, S10, S24); |
|
CMIX30(S27, S28, S29, S01, S02, S03, S12, S13, S14); |
|
SMIX(S27, S28, S29, S00); |
|
CMIX30(S24, S25, S26, S28, S29, S00, S09, S10, S11); |
|
SMIX(S24, S25, S26, S27); |
|
|
|
TIX2(0x280, S24, S25, S02, S04, S18); |
|
CMIX30(S21, S22, S23, S25, S26, S27, S06, S07, S08); |
|
SMIX(S21, S22, S23, S24); |
|
CMIX30(S18, S19, S20, S22, S23, S24, S03, S04, S05); |
|
SMIX(S18, S19, S20, S21); |
|
|
|
CMIX30(S15, S16, S17, S19, S20, S21, S00, S01, S02); |
|
SMIX(S15, S16, S17, S18); |
|
CMIX30(S12, S13, S14, S16, S17, S18, S27, S28, S29); |
|
SMIX(S12, S13, S14, S15); |
|
CMIX30(S09, S10, S11, S13, S14, S15, S24, S25, S26); |
|
SMIX(S09, S10, S11, S12); |
|
CMIX30(S06, S07, S08, S10, S11, S12, S21, S22, S23); |
|
SMIX(S06, S07, S08, S09); |
|
CMIX30(S03, S04, S05, S07, S08, S09, S18, S19, S20); |
|
SMIX(S03, S04, S05, S06); |
|
CMIX30(S00, S01, S02, S04, S05, S06, S15, S16, S17); |
|
SMIX(S00, S01, S02, S03); |
|
CMIX30(S27, S28, S29, S01, S02, S03, S12, S13, S14); |
|
SMIX(S27, S28, S29, S00); |
|
CMIX30(S24, S25, S26, S28, S29, S00, S09, S10, S11); |
|
SMIX(S24, S25, S26, S27); |
|
CMIX30(S21, S22, S23, S25, S26, S27, S06, S07, S08); |
|
SMIX(S21, S22, S23, S24); |
|
CMIX30(S18, S19, S20, S22, S23, S24, S03, S04, S05); |
|
SMIX(S18, S19, S20, S21); |
|
S22 ^= S18; |
|
S03 ^= S18; |
|
SMIX(S03, S04, S05, S06); |
|
S07 ^= S03; |
|
S19 ^= S03; |
|
SMIX(S19, S20, S21, S22); |
|
S23 ^= S19; |
|
S04 ^= S19; |
|
SMIX(S04, S05, S06, S07); |
|
S08 ^= S04; |
|
S20 ^= S04; |
|
SMIX(S20, S21, S22, S23); |
|
S24 ^= S20; |
|
S05 ^= S20; |
|
SMIX(S05, S06, S07, S08); |
|
S09 ^= S05; |
|
S21 ^= S05; |
|
SMIX(S21, S22, S23, S24); |
|
S25 ^= S21; |
|
S06 ^= S21; |
|
SMIX(S06, S07, S08, S09); |
|
S10 ^= S06; |
|
S22 ^= S06; |
|
SMIX(S22, S23, S24, S25); |
|
S26 ^= S22; |
|
S07 ^= S22; |
|
SMIX(S07, S08, S09, S10); |
|
S11 ^= S07; |
|
S23 ^= S07; |
|
SMIX(S23, S24, S25, S26); |
|
S27 ^= S23; |
|
S08 ^= S23; |
|
SMIX(S08, S09, S10, S11); |
|
S12 ^= S08; |
|
S24 ^= S08; |
|
SMIX(S24, S25, S26, S27); |
|
S28 ^= S24; |
|
S09 ^= S24; |
|
SMIX(S09, S10, S11, S12); |
|
S13 ^= S09; |
|
S25 ^= S09; |
|
SMIX(S25, S26, S27, S28); |
|
S29 ^= S25; |
|
S10 ^= S25; |
|
SMIX(S10, S11, S12, S13); |
|
S14 ^= S10; |
|
S26 ^= S10; |
|
SMIX(S26, S27, S28, S29); |
|
S00 ^= S26; |
|
S11 ^= S26; |
|
SMIX(S11, S12, S13, S14); |
|
S15 ^= S11; |
|
S27 ^= S11; |
|
SMIX(S27, S28, S29, S00); |
|
S01 ^= S27; |
|
S12 ^= S27; |
|
SMIX(S12, S13, S14, S15); |
|
S16 ^= S12; |
|
S28 ^= S12; |
|
SMIX(S28, S29, S00, S01); |
|
S02 ^= S28; |
|
S13 ^= S28; |
|
SMIX(S13, S14, S15, S16); |
|
S17 ^= S13; |
|
S29 ^= S13; |
|
SMIX(S29, S00, S01, S02); |
|
S03 ^= S29; |
|
S14 ^= S29; |
|
SMIX(S14, S15, S16, S17); |
|
S18 ^= S14; |
|
S00 ^= S14; |
|
SMIX(S00, S01, S02, S03); |
|
S04 ^= S00; |
|
S15 ^= S00; |
|
SMIX(S15, S16, S17, S18); |
|
S19 ^= S15; |
|
S01 ^= S15; |
|
SMIX(S01, S02, S03, S04); |
|
|
|
S05 ^= S01; |
|
S16 ^= S01; |
|
|
|
sph_enc32be((unsigned char*) &hash + 0, S02); |
|
sph_enc32be((unsigned char*) &hash + 4, S03); |
|
sph_enc32be((unsigned char*) &hash + 8, S04); |
|
sph_enc32be((unsigned char*) &hash + 12, S05); |
|
sph_enc32be((unsigned char*) &hash + 16, S16); |
|
sph_enc32be((unsigned char*) &hash + 20, S17); |
|
sph_enc32be((unsigned char*) &hash + 24, S18); |
|
sph_enc32be((unsigned char*) &hash + 28, S19); |
|
} |
|
|
|
// shavite |
|
{ |
|
sph_u32 h[] = { SPH_C32(0x49BB3E47), SPH_C32(0x2674860D), SPH_C32(0xA8B392AC), SPH_C32(0x021AC4E6), SPH_C32(0x409283CF), SPH_C32(0x620E5D86), SPH_C32(0x6D929DCB), SPH_C32(0x96CC2A8B) }; |
|
sph_u32 rk0, rk1, rk2, rk3, rk4, rk5, rk6, rk7; |
|
sph_u32 rk8, rk9, rkA, rkB, rkC, rkD, rkE, rkF; |
|
sph_u32 count0, count1; |
|
|
|
rk0 = sph_dec32le_aligned((const unsigned char *)&hash + 0); |
|
rk1 = sph_dec32le_aligned((const unsigned char *)&hash + 4); |
|
rk2 = sph_dec32le_aligned((const unsigned char *)&hash + 8); |
|
rk3 = sph_dec32le_aligned((const unsigned char *)&hash + 12); |
|
rk4 = sph_dec32le_aligned((const unsigned char *)&hash + 16); |
|
rk5 = sph_dec32le_aligned((const unsigned char *)&hash + 20); |
|
rk6 = sph_dec32le_aligned((const unsigned char *)&hash + 24); |
|
rk7 = sph_dec32le_aligned((const unsigned char *)&hash + 28); |
|
rk8 = sph_dec32le_aligned((const unsigned char *)&hash + 32); |
|
rk9 = sph_dec32le_aligned((const unsigned char *)&hash + 36); |
|
rkA = sph_dec32le_aligned((const unsigned char *)&hash + 40); |
|
rkB = sph_dec32le_aligned((const unsigned char *)&hash + 44); |
|
rkC = sph_dec32le_aligned((const unsigned char *)&hash + 48); |
|
rkD = sph_dec32le_aligned((const unsigned char *)&hash + 52); |
|
rkE = sph_dec32le_aligned((const unsigned char *)&hash + 56); |
|
rkF = sph_dec32le_aligned((const unsigned char *)&hash + 60); |
|
count0 = 0x200; |
|
count1 = 0; |
|
c256(buf); |
|
|
|
rk0 = 0x80; |
|
rk1 = 0; |
|
rk2 = 0; |
|
rk3 = 0; |
|
rk4 = 0; |
|
rk5 = 0; |
|
rk6 = 0; |
|
rk7 = 0; |
|
rk8 = 0; |
|
rk9 = 0; |
|
rkA = 0; |
|
rkB = 0; |
|
rkC = 0; |
|
rkD = 0x2000000; |
|
rkE = 0; |
|
rkF = 0x1000000; |
|
count0 = 0; |
|
count1 = 0; |
|
c256(buf); |
|
|
|
for (unsigned u = 0; u < 8; u ++) |
|
sph_enc32le((unsigned char *)&hash + (u << 2), h[u]); |
|
} |
|
|
|
// hamsi |
|
{ |
|
sph_u32 c0 = HAMSI_IV256[0], c1 = HAMSI_IV256[1], c2 = HAMSI_IV256[2], c3 = HAMSI_IV256[3]; |
|
sph_u32 c4 = HAMSI_IV256[4], c5 = HAMSI_IV256[5], c6 = HAMSI_IV256[6], c7 = HAMSI_IV256[7]; |
|
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7; |
|
sph_u32 h[8] = { c0, c1, c2, c3, c4, c5, c6, c7 }; |
|
|
|
#define buf(u) hash[i + u] |
|
for(int i = 0; i < 64; i += 4) { |
|
INPUT_SMALL; |
|
P_SMALL; |
|
T_SMALL; |
|
} |
|
#undef buf |
|
#define buf(u) (u == 0 ? 0x80 : 0) |
|
INPUT_SMALL; |
|
P_SMALL; |
|
T_SMALL; |
|
#undef buf |
|
#define buf(u) 0 |
|
INPUT_SMALL; |
|
P_SMALL; |
|
T_SMALL; |
|
#undef buf |
|
#define buf(u) (u == 2 ? 2 : 0) |
|
INPUT_SMALL; |
|
PF_SMALL; |
|
T_SMALL; |
|
|
|
for (unsigned u = 0; u < 8; u ++) |
|
sph_enc32be((unsigned char*) &hash + (u << 2), h[u]); |
|
} |
|
|
|
// panama |
|
{ |
|
sph_u32 buffer[32][8]; |
|
sph_u32 state[17]; |
|
int i, j; |
|
for(i = 0; i < 32; i++) |
|
for(j = 0; j < 8; j++) |
|
buffer[i][j] = 0; |
|
for(i = 0; i < 17; i++) |
|
state[i] = 0; |
|
|
|
LVARS |
|
unsigned ptr0 = 0; |
|
#define INW1(i) sph_dec32le_aligned((unsigned char*) &hash + 4 * (i)) |
|
#define INW2(i) INW1(i) |
|
|
|
M17(RSTATE); |
|
PANAMA_STEP; |
|
|
|
#undef INW1 |
|
#undef INW2 |
|
#define INW1(i) sph_dec32le_aligned((unsigned char*) &hash + 32 + 4 * (i)) |
|
#define INW2(i) INW1(i) |
|
PANAMA_STEP; |
|
M17(WSTATE); |
|
|
|
#undef INW1 |
|
#undef INW2 |
|
|
|
#define INW1(i) (sph_u32) (i == 0) |
|
#define INW2(i) INW1(i) |
|
|
|
M17(RSTATE); |
|
PANAMA_STEP; |
|
M17(WSTATE); |
|
|
|
#undef INW1 |
|
#undef INW2 |
|
|
|
#define INW1(i) INW_H1(INC ## i) |
|
#define INW_H1(i) INW_H2(i) |
|
#define INW_H2(i) a ## i |
|
#define INW2(i) buffer[ptr4][i] |
|
|
|
M17(RSTATE); |
|
for(i = 0; i < 32; i++) { |
|
unsigned ptr4 = (ptr0 + 4) & 31; |
|
PANAMA_STEP; |
|
} |
|
M17(WSTATE); |
|
|
|
#undef INW1 |
|
#undef INW_H1 |
|
#undef INW_H2 |
|
#undef INW2 |
|
|
|
bool result = ((((sph_u64) state[16] << 32) | state[15]) <= target); |
|
if (result) |
|
output[output[0xFF]++] = SWAP4(gid); |
|
} |
|
}
|
|
|