|
|
|
/*
|
|
|
|
* GroestlCoin kernel implementation.
|
|
|
|
*
|
|
|
|
* ==========================(LICENSE BEGIN)============================
|
|
|
|
*
|
|
|
|
* Copyright (c) 2014 phm
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
|
|
* a copy of this software and associated documentation files (the
|
|
|
|
* "Software"), to deal in the Software without restriction, including
|
|
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
|
|
* the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be
|
|
|
|
* included in all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
* ===========================(LICENSE END)=============================
|
|
|
|
*
|
|
|
|
* @author phm <phm@inbox.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef GROESTLCOIN_CL
|
|
|
|
#define GROESTLCOIN_CL
|
|
|
|
|
|
|
|
#if __ENDIAN_LITTLE__
|
|
|
|
#define SPH_LITTLE_ENDIAN 1
|
|
|
|
#else
|
|
|
|
#define SPH_BIG_ENDIAN 1
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define SPH_UPTR sph_u64
|
|
|
|
|
|
|
|
typedef unsigned int sph_u32;
|
|
|
|
typedef int sph_s32;
|
|
|
|
#ifndef __OPENCL_VERSION__
|
|
|
|
typedef unsigned long long sph_u64;
|
|
|
|
typedef long long sph_s64;
|
|
|
|
#else
|
|
|
|
typedef unsigned long sph_u64;
|
|
|
|
typedef long sph_s64;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define SPH_64 1
|
|
|
|
#define SPH_64_TRUE 1
|
|
|
|
|
|
|
|
#define SPH_C32(x) ((sph_u32)(x ## U))
|
|
|
|
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
|
|
|
|
#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
|
|
|
|
#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
|
|
|
|
|
|
|
|
#define SPH_C64(x) ((sph_u64)(x ## UL))
|
|
|
|
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
|
|
|
|
#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
|
|
|
|
#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n)))
|
|
|
|
|
|
|
|
#define SPH_ECHO_64 1
|
|
|
|
#define SPH_SIMD_NOCOPY 0
|
|
|
|
#define SPH_CUBEHASH_UNROLL 0
|
|
|
|
|
|
|
|
#ifndef SPH_LUFFA_PARALLEL
|
|
|
|
#define SPH_LUFFA_PARALLEL 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "groestl.cl"
|
|
|
|
|
|
|
|
#define SWAP4(x) as_uint(as_uchar4(x).wzyx)
|
|
|
|
#define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
|
|
|
|
|
|
|
|
#if SPH_BIG_ENDIAN
|
|
|
|
#define ENC64E(x) SWAP8(x)
|
|
|
|
#define DEC64E(x) SWAP8(*(const __global sph_u64 *) (x));
|
|
|
|
#else
|
|
|
|
#define ENC64E(x) (x)
|
|
|
|
#define DEC64E(x) (*(const __global sph_u64 *) (x));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define ROL32(x, n) rotate(x, (uint) n)
|
|
|
|
#define SHR(x, n) ((x) >> n)
|
|
|
|
#define SWAP32(a) (as_uint(as_uchar4(a).wzyx))
|
|
|
|
|
|
|
|
#define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3))
|
|
|
|
#define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10))
|
|
|
|
|
|
|
|
#define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10))
|
|
|
|
#define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7))
|
|
|
|
|
|
|
|
#define P(a,b,c,d,e,f,g,h,x,K) \
|
|
|
|
{ \
|
|
|
|
temp1 = h + S3(e) + F1(e,f,g) + (K + x); \
|
|
|
|
d += temp1; h = temp1 + S2(a) + F0(a,b,c); \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define PLAST(a,b,c,d,e,f,g,h,x,K) \
|
|
|
|
{ \
|
|
|
|
d += h + S3(e) + F1(e,f,g) + (x + K); \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define F0(y, x, z) bitselect(z, y, z ^ x)
|
|
|
|
#define F1(x, y, z) bitselect(z, y, x)
|
|
|
|
|
|
|
|
#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0)
|
|
|
|
#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1)
|
|
|
|
#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2)
|
|
|
|
#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3)
|
|
|
|
#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4)
|
|
|
|
#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5)
|
|
|
|
#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6)
|
|
|
|
#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7)
|
|
|
|
#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8)
|
|
|
|
#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9)
|
|
|
|
#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10)
|
|
|
|
#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11)
|
|
|
|
#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12)
|
|
|
|
#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13)
|
|
|
|
#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14)
|
|
|
|
#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15)
|
|
|
|
|
|
|
|
#define RD14 (S1(W12) + W7 + S0(W15) + W14)
|
|
|
|
#define RD15 (S1(W13) + W8 + S0(W0) + W15)
|
|
|
|
|
|
|
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
|
|
|
__kernel void search(__global unsigned char* block, volatile __global uint* output, const ulong target)
|
|
|
|
{
|
|
|
|
uint gid = get_global_id(0);
|
|
|
|
union {
|
|
|
|
unsigned char h1[64];
|
|
|
|
uint h4[16];
|
|
|
|
ulong h8[8];
|
|
|
|
} hash;
|
|
|
|
|
|
|
|
__local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256];
|
|
|
|
int init = get_local_id(0);
|
|
|
|
int step = get_local_size(0);
|
|
|
|
for (int i = init; i < 256; i += step)
|
|
|
|
{
|
|
|
|
T0_L[i] = T0[i];
|
|
|
|
T1_L[i] = T1[i];
|
|
|
|
T2_L[i] = T2[i];
|
|
|
|
T3_L[i] = T3[i];
|
|
|
|
T4_L[i] = T4[i];
|
|
|
|
T5_L[i] = T5[i];
|
|
|
|
T6_L[i] = T6[i];
|
|
|
|
T7_L[i] = T7[i];
|
|
|
|
}
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
|
|
|
|
#define T0 T0_L
|
|
|
|
#define T1 T1_L
|
|
|
|
#define T2 T2_L
|
|
|
|
#define T3 T3_L
|
|
|
|
#define T4 T4_L
|
|
|
|
#define T5 T5_L
|
|
|
|
#define T6 T6_L
|
|
|
|
#define T7 T7_L
|
|
|
|
|
|
|
|
// groestl
|
|
|
|
{
|
|
|
|
sph_u64 H[16];
|
|
|
|
for (unsigned int u = 0; u < 15; u ++)
|
|
|
|
H[u] = 0;
|
|
|
|
#if USE_LE
|
|
|
|
H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40);
|
|
|
|
#else
|
|
|
|
H[15] = (sph_u64)512;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
sph_u64 g[16], m[16];
|
|
|
|
m[0] = DEC64E(block + 0 * 8);
|
|
|
|
m[1] = DEC64E(block + 1 * 8);
|
|
|
|
m[2] = DEC64E(block + 2 * 8);
|
|
|
|
m[3] = DEC64E(block + 3 * 8);
|
|
|
|
m[4] = DEC64E(block + 4 * 8);
|
|
|
|
m[5] = DEC64E(block + 5 * 8);
|
|
|
|
m[6] = DEC64E(block + 6 * 8);
|
|
|
|
m[7] = DEC64E(block + 7 * 8);
|
|
|
|
m[8] = DEC64E(block + 8 * 8);
|
|
|
|
m[9] = DEC64E(block + 9 * 8);
|
|
|
|
m[9] &= 0x00000000FFFFFFFF;
|
|
|
|
m[9] |= ((sph_u64) gid << 32);
|
|
|
|
m[10] = 0x80;
|
|
|
|
m[11] = 0;
|
|
|
|
m[12] = 0;
|
|
|
|
m[13] = 0;
|
|
|
|
m[14] = 0;
|
|
|
|
m[15] = 0x100000000000000;
|
|
|
|
for (unsigned int u = 0; u < 16; u ++)
|
|
|
|
g[u] = m[u] ^ H[u];
|
|
|
|
PERM_BIG_P(g);
|
|
|
|
PERM_BIG_Q(m);
|
|
|
|
for (unsigned int u = 0; u < 16; u ++)
|
|
|
|
H[u] ^= g[u] ^ m[u];
|
|
|
|
sph_u64 xH[16];
|
|
|
|
for (unsigned int u = 0; u < 16; u ++)
|
|
|
|
xH[u] = H[u];
|
|
|
|
PERM_BIG_P(xH);
|
|
|
|
for (unsigned int u = 0; u < 16; u ++)
|
|
|
|
H[u] ^= xH[u];
|
|
|
|
for (unsigned int u = 0; u < 8; u ++)
|
|
|
|
hash.h8[u] = ENC64E(H[u + 8]);
|
|
|
|
|
|
|
|
for (unsigned int u = 0; u < 15; u ++)
|
|
|
|
H[u] = 0;
|
|
|
|
#if USE_LE
|
|
|
|
H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40);
|
|
|
|
#else
|
|
|
|
H[15] = (sph_u64)512;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
m[0] = hash.h8[0];
|
|
|
|
m[1] = hash.h8[1];
|
|
|
|
m[2] = hash.h8[2];
|
|
|
|
m[3] = hash.h8[3];
|
|
|
|
m[4] = hash.h8[4];
|
|
|
|
m[5] = hash.h8[5];
|
|
|
|
m[6] = hash.h8[6];
|
|
|
|
m[7] = hash.h8[7];
|
|
|
|
for (unsigned int u = 0; u < 16; u ++)
|
|
|
|
g[u] = m[u] ^ H[u];
|
|
|
|
m[8] = 0x80; g[8] = m[8] ^ H[8];
|
|
|
|
m[9] = 0; g[9] = m[9] ^ H[9];
|
|
|
|
m[10] = 0; g[10] = m[10] ^ H[10];
|
|
|
|
m[11] = 0; g[11] = m[11] ^ H[11];
|
|
|
|
m[12] = 0; g[12] = m[12] ^ H[12];
|
|
|
|
m[13] = 0; g[13] = m[13] ^ H[13];
|
|
|
|
m[14] = 0; g[14] = m[14] ^ H[14];
|
|
|
|
m[15] = 0x100000000000000; g[15] = m[15] ^ H[15];
|
|
|
|
PERM_BIG_P(g);
|
|
|
|
PERM_BIG_Q(m);
|
|
|
|
for (unsigned int u = 0; u < 16; u ++)
|
|
|
|
H[u] ^= g[u] ^ m[u];
|
|
|
|
for (unsigned int u = 0; u < 16; u ++)
|
|
|
|
xH[u] = H[u];
|
|
|
|
PERM_BIG_P(xH);
|
|
|
|
for (unsigned int u = 0; u < 16; u ++)
|
|
|
|
H[u] ^= xH[u];
|
|
|
|
for (unsigned int u = 0; u < 8; u ++)
|
|
|
|
hash.h8[u] = H[u + 8];
|
|
|
|
}
|
|
|
|
|
|
|
|
bool result = (hash.h8[3] <= target);
|
|
|
|
if (result)
|
|
|
|
output[output[0xFF]++] = SWAP4(gid);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif // GROESTLCOIN_CL
|