/* * GroestlCoin kernel implementation. * * ==========================(LICENSE BEGIN)============================ * * Copyright (c) 2014 phm * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ===========================(LICENSE END)============================= * * @author phm <phm@inbox.com> */ #ifndef GROESTLCOIN_CL #define GROESTLCOIN_CL #if __ENDIAN_LITTLE__ #define SPH_LITTLE_ENDIAN 1 #else #define SPH_BIG_ENDIAN 1 #endif #define SPH_UPTR sph_u64 typedef unsigned int sph_u32; typedef int sph_s32; #ifndef __OPENCL_VERSION__ typedef unsigned long long sph_u64; typedef long long sph_s64; #else typedef unsigned long sph_u64; typedef long sph_s64; #endif #define SPH_64 1 #define SPH_64_TRUE 1 #define SPH_C32(x) ((sph_u32)(x ## U)) #define SPH_T32(x) (as_uint(x)) #define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n)) #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) #define SPH_C64(x) ((sph_u64)(x ## UL)) #define SPH_T64(x) (as_ulong(x)) #define SPH_ROTL64(x, n) rotate(as_ulong(x), (n) & 0xFFFFFFFFFFFFFFFFUL) #define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) #define SPH_ECHO_64 1 #define SPH_SIMD_NOCOPY 0 #define SPH_CUBEHASH_UNROLL 0 #ifndef SPH_LUFFA_PARALLEL #define SPH_LUFFA_PARALLEL 0 #endif #include "groestl.cl" #define SWAP4(x) as_uint(as_uchar4(x).wzyx) #define SWAP8(x) as_ulong(as_uchar8(x).s76543210) #if SPH_BIG_ENDIAN #define ENC64E(x) SWAP8(x) #define DEC64E(x) SWAP8(*(const __global sph_u64 *) (x)); #else #define ENC64E(x) (x) #define DEC64E(x) (*(const __global sph_u64 *) (x)); #endif #define ROL32(x, n) rotate(x, (uint) n) #define SHR(x, n) ((x) >> n) #define SWAP32(a) (as_uint(as_uchar4(a).wzyx)) #define S0(x) (ROL32(x, 25) ^ ROL32(x, 14) ^ SHR(x, 3)) #define S1(x) (ROL32(x, 15) ^ ROL32(x, 13) ^ SHR(x, 10)) #define S2(x) (ROL32(x, 30) ^ ROL32(x, 19) ^ ROL32(x, 10)) #define S3(x) (ROL32(x, 26) ^ ROL32(x, 21) ^ ROL32(x, 7)) #define P(a,b,c,d,e,f,g,h,x,K) \ { \ temp1 = h + S3(e) + F1(e,f,g) + (K + x); \ d += temp1; h = temp1 + S2(a) + F0(a,b,c); \ } #define PLAST(a,b,c,d,e,f,g,h,x,K) \ { \ d += h + S3(e) + F1(e,f,g) + (x + K); \ } #define F0(y, x, z) bitselect(z, y, z ^ x) #define F1(x, y, z) bitselect(z, y, x) #define R0 (W0 = S1(W14) + W9 + S0(W1) + W0) #define R1 (W1 = S1(W15) + W10 + S0(W2) + W1) #define R2 (W2 = S1(W0) + W11 + S0(W3) + W2) #define R3 (W3 = S1(W1) + W12 + S0(W4) + W3) #define R4 (W4 = S1(W2) + W13 + S0(W5) + W4) #define R5 (W5 = S1(W3) + W14 + S0(W6) + W5) #define R6 (W6 = S1(W4) + W15 + S0(W7) + W6) #define R7 (W7 = S1(W5) + W0 + S0(W8) + W7) #define R8 (W8 = S1(W6) + W1 + S0(W9) + W8) #define R9 (W9 = S1(W7) + W2 + S0(W10) + W9) #define R10 (W10 = S1(W8) + W3 + S0(W11) + W10) #define R11 (W11 = S1(W9) + W4 + S0(W12) + W11) #define R12 (W12 = S1(W10) + W5 + S0(W13) + W12) #define R13 (W13 = S1(W11) + W6 + S0(W14) + W13) #define R14 (W14 = S1(W12) + W7 + S0(W15) + W14) #define R15 (W15 = S1(W13) + W8 + S0(W0) + W15) #define RD14 (S1(W12) + W7 + S0(W15) + W14) #define RD15 (S1(W13) + W8 + S0(W0) + W15) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void search(__global unsigned char* block, volatile __global uint* output, const ulong target) { uint gid = get_global_id(0); union { unsigned char h1[64]; uint h4[16]; ulong h8[8]; } hash; #if !SPH_SMALL_FOOTPRINT_GROESTL __local sph_u64 T0_C[256], T1_C[256], T2_C[256], T3_C[256]; __local sph_u64 T4_C[256], T5_C[256], T6_C[256], T7_C[256]; #else __local sph_u64 T0_C[256], T4_C[256]; #endif int init = get_local_id(0); int step = get_local_size(0); for (int i = init; i < 256; i += step) { T0_C[i] = T0[i]; T4_C[i] = T4[i]; #if !SPH_SMALL_FOOTPRINT_GROESTL T1_C[i] = T1[i]; T2_C[i] = T2[i]; T3_C[i] = T3[i]; T5_C[i] = T5[i]; T6_C[i] = T6[i]; T7_C[i] = T7[i]; #endif } barrier(CLK_LOCAL_MEM_FENCE); // groestl #define T0 T0_C #define T1 T1_C #define T2 T2_C #define T3 T3_C #define T4 T4_C #define T5 T5_C #define T6 T6_C #define T7 T7_C // groestl { sph_u64 H[16]; //#pragma unroll 15 for (unsigned int u = 0; u < 15; u ++) H[u] = 0; #if USE_LE H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); #else H[15] = (sph_u64)512; #endif sph_u64 g[16], m[16]; m[0] = DEC64E(block + 0 * 8); m[1] = DEC64E(block + 1 * 8); m[2] = DEC64E(block + 2 * 8); m[3] = DEC64E(block + 3 * 8); m[4] = DEC64E(block + 4 * 8); m[5] = DEC64E(block + 5 * 8); m[6] = DEC64E(block + 6 * 8); m[7] = DEC64E(block + 7 * 8); m[8] = DEC64E(block + 8 * 8); m[9] = DEC64E(block + 9 * 8); m[9] &= 0x00000000FFFFFFFF; m[9] |= ((sph_u64) gid << 32); //#pragma unroll 16 for (unsigned int u = 0; u < 16; u ++) g[u] = m[u] ^ H[u]; m[10] = 0x80; g[10] = m[10] ^ H[10]; m[11] = 0; g[11] = m[11] ^ H[11]; m[12] = 0; g[12] = m[12] ^ H[12]; m[13] = 0; g[13] = m[13] ^ H[13]; m[14] = 0; g[14] = m[14] ^ H[14]; m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; PERM_BIG_P(g); PERM_BIG_Q(m); //#pragma unroll 16 for (unsigned int u = 0; u < 16; u ++) H[u] ^= g[u] ^ m[u]; sph_u64 xH[16]; //#pragma unroll 16 for (unsigned int u = 0; u < 16; u ++) xH[u] = H[u]; PERM_BIG_P(xH); //#pragma unroll 16 for (unsigned int u = 0; u < 16; u ++) H[u] ^= xH[u]; //#pragma unroll 8 for (unsigned int u = 0; u < 8; u ++) hash.h8[u] = ENC64E(H[u + 8]); //#pragma unroll 15 for (unsigned int u = 0; u < 15; u ++) H[u] = 0; #if USE_LE H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); #else H[15] = (sph_u64)512; #endif m[0] = hash.h8[0]; m[1] = hash.h8[1]; m[2] = hash.h8[2]; m[3] = hash.h8[3]; m[4] = hash.h8[4]; m[5] = hash.h8[5]; m[6] = hash.h8[6]; m[7] = hash.h8[7]; //#pragma unroll 16 for (unsigned int u = 0; u < 16; u ++) g[u] = m[u] ^ H[u]; m[8] = 0x80; g[8] = m[8] ^ H[8]; m[9] = 0; g[9] = m[9] ^ H[9]; m[10] = 0; g[10] = m[10] ^ H[10]; m[11] = 0; g[11] = m[11] ^ H[11]; m[12] = 0; g[12] = m[12] ^ H[12]; m[13] = 0; g[13] = m[13] ^ H[13]; m[14] = 0; g[14] = m[14] ^ H[14]; m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; PERM_BIG_P(g); PERM_BIG_Q(m); //#pragma unroll 16 for (unsigned int u = 0; u < 16; u ++) H[u] ^= g[u] ^ m[u]; //#pragma unroll 16 for (unsigned int u = 0; u < 16; u ++) xH[u] = H[u]; PERM_BIG_P(xH); //#pragma unroll 16 for (unsigned int u = 0; u < 16; u ++) H[u] ^= xH[u]; //#pragma unroll 8 for (unsigned int u = 0; u < 8; u ++) hash.h8[u] = H[u + 8]; barrier(CLK_GLOBAL_MEM_FENCE); } bool result = (hash.h8[3] <= target); if (result) output[output[0xFF]++] = SWAP4(gid); } #endif // GROESTLCOIN_CL