|
|
|
/*
|
|
|
|
* InkCoin kernel implementation.
|
|
|
|
*
|
|
|
|
* ==========================(LICENSE BEGIN)============================
|
|
|
|
*
|
|
|
|
* Copyright (c) 2014 phm
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
|
|
* a copy of this software and associated documentation files (the
|
|
|
|
* "Software"), to deal in the Software without restriction, including
|
|
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
|
|
* the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be
|
|
|
|
* included in all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
* ===========================(LICENSE END)=============================
|
|
|
|
*
|
|
|
|
* @author phm <phm@inbox.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef DARKCOIN_CL
|
|
|
|
#define DARKCOIN_CL
|
|
|
|
|
|
|
|
#if __ENDIAN_LITTLE__
|
|
|
|
#define SPH_LITTLE_ENDIAN 1
|
|
|
|
#else
|
|
|
|
#define SPH_BIG_ENDIAN 1
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define SPH_UPTR sph_u64
|
|
|
|
|
|
|
|
typedef unsigned int sph_u32;
|
|
|
|
typedef int sph_s32;
|
|
|
|
#ifndef __OPENCL_VERSION__
|
|
|
|
typedef unsigned long long sph_u64;
|
|
|
|
typedef long long sph_s64;
|
|
|
|
#else
|
|
|
|
typedef unsigned long sph_u64;
|
|
|
|
typedef long sph_s64;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define SPH_64 1
|
|
|
|
#define SPH_64_TRUE 1
|
|
|
|
|
|
|
|
#define SPH_C32(x) ((sph_u32)(x ## U))
|
|
|
|
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
|
|
|
|
#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
|
|
|
|
#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
|
|
|
|
|
|
|
|
#define SPH_C64(x) ((sph_u64)(x ## UL))
|
|
|
|
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
|
|
|
|
#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
|
|
|
|
#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n)))
|
|
|
|
|
|
|
|
#define SPH_ECHO_64 1
|
|
|
|
#define SPH_KECCAK_64 1
|
|
|
|
#define SPH_JH_64 1
|
|
|
|
#define SPH_SIMD_NOCOPY 0
|
|
|
|
#define SPH_KECCAK_NOCOPY 0
|
|
|
|
#define SPH_COMPACT_BLAKE_64 0
|
|
|
|
#define SPH_LUFFA_PARALLEL 0
|
|
|
|
#define SPH_SMALL_FOOTPRINT_GROESTL 0
|
|
|
|
#define SPH_GROESTL_BIG_ENDIAN 0
|
|
|
|
|
|
|
|
#define SPH_CUBEHASH_UNROLL 0
|
|
|
|
#define SPH_KECCAK_UNROLL 0
|
|
|
|
|
|
|
|
#include "shavite.cl"
|
|
|
|
|
|
|
|
#define SWAP4(x) as_uint(as_uchar4(x).wzyx)
|
|
|
|
#define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
|
|
|
|
|
|
|
|
#if SPH_BIG_ENDIAN
|
|
|
|
#define DEC64E(x) (x)
|
|
|
|
#define DEC64BE(x) (*(const __global sph_u64 *) (x));
|
|
|
|
#define DEC32LE(x) SWAP4(*(const __global sph_u32 *) (x));
|
|
|
|
#else
|
|
|
|
#define DEC64E(x) SWAP8(x)
|
|
|
|
#define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x));
|
|
|
|
#define DEC32LE(x) (*(const __global sph_u32 *) (x));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
|
|
|
__kernel void search(__global unsigned char* block, volatile __global uint* output, const ulong target)
|
|
|
|
{
|
|
|
|
uint gid = get_global_id(0);
|
|
|
|
union {
|
|
|
|
unsigned char h1[64];
|
|
|
|
uint h4[16];
|
|
|
|
ulong h8[8];
|
|
|
|
} hash;
|
|
|
|
|
|
|
|
__local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256];
|
|
|
|
int init = get_local_id(0);
|
|
|
|
int step = get_local_size(0);
|
|
|
|
for (int i = init; i < 256; i += step)
|
|
|
|
{
|
|
|
|
AES0[i] = AES0_C[i];
|
|
|
|
AES1[i] = AES1_C[i];
|
|
|
|
AES2[i] = AES2_C[i];
|
|
|
|
AES3[i] = AES3_C[i];
|
|
|
|
}
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
|
|
|
|
// shavite
|
|
|
|
{
|
|
|
|
// IV
|
|
|
|
sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC);
|
|
|
|
sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC);
|
|
|
|
sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47);
|
|
|
|
sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A);
|
|
|
|
|
|
|
|
// state
|
|
|
|
sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07;
|
|
|
|
sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F;
|
|
|
|
sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;
|
|
|
|
sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;
|
|
|
|
|
|
|
|
sph_u32 sc_count0 = (80 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0;
|
|
|
|
|
|
|
|
rk00 = DEC32LE(block + 0 * 4);;
|
|
|
|
rk01 = DEC32LE(block + 1 * 4);;
|
|
|
|
rk02 = DEC32LE(block + 2 * 4);;
|
|
|
|
rk03 = DEC32LE(block + 3 * 4);;
|
|
|
|
rk04 = DEC32LE(block + 4 * 4);;
|
|
|
|
rk05 = DEC32LE(block + 5 * 4);;
|
|
|
|
rk06 = DEC32LE(block + 6 * 4);;
|
|
|
|
rk07 = DEC32LE(block + 7 * 4);;
|
|
|
|
rk08 = DEC32LE(block + 8 * 4);;
|
|
|
|
rk09 = DEC32LE(block + 9 * 4);;
|
|
|
|
rk0A = DEC32LE(block + 10 * 4);;
|
|
|
|
rk0B = DEC32LE(block + 11 * 4);;
|
|
|
|
rk0C = DEC32LE(block + 12 * 4);;
|
|
|
|
rk0D = DEC32LE(block + 13 * 4);;
|
|
|
|
rk0E = DEC32LE(block + 14 * 4);;
|
|
|
|
rk0F = DEC32LE(block + 15 * 4);;
|
|
|
|
rk10 = DEC32LE(block + 16 * 4);;
|
|
|
|
rk11 = DEC32LE(block + 17 * 4);;
|
|
|
|
rk12 = DEC32LE(block + 18 * 4);;
|
|
|
|
rk13 = gid;
|
|
|
|
rk14 = 0x80;
|
|
|
|
rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0;
|
|
|
|
rk1B = 0x2800000;
|
|
|
|
rk1C = rk1D = rk1E = 0;
|
|
|
|
rk1F = 0x2000000;
|
|
|
|
|
|
|
|
c512(buf);
|
|
|
|
|
|
|
|
hash.h4[0] = h0;
|
|
|
|
hash.h4[1] = h1;
|
|
|
|
hash.h4[2] = h2;
|
|
|
|
hash.h4[3] = h3;
|
|
|
|
hash.h4[4] = h4;
|
|
|
|
hash.h4[5] = h5;
|
|
|
|
hash.h4[6] = h6;
|
|
|
|
hash.h4[7] = h7;
|
|
|
|
hash.h4[8] = h8;
|
|
|
|
hash.h4[9] = h9;
|
|
|
|
hash.h4[10] = hA;
|
|
|
|
hash.h4[11] = hB;
|
|
|
|
hash.h4[12] = hC;
|
|
|
|
hash.h4[13] = hD;
|
|
|
|
hash.h4[14] = hE;
|
|
|
|
hash.h4[15] = hF;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// shavite
|
|
|
|
{
|
|
|
|
// IV
|
|
|
|
sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC);
|
|
|
|
sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC);
|
|
|
|
sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47);
|
|
|
|
sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A);
|
|
|
|
|
|
|
|
// state
|
|
|
|
sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07;
|
|
|
|
sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F;
|
|
|
|
sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;
|
|
|
|
sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;
|
|
|
|
|
|
|
|
sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0;
|
|
|
|
|
|
|
|
rk00 = hash.h4[0];
|
|
|
|
rk01 = hash.h4[1];
|
|
|
|
rk02 = hash.h4[2];
|
|
|
|
rk03 = hash.h4[3];
|
|
|
|
rk04 = hash.h4[4];
|
|
|
|
rk05 = hash.h4[5];
|
|
|
|
rk06 = hash.h4[6];
|
|
|
|
rk07 = hash.h4[7];
|
|
|
|
rk08 = hash.h4[8];
|
|
|
|
rk09 = hash.h4[9];
|
|
|
|
rk0A = hash.h4[10];
|
|
|
|
rk0B = hash.h4[11];
|
|
|
|
rk0C = hash.h4[12];
|
|
|
|
rk0D = hash.h4[13];
|
|
|
|
rk0E = hash.h4[14];
|
|
|
|
rk0F = hash.h4[15];
|
|
|
|
rk10 = 0x80;
|
|
|
|
rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0;
|
|
|
|
rk1B = 0x2000000;
|
|
|
|
rk1C = rk1D = rk1E = 0;
|
|
|
|
rk1F = 0x2000000;
|
|
|
|
|
|
|
|
c512(buf);
|
|
|
|
|
|
|
|
hash.h4[0] = h0;
|
|
|
|
hash.h4[1] = h1;
|
|
|
|
hash.h4[2] = h2;
|
|
|
|
hash.h4[3] = h3;
|
|
|
|
hash.h4[4] = h4;
|
|
|
|
hash.h4[5] = h5;
|
|
|
|
hash.h4[6] = h6;
|
|
|
|
hash.h4[7] = h7;
|
|
|
|
hash.h4[8] = h8;
|
|
|
|
hash.h4[9] = h9;
|
|
|
|
hash.h4[10] = hA;
|
|
|
|
hash.h4[11] = hB;
|
|
|
|
hash.h4[12] = hC;
|
|
|
|
hash.h4[13] = hD;
|
|
|
|
hash.h4[14] = hE;
|
|
|
|
hash.h4[15] = hF;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool result = (hash.h8[3] <= target);
|
|
|
|
if (result)
|
|
|
|
output[output[0xFF]++] = SWAP4(gid);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif // DARKCOIN_CL
|