diff --git a/Makefile.am b/Makefile.am index eb50cdea..920ebadc 100644 --- a/Makefile.am +++ b/Makefile.am @@ -62,6 +62,8 @@ sgminer_SOURCES += algorithm/twecoin.c algorithm/twecoin.h sgminer_SOURCES += algorithm/marucoin.c algorithm/marucoin.h sgminer_SOURCES += algorithm/maxcoin.c algorithm/maxcoin.h sgminer_SOURCES += algorithm/talkcoin.c algorithm/talkcoin.h +sgminer_SOURCES += algorithm/bitblock.c algorithm/bitblock.h +sgminer_SOURCES += algorithm/x14.c algorithm/x14.h bin_SCRIPTS = $(top_srcdir)/kernel/*.cl diff --git a/algorithm.c b/algorithm.c index 55443670..a3210c11 100644 --- a/algorithm.c +++ b/algorithm.c @@ -26,6 +26,8 @@ #include "algorithm/marucoin.h" #include "algorithm/maxcoin.h" #include "algorithm/talkcoin.h" +#include "algorithm/bitblock.h" +#include "algorithm/x14.h" #include "compat.h" @@ -39,6 +41,8 @@ const char *algorithm_type_str[] = { "NScrypt", "X11", "X13", + "X14", + "X15", "Keccak", "Quarkcoin", "Twecoin", @@ -90,11 +94,11 @@ static void append_scrypt_compiler_options(struct _build_kernel_data *data, stru static void append_hamsi_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm) { char buf[255]; - sprintf(buf, " -D SPH_HAMSI_EXPAND_BIG=%d", - opt_hamsi_expand_big); + sprintf(buf, " -D SPH_HAMSI_EXPAND_BIG=%d -D SPH_HAMSI_SHORT=%d ", + opt_hamsi_expand_big, ((opt_hamsi_short)?1:0)); strcat(data->compiler_options, buf); - sprintf(buf, "big%u", (unsigned int)opt_hamsi_expand_big); + sprintf(buf, "big%u%s", (unsigned int)opt_hamsi_expand_big, ((opt_hamsi_short)?"hs":"")); strcat(data->binary_filename, buf); } @@ -197,6 +201,103 @@ static cl_int queue_darkcoin_mod_kernel(struct __clState *clState, struct _dev_b return status; } +static cl_int queue_bitblock_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel; + unsigned int num; + cl_ulong le_target; + cl_int status = 0; + + le_target = *(cl_ulong *)(blk->work->device_target + 24); + flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + + // blake - search + kernel = &clState->kernel; + num = 0; + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->padbuffer8); + // bmw - search1 + kernel = clState->extra_kernels; + CL_SET_ARG_0(clState->padbuffer8); + // groestl - search2 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // skein - search3 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // jh - search4 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // keccak - search5 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // luffa - search6 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // cubehash - search7 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // shavite - search8 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // simd - search9 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // echo - search10 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // hamsi - search11 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // fugue - search12 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // hamsi - search11 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // fugue - search12 + num = 0; + CL_NEXTKERNEL_SET_ARG(clState->padbuffer8); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(le_target); + + return status; +} + +static cl_int queue_bitblockold_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel; + unsigned int num; + cl_ulong le_target; + cl_int status = 0; + + le_target = *(cl_ulong *)(blk->work->device_target + 24); + flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + + // blake - search + kernel = &clState->kernel; + num = 0; + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->padbuffer8); + // bmw - search1 + kernel = clState->extra_kernels; + CL_SET_ARG_0(clState->padbuffer8); + // groestl - search2 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // skein - search3 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // jh - search4 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // keccak - search5 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // luffa - search6 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // cubehash - search7 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // shavite - search8 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // simd - search9 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // combined echo, hamsi, fugue - shabal - whirlpool - search10 + num = 0; + CL_NEXTKERNEL_SET_ARG(clState->padbuffer8); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(le_target); + + return status; +} + + static cl_int queue_marucoin_mod_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) { cl_kernel *kernel; @@ -321,6 +422,100 @@ static cl_int queue_talkcoin_mod_kernel(struct __clState *clState, struct _dev_b return status; } +static cl_int queue_x14_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel; + unsigned int num; + cl_ulong le_target; + cl_int status = 0; + + le_target = *(cl_ulong *)(blk->work->device_target + 24); + flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + + // blake - search + kernel = &clState->kernel; + num = 0; + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->padbuffer8); + // bmw - search1 + kernel = clState->extra_kernels; + CL_SET_ARG_0(clState->padbuffer8); + // groestl - search2 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // skein - search3 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // jh - search4 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // keccak - search5 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // luffa - search6 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // cubehash - search7 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // shavite - search8 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // simd - search9 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // echo - search10 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // hamsi - search11 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // fugue - search12 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // shabal - search13 + num = 0; + CL_NEXTKERNEL_SET_ARG(clState->padbuffer8); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(le_target); + + return status; +} + +static cl_int queue_x14_old_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads) +{ + cl_kernel *kernel; + unsigned int num; + cl_ulong le_target; + cl_int status = 0; + + le_target = *(cl_ulong *)(blk->work->device_target + 24); + flip80(clState->cldata, blk->work->data); + status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL); + + // blake - search + kernel = &clState->kernel; + num = 0; + CL_SET_ARG(clState->CLbuffer0); + CL_SET_ARG(clState->padbuffer8); + // bmw - search1 + kernel = clState->extra_kernels; + CL_SET_ARG_0(clState->padbuffer8); + // groestl - search2 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // skein - search3 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // jh - search4 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // keccak - search5 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // luffa - search6 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // cubehash - search7 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // shavite - search8 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // simd - search9 + CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8); + // combined echo, hamsi, fugue - shabal - search10 + num = 0; + CL_NEXTKERNEL_SET_ARG(clState->padbuffer8); + CL_SET_ARG(clState->outputBuffer); + CL_SET_ARG(le_target); + + return status; +} + typedef struct _algorithm_settings_t { const char *name; /* Human-readable identifier */ algorithm_type_t type; //common algorithm type @@ -379,8 +574,13 @@ static algorithm_settings_t algos[] = { { "marucoin-mod", ALGO_X13, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_kernel, gen_hash, append_hamsi_compiler_options}, { "marucoin-modold", ALGO_X13, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_old_kernel, gen_hash, append_hamsi_compiler_options}, - { "talkcoin-mod", ALGO_NIST, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, queue_talkcoin_mod_kernel, gen_hash, NULL}, + { "x14", ALGO_X14, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 13, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_kernel, gen_hash, append_hamsi_compiler_options}, + { "x14old", ALGO_X14, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_old_kernel, gen_hash, append_hamsi_compiler_options}, + { "bitblock", ALGO_X15, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 14, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblock_kernel, gen_hash, append_hamsi_compiler_options}, + { "bitblockold", ALGO_X15, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 10, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblockold_kernel, gen_hash, append_hamsi_compiler_options}, + + { "talkcoin-mod", ALGO_NIST, 1, 1, 1, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, queue_talkcoin_mod_kernel, gen_hash, NULL}, // kernels starting from this will have difficulty calculated by using fuguecoin algorithm #define A_FUGUE(a, b) \ { a, ALGO_FUGUE, 1, 256, 256, 0, 0, 0xFF, 0x00000000ffff0000ULL, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, sha256, NULL} diff --git a/algorithm.h b/algorithm.h index 45549b4d..3bbf8c35 100644 --- a/algorithm.h +++ b/algorithm.h @@ -16,6 +16,8 @@ typedef enum { ALGO_NSCRYPT, ALGO_X11, ALGO_X13, + ALGO_X14, + ALGO_X15, ALGO_KECCAK, ALGO_QUARK, ALGO_TWE, diff --git a/algorithm/bitblock.c b/algorithm/bitblock.c new file mode 100644 index 00000000..9b1af5b8 --- /dev/null +++ b/algorithm/bitblock.c @@ -0,0 +1,253 @@ +/*- + * Copyright 2009 Colin Percival, 2011 ArtForz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ + +#include "config.h" +#include "miner.h" + +#include +#include +#include + + +#include "sph/sph_blake.h" +#include "sph/sph_bmw.h" +#include "sph/sph_groestl.h" +#include "sph/sph_jh.h" +#include "sph/sph_keccak.h" +#include "sph/sph_skein.h" +#include "sph/sph_luffa.h" +#include "sph/sph_cubehash.h" +#include "sph/sph_shavite.h" +#include "sph/sph_simd.h" +#include "sph/sph_echo.h" +#include "sph/sph_hamsi.h" +#include "sph/sph_fugue.h" +#include "sph/sph_shabal.h" +#include "sph/sph_whirlpool.h" + +/* Move init out of loop, so init once externally, and then use one single memcpy with that bigger memory block */ +typedef struct { + sph_blake512_context blake1; + sph_bmw512_context bmw1; + sph_groestl512_context groestl1; + sph_skein512_context skein1; + sph_jh512_context jh1; + sph_keccak512_context keccak1; + sph_luffa512_context luffa1; + sph_cubehash512_context cubehash1; + sph_shavite512_context shavite1; + sph_simd512_context simd1; + sph_echo512_context echo1; + sph_hamsi512_context hamsi1; + sph_fugue512_context fugue1; + sph_shabal512_context shabal1; + sph_whirlpool_context whilpool1; +} Xhash_context_holder; + +static Xhash_context_holder base_contexts; + + +void init_Bhash_contexts() +{ + sph_blake512_init(&base_contexts.blake1); + sph_bmw512_init(&base_contexts.bmw1); + sph_groestl512_init(&base_contexts.groestl1); + sph_skein512_init(&base_contexts.skein1); + sph_jh512_init(&base_contexts.jh1); + sph_keccak512_init(&base_contexts.keccak1); + sph_luffa512_init(&base_contexts.luffa1); + sph_cubehash512_init(&base_contexts.cubehash1); + sph_shavite512_init(&base_contexts.shavite1); + sph_simd512_init(&base_contexts.simd1); + sph_echo512_init(&base_contexts.echo1); + sph_hamsi512_init(&base_contexts.hamsi1); + sph_fugue512_init(&base_contexts.fugue1); + sph_shabal512_init(&base_contexts.shabal1); + sph_whirlpool_init(&base_contexts.whilpool1); +} + +/* + * Encode a length len/4 vector of (uint32_t) into a length len vector of + * (unsigned char) in big-endian form. Assumes len is a multiple of 4. + */ +static inline void +be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) + dst[i] = htobe32(src[i]); +} + + +inline void bitblockhash(void *state, const void *input) +{ + init_Bhash_contexts(); + + Xhash_context_holder ctx; + + uint32_t hashA[16], hashB[16]; + //blake-bmw-groestl-sken-jh-meccak-luffa-cubehash-shivite-simd-echo + memcpy(&ctx, &base_contexts, sizeof(base_contexts)); + + sph_blake512 (&ctx.blake1, input, 80); + sph_blake512_close (&ctx.blake1, hashA); + + sph_bmw512 (&ctx.bmw1, hashA, 64); + sph_bmw512_close(&ctx.bmw1, hashB); + + sph_groestl512 (&ctx.groestl1, hashB, 64); + sph_groestl512_close(&ctx.groestl1, hashA); + + sph_skein512 (&ctx.skein1, hashA, 64); + sph_skein512_close(&ctx.skein1, hashB); + + sph_jh512 (&ctx.jh1, hashB, 64); + sph_jh512_close(&ctx.jh1, hashA); + + sph_keccak512 (&ctx.keccak1, hashA, 64); + sph_keccak512_close(&ctx.keccak1, hashB); + + sph_luffa512 (&ctx.luffa1, hashB, 64); + sph_luffa512_close (&ctx.luffa1, hashA); + + sph_cubehash512 (&ctx.cubehash1, hashA, 64); + sph_cubehash512_close(&ctx.cubehash1, hashB); + + sph_shavite512 (&ctx.shavite1, hashB, 64); + sph_shavite512_close(&ctx.shavite1, hashA); + + sph_simd512 (&ctx.simd1, hashA, 64); + sph_simd512_close(&ctx.simd1, hashB); + + sph_echo512 (&ctx.echo1, hashB, 64); + sph_echo512_close(&ctx.echo1, hashA); + + sph_hamsi512 (&ctx.hamsi1, hashA, 64); + sph_hamsi512_close(&ctx.hamsi1, hashB); + + sph_fugue512 (&ctx.fugue1, hashB, 64); + sph_fugue512_close(&ctx.fugue1, hashA); + + sph_shabal512 (&ctx.shabal1, (const unsigned char*)hashA, 64); + sph_shabal512_close(&ctx.shabal1, hashB); + + sph_whirlpool(&ctx.whilpool1, hashB, 64); + sph_whirlpool_close(&ctx.whilpool1, hashA); + + memcpy(state, hashA, 32); + +} + +static const uint32_t diff1targ = 0x0000ffff; + + +/* Used externally as confirmation of correct OCL code */ +int bitblock_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +{ + uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); + uint32_t data[20], ohash[8]; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + data[19] = htobe32(nonce); + bitblockhash(ohash, data); + tmp_hash7 = be32toh(ohash[7]); + + applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", + (long unsigned int)Htarg, + (long unsigned int)diff1targ, + (long unsigned int)tmp_hash7); + if (tmp_hash7 > diff1targ) + return -1; + if (tmp_hash7 > Htarg) + return 0; + return 1; +} + +void bitblock_regenhash(struct work *work) +{ + uint32_t data[20]; + uint32_t *nonce = (uint32_t *)(work->data + 76); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 19); + data[19] = htobe32(*nonce); + bitblockhash(ohash, data); +} + +static inline void be32enc(void *pp, uint32_t x) +{ + uint8_t *p = (uint8_t *)pp; + p[3] = x & 0xff; + p[2] = (x >> 8) & 0xff; + p[1] = (x >> 16) & 0xff; + p[0] = (x >> 24) & 0xff; +} + +bool scanhash_bitblock(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, + unsigned char *pdata, unsigned char __maybe_unused *phash1, + unsigned char __maybe_unused *phash, const unsigned char *ptarget, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) +{ + uint32_t *nonce = (uint32_t *)(pdata + 76); + uint32_t data[20]; + uint32_t tmp_hash7; + uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); + bool ret = false; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + + while(1) { + uint32_t ostate[8]; + *nonce = ++n; + data[19] = (n); + bitblockhash(ostate, data); + tmp_hash7 = (ostate[7]); + + applog(LOG_INFO, "data7 %08lx", + (long unsigned int)data[7]); + + if (unlikely(tmp_hash7 <= Htarg)) { + ((uint32_t *)pdata)[19] = htobe32(n); + *last_nonce = n; + ret = true; + break; + } + + if (unlikely((n >= max_nonce) || thr->work_restart)) { + *last_nonce = n; + break; + } + } + + return ret; +} + + + diff --git a/algorithm/bitblock.h b/algorithm/bitblock.h new file mode 100644 index 00000000..fea6bf1c --- /dev/null +++ b/algorithm/bitblock.h @@ -0,0 +1,10 @@ +#ifndef BITBLOCK_H +#define BITBLOCK_H + +#include "miner.h" + +extern int bitblock_test(unsigned char *pdata, const unsigned char *ptarget, + uint32_t nonce); +extern void bitblock_regenhash(struct work *work); + +#endif /* BITBLOCK_H */ diff --git a/algorithm/x14.c b/algorithm/x14.c new file mode 100644 index 00000000..7855ecdd --- /dev/null +++ b/algorithm/x14.c @@ -0,0 +1,247 @@ +/*- + * Copyright 2009 Colin Percival, 2011 ArtForz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ + +#include "config.h" +#include "miner.h" + +#include +#include +#include + + +#include "sph/sph_blake.h" +#include "sph/sph_bmw.h" +#include "sph/sph_groestl.h" +#include "sph/sph_jh.h" +#include "sph/sph_keccak.h" +#include "sph/sph_skein.h" +#include "sph/sph_luffa.h" +#include "sph/sph_cubehash.h" +#include "sph/sph_shavite.h" +#include "sph/sph_simd.h" +#include "sph/sph_echo.h" +#include "sph/sph_hamsi.h" +#include "sph/sph_fugue.h" +#include "sph/sph_shabal.h" + +/* Move init out of loop, so init once externally, and then use one single memcpy with that bigger memory block */ +typedef struct { + sph_blake512_context blake1; + sph_bmw512_context bmw1; + sph_groestl512_context groestl1; + sph_skein512_context skein1; + sph_jh512_context jh1; + sph_keccak512_context keccak1; + sph_luffa512_context luffa1; + sph_cubehash512_context cubehash1; + sph_shavite512_context shavite1; + sph_simd512_context simd1; + sph_echo512_context echo1; + sph_hamsi512_context hamsi1; + sph_fugue512_context fugue1; + sph_shabal512_context shabal1; +} Xhash_context_holder; + +static Xhash_context_holder base_contexts; + +void init_X14hash_contexts() +{ + sph_blake512_init(&base_contexts.blake1); + sph_bmw512_init(&base_contexts.bmw1); + sph_groestl512_init(&base_contexts.groestl1); + sph_skein512_init(&base_contexts.skein1); + sph_jh512_init(&base_contexts.jh1); + sph_keccak512_init(&base_contexts.keccak1); + sph_luffa512_init(&base_contexts.luffa1); + sph_cubehash512_init(&base_contexts.cubehash1); + sph_shavite512_init(&base_contexts.shavite1); + sph_simd512_init(&base_contexts.simd1); + sph_echo512_init(&base_contexts.echo1); + sph_hamsi512_init(&base_contexts.hamsi1); + sph_fugue512_init(&base_contexts.fugue1); + sph_shabal512_init(&base_contexts.shabal1); +} + +/* + * Encode a length len/4 vector of (uint32_t) into a length len vector of + * (unsigned char) in big-endian form. Assumes len is a multiple of 4. + */ +static inline void be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) + dst[i] = htobe32(src[i]); +} + + +inline void x14hash(void *state, const void *input) +{ + init_X14hash_contexts(); + + Xhash_context_holder ctx; + + uint32_t hashA[16], hashB[16]; + //blake-bmw-groestl-sken-jh-meccak-luffa-cubehash-shivite-simd-echo + memcpy(&ctx, &base_contexts, sizeof(base_contexts)); + + sph_blake512 (&ctx.blake1, input, 80); + sph_blake512_close (&ctx.blake1, hashA); + + sph_bmw512 (&ctx.bmw1, hashA, 64); + sph_bmw512_close(&ctx.bmw1, hashB); + + sph_groestl512 (&ctx.groestl1, hashB, 64); + sph_groestl512_close(&ctx.groestl1, hashA); + + sph_skein512 (&ctx.skein1, hashA, 64); + sph_skein512_close(&ctx.skein1, hashB); + + sph_jh512 (&ctx.jh1, hashB, 64); + sph_jh512_close(&ctx.jh1, hashA); + + sph_keccak512 (&ctx.keccak1, hashA, 64); + sph_keccak512_close(&ctx.keccak1, hashB); + + sph_luffa512 (&ctx.luffa1, hashB, 64); + sph_luffa512_close (&ctx.luffa1, hashA); + + sph_cubehash512 (&ctx.cubehash1, hashA, 64); + sph_cubehash512_close(&ctx.cubehash1, hashB); + + sph_shavite512 (&ctx.shavite1, hashB, 64); + sph_shavite512_close(&ctx.shavite1, hashA); + + sph_simd512 (&ctx.simd1, hashA, 64); + sph_simd512_close(&ctx.simd1, hashB); + + sph_echo512 (&ctx.echo1, hashB, 64); + sph_echo512_close(&ctx.echo1, hashA); + + sph_hamsi512 (&ctx.hamsi1, hashA, 64); + sph_hamsi512_close(&ctx.hamsi1, hashB); + + sph_fugue512 (&ctx.fugue1, hashB, 64); + sph_fugue512_close(&ctx.fugue1, hashA); + + sph_shabal512 (&ctx.shabal1, (const unsigned char*)hashA, 64); + sph_shabal512_close(&ctx.shabal1, hashB); + + memcpy(state, hashB, 32); +} + +static const uint32_t diff1targ = 0x0000ffff; + +/* Used externally as confirmation of correct OCL code */ +int x14_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +{ + uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); + uint32_t data[20], ohash[8]; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + data[19] = htobe32(nonce); + x14hash(ohash, data); + tmp_hash7 = be32toh(ohash[7]); + + applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", + (long unsigned int)Htarg, + (long unsigned int)diff1targ, + (long unsigned int)tmp_hash7); + + if (tmp_hash7 > diff1targ) + return -1; + + if (tmp_hash7 > Htarg) + return 0; + + return 1; +} + +void x14_regenhash(struct work *work) +{ + uint32_t data[20]; + uint32_t *nonce = (uint32_t *)(work->data + 76); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 19); + data[19] = htobe32(*nonce); + x14hash(ohash, data); +} + +static inline void be32enc(void *pp, uint32_t x) +{ + uint8_t *p = (uint8_t *)pp; + p[3] = x & 0xff; + p[2] = (x >> 8) & 0xff; + p[1] = (x >> 16) & 0xff; + p[0] = (x >> 24) & 0xff; +} + +bool scanhash_x14(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, + unsigned char *pdata, unsigned char __maybe_unused *phash1, + unsigned char __maybe_unused *phash, const unsigned char *ptarget, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) +{ + uint32_t *nonce = (uint32_t *)(pdata + 76); + uint32_t data[20]; + uint32_t tmp_hash7; + uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); + bool ret = false; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + + while(1) + { + uint32_t ostate[8]; + *nonce = ++n; + data[19] = (n); + x14hash(ostate, data); + tmp_hash7 = (ostate[7]); + + applog(LOG_INFO, "data7 %08lx", (long unsigned int)data[7]); + + if(unlikely(tmp_hash7 <= Htarg)) + { + ((uint32_t *)pdata)[19] = htobe32(n); + *last_nonce = n; + ret = true; + break; + } + + if (unlikely((n >= max_nonce) || thr->work_restart)) + { + *last_nonce = n; + break; + } + } + + return ret; +} + + diff --git a/algorithm/x14.h b/algorithm/x14.h new file mode 100644 index 00000000..f4e92db6 --- /dev/null +++ b/algorithm/x14.h @@ -0,0 +1,10 @@ +#ifndef X14_H +#define X14_H + +#include "miner.h" + +extern int x14_test(unsigned char *pdata, const unsigned char *ptarget, + uint32_t nonce); +extern void x14_regenhash(struct work *work); + +#endif /* X14_H */ \ No newline at end of file diff --git a/kernel/bitblock.cl b/kernel/bitblock.cl new file mode 100644 index 00000000..00c356c6 --- /dev/null +++ b/kernel/bitblock.cl @@ -0,0 +1,1462 @@ +/* + * X15 kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 phm + * Copyright (c) 2014 Girino Vey + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author phm + */ + +#ifndef BITBLOCK_CL +#define BITBLOCK_CL + +#define DEBUG(x) + +#if __ENDIAN_LITTLE__ + #define SPH_LITTLE_ENDIAN 1 +#else + #define SPH_BIG_ENDIAN 1 +#endif + +#define SPH_UPTR sph_u64 + +typedef unsigned int sph_u32; +typedef int sph_s32; +#ifndef __OPENCL_VERSION__ + typedef unsigned long long sph_u64; + typedef long long sph_s64; +#else + typedef unsigned long sph_u64; + typedef long sph_s64; +#endif + +#define SPH_64 1 +#define SPH_64_TRUE 1 + +#define SPH_C32(x) ((sph_u32)(x ## U)) +#define SPH_T32(x) (as_uint(x)) +#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n)) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) + +#define SPH_C64(x) ((sph_u64)(x ## UL)) +#define SPH_T64(x) (as_ulong(x)) +#define SPH_ROTL64(x, n) rotate(as_ulong(x), (n) & 0xFFFFFFFFFFFFFFFFUL) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) + +#define SPH_ECHO_64 1 +#define SPH_KECCAK_64 1 +#define SPH_JH_64 1 +#define SPH_SIMD_NOCOPY 0 +#define SPH_KECCAK_NOCOPY 0 +#define SPH_COMPACT_BLAKE_64 0 +#define SPH_LUFFA_PARALLEL 0 +#define SPH_SMALL_FOOTPRINT_GROESTL 0 +#define SPH_GROESTL_BIG_ENDIAN 0 +#define SPH_CUBEHASH_UNROLL 0 +#define SPH_KECCAK_UNROLL 1 +#ifndef SPH_HAMSI_EXPAND_BIG + #define SPH_HAMSI_EXPAND_BIG 1 +#endif + +#include "blake.cl" +#include "bmw.cl" +#include "groestl.cl" +#include "jh.cl" +#include "keccak.cl" +#include "skein.cl" +#include "luffa.cl" +#include "cubehash.cl" +#include "shavite.cl" +#include "simd.cl" +#include "echo.cl" +#include "hamsi.cl" +#include "fugue.cl" +#include "shabal.cl" +#include "whirlpool.cl" + +#define SWAP4(x) as_uint(as_uchar4(x).wzyx) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +#if SPH_BIG_ENDIAN + #define DEC64E(x) (x) + #define DEC64BE(x) (*(const __global sph_u64 *) (x)); + #define DEC32LE(x) SWAP4(*(const __global sph_u32 *) (x)); +#else + #define DEC64E(x) SWAP8(x) + #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); + #define DEC32LE(x) (*(const __global sph_u32 *) (x)); +#endif + +#define SHL(x, n) ((x) << (n)) +#define SHR(x, n) ((x) >> (n)) + +#define CONST_EXP2 q[i+0] + SPH_ROTL64(q[i+1], 5) + q[i+2] + SPH_ROTL64(q[i+3], 11) + \ + q[i+4] + SPH_ROTL64(q[i+5], 27) + q[i+6] + SPH_ROTL64(q[i+7], 32) + \ + q[i+8] + SPH_ROTL64(q[i+9], 37) + q[i+10] + SPH_ROTL64(q[i+11], 43) + \ + q[i+12] + SPH_ROTL64(q[i+13], 53) + (SHR(q[i+14],1) ^ q[i+14]) + (SHR(q[i+15],2) ^ q[i+15]) + +typedef union { + unsigned char h1[64]; + uint h4[16]; + ulong h8[8]; +} hash_t; + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global unsigned char* block, __global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // blake + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + T1 = SPH_T64(T1 + 1); + + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + + M0 = DEC64BE(block + 0); + M1 = DEC64BE(block + 8); + M2 = DEC64BE(block + 16); + M3 = DEC64BE(block + 24); + M4 = DEC64BE(block + 32); + M5 = DEC64BE(block + 40); + M6 = DEC64BE(block + 48); + M7 = DEC64BE(block + 56); + M8 = DEC64BE(block + 64); + M9 = DEC64BE(block + 72); + M9 &= 0xFFFFFFFF00000000; + M9 ^= SWAP4(gid); + MA = 0x8000000000000000; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x280; + + COMPRESS64; + + hash->h8[0] = H0; + hash->h8[1] = H1; + hash->h8[2] = H2; + hash->h8[3] = H3; + hash->h8[4] = H4; + hash->h8[5] = H5; + hash->h8[6] = H6; + hash->h8[7] = H7; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search1(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // bmw + sph_u64 BMW_H[16]; + +#pragma unroll 16 + for(unsigned u = 0; u < 16; u++) + BMW_H[u] = BMW_IV512[u]; + + sph_u64 mv[16],q[32]; + sph_u64 tmp; + + mv[0] = SWAP8(hash->h8[0]); + mv[1] = SWAP8(hash->h8[1]); + mv[2] = SWAP8(hash->h8[2]); + mv[3] = SWAP8(hash->h8[3]); + mv[4] = SWAP8(hash->h8[4]); + mv[5] = SWAP8(hash->h8[5]); + mv[6] = SWAP8(hash->h8[6]); + mv[7] = SWAP8(hash->h8[7]); + mv[8] = 0x80; + mv[9] = 0; + mv[10] = 0; + mv[11] = 0; + mv[12] = 0; + mv[13] = 0; + mv[14] = 0; + mv[15] = SPH_C64(512); + + tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]); + q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1]; + tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]); + q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2]; + tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); + q[2] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[3]; + tmp = (mv[0] ^ BMW_H[0]) - (mv[1] ^ BMW_H[1]) + (mv[8] ^ BMW_H[8]) - (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]); + q[3] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[4]; + tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]); + q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5]; + tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); + q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6]; + tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]); + q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7]; + tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]); + q[7] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[8]; + tmp = (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) - (mv[6] ^ BMW_H[6]) + (mv[13] ^ BMW_H[13]) - (mv[15] ^ BMW_H[15]); + q[8] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[9]; + tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]); + q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10]; + tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]); + q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11]; + tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]); + q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12]; + tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]); + q[12] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[13]; + tmp = (mv[2] ^ BMW_H[2]) + (mv[4] ^ BMW_H[4]) + (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[11] ^ BMW_H[11]); + q[13] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[14]; + tmp = (mv[3] ^ BMW_H[3]) - (mv[5] ^ BMW_H[5]) + (mv[8] ^ BMW_H[8]) - (mv[11] ^ BMW_H[11]) - (mv[12] ^ BMW_H[12]); + q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15]; + tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]); + q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0]; + +#pragma unroll 2 + for(int i=0;i<2;i++) + { + q[i+16] = + (SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) + + (SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) + + (SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) + + (SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) + + (SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) + + (SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) + + (SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) + + (SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) + + (SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) + + (SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) + + (SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) + + (SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) + + (SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) + + (SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) + + (SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) + + (SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + } + +#pragma unroll 4 + for(int i=2;i<6;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + } + +#pragma unroll 3 + for(int i=6;i<9;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]); + } + +#pragma unroll 4 + for(int i=9;i<13;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + } + +#pragma unroll 3 + for(int i=13;i<16;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + } + + sph_u64 XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23]; + sph_u64 XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31]; + + BMW_H[0] = (SHL(XH64, 5) ^ SHR(q[16],5) ^ mv[0]) + ( XL64 ^ q[24] ^ q[0]); + BMW_H[1] = (SHR(XH64, 7) ^ SHL(q[17],8) ^ mv[1]) + ( XL64 ^ q[25] ^ q[1]); + BMW_H[2] = (SHR(XH64, 5) ^ SHL(q[18],5) ^ mv[2]) + ( XL64 ^ q[26] ^ q[2]); + BMW_H[3] = (SHR(XH64, 1) ^ SHL(q[19],5) ^ mv[3]) + ( XL64 ^ q[27] ^ q[3]); + BMW_H[4] = (SHR(XH64, 3) ^ q[20] ^ mv[4]) + ( XL64 ^ q[28] ^ q[4]); + BMW_H[5] = (SHL(XH64, 6) ^ SHR(q[21],6) ^ mv[5]) + ( XL64 ^ q[29] ^ q[5]); + BMW_H[6] = (SHR(XH64, 4) ^ SHL(q[22],6) ^ mv[6]) + ( XL64 ^ q[30] ^ q[6]); + BMW_H[7] = (SHR(XH64,11) ^ SHL(q[23],2) ^ mv[7]) + ( XL64 ^ q[31] ^ q[7]); + + BMW_H[8] = SPH_ROTL64(BMW_H[4], 9) + ( XH64 ^ q[24] ^ mv[8]) + (SHL(XL64,8) ^ q[23] ^ q[8]); + BMW_H[9] = SPH_ROTL64(BMW_H[5],10) + ( XH64 ^ q[25] ^ mv[9]) + (SHR(XL64,6) ^ q[16] ^ q[9]); + BMW_H[10] = SPH_ROTL64(BMW_H[6],11) + ( XH64 ^ q[26] ^ mv[10]) + (SHL(XL64,6) ^ q[17] ^ q[10]); + BMW_H[11] = SPH_ROTL64(BMW_H[7],12) + ( XH64 ^ q[27] ^ mv[11]) + (SHL(XL64,4) ^ q[18] ^ q[11]); + BMW_H[12] = SPH_ROTL64(BMW_H[0],13) + ( XH64 ^ q[28] ^ mv[12]) + (SHR(XL64,3) ^ q[19] ^ q[12]); + BMW_H[13] = SPH_ROTL64(BMW_H[1],14) + ( XH64 ^ q[29] ^ mv[13]) + (SHR(XL64,4) ^ q[20] ^ q[13]); + BMW_H[14] = SPH_ROTL64(BMW_H[2],15) + ( XH64 ^ q[30] ^ mv[14]) + (SHR(XL64,7) ^ q[21] ^ q[14]); + BMW_H[15] = SPH_ROTL64(BMW_H[3],16) + ( XH64 ^ q[31] ^ mv[15]) + (SHR(XL64,2) ^ q[22] ^ q[15]); + +#pragma unroll 16 + for(int i=0;i<16;i++) + { + mv[i] = BMW_H[i]; + BMW_H[i] = 0xaaaaaaaaaaaaaaa0ull + (sph_u64)i; + } + + tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]); + q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1]; + tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]); + q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2]; + tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); + q[2] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[3]; + tmp = (mv[0] ^ BMW_H[0]) - (mv[1] ^ BMW_H[1]) + (mv[8] ^ BMW_H[8]) - (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]); + q[3] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[4]; + tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]); + q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5]; + tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); + q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6]; + tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]); + q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7]; + tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]); + q[7] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[8]; + tmp = (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) - (mv[6] ^ BMW_H[6]) + (mv[13] ^ BMW_H[13]) - (mv[15] ^ BMW_H[15]); + q[8] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[9]; + tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]); + q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10]; + tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]); + q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11]; + tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]); + q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12]; + tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]); + q[12] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[13]; + tmp = (mv[2] ^ BMW_H[2]) + (mv[4] ^ BMW_H[4]) + (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[11] ^ BMW_H[11]); + q[13] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[14]; + tmp = (mv[3] ^ BMW_H[3]) - (mv[5] ^ BMW_H[5]) + (mv[8] ^ BMW_H[8]) - (mv[11] ^ BMW_H[11]) - (mv[12] ^ BMW_H[12]); + q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15]; + tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]); + q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0]; + +#pragma unroll 2 + for(int i=0;i<2;i++) + { + q[i+16] = + (SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) + + (SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) + + (SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) + + (SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) + + (SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) + + (SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) + + (SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) + + (SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) + + (SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) + + (SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) + + (SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) + + (SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) + + (SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) + + (SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) + + (SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) + + (SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + } + +#pragma unroll 4 + for(int i=2;i<6;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + } + +#pragma unroll 3 + for(int i=6;i<9;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]); + } + +#pragma unroll 4 + for(int i=9;i<13;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + } + +#pragma unroll 3 + for(int i=13;i<16;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + } + + XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23]; + XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31]; + + BMW_H[0] = (SHL(XH64, 5) ^ SHR(q[16],5) ^ mv[0]) + ( XL64 ^ q[24] ^ q[0]); + BMW_H[1] = (SHR(XH64, 7) ^ SHL(q[17],8) ^ mv[1]) + ( XL64 ^ q[25] ^ q[1]); + BMW_H[2] = (SHR(XH64, 5) ^ SHL(q[18],5) ^ mv[2]) + ( XL64 ^ q[26] ^ q[2]); + BMW_H[3] = (SHR(XH64, 1) ^ SHL(q[19],5) ^ mv[3]) + ( XL64 ^ q[27] ^ q[3]); + BMW_H[4] = (SHR(XH64, 3) ^ q[20] ^ mv[4]) + ( XL64 ^ q[28] ^ q[4]); + BMW_H[5] = (SHL(XH64, 6) ^ SHR(q[21],6) ^ mv[5]) + ( XL64 ^ q[29] ^ q[5]); + BMW_H[6] = (SHR(XH64, 4) ^ SHL(q[22],6) ^ mv[6]) + ( XL64 ^ q[30] ^ q[6]); + BMW_H[7] = (SHR(XH64,11) ^ SHL(q[23],2) ^ mv[7]) + ( XL64 ^ q[31] ^ q[7]); + + BMW_H[8] = SPH_ROTL64(BMW_H[4], 9) + ( XH64 ^ q[24] ^ mv[8]) + (SHL(XL64,8) ^ q[23] ^ q[8]); + BMW_H[9] = SPH_ROTL64(BMW_H[5],10) + ( XH64 ^ q[25] ^ mv[9]) + (SHR(XL64,6) ^ q[16] ^ q[9]); + BMW_H[10] = SPH_ROTL64(BMW_H[6],11) + ( XH64 ^ q[26] ^ mv[10]) + (SHL(XL64,6) ^ q[17] ^ q[10]); + BMW_H[11] = SPH_ROTL64(BMW_H[7],12) + ( XH64 ^ q[27] ^ mv[11]) + (SHL(XL64,4) ^ q[18] ^ q[11]); + BMW_H[12] = SPH_ROTL64(BMW_H[0],13) + ( XH64 ^ q[28] ^ mv[12]) + (SHR(XL64,3) ^ q[19] ^ q[12]); + BMW_H[13] = SPH_ROTL64(BMW_H[1],14) + ( XH64 ^ q[29] ^ mv[13]) + (SHR(XL64,4) ^ q[20] ^ q[13]); + BMW_H[14] = SPH_ROTL64(BMW_H[2],15) + ( XH64 ^ q[30] ^ mv[14]) + (SHR(XL64,7) ^ q[21] ^ q[14]); + BMW_H[15] = SPH_ROTL64(BMW_H[3],16) + ( XH64 ^ q[31] ^ mv[15]) + (SHR(XL64,2) ^ q[22] ^ q[15]); + + hash->h8[0] = SWAP8(BMW_H[8]); + hash->h8[1] = SWAP8(BMW_H[9]); + hash->h8[2] = SWAP8(BMW_H[10]); + hash->h8[3] = SWAP8(BMW_H[11]); + hash->h8[4] = SWAP8(BMW_H[12]); + hash->h8[5] = SWAP8(BMW_H[13]); + hash->h8[6] = SWAP8(BMW_H[14]); + hash->h8[7] = SWAP8(BMW_H[15]); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search2(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + __local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + T0_L[i] = T0[i]; + T4_L[i] = T4[i]; + T1_L[i] = T1[i]; + T2_L[i] = T2[i]; + T3_L[i] = T3[i]; + T5_L[i] = T5[i]; + T6_L[i] = T6[i]; + T7_L[i] = T7[i]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + #define T0 T0_L + #define T1 T1_L + #define T2 T2_L + #define T3 T3_L + #define T4 T4_L + #define T5 T5_L + #define T6 T6_L + #define T7 T7_L + + // groestl + sph_u64 H[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0002000000000000}; + + sph_u64 g[16], m[16]; + g[0] = m[0] = DEC64E(hash->h8[0]); + g[1] = m[1] = DEC64E(hash->h8[1]); + g[2] = m[2] = DEC64E(hash->h8[2]); + g[3] = m[3] = DEC64E(hash->h8[3]); + g[4] = m[4] = DEC64E(hash->h8[4]); + g[5] = m[5] = DEC64E(hash->h8[5]); + g[6] = m[6] = DEC64E(hash->h8[6]); + g[7] = m[7] = DEC64E(hash->h8[7]); + g[8] = m[8] = 0x80; + g[9] = m[9] = 0; + g[10] = m[10] = 0; + g[11] = m[11] = 0; + g[12] = m[12] = 0; + g[13] = m[13] = 0; + g[14] = m[14] = 0; + g[15] = 0x102000000000000; + m[15] = 0x100000000000000; + + PERM_BIG_P(g); + PERM_BIG_Q(m); + + sph_u64 xH[16]; + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u] ^= g[u] ^ m[u]; + + PERM_BIG_P(xH); + + for (unsigned int u = 8; u < 16; u ++) + hash->h8[u-8] = DEC64E(H[u] ^ xH[u]); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search3(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // skein + + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; + + m0 = SWAP8(hash->h8[0]); + m1 = SWAP8(hash->h8[1]); + m2 = SWAP8(hash->h8[2]); + m3 = SWAP8(hash->h8[3]); + m4 = SWAP8(hash->h8[4]); + m5 = SWAP8(hash->h8[5]); + m6 = SWAP8(hash->h8[6]); + m7 = SWAP8(hash->h8[7]); + + UBI_BIG(480, 64); + + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + + UBI_BIG(510, 8); + + hash->h8[0] = SWAP8(h0); + hash->h8[1] = SWAP8(h1); + hash->h8[2] = SWAP8(h2); + hash->h8[3] = SWAP8(h3); + hash->h8[4] = SWAP8(h4); + hash->h8[5] = SWAP8(h5); + hash->h8[6] = SWAP8(h6); + hash->h8[7] = SWAP8(h7); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search4(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // jh + + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; + + for(int i = 0; i < 2; i++) + { + if (i == 0) + { + h0h ^= DEC64E(hash->h8[0]); + h0l ^= DEC64E(hash->h8[1]); + h1h ^= DEC64E(hash->h8[2]); + h1l ^= DEC64E(hash->h8[3]); + h2h ^= DEC64E(hash->h8[4]); + h2l ^= DEC64E(hash->h8[5]); + h3h ^= DEC64E(hash->h8[6]); + h3l ^= DEC64E(hash->h8[7]); + } + else if(i == 1) + { + h4h ^= DEC64E(hash->h8[0]); + h4l ^= DEC64E(hash->h8[1]); + h5h ^= DEC64E(hash->h8[2]); + h5l ^= DEC64E(hash->h8[3]); + h6h ^= DEC64E(hash->h8[4]); + h6l ^= DEC64E(hash->h8[5]); + h7h ^= DEC64E(hash->h8[6]); + h7l ^= DEC64E(hash->h8[7]); + + h0h ^= 0x80; + h3l ^= 0x2000000000000; + } + E8; + } + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash->h8[0] = DEC64E(h4h); + hash->h8[1] = DEC64E(h4l); + hash->h8[2] = DEC64E(h5h); + hash->h8[3] = DEC64E(h5l); + hash->h8[4] = DEC64E(h6h); + hash->h8[5] = DEC64E(h6l); + hash->h8[6] = DEC64E(h7h); + hash->h8[7] = DEC64E(h7l); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search5(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // keccak + + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; + + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); + + a00 ^= SWAP8(hash->h8[0]); + a10 ^= SWAP8(hash->h8[1]); + a20 ^= SWAP8(hash->h8[2]); + a30 ^= SWAP8(hash->h8[3]); + a40 ^= SWAP8(hash->h8[4]); + a01 ^= SWAP8(hash->h8[5]); + a11 ^= SWAP8(hash->h8[6]); + a21 ^= SWAP8(hash->h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; + + hash->h8[0] = SWAP8(a00); + hash->h8[1] = SWAP8(a10); + hash->h8[2] = SWAP8(a20); + hash->h8[3] = SWAP8(a30); + hash->h8[4] = SWAP8(a40); + hash->h8[5] = SWAP8(a01); + hash->h8[6] = SWAP8(a11); + hash->h8[7] = SWAP8(a21); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search6(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // luffa + + sph_u32 V00 = SPH_C32(0x6d251e69), V01 = SPH_C32(0x44b051e0), V02 = SPH_C32(0x4eaa6fb4), V03 = SPH_C32(0xdbf78465), V04 = SPH_C32(0x6e292011), V05 = SPH_C32(0x90152df4), V06 = SPH_C32(0xee058139), V07 = SPH_C32(0xdef610bb); + sph_u32 V10 = SPH_C32(0xc3b44b95), V11 = SPH_C32(0xd9d2f256), V12 = SPH_C32(0x70eee9a0), V13 = SPH_C32(0xde099fa3), V14 = SPH_C32(0x5d9b0557), V15 = SPH_C32(0x8fc944b3), V16 = SPH_C32(0xcf1ccf0e), V17 = SPH_C32(0x746cd581); + sph_u32 V20 = SPH_C32(0xf7efc89d), V21 = SPH_C32(0x5dba5781), V22 = SPH_C32(0x04016ce5), V23 = SPH_C32(0xad659c05), V24 = SPH_C32(0x0306194f), V25 = SPH_C32(0x666d1836), V26 = SPH_C32(0x24aa230a), V27 = SPH_C32(0x8b264ae7); + sph_u32 V30 = SPH_C32(0x858075d5), V31 = SPH_C32(0x36d79cce), V32 = SPH_C32(0xe571f7d7), V33 = SPH_C32(0x204b1f67), V34 = SPH_C32(0x35870c6a), V35 = SPH_C32(0x57e9e923), V36 = SPH_C32(0x14bcb808), V37 = SPH_C32(0x7cde72ce); + sph_u32 V40 = SPH_C32(0x6c68e9be), V41 = SPH_C32(0x5ec41e22), V42 = SPH_C32(0xc825b7c7), V43 = SPH_C32(0xaffb4363), V44 = SPH_C32(0xf5df3999), V45 = SPH_C32(0x0fc688f1), V46 = SPH_C32(0xb07224cc), V47 = SPH_C32(0x03e86cea); + + DECL_TMP8(M); + + M0 = hash->h4[1]; + M1 = hash->h4[0]; + M2 = hash->h4[3]; + M3 = hash->h4[2]; + M4 = hash->h4[5]; + M5 = hash->h4[4]; + M6 = hash->h4[7]; + M7 = hash->h4[6]; + + for(uint i = 0; i < 5; i++) + { + MI5; + LUFFA_P5; + + if(i == 0) + { + M0 = hash->h4[9]; + M1 = hash->h4[8]; + M2 = hash->h4[11]; + M3 = hash->h4[10]; + M4 = hash->h4[13]; + M5 = hash->h4[12]; + M6 = hash->h4[15]; + M7 = hash->h4[14]; + } + else if(i == 1) + { + M0 = 0x80000000; + M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } + else if(i == 2) + M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + else if(i == 3) + { + hash->h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash->h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash->h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash->h4[2] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash->h4[5] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash->h4[4] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash->h4[7] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash->h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + } + } + + hash->h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash->h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash->h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash->h4[10] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash->h4[13] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash->h4[12] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash->h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash->h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search7(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // cubehash.h1 + + sph_u32 x0 = SPH_C32(0x2AEA2A61), x1 = SPH_C32(0x50F494D4), x2 = SPH_C32(0x2D538B8B), x3 = SPH_C32(0x4167D83E); + sph_u32 x4 = SPH_C32(0x3FEE2313), x5 = SPH_C32(0xC701CF8C), x6 = SPH_C32(0xCC39968E), x7 = SPH_C32(0x50AC5695); + sph_u32 x8 = SPH_C32(0x4D42C787), x9 = SPH_C32(0xA647A8B3), xa = SPH_C32(0x97CF0BEF), xb = SPH_C32(0x825B4537); + sph_u32 xc = SPH_C32(0xEEF864D2), xd = SPH_C32(0xF22090C4), xe = SPH_C32(0xD0E5CD33), xf = SPH_C32(0xA23911AE); + sph_u32 xg = SPH_C32(0xFCD398D9), xh = SPH_C32(0x148FE485), xi = SPH_C32(0x1B017BEF), xj = SPH_C32(0xB6444532); + sph_u32 xk = SPH_C32(0x6A536159), xl = SPH_C32(0x2FF5781C), xm = SPH_C32(0x91FA7934), xn = SPH_C32(0x0DBADEA9); + sph_u32 xo = SPH_C32(0xD65C8A2B), xp = SPH_C32(0xA5A70E75), xq = SPH_C32(0xB1C62456), xr = SPH_C32(0xBC796576); + sph_u32 xs = SPH_C32(0x1921C8F7), xt = SPH_C32(0xE7989AF1), xu = SPH_C32(0x7795D246), xv = SPH_C32(0xD43E3B44); + + x0 ^= SWAP4(hash->h4[1]); + x1 ^= SWAP4(hash->h4[0]); + x2 ^= SWAP4(hash->h4[3]); + x3 ^= SWAP4(hash->h4[2]); + x4 ^= SWAP4(hash->h4[5]); + x5 ^= SWAP4(hash->h4[4]); + x6 ^= SWAP4(hash->h4[7]); + x7 ^= SWAP4(hash->h4[6]); + + for (int i = 0; i < 13; i ++) + { + SIXTEEN_ROUNDS; + + if (i == 0) + { + x0 ^= SWAP4(hash->h4[9]); + x1 ^= SWAP4(hash->h4[8]); + x2 ^= SWAP4(hash->h4[11]); + x3 ^= SWAP4(hash->h4[10]); + x4 ^= SWAP4(hash->h4[13]); + x5 ^= SWAP4(hash->h4[12]); + x6 ^= SWAP4(hash->h4[15]); + x7 ^= SWAP4(hash->h4[14]); + } + else if(i == 1) + x0 ^= 0x80; + else if (i == 2) + xv ^= SPH_C32(1); + } + + hash->h4[0] = x0; + hash->h4[1] = x1; + hash->h4[2] = x2; + hash->h4[3] = x3; + hash->h4[4] = x4; + hash->h4[5] = x5; + hash->h4[6] = x6; + hash->h4[7] = x7; + hash->h4[8] = x8; + hash->h4[9] = x9; + hash->h4[10] = xa; + hash->h4[11] = xb; + hash->h4[12] = xc; + hash->h4[13] = xd; + hash->h4[14] = xe; + hash->h4[15] = xf; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search8(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + AES0[i] = AES0_C[i]; + AES1[i] = AES1_C[i]; + AES2[i] = AES2_C[i]; + AES3[i] = AES3_C[i]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + // shavite + // IV + sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); + sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); + sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); + sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); + + // state + sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; + sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; + sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; + sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; + + sph_u32 sc_count0 = 0x200, sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; + + rk00 = hash->h4[0]; + rk01 = hash->h4[1]; + rk02 = hash->h4[2]; + rk03 = hash->h4[3]; + rk04 = hash->h4[4]; + rk05 = hash->h4[5]; + rk06 = hash->h4[6]; + rk07 = hash->h4[7]; + rk08 = hash->h4[8]; + rk09 = hash->h4[9]; + rk0A = hash->h4[10]; + rk0B = hash->h4[11]; + rk0C = hash->h4[12]; + rk0D = hash->h4[13]; + rk0E = hash->h4[14]; + rk0F = hash->h4[15]; + rk10 = 0x80; + rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; + rk1B = 0x2000000; + rk1C = rk1D = rk1E = 0; + rk1F = 0x2000000; + + c512(buf); + + hash->h4[0] = h0; + hash->h4[1] = h1; + hash->h4[2] = h2; + hash->h4[3] = h3; + hash->h4[4] = h4; + hash->h4[5] = h5; + hash->h4[6] = h6; + hash->h4[7] = h7; + hash->h4[8] = h8; + hash->h4[9] = h9; + hash->h4[10] = hA; + hash->h4[11] = hB; + hash->h4[12] = hC; + hash->h4[13] = hD; + hash->h4[14] = hE; + hash->h4[15] = hF; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search9(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // simd + s32 q[256]; + unsigned char x[128]; + for(unsigned int i = 0; i < 64; i++) + x[i] = hash->h1[i]; + for(unsigned int i = 64; i < 128; i++) + x[i] = 0; + + u32 A0 = C32(0x0BA16B95), A1 = C32(0x72F999AD), A2 = C32(0x9FECC2AE), A3 = C32(0xBA3264FC), A4 = C32(0x5E894929), A5 = C32(0x8E9F30E5), A6 = C32(0x2F1DAA37), A7 = C32(0xF0F2C558); + u32 B0 = C32(0xAC506643), B1 = C32(0xA90635A5), B2 = C32(0xE25B878B), B3 = C32(0xAAB7878F), B4 = C32(0x88817F7A), B5 = C32(0x0A02892B), B6 = C32(0x559A7550), B7 = C32(0x598F657E); + u32 C0 = C32(0x7EEF60A1), C1 = C32(0x6B70E3E8), C2 = C32(0x9C1714D1), C3 = C32(0xB958E2A8), C4 = C32(0xAB02675E), C5 = C32(0xED1C014F), C6 = C32(0xCD8D65BB), C7 = C32(0xFDB7A257); + u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); + + FFT256(0, 1, 0, ll1); + for (int i = 0; i < 256; i ++) + { + s32 tq; + + tq = q[i] + yoff_b_n[i]; + tq = REDS2(tq); + tq = REDS1(tq); + tq = REDS1(tq); + q[i] = (tq <= 128 ? tq : tq - 257); + } + + A0 ^= hash->h4[0]; + A1 ^= hash->h4[1]; + A2 ^= hash->h4[2]; + A3 ^= hash->h4[3]; + A4 ^= hash->h4[4]; + A5 ^= hash->h4[5]; + A6 ^= hash->h4[6]; + A7 ^= hash->h4[7]; + B0 ^= hash->h4[8]; + B1 ^= hash->h4[9]; + B2 ^= hash->h4[10]; + B3 ^= hash->h4[11]; + B4 ^= hash->h4[12]; + B5 ^= hash->h4[13]; + B6 ^= hash->h4[14]; + B7 ^= hash->h4[15]; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + + STEP_BIG( + C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), + C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), + IF, 4, 13, PP8_4_); + + STEP_BIG( + C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), + C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), + IF, 13, 10, PP8_5_); + + STEP_BIG( + C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), + C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), + IF, 10, 25, PP8_6_); + + STEP_BIG( + C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), + C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), + IF, 25, 4, PP8_0_); + + u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7; + u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7; + u32 COPY_C0 = C0, COPY_C1 = C1, COPY_C2 = C2, COPY_C3 = C3, COPY_C4 = C4, COPY_C5 = C5, COPY_C6 = C6, COPY_C7 = C7; + u32 COPY_D0 = D0, COPY_D1 = D1, COPY_D2 = D2, COPY_D3 = D3, COPY_D4 = D4, COPY_D5 = D5, COPY_D6 = D6, COPY_D7 = D7; + + #define q SIMD_Q + + A0 ^= 0x200; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + + STEP_BIG( + COPY_A0, COPY_A1, COPY_A2, COPY_A3, + COPY_A4, COPY_A5, COPY_A6, COPY_A7, + IF, 4, 13, PP8_4_); + + STEP_BIG( + COPY_B0, COPY_B1, COPY_B2, COPY_B3, + COPY_B4, COPY_B5, COPY_B6, COPY_B7, + IF, 13, 10, PP8_5_); + + STEP_BIG( + COPY_C0, COPY_C1, COPY_C2, COPY_C3, + COPY_C4, COPY_C5, COPY_C6, COPY_C7, + IF, 10, 25, PP8_6_); + + STEP_BIG( + COPY_D0, COPY_D1, COPY_D2, COPY_D3, + COPY_D4, COPY_D5, COPY_D6, COPY_D7, + IF, 25, 4, PP8_0_); + + #undef q + + hash->h4[0] = A0; + hash->h4[1] = A1; + hash->h4[2] = A2; + hash->h4[3] = A3; + hash->h4[4] = A4; + hash->h4[5] = A5; + hash->h4[6] = A6; + hash->h4[7] = A7; + hash->h4[8] = B0; + hash->h4[9] = B1; + hash->h4[10] = B2; + hash->h4[11] = B3; + hash->h4[12] = B4; + hash->h4[13] = B5; + hash->h4[14] = B6; + hash->h4[15] = B7; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search10(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + uint offset = get_global_offset(0); + hash_t hash; + __global hash_t *hashp = &(hashes[gid-offset]); + + __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + AES0[i] = AES0_C[i]; + AES1[i] = AES1_C[i]; + AES2[i] = AES2_C[i]; + AES3[i] = AES3_C[i]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for (int i = 0; i < 8; i++) + hash.h8[i] = hashes[gid-offset].h8[i]; + + // echo + sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; + sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; + Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; + Vb01 = Vb11 = Vb21 = Vb31 = Vb41 = Vb51 = Vb61 = Vb71 = 0; + + sph_u32 K0 = 512; + sph_u32 K1 = 0; + sph_u32 K2 = 0; + sph_u32 K3 = 0; + + W00 = Vb00; + W01 = Vb01; + W10 = Vb10; + W11 = Vb11; + W20 = Vb20; + W21 = Vb21; + W30 = Vb30; + W31 = Vb31; + W40 = Vb40; + W41 = Vb41; + W50 = Vb50; + W51 = Vb51; + W60 = Vb60; + W61 = Vb61; + W70 = Vb70; + W71 = Vb71; + W80 = hash.h8[0]; + W81 = hash.h8[1]; + W90 = hash.h8[2]; + W91 = hash.h8[3]; + WA0 = hash.h8[4]; + WA1 = hash.h8[5]; + WB0 = hash.h8[6]; + WB1 = hash.h8[7]; + WC0 = 0x80; + WC1 = 0; + WD0 = 0; + WD1 = 0; + WE0 = 0; + WE1 = 0x200000000000000; + WF0 = 0x200; + WF1 = 0; + + for (unsigned u = 0; u < 10; u ++) + BIG_ROUND; + + hashp->h8[0] = hash.h8[0] ^ Vb00 ^ W00 ^ W80; + hashp->h8[1] = hash.h8[1] ^ Vb01 ^ W01 ^ W81; + hashp->h8[2] = hash.h8[2] ^ Vb10 ^ W10 ^ W90; + hashp->h8[3] = hash.h8[3] ^ Vb11 ^ W11 ^ W91; + hashp->h8[4] = hash.h8[4] ^ Vb20 ^ W20 ^ WA0; + hashp->h8[5] = hash.h8[5] ^ Vb21 ^ W21 ^ WA1; + hashp->h8[6] = hash.h8[6] ^ Vb30 ^ W30 ^ WB0; + hashp->h8[7] = hash.h8[7] ^ Vb31 ^ W31 ^ WB1; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search11(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + #ifdef INPUT_BIG_LOCAL + __local sph_u32 T512_L[1024]; + __constant const sph_u32 *T512_C = &T512[0][0]; + + int init = get_local_id(0); + int step = get_local_size(0); + for (int i = init; i < 1024; i += step) + T512_L[i] = T512_C[i]; + + barrier(CLK_LOCAL_MEM_FENCE); + #else + #define INPUT_BIG_LOCAL INPUT_BIG + #endif + + sph_u32 c0 = HAMSI_IV512[0], c1 = HAMSI_IV512[1], c2 = HAMSI_IV512[2], c3 = HAMSI_IV512[3]; + sph_u32 c4 = HAMSI_IV512[4], c5 = HAMSI_IV512[5], c6 = HAMSI_IV512[6], c7 = HAMSI_IV512[7]; + sph_u32 c8 = HAMSI_IV512[8], c9 = HAMSI_IV512[9], cA = HAMSI_IV512[10], cB = HAMSI_IV512[11]; + sph_u32 cC = HAMSI_IV512[12], cD = HAMSI_IV512[13], cE = HAMSI_IV512[14], cF = HAMSI_IV512[15]; + sph_u32 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u32 m8, m9, mA, mB, mC, mD, mE, mF; + sph_u32 h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF }; + + #define buf(u) hash->h1[i + u] + + for(int i = 0; i < 64; i += 8) + { + INPUT_BIG_LOCAL; + P_BIG; + T_BIG; + } + + #undef buf + #define buf(u) (u == 0 ? 0x80 : 0) + + INPUT_BIG_LOCAL; + P_BIG; + T_BIG; + + #undef buf + #define buf(u) (u == 6 ? 2 : 0) + + INPUT_BIG_LOCAL; + PF_BIG; + T_BIG; + + for (unsigned u = 0; u < 16; u ++) + hash->h4[u] = h[u]; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search12(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + uint offset = get_global_offset(0); + __global hash_t *hash = &(hashes[gid-offset]); + + // mixtab + __local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256]; + int init = get_local_id(0); + int step = get_local_size(0); + for (int i = init; i < 256; i += step) + { + mixtab0[i] = mixtab0_c[i]; + mixtab1[i] = mixtab1_c[i]; + mixtab2[i] = mixtab2_c[i]; + mixtab3[i] = mixtab3_c[i]; + } + + // fugue + sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09; + sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19; + sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29; + sph_u32 S30, S31, S32, S33, S34, S35; + + ulong fc_bit_count = (sph_u64) 64 << 3; + + S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0; + S20 = SPH_C32(0x8807a57e); S21 = SPH_C32(0xe616af75); S22 = SPH_C32(0xc5d3e4db); S23 = SPH_C32(0xac9ab027); + S24 = SPH_C32(0xd915f117); S25 = SPH_C32(0xb6eecc54); S26 = SPH_C32(0x06e8020b); S27 = SPH_C32(0x4a92efd1); + S28 = SPH_C32(0xaac6e2c9); S29 = SPH_C32(0xddb21398); S30 = SPH_C32(0xcae65838); S31 = SPH_C32(0x437f203f); + S32 = SPH_C32(0x25ea78e7); S33 = SPH_C32(0x951fddd6); S34 = SPH_C32(0xda6ed11d); S35 = SPH_C32(0xe13e3567); + + FUGUE512_3((hash->h4[0x0]), (hash->h4[0x1]), (hash->h4[0x2])); + FUGUE512_3((hash->h4[0x3]), (hash->h4[0x4]), (hash->h4[0x5])); + FUGUE512_3((hash->h4[0x6]), (hash->h4[0x7]), (hash->h4[0x8])); + FUGUE512_3((hash->h4[0x9]), (hash->h4[0xA]), (hash->h4[0xB])); + FUGUE512_3((hash->h4[0xC]), (hash->h4[0xD]), (hash->h4[0xE])); + FUGUE512_3((hash->h4[0xF]), as_uint2(fc_bit_count).y, as_uint2(fc_bit_count).x); + + // apply round shift if necessary + int i; + + for (i = 0; i < 32; i ++) + { + ROR3; + CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); + SMIX(S00, S01, S02, S03); + } + + for (i = 0; i < 13; i ++) + { + S04 ^= S00; + S09 ^= S00; + S18 ^= S00; + S27 ^= S00; + ROR9; + SMIX(S00, S01, S02, S03); + S04 ^= S00; + S10 ^= S00; + S18 ^= S00; + S27 ^= S00; + ROR9; + SMIX(S00, S01, S02, S03); + S04 ^= S00; + S10 ^= S00; + S19 ^= S00; + S27 ^= S00; + ROR9; + SMIX(S00, S01, S02, S03); + S04 ^= S00; + S10 ^= S00; + S19 ^= S00; + S28 ^= S00; + ROR8; + SMIX(S00, S01, S02, S03); + } + + S04 ^= S00; + S09 ^= S00; + S18 ^= S00; + S27 ^= S00; + + hash->h4[0] = SWAP4(S01); + hash->h4[1] = SWAP4(S02); + hash->h4[2] = SWAP4(S03); + hash->h4[3] = SWAP4(S04); + hash->h4[4] = SWAP4(S09); + hash->h4[5] = SWAP4(S10); + hash->h4[6] = SWAP4(S11); + hash->h4[7] = SWAP4(S12); + hash->h4[8] = SWAP4(S18); + hash->h4[9] = SWAP4(S19); + hash->h4[10] = SWAP4(S20); + hash->h4[11] = SWAP4(S21); + hash->h4[12] = SWAP4(S27); + hash->h4[13] = SWAP4(S28); + hash->h4[14] = SWAP4(S29); + hash->h4[15] = SWAP4(S30); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search13(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + uint offset = get_global_offset(0); + __global hash_t *hash = &(hashes[gid-offset]); + + // shabal + sph_u32 A00 = A_init_512[0], A01 = A_init_512[1], A02 = A_init_512[2], A03 = A_init_512[3], A04 = A_init_512[4], A05 = A_init_512[5], A06 = A_init_512[6], A07 = A_init_512[7], + A08 = A_init_512[8], A09 = A_init_512[9], A0A = A_init_512[10], A0B = A_init_512[11]; + sph_u32 B0 = B_init_512[0], B1 = B_init_512[1], B2 = B_init_512[2], B3 = B_init_512[3], B4 = B_init_512[4], B5 = B_init_512[5], B6 = B_init_512[6], B7 = B_init_512[7], + B8 = B_init_512[8], B9 = B_init_512[9], BA = B_init_512[10], BB = B_init_512[11], BC = B_init_512[12], BD = B_init_512[13], BE = B_init_512[14], BF = B_init_512[15]; + sph_u32 C0 = C_init_512[0], C1 = C_init_512[1], C2 = C_init_512[2], C3 = C_init_512[3], C4 = C_init_512[4], C5 = C_init_512[5], C6 = C_init_512[6], C7 = C_init_512[7], + C8 = C_init_512[8], C9 = C_init_512[9], CA = C_init_512[10], CB = C_init_512[11], CC = C_init_512[12], CD = C_init_512[13], CE = C_init_512[14], CF = C_init_512[15]; + sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF; + sph_u32 Wlow = 1, Whigh = 0; + + M0 = hash->h4[0]; + M1 = hash->h4[1]; + M2 = hash->h4[2]; + M3 = hash->h4[3]; + M4 = hash->h4[4]; + M5 = hash->h4[5]; + M6 = hash->h4[6]; + M7 = hash->h4[7]; + M8 = hash->h4[8]; + M9 = hash->h4[9]; + MA = hash->h4[10]; + MB = hash->h4[11]; + MC = hash->h4[12]; + MD = hash->h4[13]; + ME = hash->h4[14]; + MF = hash->h4[15]; + + INPUT_BLOCK_ADD; + XOR_W; + APPLY_P; + INPUT_BLOCK_SUB; + SWAP_BC; + INCR_W; + + M0 = 0x80; + M1 = M2 = M3 = M4 = M5 = M6 = M7 = M8 = M9 = MA = MB = MC = MD = ME = MF = 0; + + INPUT_BLOCK_ADD; + XOR_W; + APPLY_P; + + for (unsigned i = 0; i < 3; i ++) + { + SWAP_BC; + XOR_W; + APPLY_P; + } + + hash->h4[0] = B0; + hash->h4[1] = B1; + hash->h4[2] = B2; + hash->h4[3] = B3; + hash->h4[4] = B4; + hash->h4[5] = B5; + hash->h4[6] = B6; + hash->h4[7] = B7; + hash->h4[8] = B8; + hash->h4[9] = B9; + hash->h4[10] = BA; + hash->h4[11] = BB; + hash->h4[12] = BC; + hash->h4[13] = BD; + hash->h4[14] = BE; + hash->h4[15] = BF; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search14(__global hash_t* hashes, __global uint* output, const ulong target) +{ + uint gid = get_global_id(0); + uint offset = get_global_offset(0); + __global hash_t *hash = &(hashes[gid-offset]); + + __local sph_u64 LT0[256], LT1[256], LT2[256], LT3[256], LT4[256], LT5[256], LT6[256], LT7[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + LT0[i] = plain_T0[i]; + LT1[i] = plain_T1[i]; + LT2[i] = plain_T2[i]; + LT3[i] = plain_T3[i]; + LT4[i] = plain_T4[i]; + LT5[i] = plain_T5[i]; + LT6[i] = plain_T6[i]; + LT7[i] = plain_T7[i]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + + // whirlpool + sph_u64 n0, n1, n2, n3, n4, n5, n6, n7; + sph_u64 h0, h1, h2, h3, h4, h5, h6, h7; + sph_u64 state[8]; + + n0 = (hash->h8[0]); + n1 = (hash->h8[1]); + n2 = (hash->h8[2]); + n3 = (hash->h8[3]); + n4 = (hash->h8[4]); + n5 = (hash->h8[5]); + n6 = (hash->h8[6]); + n7 = (hash->h8[7]); + + h0 = h1 = h2 = h3 = h4 = h5 = h6 = h7 = 0; + + n0 ^= h0; + n1 ^= h1; + n2 ^= h2; + n3 ^= h3; + n4 ^= h4; + n5 ^= h5; + n6 ^= h6; + n7 ^= h7; + + #pragma unroll 10 + for (unsigned r = 0; r < 10; r ++) + { + sph_u64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + + ROUND_KSCHED(plain_T, h, tmp, plain_RC[r]); + TRANSFER(h, tmp); + ROUND_WENC(plain_T, n, h, tmp); + TRANSFER(n, tmp); + } + + state[0] = n0 ^ (hash->h8[0]); + state[1] = n1 ^ (hash->h8[1]); + state[2] = n2 ^ (hash->h8[2]); + state[3] = n3 ^ (hash->h8[3]); + state[4] = n4 ^ (hash->h8[4]); + state[5] = n5 ^ (hash->h8[5]); + state[6] = n6 ^ (hash->h8[6]); + state[7] = n7 ^ (hash->h8[7]); + + n0 = 0x80; + n1 = n2 = n3 = n4 = n5 = n6 = 0; + n7 = 0x2000000000000; + + h0 = state[0]; + h1 = state[1]; + h2 = state[2]; + h3 = state[3]; + h4 = state[4]; + h5 = state[5]; + h6 = state[6]; + h7 = state[7]; + + n0 ^= h0; + n1 ^= h1; + n2 ^= h2; + n3 ^= h3; + n4 ^= h4; + n5 ^= h5; + n6 ^= h6; + n7 ^= h7; + + #pragma unroll 10 + for (unsigned r = 0; r < 10; r ++) + { + sph_u64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + + ROUND_KSCHED(LT, h, tmp, plain_RC[r]); + TRANSFER(h, tmp); + ROUND_WENC(plain_T, n, h, tmp); + TRANSFER(n, tmp); + } + + state[0] ^= n0 ^ 0x80; + state[1] ^= n1; + state[2] ^= n2; + state[3] ^= n3; + state[4] ^= n4; + state[5] ^= n5; + state[6] ^= n6; + state[7] ^= n7 ^ 0x2000000000000; + + for (unsigned i = 0; i < 8; i ++) + hash->h8[i] = state[i]; + + bool result = (hash->h8[3] <= target); + if (result) + output[atomic_inc(output+0xFF)] = SWAP4(gid); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +#endif // BITBLOCK_CL diff --git a/kernel/bitblockold.cl b/kernel/bitblockold.cl new file mode 100644 index 00000000..a18d250d --- /dev/null +++ b/kernel/bitblockold.cl @@ -0,0 +1,1166 @@ +/* + * X15 kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 phm + * Copyright (c) 2014 Girino Vey + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author phm + */ + +#ifndef BITBLOCK_CL +#define BITBLOCK_CL + +#if __ENDIAN_LITTLE__ +#define SPH_LITTLE_ENDIAN 1 +#else +#define SPH_BIG_ENDIAN 1 +#endif + +#define SPH_UPTR sph_u64 + +typedef unsigned int sph_u32; +typedef int sph_s32; +#ifndef __OPENCL_VERSION__ +typedef unsigned long long sph_u64; +typedef long long sph_s64; +#else +typedef unsigned long sph_u64; +typedef long sph_s64; +#endif + +#define SPH_64 1 +#define SPH_64_TRUE 1 + +#define SPH_C32(x) ((sph_u32)(x ## U)) +#define SPH_T32(x) (as_uint(x)) +#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n)) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) + +#define SPH_C64(x) ((sph_u64)(x ## UL)) +#define SPH_T64(x) (as_ulong(x)) +#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) + +#define SPH_ECHO_64 1 +#define SPH_KECCAK_64 1 +#define SPH_JH_64 1 +#define SPH_SIMD_NOCOPY 0 +#define SPH_KECCAK_NOCOPY 0 +#define SPH_COMPACT_BLAKE_64 0 +#define SPH_LUFFA_PARALLEL 0 +#define SPH_SMALL_FOOTPRINT_GROESTL 0 +#define SPH_GROESTL_BIG_ENDIAN 0 +#define SPH_CUBEHASH_UNROLL 0 +#define SPH_KECCAK_UNROLL 1 +#ifndef SPH_HAMSI_EXPAND_BIG + #define SPH_HAMSI_EXPAND_BIG 4 +#endif + +#include "blake.cl" +#include "bmw.cl" +#include "groestl.cl" +#include "jh.cl" +#include "keccak.cl" +#include "skein.cl" +#include "luffa.cl" +#include "cubehash.cl" +#include "shavite.cl" +#include "simd.cl" +#include "echo.cl" +#include "hamsi.cl" +#include "fugue.cl" +#include "shabal.cl" +#include "whirlpool.cl" + +#define SWAP4(x) as_uint(as_uchar4(x).wzyx) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +#if SPH_BIG_ENDIAN + #define DEC64E(x) (x) + #define DEC64BE(x) (*(const __global sph_u64 *) (x)); +#else + #define DEC64E(x) SWAP8(x) + #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); +#endif + +typedef union { + unsigned char h1[64]; + uint h4[16]; + ulong h8[8]; +} hash_t; + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global unsigned char* block, __global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + // blake + + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + { + T1 = SPH_T64(T1 + 1); + } + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + M0 = DEC64BE(block + 0); + M1 = DEC64BE(block + 8); + M2 = DEC64BE(block + 16); + M3 = DEC64BE(block + 24); + M4 = DEC64BE(block + 32); + M5 = DEC64BE(block + 40); + M6 = DEC64BE(block + 48); + M7 = DEC64BE(block + 56); + M8 = DEC64BE(block + 64); + M9 = DEC64BE(block + 72); + M9 &= 0xFFFFFFFF00000000; + M9 ^= SWAP4(gid); + MA = 0x8000000000000000; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x280; + + COMPRESS64; + + hash->h8[0] = H0; + hash->h8[1] = H1; + hash->h8[2] = H2; + hash->h8[3] = H3; + hash->h8[4] = H4; + hash->h8[5] = H5; + hash->h8[6] = H6; + hash->h8[7] = H7; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search1(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + // bmw + sph_u64 BMW_H[16]; + for(unsigned u = 0; u < 16; u++) + BMW_H[u] = BMW_IV512[u]; + + sph_u64 BMW_h1[16], BMW_h2[16]; + sph_u64 mv[16]; + + mv[ 0] = SWAP8(hash->h8[0]); + mv[ 1] = SWAP8(hash->h8[1]); + mv[ 2] = SWAP8(hash->h8[2]); + mv[ 3] = SWAP8(hash->h8[3]); + mv[ 4] = SWAP8(hash->h8[4]); + mv[ 5] = SWAP8(hash->h8[5]); + mv[ 6] = SWAP8(hash->h8[6]); + mv[ 7] = SWAP8(hash->h8[7]); + mv[ 8] = 0x80; + mv[ 9] = 0; + mv[10] = 0; + mv[11] = 0; + mv[12] = 0; + mv[13] = 0; + mv[14] = 0; + mv[15] = 0x200; +#define M(x) (mv[x]) +#define H(x) (BMW_H[x]) +#define dH(x) (BMW_h2[x]) + + FOLDb; + +#undef M +#undef H +#undef dH + +#define M(x) (BMW_h2[x]) +#define H(x) (final_b[x]) +#define dH(x) (BMW_h1[x]) + + FOLDb; + +#undef M +#undef H +#undef dH + + hash->h8[0] = SWAP8(BMW_h1[8]); + hash->h8[1] = SWAP8(BMW_h1[9]); + hash->h8[2] = SWAP8(BMW_h1[10]); + hash->h8[3] = SWAP8(BMW_h1[11]); + hash->h8[4] = SWAP8(BMW_h1[12]); + hash->h8[5] = SWAP8(BMW_h1[13]); + hash->h8[6] = SWAP8(BMW_h1[14]); + hash->h8[7] = SWAP8(BMW_h1[15]); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search2(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + __local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + T0_L[i] = T0[i]; + T1_L[i] = T1[i]; + T2_L[i] = T2[i]; + T3_L[i] = T3[i]; + T4_L[i] = T4[i]; + T5_L[i] = T5[i]; + T6_L[i] = T6[i]; + T7_L[i] = T7[i]; + } + barrier(CLK_LOCAL_MEM_FENCE); + +#define T0 T0_L +#define T1 T1_L +#define T2 T2_L +#define T3 T3_L +#define T4 T4_L +#define T5 T5_L +#define T6 T6_L +#define T7 T7_L + + // groestl + + sph_u64 H[16]; + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; +#if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); +#else + H[15] = (sph_u64)512; +#endif + + sph_u64 g[16], m[16]; + m[0] = DEC64E(hash->h8[0]); + m[1] = DEC64E(hash->h8[1]); + m[2] = DEC64E(hash->h8[2]); + m[3] = DEC64E(hash->h8[3]); + m[4] = DEC64E(hash->h8[4]); + m[5] = DEC64E(hash->h8[5]); + m[6] = DEC64E(hash->h8[6]); + m[7] = DEC64E(hash->h8[7]); + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + for (unsigned int u = 0; u < 8; u ++) + hash->h8[u] = DEC64E(H[u + 8]); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search3(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // skein + + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; + + m0 = SWAP8(hash->h8[0]); + m1 = SWAP8(hash->h8[1]); + m2 = SWAP8(hash->h8[2]); + m3 = SWAP8(hash->h8[3]); + m4 = SWAP8(hash->h8[4]); + m5 = SWAP8(hash->h8[5]); + m6 = SWAP8(hash->h8[6]); + m7 = SWAP8(hash->h8[7]); + UBI_BIG(480, 64); + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + UBI_BIG(510, 8); + hash->h8[0] = SWAP8(h0); + hash->h8[1] = SWAP8(h1); + hash->h8[2] = SWAP8(h2); + hash->h8[3] = SWAP8(h3); + hash->h8[4] = SWAP8(h4); + hash->h8[5] = SWAP8(h5); + hash->h8[6] = SWAP8(h6); + hash->h8[7] = SWAP8(h7); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search4(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // jh + + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; + + for(int i = 0; i < 2; i++) + { + if (i == 0) { + h0h ^= DEC64E(hash->h8[0]); + h0l ^= DEC64E(hash->h8[1]); + h1h ^= DEC64E(hash->h8[2]); + h1l ^= DEC64E(hash->h8[3]); + h2h ^= DEC64E(hash->h8[4]); + h2l ^= DEC64E(hash->h8[5]); + h3h ^= DEC64E(hash->h8[6]); + h3l ^= DEC64E(hash->h8[7]); + } else if(i == 1) { + h4h ^= DEC64E(hash->h8[0]); + h4l ^= DEC64E(hash->h8[1]); + h5h ^= DEC64E(hash->h8[2]); + h5l ^= DEC64E(hash->h8[3]); + h6h ^= DEC64E(hash->h8[4]); + h6l ^= DEC64E(hash->h8[5]); + h7h ^= DEC64E(hash->h8[6]); + h7l ^= DEC64E(hash->h8[7]); + + h0h ^= 0x80; + h3l ^= 0x2000000000000; + } + E8; + } + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash->h8[0] = DEC64E(h4h); + hash->h8[1] = DEC64E(h4l); + hash->h8[2] = DEC64E(h5h); + hash->h8[3] = DEC64E(h5l); + hash->h8[4] = DEC64E(h6h); + hash->h8[5] = DEC64E(h6l); + hash->h8[6] = DEC64E(h7h); + hash->h8[7] = DEC64E(h7l); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search5(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // keccak + + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; + + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); + + a00 ^= SWAP8(hash->h8[0]); + a10 ^= SWAP8(hash->h8[1]); + a20 ^= SWAP8(hash->h8[2]); + a30 ^= SWAP8(hash->h8[3]); + a40 ^= SWAP8(hash->h8[4]); + a01 ^= SWAP8(hash->h8[5]); + a11 ^= SWAP8(hash->h8[6]); + a21 ^= SWAP8(hash->h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; + + hash->h8[0] = SWAP8(a00); + hash->h8[1] = SWAP8(a10); + hash->h8[2] = SWAP8(a20); + hash->h8[3] = SWAP8(a30); + hash->h8[4] = SWAP8(a40); + hash->h8[5] = SWAP8(a01); + hash->h8[6] = SWAP8(a11); + hash->h8[7] = SWAP8(a21); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search6(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // luffa + + sph_u32 V00 = SPH_C32(0x6d251e69), V01 = SPH_C32(0x44b051e0), V02 = SPH_C32(0x4eaa6fb4), V03 = SPH_C32(0xdbf78465), V04 = SPH_C32(0x6e292011), V05 = SPH_C32(0x90152df4), V06 = SPH_C32(0xee058139), V07 = SPH_C32(0xdef610bb); + sph_u32 V10 = SPH_C32(0xc3b44b95), V11 = SPH_C32(0xd9d2f256), V12 = SPH_C32(0x70eee9a0), V13 = SPH_C32(0xde099fa3), V14 = SPH_C32(0x5d9b0557), V15 = SPH_C32(0x8fc944b3), V16 = SPH_C32(0xcf1ccf0e), V17 = SPH_C32(0x746cd581); + sph_u32 V20 = SPH_C32(0xf7efc89d), V21 = SPH_C32(0x5dba5781), V22 = SPH_C32(0x04016ce5), V23 = SPH_C32(0xad659c05), V24 = SPH_C32(0x0306194f), V25 = SPH_C32(0x666d1836), V26 = SPH_C32(0x24aa230a), V27 = SPH_C32(0x8b264ae7); + sph_u32 V30 = SPH_C32(0x858075d5), V31 = SPH_C32(0x36d79cce), V32 = SPH_C32(0xe571f7d7), V33 = SPH_C32(0x204b1f67), V34 = SPH_C32(0x35870c6a), V35 = SPH_C32(0x57e9e923), V36 = SPH_C32(0x14bcb808), V37 = SPH_C32(0x7cde72ce); + sph_u32 V40 = SPH_C32(0x6c68e9be), V41 = SPH_C32(0x5ec41e22), V42 = SPH_C32(0xc825b7c7), V43 = SPH_C32(0xaffb4363), V44 = SPH_C32(0xf5df3999), V45 = SPH_C32(0x0fc688f1), V46 = SPH_C32(0xb07224cc), V47 = SPH_C32(0x03e86cea); + + DECL_TMP8(M); + + M0 = hash->h4[1]; + M1 = hash->h4[0]; + M2 = hash->h4[3]; + M3 = hash->h4[2]; + M4 = hash->h4[5]; + M5 = hash->h4[4]; + M6 = hash->h4[7]; + M7 = hash->h4[6]; + + for(uint i = 0; i < 5; i++) + { + MI5; + LUFFA_P5; + + if(i == 0) { + M0 = hash->h4[9]; + M1 = hash->h4[8]; + M2 = hash->h4[11]; + M3 = hash->h4[10]; + M4 = hash->h4[13]; + M5 = hash->h4[12]; + M6 = hash->h4[15]; + M7 = hash->h4[14]; + } else if(i == 1) { + M0 = 0x80000000; + M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } else if(i == 2) { + M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } else if(i == 3) { + hash->h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash->h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash->h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash->h4[2] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash->h4[5] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash->h4[4] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash->h4[7] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash->h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + } + } + + hash->h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash->h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash->h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash->h4[10] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash->h4[13] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash->h4[12] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash->h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash->h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search7(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // cubehash.h1 + + sph_u32 x0 = SPH_C32(0x2AEA2A61), x1 = SPH_C32(0x50F494D4), x2 = SPH_C32(0x2D538B8B), x3 = SPH_C32(0x4167D83E); + sph_u32 x4 = SPH_C32(0x3FEE2313), x5 = SPH_C32(0xC701CF8C), x6 = SPH_C32(0xCC39968E), x7 = SPH_C32(0x50AC5695); + sph_u32 x8 = SPH_C32(0x4D42C787), x9 = SPH_C32(0xA647A8B3), xa = SPH_C32(0x97CF0BEF), xb = SPH_C32(0x825B4537); + sph_u32 xc = SPH_C32(0xEEF864D2), xd = SPH_C32(0xF22090C4), xe = SPH_C32(0xD0E5CD33), xf = SPH_C32(0xA23911AE); + sph_u32 xg = SPH_C32(0xFCD398D9), xh = SPH_C32(0x148FE485), xi = SPH_C32(0x1B017BEF), xj = SPH_C32(0xB6444532); + sph_u32 xk = SPH_C32(0x6A536159), xl = SPH_C32(0x2FF5781C), xm = SPH_C32(0x91FA7934), xn = SPH_C32(0x0DBADEA9); + sph_u32 xo = SPH_C32(0xD65C8A2B), xp = SPH_C32(0xA5A70E75), xq = SPH_C32(0xB1C62456), xr = SPH_C32(0xBC796576); + sph_u32 xs = SPH_C32(0x1921C8F7), xt = SPH_C32(0xE7989AF1), xu = SPH_C32(0x7795D246), xv = SPH_C32(0xD43E3B44); + + x0 ^= SWAP4(hash->h4[1]); + x1 ^= SWAP4(hash->h4[0]); + x2 ^= SWAP4(hash->h4[3]); + x3 ^= SWAP4(hash->h4[2]); + x4 ^= SWAP4(hash->h4[5]); + x5 ^= SWAP4(hash->h4[4]); + x6 ^= SWAP4(hash->h4[7]); + x7 ^= SWAP4(hash->h4[6]); + + for (int i = 0; i < 13; i ++) { + SIXTEEN_ROUNDS; + + if (i == 0) { + x0 ^= SWAP4(hash->h4[9]); + x1 ^= SWAP4(hash->h4[8]); + x2 ^= SWAP4(hash->h4[11]); + x3 ^= SWAP4(hash->h4[10]); + x4 ^= SWAP4(hash->h4[13]); + x5 ^= SWAP4(hash->h4[12]); + x6 ^= SWAP4(hash->h4[15]); + x7 ^= SWAP4(hash->h4[14]); + } else if(i == 1) { + x0 ^= 0x80; + } else if (i == 2) { + xv ^= SPH_C32(1); + } + } + + hash->h4[0] = x0; + hash->h4[1] = x1; + hash->h4[2] = x2; + hash->h4[3] = x3; + hash->h4[4] = x4; + hash->h4[5] = x5; + hash->h4[6] = x6; + hash->h4[7] = x7; + hash->h4[8] = x8; + hash->h4[9] = x9; + hash->h4[10] = xa; + hash->h4[11] = xb; + hash->h4[12] = xc; + hash->h4[13] = xd; + hash->h4[14] = xe; + hash->h4[15] = xf; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search8(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + AES0[i] = AES0_C[i]; + AES1[i] = AES1_C[i]; + AES2[i] = AES2_C[i]; + AES3[i] = AES3_C[i]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + // shavite + // IV + sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); + sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); + sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); + sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); + + // state + sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; + sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; + sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; + sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; + + sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; + + rk00 = hash->h4[0]; + rk01 = hash->h4[1]; + rk02 = hash->h4[2]; + rk03 = hash->h4[3]; + rk04 = hash->h4[4]; + rk05 = hash->h4[5]; + rk06 = hash->h4[6]; + rk07 = hash->h4[7]; + rk08 = hash->h4[8]; + rk09 = hash->h4[9]; + rk0A = hash->h4[10]; + rk0B = hash->h4[11]; + rk0C = hash->h4[12]; + rk0D = hash->h4[13]; + rk0E = hash->h4[14]; + rk0F = hash->h4[15]; + rk10 = 0x80; + rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; + rk1B = 0x2000000; + rk1C = rk1D = rk1E = 0; + rk1F = 0x2000000; + + c512(buf); + + hash->h4[0] = h0; + hash->h4[1] = h1; + hash->h4[2] = h2; + hash->h4[3] = h3; + hash->h4[4] = h4; + hash->h4[5] = h5; + hash->h4[6] = h6; + hash->h4[7] = h7; + hash->h4[8] = h8; + hash->h4[9] = h9; + hash->h4[10] = hA; + hash->h4[11] = hB; + hash->h4[12] = hC; + hash->h4[13] = hD; + hash->h4[14] = hE; + hash->h4[15] = hF; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search9(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // simd + s32 q[256]; + unsigned char x[128]; + for(unsigned int i = 0; i < 64; i++) + x[i] = hash->h1[i]; + for(unsigned int i = 64; i < 128; i++) + x[i] = 0; + + u32 A0 = C32(0x0BA16B95), A1 = C32(0x72F999AD), A2 = C32(0x9FECC2AE), A3 = C32(0xBA3264FC), A4 = C32(0x5E894929), A5 = C32(0x8E9F30E5), A6 = C32(0x2F1DAA37), A7 = C32(0xF0F2C558); + u32 B0 = C32(0xAC506643), B1 = C32(0xA90635A5), B2 = C32(0xE25B878B), B3 = C32(0xAAB7878F), B4 = C32(0x88817F7A), B5 = C32(0x0A02892B), B6 = C32(0x559A7550), B7 = C32(0x598F657E); + u32 C0 = C32(0x7EEF60A1), C1 = C32(0x6B70E3E8), C2 = C32(0x9C1714D1), C3 = C32(0xB958E2A8), C4 = C32(0xAB02675E), C5 = C32(0xED1C014F), C6 = C32(0xCD8D65BB), C7 = C32(0xFDB7A257); + u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); + + FFT256(0, 1, 0, ll1); + for (int i = 0; i < 256; i ++) { + s32 tq; + + tq = q[i] + yoff_b_n[i]; + tq = REDS2(tq); + tq = REDS1(tq); + tq = REDS1(tq); + q[i] = (tq <= 128 ? tq : tq - 257); + } + + A0 ^= hash->h4[0]; + A1 ^= hash->h4[1]; + A2 ^= hash->h4[2]; + A3 ^= hash->h4[3]; + A4 ^= hash->h4[4]; + A5 ^= hash->h4[5]; + A6 ^= hash->h4[6]; + A7 ^= hash->h4[7]; + B0 ^= hash->h4[8]; + B1 ^= hash->h4[9]; + B2 ^= hash->h4[10]; + B3 ^= hash->h4[11]; + B4 ^= hash->h4[12]; + B5 ^= hash->h4[13]; + B6 ^= hash->h4[14]; + B7 ^= hash->h4[15]; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + + STEP_BIG( + C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), + C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), + IF, 4, 13, PP8_4_); + STEP_BIG( + C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), + C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), + IF, 13, 10, PP8_5_); + STEP_BIG( + C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), + C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), + IF, 10, 25, PP8_6_); + STEP_BIG( + C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), + C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), + IF, 25, 4, PP8_0_); + + u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7; + u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7; + u32 COPY_C0 = C0, COPY_C1 = C1, COPY_C2 = C2, COPY_C3 = C3, COPY_C4 = C4, COPY_C5 = C5, COPY_C6 = C6, COPY_C7 = C7; + u32 COPY_D0 = D0, COPY_D1 = D1, COPY_D2 = D2, COPY_D3 = D3, COPY_D4 = D4, COPY_D5 = D5, COPY_D6 = D6, COPY_D7 = D7; + + #define q SIMD_Q + + A0 ^= 0x200; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + STEP_BIG( + COPY_A0, COPY_A1, COPY_A2, COPY_A3, + COPY_A4, COPY_A5, COPY_A6, COPY_A7, + IF, 4, 13, PP8_4_); + STEP_BIG( + COPY_B0, COPY_B1, COPY_B2, COPY_B3, + COPY_B4, COPY_B5, COPY_B6, COPY_B7, + IF, 13, 10, PP8_5_); + STEP_BIG( + COPY_C0, COPY_C1, COPY_C2, COPY_C3, + COPY_C4, COPY_C5, COPY_C6, COPY_C7, + IF, 10, 25, PP8_6_); + STEP_BIG( + COPY_D0, COPY_D1, COPY_D2, COPY_D3, + COPY_D4, COPY_D5, COPY_D6, COPY_D7, + IF, 25, 4, PP8_0_); + #undef q + + hash->h4[0] = A0; + hash->h4[1] = A1; + hash->h4[2] = A2; + hash->h4[3] = A3; + hash->h4[4] = A4; + hash->h4[5] = A5; + hash->h4[6] = A6; + hash->h4[7] = A7; + hash->h4[8] = B0; + hash->h4[9] = B1; + hash->h4[10] = B2; + hash->h4[11] = B3; + hash->h4[12] = B4; + hash->h4[13] = B5; + hash->h4[14] = B6; + hash->h4[15] = B7; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search10(__global hash_t* hashes, __global uint* output, const ulong target) +{ + uint gid = get_global_id(0); + uint offset = get_global_offset(0); + hash_t hash; + + __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + AES0[i] = AES0_C[i]; + AES1[i] = AES1_C[i]; + AES2[i] = AES2_C[i]; + AES3[i] = AES3_C[i]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + #ifdef INPUT_BIG_LOCAL + __local sph_u32 T512_L[1024]; + __constant const sph_u32 *T512_C = &T512[0][0]; + + for (int i = init; i < 1024; i += step) + T512_L[i] = T512_C[i]; + + barrier(CLK_LOCAL_MEM_FENCE); + #else + #define INPUT_BIG_LOCAL INPUT_BIG + #endif + + // mixtab + __local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256]; + for (int i = init; i < 256; i += step) + { + mixtab0[i] = mixtab0_c[i]; + mixtab1[i] = mixtab1_c[i]; + mixtab2[i] = mixtab2_c[i]; + mixtab3[i] = mixtab3_c[i]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + + for (int i = 0; i < 8; i++) { + hash.h8[i] = hashes[gid-offset].h8[i]; + } + + // echo + + { + + sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; + sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; + Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; + Vb01 = Vb11 = Vb21 = Vb31 = Vb41 = Vb51 = Vb61 = Vb71 = 0; + + sph_u32 K0 = 512; + sph_u32 K1 = 0; + sph_u32 K2 = 0; + sph_u32 K3 = 0; + + W00 = Vb00; + W01 = Vb01; + W10 = Vb10; + W11 = Vb11; + W20 = Vb20; + W21 = Vb21; + W30 = Vb30; + W31 = Vb31; + W40 = Vb40; + W41 = Vb41; + W50 = Vb50; + W51 = Vb51; + W60 = Vb60; + W61 = Vb61; + W70 = Vb70; + W71 = Vb71; + W80 = hash.h8[0]; + W81 = hash.h8[1]; + W90 = hash.h8[2]; + W91 = hash.h8[3]; + WA0 = hash.h8[4]; + WA1 = hash.h8[5]; + WB0 = hash.h8[6]; + WB1 = hash.h8[7]; + WC0 = 0x80; + WC1 = 0; + WD0 = 0; + WD1 = 0; + WE0 = 0; + WE1 = 0x200000000000000; + WF0 = 0x200; + WF1 = 0; + + for (unsigned u = 0; u < 10; u ++) { + BIG_ROUND; + } + + hash.h8[0] ^= Vb00 ^ W00 ^ W80; + hash.h8[1] ^= Vb01 ^ W01 ^ W81; + hash.h8[2] ^= Vb10 ^ W10 ^ W90; + hash.h8[3] ^= Vb11 ^ W11 ^ W91; + hash.h8[4] ^= Vb20 ^ W20 ^ WA0; + hash.h8[5] ^= Vb21 ^ W21 ^ WA1; + hash.h8[6] ^= Vb30 ^ W30 ^ WB0; + hash.h8[7] ^= Vb31 ^ W31 ^ WB1; + + } + + // hamsi + + { + + sph_u32 c0 = HAMSI_IV512[0], c1 = HAMSI_IV512[1], c2 = HAMSI_IV512[2], c3 = HAMSI_IV512[3]; + sph_u32 c4 = HAMSI_IV512[4], c5 = HAMSI_IV512[5], c6 = HAMSI_IV512[6], c7 = HAMSI_IV512[7]; + sph_u32 c8 = HAMSI_IV512[8], c9 = HAMSI_IV512[9], cA = HAMSI_IV512[10], cB = HAMSI_IV512[11]; + sph_u32 cC = HAMSI_IV512[12], cD = HAMSI_IV512[13], cE = HAMSI_IV512[14], cF = HAMSI_IV512[15]; + sph_u32 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u32 m8, m9, mA, mB, mC, mD, mE, mF; + sph_u32 h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF }; + +#define buf(u) hash.h1[i + u] + for(int i = 0; i < 64; i += 8) { + INPUT_BIG_LOCAL; + P_BIG; + T_BIG; + } +#undef buf +#define buf(u) (u == 0 ? 0x80 : 0) + INPUT_BIG_LOCAL; + P_BIG; + T_BIG; +#undef buf +#define buf(u) (u == 6 ? 2 : 0) + INPUT_BIG_LOCAL; + PF_BIG; + T_BIG; + + for (unsigned u = 0; u < 16; u ++) + hash.h4[u] = h[u]; + + } + + // fugue + + { + + sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09; + sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19; + sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29; + sph_u32 S30, S31, S32, S33, S34, S35; + + ulong fc_bit_count = (sph_u64) 64 << 3; + + S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0; + S20 = SPH_C32(0x8807a57e); S21 = SPH_C32(0xe616af75); S22 = SPH_C32(0xc5d3e4db); S23 = SPH_C32(0xac9ab027); + S24 = SPH_C32(0xd915f117); S25 = SPH_C32(0xb6eecc54); S26 = SPH_C32(0x06e8020b); S27 = SPH_C32(0x4a92efd1); + S28 = SPH_C32(0xaac6e2c9); S29 = SPH_C32(0xddb21398); S30 = SPH_C32(0xcae65838); S31 = SPH_C32(0x437f203f); + S32 = SPH_C32(0x25ea78e7); S33 = SPH_C32(0x951fddd6); S34 = SPH_C32(0xda6ed11d); S35 = SPH_C32(0xe13e3567); + + FUGUE512_3((hash.h4[0x0]), (hash.h4[0x1]), (hash.h4[0x2])); + FUGUE512_3((hash.h4[0x3]), (hash.h4[0x4]), (hash.h4[0x5])); + FUGUE512_3((hash.h4[0x6]), (hash.h4[0x7]), (hash.h4[0x8])); + FUGUE512_3((hash.h4[0x9]), (hash.h4[0xA]), (hash.h4[0xB])); + FUGUE512_3((hash.h4[0xC]), (hash.h4[0xD]), (hash.h4[0xE])); + FUGUE512_3((hash.h4[0xF]), as_uint2(fc_bit_count).y, as_uint2(fc_bit_count).x); + + // apply round shift if necessary + int i; + + for (i = 0; i < 32; i ++) { + ROR3; + CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); + SMIX(S00, S01, S02, S03); + } + for (i = 0; i < 13; i ++) { + S04 ^= S00; + S09 ^= S00; + S18 ^= S00; + S27 ^= S00; + ROR9; + SMIX(S00, S01, S02, S03); + S04 ^= S00; + S10 ^= S00; + S18 ^= S00; + S27 ^= S00; + ROR9; + SMIX(S00, S01, S02, S03); + S04 ^= S00; + S10 ^= S00; + S19 ^= S00; + S27 ^= S00; + ROR9; + SMIX(S00, S01, S02, S03); + S04 ^= S00; + S10 ^= S00; + S19 ^= S00; + S28 ^= S00; + ROR8; + SMIX(S00, S01, S02, S03); + } + S04 ^= S00; + S09 ^= S00; + S18 ^= S00; + S27 ^= S00; + + hash.h4[0] = SWAP4(S01); + hash.h4[1] = SWAP4(S02); + hash.h4[2] = SWAP4(S03); + hash.h4[3] = SWAP4(S04); + hash.h4[4] = SWAP4(S09); + hash.h4[5] = SWAP4(S10); + hash.h4[6] = SWAP4(S11); + hash.h4[7] = SWAP4(S12); + hash.h4[8] = SWAP4(S18); + hash.h4[9] = SWAP4(S19); + hash.h4[10] = SWAP4(S20); + hash.h4[11] = SWAP4(S21); + hash.h4[12] = SWAP4(S27); + hash.h4[13] = SWAP4(S28); + hash.h4[14] = SWAP4(S29); + hash.h4[15] = SWAP4(S30); + + } + + //shabal + { + sph_u32 A00 = A_init_512[0], A01 = A_init_512[1], A02 = A_init_512[2], A03 = A_init_512[3], A04 = A_init_512[4], A05 = A_init_512[5], A06 = A_init_512[6], A07 = A_init_512[7], + A08 = A_init_512[8], A09 = A_init_512[9], A0A = A_init_512[10], A0B = A_init_512[11]; + sph_u32 B0 = B_init_512[0], B1 = B_init_512[1], B2 = B_init_512[2], B3 = B_init_512[3], B4 = B_init_512[4], B5 = B_init_512[5], B6 = B_init_512[6], B7 = B_init_512[7], + B8 = B_init_512[8], B9 = B_init_512[9], BA = B_init_512[10], BB = B_init_512[11], BC = B_init_512[12], BD = B_init_512[13], BE = B_init_512[14], BF = B_init_512[15]; + sph_u32 C0 = C_init_512[0], C1 = C_init_512[1], C2 = C_init_512[2], C3 = C_init_512[3], C4 = C_init_512[4], C5 = C_init_512[5], C6 = C_init_512[6], C7 = C_init_512[7], + C8 = C_init_512[8], C9 = C_init_512[9], CA = C_init_512[10], CB = C_init_512[11], CC = C_init_512[12], CD = C_init_512[13], CE = C_init_512[14], CF = C_init_512[15]; + sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF; + sph_u32 Wlow = 1, Whigh = 0; + + M0 = hash.h4[0]; + M1 = hash.h4[1]; + M2 = hash.h4[2]; + M3 = hash.h4[3]; + M4 = hash.h4[4]; + M5 = hash.h4[5]; + M6 = hash.h4[6]; + M7 = hash.h4[7]; + M8 = hash.h4[8]; + M9 = hash.h4[9]; + MA = hash.h4[10]; + MB = hash.h4[11]; + MC = hash.h4[12]; + MD = hash.h4[13]; + ME = hash.h4[14]; + MF = hash.h4[15]; + + INPUT_BLOCK_ADD; + XOR_W; + APPLY_P; + INPUT_BLOCK_SUB; + SWAP_BC; + INCR_W; + + M0 = 0x80; + M1 = M2 = M3 = M4 = M5 = M6 = M7 = M8 = M9 = MA = MB = MC = MD = ME = MF = 0; + + INPUT_BLOCK_ADD; + XOR_W; + APPLY_P; + for (unsigned i = 0; i < 3; i ++) { + SWAP_BC; + XOR_W; + APPLY_P; + } + + hash.h4[0] = B0; + hash.h4[1] = B1; + hash.h4[2] = B2; + hash.h4[3] = B3; + hash.h4[4] = B4; + hash.h4[5] = B5; + hash.h4[6] = B6; + hash.h4[7] = B7; + hash.h4[8] = B8; + hash.h4[9] = B9; + hash.h4[10] = BA; + hash.h4[11] = BB; + hash.h4[12] = BC; + hash.h4[13] = BD; + hash.h4[14] = BE; + hash.h4[15] = BF; + } + + //whirlpool + { + sph_u64 n0, n1, n2, n3, n4, n5, n6, n7; + sph_u64 h0, h1, h2, h3, h4, h5, h6, h7; + sph_u64 state[8]; + + n0 = (hash.h8[0]); + n1 = (hash.h8[1]); + n2 = (hash.h8[2]); + n3 = (hash.h8[3]); + n4 = (hash.h8[4]); + n5 = (hash.h8[5]); + n6 = (hash.h8[6]); + n7 = (hash.h8[7]); + + h0 = h1 = h2 = h3 = h4 = h5 = h6 = h7 = 0; + + n0 ^= h0; + n1 ^= h1; + n2 ^= h2; + n3 ^= h3; + n4 ^= h4; + n5 ^= h5; + n6 ^= h6; + n7 ^= h7; + + for (unsigned r = 0; r < 10; r ++) { + sph_u64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + + ROUND_KSCHED(plain_T, h, tmp, plain_RC[r]); + TRANSFER(h, tmp); + ROUND_WENC(plain_T, n, h, tmp); + TRANSFER(n, tmp); + } + + state[0] = n0 ^ (hash.h8[0]); + state[1] = n1 ^ (hash.h8[1]); + state[2] = n2 ^ (hash.h8[2]); + state[3] = n3 ^ (hash.h8[3]); + state[4] = n4 ^ (hash.h8[4]); + state[5] = n5 ^ (hash.h8[5]); + state[6] = n6 ^ (hash.h8[6]); + state[7] = n7 ^ (hash.h8[7]); + + n0 = 0x80; + n1 = n2 = n3 = n4 = n5 = n6 = 0; + n7 = 0x2000000000000; + + h0 = state[0]; + h1 = state[1]; + h2 = state[2]; + h3 = state[3]; + h4 = state[4]; + h5 = state[5]; + h6 = state[6]; + h7 = state[7]; + + n0 ^= h0; + n1 ^= h1; + n2 ^= h2; + n3 ^= h3; + n4 ^= h4; + n5 ^= h5; + n6 ^= h6; + n7 ^= h7; + + for (unsigned r = 0; r < 10; r ++) { + sph_u64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + + ROUND_KSCHED(plain_T, h, tmp, plain_RC[r]); + TRANSFER(h, tmp); + ROUND_WENC(plain_T, n, h, tmp); + TRANSFER(n, tmp); + } + + state[0] ^= n0 ^ 0x80; + state[1] ^= n1; + state[2] ^= n2; + state[3] ^= n3; + state[4] ^= n4; + state[5] ^= n5; + state[6] ^= n6; + state[7] ^= n7 ^ 0x2000000000000; + + for (unsigned i = 0; i < 8; i ++) + hash.h8[i] = state[i]; + } + + bool result = (hash.h8[3] <= target); + if (result) + output[atomic_inc(output+0xFF)] = SWAP4(gid); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +#endif // BITBLOCK_CL \ No newline at end of file diff --git a/kernel/darkcoin-mod.cl b/kernel/darkcoin-mod.cl index 5b56fa21..caa10368 100644 --- a/kernel/darkcoin-mod.cl +++ b/kernel/darkcoin-mod.cl @@ -95,8 +95,6 @@ #include "shavite.cl" #include "simd.cl" #include "echo.cl" -#include "hamsi.cl" -#include "fugue.cl" #define SWAP4(x) as_uint(as_uchar4(x).wzyx) #define SWAP8(x) as_ulong(as_uchar8(x).s76543210) @@ -181,7 +179,7 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void search1(__global hash_t* hashes) { - uint gid = get_global_id(0); + uint gid = get_global_id(0); __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); // bmw @@ -456,93 +454,69 @@ __kernel void search2(__global hash_t* hashes) uint gid = get_global_id(0); __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); - #if !SPH_SMALL_FOOTPRINT_GROESTL - __local sph_u64 T0_C[256], T1_C[256], T2_C[256], T3_C[256]; - __local sph_u64 T4_C[256], T5_C[256], T6_C[256], T7_C[256]; - #else - __local sph_u64 T0_C[256], T4_C[256]; - #endif - + __local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256]; + int init = get_local_id(0); int step = get_local_size(0); - + for (int i = init; i < 256; i += step) { - T0_C[i] = T0[i]; - T4_C[i] = T4[i]; - #if !SPH_SMALL_FOOTPRINT_GROESTL - T1_C[i] = T1[i]; - T2_C[i] = T2[i]; - T3_C[i] = T3[i]; - T5_C[i] = T5[i]; - T6_C[i] = T6[i]; - T7_C[i] = T7[i]; - #endif + T0_L[i] = T0[i]; + T4_L[i] = T4[i]; + T1_L[i] = T1[i]; + T2_L[i] = T2[i]; + T3_L[i] = T3[i]; + T5_L[i] = T5[i]; + T6_L[i] = T6[i]; + T7_L[i] = T7[i]; } - - barrier(CLK_LOCAL_MEM_FENCE); // groestl - - #define T0 T0_C - #define T1 T1_C - #define T2 T2_C - #define T3 T3_C - #define T4 T4_C - #define T5 T5_C - #define T6 T6_C - #define T7 T7_C - - sph_u64 H[16]; - - for (unsigned int u = 0; u < 15; u ++) - H[u] = 0; - - #if USE_LE - H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); - #else - H[15] = (sph_u64)512; - #endif + + barrier(CLK_LOCAL_MEM_FENCE); + + #define T0 T0_L + #define T1 T1_L + #define T2 T2_L + #define T3 T3_L + #define T4 T4_L + #define T5 T5_L + #define T6 T6_L + #define T7 T7_L + + // groestl + sph_u64 H[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0002000000000000}; sph_u64 g[16], m[16]; - m[0] = DEC64E(hash->h8[0]); - m[1] = DEC64E(hash->h8[1]); - m[2] = DEC64E(hash->h8[2]); - m[3] = DEC64E(hash->h8[3]); - m[4] = DEC64E(hash->h8[4]); - m[5] = DEC64E(hash->h8[5]); - m[6] = DEC64E(hash->h8[6]); - m[7] = DEC64E(hash->h8[7]); - - for (unsigned int u = 0; u < 16; u ++) - g[u] = m[u] ^ H[u]; - - m[8] = 0x80; g[8] = m[8] ^ H[8]; - m[9] = 0; g[9] = m[9] ^ H[9]; - m[10] = 0; g[10] = m[10] ^ H[10]; - m[11] = 0; g[11] = m[11] ^ H[11]; - m[12] = 0; g[12] = m[12] ^ H[12]; - m[13] = 0; g[13] = m[13] ^ H[13]; - m[14] = 0; g[14] = m[14] ^ H[14]; - m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + g[0] = m[0] = DEC64E(hash->h8[0]); + g[1] = m[1] = DEC64E(hash->h8[1]); + g[2] = m[2] = DEC64E(hash->h8[2]); + g[3] = m[3] = DEC64E(hash->h8[3]); + g[4] = m[4] = DEC64E(hash->h8[4]); + g[5] = m[5] = DEC64E(hash->h8[5]); + g[6] = m[6] = DEC64E(hash->h8[6]); + g[7] = m[7] = DEC64E(hash->h8[7]); + g[8] = m[8] = 0x80; + g[9] = m[9] = 0; + g[10] = m[10] = 0; + g[11] = m[11] = 0; + g[12] = m[12] = 0; + g[13] = m[13] = 0; + g[14] = m[14] = 0; + g[15] = 0x102000000000000; + m[15] = 0x100000000000000; PERM_BIG_P(g); PERM_BIG_Q(m); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= g[u] ^ m[u]; - sph_u64 xH[16]; - for (unsigned int u = 0; u < 16; u ++) - xH[u] = H[u]; + xH[u] = H[u] ^= g[u] ^ m[u]; + PERM_BIG_P(xH); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= xH[u]; - - for (unsigned int u = 0; u < 8; u ++) - hash->h8[u] = DEC64E(H[u + 8]); - - barrier(CLK_GLOBAL_MEM_FENCE); + for (unsigned int u = 8; u < 16; u ++) + hash->h8[u-8] = DEC64E(H[u] ^ xH[u]); + + barrier(CLK_GLOBAL_MEM_FENCE); } __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) @@ -863,7 +837,7 @@ __kernel void search8(__global hash_t* hashes) sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; - sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; + sph_u32 sc_count0 = 0x200, sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; rk00 = hash->h4[0]; rk01 = hash->h4[1]; diff --git a/kernel/fugue.cl b/kernel/fugue.cl index fca6d441..7f500278 100644 --- a/kernel/fugue.cl +++ b/kernel/fugue.cl @@ -30,26 +30,26 @@ * @author Thomas Pornin */ -__constant static const sph_u32 IV224[] = { +__constant const sph_u32 IV224[] = { SPH_C32(0xf4c9120d), SPH_C32(0x6286f757), SPH_C32(0xee39e01c), SPH_C32(0xe074e3cb), SPH_C32(0xa1127c62), SPH_C32(0x9a43d215), SPH_C32(0xbd8d679a) }; -__constant static const sph_u32 IV256[] = { +__constant const sph_u32 IV256[] = { SPH_C32(0xe952bdde), SPH_C32(0x6671135f), SPH_C32(0xe0d4f668), SPH_C32(0xd2b0b594), SPH_C32(0xf96c621d), SPH_C32(0xfbf929de), SPH_C32(0x9149e899), SPH_C32(0x34f8c248) }; -__constant static const sph_u32 IV384[] = { +__constant const sph_u32 IV384[] = { SPH_C32(0xaa61ec0d), SPH_C32(0x31252e1f), SPH_C32(0xa01db4c7), SPH_C32(0x00600985), SPH_C32(0x215ef44a), SPH_C32(0x741b5e9c), SPH_C32(0xfa693e9a), SPH_C32(0x473eb040), SPH_C32(0xe502ae8a), SPH_C32(0xa99c25e0), SPH_C32(0xbc95517c), SPH_C32(0x5c1095a1) }; -__constant static const sph_u32 IV512[] = { +__constant const sph_u32 IV512[] = { SPH_C32(0x8807a57e), SPH_C32(0xe616af75), SPH_C32(0xc5d3e4db), SPH_C32(0xac9ab027), SPH_C32(0xd915f117), SPH_C32(0xb6eecc54), SPH_C32(0x06e8020b), SPH_C32(0x4a92efd1), SPH_C32(0xaac6e2c9), @@ -58,7 +58,7 @@ __constant static const sph_u32 IV512[] = { SPH_C32(0xe13e3567) }; -__constant static const sph_u32 mixtab0[] = { +__constant const sph_u32 mixtab0_c[] = { SPH_C32(0x63633297), SPH_C32(0x7c7c6feb), SPH_C32(0x77775ec7), SPH_C32(0x7b7b7af7), SPH_C32(0xf2f2e8e5), SPH_C32(0x6b6b0ab7), SPH_C32(0x6f6f16a7), SPH_C32(0xc5c56d39), SPH_C32(0x303090c0), @@ -147,7 +147,7 @@ __constant static const sph_u32 mixtab0[] = { SPH_C32(0x16166258) }; -__constant static const sph_u32 mixtab1[] = { +__constant const sph_u32 mixtab1_c[] = { SPH_C32(0x97636332), SPH_C32(0xeb7c7c6f), SPH_C32(0xc777775e), SPH_C32(0xf77b7b7a), SPH_C32(0xe5f2f2e8), SPH_C32(0xb76b6b0a), SPH_C32(0xa76f6f16), SPH_C32(0x39c5c56d), SPH_C32(0xc0303090), @@ -236,7 +236,7 @@ __constant static const sph_u32 mixtab1[] = { SPH_C32(0x58161662) }; -__constant static const sph_u32 mixtab2[] = { +__constant const sph_u32 mixtab2_c[] = { SPH_C32(0x32976363), SPH_C32(0x6feb7c7c), SPH_C32(0x5ec77777), SPH_C32(0x7af77b7b), SPH_C32(0xe8e5f2f2), SPH_C32(0x0ab76b6b), SPH_C32(0x16a76f6f), SPH_C32(0x6d39c5c5), SPH_C32(0x90c03030), @@ -325,7 +325,7 @@ __constant static const sph_u32 mixtab2[] = { SPH_C32(0x62581616) }; -__constant static const sph_u32 mixtab3[] = { +__constant const sph_u32 mixtab3_c[] = { SPH_C32(0x63329763), SPH_C32(0x7c6feb7c), SPH_C32(0x775ec777), SPH_C32(0x7b7af77b), SPH_C32(0xf2e8e5f2), SPH_C32(0x6b0ab76b), SPH_C32(0x6f16a76f), SPH_C32(0xc56d39c5), SPH_C32(0x3090c030), diff --git a/kernel/fuguecoin.cl b/kernel/fuguecoin.cl index 87396f59..bdb4665d 100644 --- a/kernel/fuguecoin.cl +++ b/kernel/fuguecoin.cl @@ -79,6 +79,19 @@ __kernel void search(__global unsigned char* input, volatile __global uint* outp { uint gid = get_global_id(0); + //mixtab + __local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256]; + int init = get_local_id(0); + int step = get_local_size(0); + for (int i = init; i < 256; i += step) + { + mixtab0[i] = mixtab0_c[i]; + mixtab1[i] = mixtab1_c[i]; + mixtab2[i] = mixtab2_c[i]; + mixtab3[i] = mixtab3_c[i]; + } + barrier(CLK_GLOBAL_MEM_FENCE); + sph_u32 S00 = 0, S01 = 0, S02 = 0, S03 = 0, S04 = 0, S05 = 0, S06 = 0, S07 = 0, S08 = 0, S09 = 0; \ sph_u32 S10 = 0, S11 = 0, S12 = 0, S13 = 0, S14 = 0, S15 = 0, S16 = 0, S17 = 0, S18 = 0, S19 = 0; \ sph_u32 S20 = 0, S21 = 0, S22 = IV256[0], S23 = IV256[1], S24 = IV256[2], S25 = IV256[3], S26 = IV256[4], S27 = IV256[5], S28 = IV256[6], S29 = IV256[7]; diff --git a/kernel/hamsi.cl b/kernel/hamsi.cl index 0fb445be..ec9d4a40 100644 --- a/kernel/hamsi.cl +++ b/kernel/hamsi.cl @@ -88,22 +88,31 @@ */ #if !defined SPH_HAMSI_EXPAND_SMALL -#if SPH_SMALL_FOOTPRINT_HAMSI -#define SPH_HAMSI_EXPAND_SMALL 4 -#else -#define SPH_HAMSI_EXPAND_SMALL 8 -#endif + #if SPH_SMALL_FOOTPRINT_HAMSI + #define SPH_HAMSI_EXPAND_SMALL 4 + #else + #define SPH_HAMSI_EXPAND_SMALL 8 + #endif #endif #if !defined SPH_HAMSI_EXPAND_BIG -#define SPH_HAMSI_EXPAND_BIG 8 + #define SPH_HAMSI_EXPAND_BIG 8 #endif #ifdef _MSC_VER #pragma warning (disable: 4146) #endif -#include "hamsi_helper.cl" +//temp fix for shortened implementation of X15 +#ifdef SPH_HAMSI_SHORT + #if SPH_HAMSI_SHORT == 1 && SPH_HAMSI_EXPAND_BIG == 1 + #include "hamsi_helper_big.cl" + #else + #include "hamsi_helper.cl" + #endif +#else + #include "hamsi_helper.cl" +#endif __constant static const sph_u32 HAMSI_IV224[] = { SPH_C32(0xc3967a67), SPH_C32(0xc3bc6c20), SPH_C32(0x4bc3bcc3), diff --git a/kernel/hamsi_helper_big.cl b/kernel/hamsi_helper_big.cl new file mode 100644 index 00000000..da279ae5 --- /dev/null +++ b/kernel/hamsi_helper_big.cl @@ -0,0 +1,515 @@ +/* $Id: hamsi_helper.c 202 2010-05-31 15:46:48Z tp $ */ +/* + * Helper code for Hamsi (input block expansion). This code is + * automatically generated and includes precomputed tables for + * expansion code which handles 2 to 8 bits at a time. + * + * This file is included from hamsi.c, and is not meant to be compiled + * independently. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#if SPH_HAMSI_EXPAND_BIG == 1 + +/* Note: this table lists bits within each byte from least + siginificant to most significant. */ +__constant const sph_u32 T512[64][16] = { + { SPH_C32(0xef0b0270), SPH_C32(0x3afd0000), SPH_C32(0x5dae0000), + SPH_C32(0x69490000), SPH_C32(0x9b0f3c06), SPH_C32(0x4405b5f9), + SPH_C32(0x66140a51), SPH_C32(0x924f5d0a), SPH_C32(0xc96b0030), + SPH_C32(0xe7250000), SPH_C32(0x2f840000), SPH_C32(0x264f0000), + SPH_C32(0x08695bf9), SPH_C32(0x6dfcf137), SPH_C32(0x509f6984), + SPH_C32(0x9e69af68) }, + { SPH_C32(0xc96b0030), SPH_C32(0xe7250000), SPH_C32(0x2f840000), + SPH_C32(0x264f0000), SPH_C32(0x08695bf9), SPH_C32(0x6dfcf137), + SPH_C32(0x509f6984), SPH_C32(0x9e69af68), SPH_C32(0x26600240), + SPH_C32(0xddd80000), SPH_C32(0x722a0000), SPH_C32(0x4f060000), + SPH_C32(0x936667ff), SPH_C32(0x29f944ce), SPH_C32(0x368b63d5), + SPH_C32(0x0c26f262) }, + { SPH_C32(0x145a3c00), SPH_C32(0xb9e90000), SPH_C32(0x61270000), + SPH_C32(0xf1610000), SPH_C32(0xce613d6c), SPH_C32(0xb0493d78), + SPH_C32(0x47a96720), SPH_C32(0xe18e24c5), SPH_C32(0x23671400), + SPH_C32(0xc8b90000), SPH_C32(0xf4c70000), SPH_C32(0xfb750000), + SPH_C32(0x73cd2465), SPH_C32(0xf8a6a549), SPH_C32(0x02c40a3f), + SPH_C32(0xdc24e61f) }, + { SPH_C32(0x23671400), SPH_C32(0xc8b90000), SPH_C32(0xf4c70000), + SPH_C32(0xfb750000), SPH_C32(0x73cd2465), SPH_C32(0xf8a6a549), + SPH_C32(0x02c40a3f), SPH_C32(0xdc24e61f), SPH_C32(0x373d2800), + SPH_C32(0x71500000), SPH_C32(0x95e00000), SPH_C32(0x0a140000), + SPH_C32(0xbdac1909), SPH_C32(0x48ef9831), SPH_C32(0x456d6d1f), + SPH_C32(0x3daac2da) }, + { SPH_C32(0x54285c00), SPH_C32(0xeaed0000), SPH_C32(0xc5d60000), + SPH_C32(0xa1c50000), SPH_C32(0xb3a26770), SPH_C32(0x94a5c4e1), + SPH_C32(0x6bb0419d), SPH_C32(0x551b3782), SPH_C32(0x9cbb1800), + SPH_C32(0xb0d30000), SPH_C32(0x92510000), SPH_C32(0xed930000), + SPH_C32(0x593a4345), SPH_C32(0xe114d5f4), SPH_C32(0x430633da), + SPH_C32(0x78cace29) }, + { SPH_C32(0x9cbb1800), SPH_C32(0xb0d30000), SPH_C32(0x92510000), + SPH_C32(0xed930000), SPH_C32(0x593a4345), SPH_C32(0xe114d5f4), + SPH_C32(0x430633da), SPH_C32(0x78cace29), SPH_C32(0xc8934400), + SPH_C32(0x5a3e0000), SPH_C32(0x57870000), SPH_C32(0x4c560000), + SPH_C32(0xea982435), SPH_C32(0x75b11115), SPH_C32(0x28b67247), + SPH_C32(0x2dd1f9ab) }, + { SPH_C32(0x29449c00), SPH_C32(0x64e70000), SPH_C32(0xf24b0000), + SPH_C32(0xc2f30000), SPH_C32(0x0ede4e8f), SPH_C32(0x56c23745), + SPH_C32(0xf3e04259), SPH_C32(0x8d0d9ec4), SPH_C32(0x466d0c00), + SPH_C32(0x08620000), SPH_C32(0xdd5d0000), SPH_C32(0xbadd0000), + SPH_C32(0x6a927942), SPH_C32(0x441f2b93), SPH_C32(0x218ace6f), + SPH_C32(0xbf2c0be2) }, + { SPH_C32(0x466d0c00), SPH_C32(0x08620000), SPH_C32(0xdd5d0000), + SPH_C32(0xbadd0000), SPH_C32(0x6a927942), SPH_C32(0x441f2b93), + SPH_C32(0x218ace6f), SPH_C32(0xbf2c0be2), SPH_C32(0x6f299000), + SPH_C32(0x6c850000), SPH_C32(0x2f160000), SPH_C32(0x782e0000), + SPH_C32(0x644c37cd), SPH_C32(0x12dd1cd6), SPH_C32(0xd26a8c36), + SPH_C32(0x32219526) }, + { SPH_C32(0xf6800005), SPH_C32(0x3443c000), SPH_C32(0x24070000), + SPH_C32(0x8f3d0000), SPH_C32(0x21373bfb), SPH_C32(0x0ab8d5ae), + SPH_C32(0xcdc58b19), SPH_C32(0xd795ba31), SPH_C32(0xa67f0001), + SPH_C32(0x71378000), SPH_C32(0x19fc0000), SPH_C32(0x96db0000), + SPH_C32(0x3a8b6dfd), SPH_C32(0xebcaaef3), SPH_C32(0x2c6d478f), + SPH_C32(0xac8e6c88) }, + { SPH_C32(0xa67f0001), SPH_C32(0x71378000), SPH_C32(0x19fc0000), + SPH_C32(0x96db0000), SPH_C32(0x3a8b6dfd), SPH_C32(0xebcaaef3), + SPH_C32(0x2c6d478f), SPH_C32(0xac8e6c88), SPH_C32(0x50ff0004), + SPH_C32(0x45744000), SPH_C32(0x3dfb0000), SPH_C32(0x19e60000), + SPH_C32(0x1bbc5606), SPH_C32(0xe1727b5d), SPH_C32(0xe1a8cc96), + SPH_C32(0x7b1bd6b9) }, + { SPH_C32(0xf7750009), SPH_C32(0xcf3cc000), SPH_C32(0xc3d60000), + SPH_C32(0x04920000), SPH_C32(0x029519a9), SPH_C32(0xf8e836ba), + SPH_C32(0x7a87f14e), SPH_C32(0x9e16981a), SPH_C32(0xd46a0000), + SPH_C32(0x8dc8c000), SPH_C32(0xa5af0000), SPH_C32(0x4a290000), + SPH_C32(0xfc4e427a), SPH_C32(0xc9b4866c), SPH_C32(0x98369604), + SPH_C32(0xf746c320) }, + { SPH_C32(0xd46a0000), SPH_C32(0x8dc8c000), SPH_C32(0xa5af0000), + SPH_C32(0x4a290000), SPH_C32(0xfc4e427a), SPH_C32(0xc9b4866c), + SPH_C32(0x98369604), SPH_C32(0xf746c320), SPH_C32(0x231f0009), + SPH_C32(0x42f40000), SPH_C32(0x66790000), SPH_C32(0x4ebb0000), + SPH_C32(0xfedb5bd3), SPH_C32(0x315cb0d6), SPH_C32(0xe2b1674a), + SPH_C32(0x69505b3a) }, + { SPH_C32(0x774400f0), SPH_C32(0xf15a0000), SPH_C32(0xf5b20000), + SPH_C32(0x34140000), SPH_C32(0x89377e8c), SPH_C32(0x5a8bec25), + SPH_C32(0x0bc3cd1e), SPH_C32(0xcf3775cb), SPH_C32(0xf46c0050), + SPH_C32(0x96180000), SPH_C32(0x14a50000), SPH_C32(0x031f0000), + SPH_C32(0x42947eb8), SPH_C32(0x66bf7e19), SPH_C32(0x9ca470d2), + SPH_C32(0x8a341574) }, + { SPH_C32(0xf46c0050), SPH_C32(0x96180000), SPH_C32(0x14a50000), + SPH_C32(0x031f0000), SPH_C32(0x42947eb8), SPH_C32(0x66bf7e19), + SPH_C32(0x9ca470d2), SPH_C32(0x8a341574), SPH_C32(0x832800a0), + SPH_C32(0x67420000), SPH_C32(0xe1170000), SPH_C32(0x370b0000), + SPH_C32(0xcba30034), SPH_C32(0x3c34923c), SPH_C32(0x9767bdcc), + SPH_C32(0x450360bf) }, + { SPH_C32(0xe8870170), SPH_C32(0x9d720000), SPH_C32(0x12db0000), + SPH_C32(0xd4220000), SPH_C32(0xf2886b27), SPH_C32(0xa921e543), + SPH_C32(0x4ef8b518), SPH_C32(0x618813b1), SPH_C32(0xb4370060), + SPH_C32(0x0c4c0000), SPH_C32(0x56c20000), SPH_C32(0x5cae0000), + SPH_C32(0x94541f3f), SPH_C32(0x3b3ef825), SPH_C32(0x1b365f3d), + SPH_C32(0xf3d45758) }, + { SPH_C32(0xb4370060), SPH_C32(0x0c4c0000), SPH_C32(0x56c20000), + SPH_C32(0x5cae0000), SPH_C32(0x94541f3f), SPH_C32(0x3b3ef825), + SPH_C32(0x1b365f3d), SPH_C32(0xf3d45758), SPH_C32(0x5cb00110), + SPH_C32(0x913e0000), SPH_C32(0x44190000), SPH_C32(0x888c0000), + SPH_C32(0x66dc7418), SPH_C32(0x921f1d66), SPH_C32(0x55ceea25), + SPH_C32(0x925c44e9) }, + { SPH_C32(0x0c720000), SPH_C32(0x49e50f00), SPH_C32(0x42790000), + SPH_C32(0x5cea0000), SPH_C32(0x33aa301a), SPH_C32(0x15822514), + SPH_C32(0x95a34b7b), SPH_C32(0xb44b0090), SPH_C32(0xfe220000), + SPH_C32(0xa7580500), SPH_C32(0x25d10000), SPH_C32(0xf7600000), + SPH_C32(0x893178da), SPH_C32(0x1fd4f860), SPH_C32(0x4ed0a315), + SPH_C32(0xa123ff9f) }, + { SPH_C32(0xfe220000), SPH_C32(0xa7580500), SPH_C32(0x25d10000), + SPH_C32(0xf7600000), SPH_C32(0x893178da), SPH_C32(0x1fd4f860), + SPH_C32(0x4ed0a315), SPH_C32(0xa123ff9f), SPH_C32(0xf2500000), + SPH_C32(0xeebd0a00), SPH_C32(0x67a80000), SPH_C32(0xab8a0000), + SPH_C32(0xba9b48c0), SPH_C32(0x0a56dd74), SPH_C32(0xdb73e86e), + SPH_C32(0x1568ff0f) }, + { SPH_C32(0x45180000), SPH_C32(0xa5b51700), SPH_C32(0xf96a0000), + SPH_C32(0x3b480000), SPH_C32(0x1ecc142c), SPH_C32(0x231395d6), + SPH_C32(0x16bca6b0), SPH_C32(0xdf33f4df), SPH_C32(0xb83d0000), + SPH_C32(0x16710600), SPH_C32(0x379a0000), SPH_C32(0xf5b10000), + SPH_C32(0x228161ac), SPH_C32(0xae48f145), SPH_C32(0x66241616), + SPH_C32(0xc5c1eb3e) }, + { SPH_C32(0xb83d0000), SPH_C32(0x16710600), SPH_C32(0x379a0000), + SPH_C32(0xf5b10000), SPH_C32(0x228161ac), SPH_C32(0xae48f145), + SPH_C32(0x66241616), SPH_C32(0xc5c1eb3e), SPH_C32(0xfd250000), + SPH_C32(0xb3c41100), SPH_C32(0xcef00000), SPH_C32(0xcef90000), + SPH_C32(0x3c4d7580), SPH_C32(0x8d5b6493), SPH_C32(0x7098b0a6), + SPH_C32(0x1af21fe1) }, + { SPH_C32(0x75a40000), SPH_C32(0xc28b2700), SPH_C32(0x94a40000), + SPH_C32(0x90f50000), SPH_C32(0xfb7857e0), SPH_C32(0x49ce0bae), + SPH_C32(0x1767c483), SPH_C32(0xaedf667e), SPH_C32(0xd1660000), + SPH_C32(0x1bbc0300), SPH_C32(0x9eec0000), SPH_C32(0xf6940000), + SPH_C32(0x03024527), SPH_C32(0xcf70fcf2), SPH_C32(0xb4431b17), + SPH_C32(0x857f3c2b) }, + { SPH_C32(0xd1660000), SPH_C32(0x1bbc0300), SPH_C32(0x9eec0000), + SPH_C32(0xf6940000), SPH_C32(0x03024527), SPH_C32(0xcf70fcf2), + SPH_C32(0xb4431b17), SPH_C32(0x857f3c2b), SPH_C32(0xa4c20000), + SPH_C32(0xd9372400), SPH_C32(0x0a480000), SPH_C32(0x66610000), + SPH_C32(0xf87a12c7), SPH_C32(0x86bef75c), SPH_C32(0xa324df94), + SPH_C32(0x2ba05a55) }, + { SPH_C32(0x75c90003), SPH_C32(0x0e10c000), SPH_C32(0xd1200000), + SPH_C32(0xbaea0000), SPH_C32(0x8bc42f3e), SPH_C32(0x8758b757), + SPH_C32(0xbb28761d), SPH_C32(0x00b72e2b), SPH_C32(0xeecf0001), + SPH_C32(0x6f564000), SPH_C32(0xf33e0000), SPH_C32(0xa79e0000), + SPH_C32(0xbdb57219), SPH_C32(0xb711ebc5), SPH_C32(0x4a3b40ba), + SPH_C32(0xfeabf254) }, + { SPH_C32(0xeecf0001), SPH_C32(0x6f564000), SPH_C32(0xf33e0000), + SPH_C32(0xa79e0000), SPH_C32(0xbdb57219), SPH_C32(0xb711ebc5), + SPH_C32(0x4a3b40ba), SPH_C32(0xfeabf254), SPH_C32(0x9b060002), + SPH_C32(0x61468000), SPH_C32(0x221e0000), SPH_C32(0x1d740000), + SPH_C32(0x36715d27), SPH_C32(0x30495c92), SPH_C32(0xf11336a7), + SPH_C32(0xfe1cdc7f) }, + { SPH_C32(0x86790000), SPH_C32(0x3f390002), SPH_C32(0xe19ae000), + SPH_C32(0x98560000), SPH_C32(0x9565670e), SPH_C32(0x4e88c8ea), + SPH_C32(0xd3dd4944), SPH_C32(0x161ddab9), SPH_C32(0x30b70000), + SPH_C32(0xe5d00000), SPH_C32(0xf4f46000), SPH_C32(0x42c40000), + SPH_C32(0x63b83d6a), SPH_C32(0x78ba9460), SPH_C32(0x21afa1ea), + SPH_C32(0xb0a51834) }, + { SPH_C32(0x30b70000), SPH_C32(0xe5d00000), SPH_C32(0xf4f46000), + SPH_C32(0x42c40000), SPH_C32(0x63b83d6a), SPH_C32(0x78ba9460), + SPH_C32(0x21afa1ea), SPH_C32(0xb0a51834), SPH_C32(0xb6ce0000), + SPH_C32(0xdae90002), SPH_C32(0x156e8000), SPH_C32(0xda920000), + SPH_C32(0xf6dd5a64), SPH_C32(0x36325c8a), SPH_C32(0xf272e8ae), + SPH_C32(0xa6b8c28d) }, + { SPH_C32(0x14190000), SPH_C32(0x23ca003c), SPH_C32(0x50df0000), + SPH_C32(0x44b60000), SPH_C32(0x1b6c67b0), SPH_C32(0x3cf3ac75), + SPH_C32(0x61e610b0), SPH_C32(0xdbcadb80), SPH_C32(0xe3430000), + SPH_C32(0x3a4e0014), SPH_C32(0xf2c60000), SPH_C32(0xaa4e0000), + SPH_C32(0xdb1e42a6), SPH_C32(0x256bbe15), SPH_C32(0x123db156), + SPH_C32(0x3a4e99d7) }, + { SPH_C32(0xe3430000), SPH_C32(0x3a4e0014), SPH_C32(0xf2c60000), + SPH_C32(0xaa4e0000), SPH_C32(0xdb1e42a6), SPH_C32(0x256bbe15), + SPH_C32(0x123db156), SPH_C32(0x3a4e99d7), SPH_C32(0xf75a0000), + SPH_C32(0x19840028), SPH_C32(0xa2190000), SPH_C32(0xeef80000), + SPH_C32(0xc0722516), SPH_C32(0x19981260), SPH_C32(0x73dba1e6), + SPH_C32(0xe1844257) }, + { SPH_C32(0x54500000), SPH_C32(0x0671005c), SPH_C32(0x25ae0000), + SPH_C32(0x6a1e0000), SPH_C32(0x2ea54edf), SPH_C32(0x664e8512), + SPH_C32(0xbfba18c3), SPH_C32(0x7e715d17), SPH_C32(0xbc8d0000), + SPH_C32(0xfc3b0018), SPH_C32(0x19830000), SPH_C32(0xd10b0000), + SPH_C32(0xae1878c4), SPH_C32(0x42a69856), SPH_C32(0x0012da37), + SPH_C32(0x2c3b504e) }, + { SPH_C32(0xbc8d0000), SPH_C32(0xfc3b0018), SPH_C32(0x19830000), + SPH_C32(0xd10b0000), SPH_C32(0xae1878c4), SPH_C32(0x42a69856), + SPH_C32(0x0012da37), SPH_C32(0x2c3b504e), SPH_C32(0xe8dd0000), + SPH_C32(0xfa4a0044), SPH_C32(0x3c2d0000), SPH_C32(0xbb150000), + SPH_C32(0x80bd361b), SPH_C32(0x24e81d44), SPH_C32(0xbfa8c2f4), + SPH_C32(0x524a0d59) }, + { SPH_C32(0x69510000), SPH_C32(0xd4e1009c), SPH_C32(0xc3230000), + SPH_C32(0xac2f0000), SPH_C32(0xe4950bae), SPH_C32(0xcea415dc), + SPH_C32(0x87ec287c), SPH_C32(0xbce1a3ce), SPH_C32(0xc6730000), + SPH_C32(0xaf8d000c), SPH_C32(0xa4c10000), SPH_C32(0x218d0000), + SPH_C32(0x23111587), SPH_C32(0x7913512f), SPH_C32(0x1d28ac88), + SPH_C32(0x378dd173) }, + { SPH_C32(0xc6730000), SPH_C32(0xaf8d000c), SPH_C32(0xa4c10000), + SPH_C32(0x218d0000), SPH_C32(0x23111587), SPH_C32(0x7913512f), + SPH_C32(0x1d28ac88), SPH_C32(0x378dd173), SPH_C32(0xaf220000), + SPH_C32(0x7b6c0090), SPH_C32(0x67e20000), SPH_C32(0x8da20000), + SPH_C32(0xc7841e29), SPH_C32(0xb7b744f3), SPH_C32(0x9ac484f4), + SPH_C32(0x8b6c72bd) }, + { SPH_C32(0xcc140000), SPH_C32(0xa5630000), SPH_C32(0x5ab90780), + SPH_C32(0x3b500000), SPH_C32(0x4bd013ff), SPH_C32(0x879b3418), + SPH_C32(0x694348c1), SPH_C32(0xca5a87fe), SPH_C32(0x819e0000), + SPH_C32(0xec570000), SPH_C32(0x66320280), SPH_C32(0x95f30000), + SPH_C32(0x5da92802), SPH_C32(0x48f43cbc), SPH_C32(0xe65aa22d), + SPH_C32(0x8e67b7fa) }, + { SPH_C32(0x819e0000), SPH_C32(0xec570000), SPH_C32(0x66320280), + SPH_C32(0x95f30000), SPH_C32(0x5da92802), SPH_C32(0x48f43cbc), + SPH_C32(0xe65aa22d), SPH_C32(0x8e67b7fa), SPH_C32(0x4d8a0000), + SPH_C32(0x49340000), SPH_C32(0x3c8b0500), SPH_C32(0xaea30000), + SPH_C32(0x16793bfd), SPH_C32(0xcf6f08a4), SPH_C32(0x8f19eaec), + SPH_C32(0x443d3004) }, + { SPH_C32(0x78230000), SPH_C32(0x12fc0000), SPH_C32(0xa93a0b80), + SPH_C32(0x90a50000), SPH_C32(0x713e2879), SPH_C32(0x7ee98924), + SPH_C32(0xf08ca062), SPH_C32(0x636f8bab), SPH_C32(0x02af0000), + SPH_C32(0xb7280000), SPH_C32(0xba1c0300), SPH_C32(0x56980000), + SPH_C32(0xba8d45d3), SPH_C32(0x8048c667), SPH_C32(0xa95c149a), + SPH_C32(0xf4f6ea7b) }, + { SPH_C32(0x02af0000), SPH_C32(0xb7280000), SPH_C32(0xba1c0300), + SPH_C32(0x56980000), SPH_C32(0xba8d45d3), SPH_C32(0x8048c667), + SPH_C32(0xa95c149a), SPH_C32(0xf4f6ea7b), SPH_C32(0x7a8c0000), + SPH_C32(0xa5d40000), SPH_C32(0x13260880), SPH_C32(0xc63d0000), + SPH_C32(0xcbb36daa), SPH_C32(0xfea14f43), SPH_C32(0x59d0b4f8), + SPH_C32(0x979961d0) }, + { SPH_C32(0xac480000), SPH_C32(0x1ba60000), SPH_C32(0x45fb1380), + SPH_C32(0x03430000), SPH_C32(0x5a85316a), SPH_C32(0x1fb250b6), + SPH_C32(0xfe72c7fe), SPH_C32(0x91e478f6), SPH_C32(0x1e4e0000), + SPH_C32(0xdecf0000), SPH_C32(0x6df80180), SPH_C32(0x77240000), + SPH_C32(0xec47079e), SPH_C32(0xf4a0694e), SPH_C32(0xcda31812), + SPH_C32(0x98aa496e) }, + { SPH_C32(0x1e4e0000), SPH_C32(0xdecf0000), SPH_C32(0x6df80180), + SPH_C32(0x77240000), SPH_C32(0xec47079e), SPH_C32(0xf4a0694e), + SPH_C32(0xcda31812), SPH_C32(0x98aa496e), SPH_C32(0xb2060000), + SPH_C32(0xc5690000), SPH_C32(0x28031200), SPH_C32(0x74670000), + SPH_C32(0xb6c236f4), SPH_C32(0xeb1239f8), SPH_C32(0x33d1dfec), + SPH_C32(0x094e3198) }, + { SPH_C32(0xaec30000), SPH_C32(0x9c4f0001), SPH_C32(0x79d1e000), + SPH_C32(0x2c150000), SPH_C32(0x45cc75b3), SPH_C32(0x6650b736), + SPH_C32(0xab92f78f), SPH_C32(0xa312567b), SPH_C32(0xdb250000), + SPH_C32(0x09290000), SPH_C32(0x49aac000), SPH_C32(0x81e10000), + SPH_C32(0xcafe6b59), SPH_C32(0x42793431), SPH_C32(0x43566b76), + SPH_C32(0xe86cba2e) }, + { SPH_C32(0xdb250000), SPH_C32(0x09290000), SPH_C32(0x49aac000), + SPH_C32(0x81e10000), SPH_C32(0xcafe6b59), SPH_C32(0x42793431), + SPH_C32(0x43566b76), SPH_C32(0xe86cba2e), SPH_C32(0x75e60000), + SPH_C32(0x95660001), SPH_C32(0x307b2000), SPH_C32(0xadf40000), + SPH_C32(0x8f321eea), SPH_C32(0x24298307), SPH_C32(0xe8c49cf9), + SPH_C32(0x4b7eec55) }, + { SPH_C32(0x58430000), SPH_C32(0x807e0000), SPH_C32(0x78330001), + SPH_C32(0xc66b3800), SPH_C32(0xe7375cdc), SPH_C32(0x79ad3fdd), + SPH_C32(0xac73fe6f), SPH_C32(0x3a4479b1), SPH_C32(0x1d5a0000), + SPH_C32(0x2b720000), SPH_C32(0x488d0000), SPH_C32(0xaf611800), + SPH_C32(0x25cb2ec5), SPH_C32(0xc879bfd0), SPH_C32(0x81a20429), + SPH_C32(0x1e7536a6) }, + { SPH_C32(0x1d5a0000), SPH_C32(0x2b720000), SPH_C32(0x488d0000), + SPH_C32(0xaf611800), SPH_C32(0x25cb2ec5), SPH_C32(0xc879bfd0), + SPH_C32(0x81a20429), SPH_C32(0x1e7536a6), SPH_C32(0x45190000), + SPH_C32(0xab0c0000), SPH_C32(0x30be0001), SPH_C32(0x690a2000), + SPH_C32(0xc2fc7219), SPH_C32(0xb1d4800d), SPH_C32(0x2dd1fa46), + SPH_C32(0x24314f17) }, + { SPH_C32(0xa53b0000), SPH_C32(0x14260000), SPH_C32(0x4e30001e), + SPH_C32(0x7cae0000), SPH_C32(0x8f9e0dd5), SPH_C32(0x78dfaa3d), + SPH_C32(0xf73168d8), SPH_C32(0x0b1b4946), SPH_C32(0x07ed0000), + SPH_C32(0xb2500000), SPH_C32(0x8774000a), SPH_C32(0x970d0000), + SPH_C32(0x437223ae), SPH_C32(0x48c76ea4), SPH_C32(0xf4786222), + SPH_C32(0x9075b1ce) }, + { SPH_C32(0x07ed0000), SPH_C32(0xb2500000), SPH_C32(0x8774000a), + SPH_C32(0x970d0000), SPH_C32(0x437223ae), SPH_C32(0x48c76ea4), + SPH_C32(0xf4786222), SPH_C32(0x9075b1ce), SPH_C32(0xa2d60000), + SPH_C32(0xa6760000), SPH_C32(0xc9440014), SPH_C32(0xeba30000), + SPH_C32(0xccec2e7b), SPH_C32(0x3018c499), SPH_C32(0x03490afa), + SPH_C32(0x9b6ef888) }, + { SPH_C32(0x88980000), SPH_C32(0x1f940000), SPH_C32(0x7fcf002e), + SPH_C32(0xfb4e0000), SPH_C32(0xf158079a), SPH_C32(0x61ae9167), + SPH_C32(0xa895706c), SPH_C32(0xe6107494), SPH_C32(0x0bc20000), + SPH_C32(0xdb630000), SPH_C32(0x7e88000c), SPH_C32(0x15860000), + SPH_C32(0x91fd48f3), SPH_C32(0x7581bb43), SPH_C32(0xf460449e), + SPH_C32(0xd8b61463) }, + { SPH_C32(0x0bc20000), SPH_C32(0xdb630000), SPH_C32(0x7e88000c), + SPH_C32(0x15860000), SPH_C32(0x91fd48f3), SPH_C32(0x7581bb43), + SPH_C32(0xf460449e), SPH_C32(0xd8b61463), SPH_C32(0x835a0000), + SPH_C32(0xc4f70000), SPH_C32(0x01470022), SPH_C32(0xeec80000), + SPH_C32(0x60a54f69), SPH_C32(0x142f2a24), SPH_C32(0x5cf534f2), + SPH_C32(0x3ea660f7) }, + { SPH_C32(0x52500000), SPH_C32(0x29540000), SPH_C32(0x6a61004e), + SPH_C32(0xf0ff0000), SPH_C32(0x9a317eec), SPH_C32(0x452341ce), + SPH_C32(0xcf568fe5), SPH_C32(0x5303130f), SPH_C32(0x538d0000), + SPH_C32(0xa9fc0000), SPH_C32(0x9ef70006), SPH_C32(0x56ff0000), + SPH_C32(0x0ae4004e), SPH_C32(0x92c5cdf9), SPH_C32(0xa9444018), + SPH_C32(0x7f975691) }, + { SPH_C32(0x538d0000), SPH_C32(0xa9fc0000), SPH_C32(0x9ef70006), + SPH_C32(0x56ff0000), SPH_C32(0x0ae4004e), SPH_C32(0x92c5cdf9), + SPH_C32(0xa9444018), SPH_C32(0x7f975691), SPH_C32(0x01dd0000), + SPH_C32(0x80a80000), SPH_C32(0xf4960048), SPH_C32(0xa6000000), + SPH_C32(0x90d57ea2), SPH_C32(0xd7e68c37), SPH_C32(0x6612cffd), + SPH_C32(0x2c94459e) }, + { SPH_C32(0xe6280000), SPH_C32(0x4c4b0000), SPH_C32(0xa8550000), + SPH_C32(0xd3d002e0), SPH_C32(0xd86130b8), SPH_C32(0x98a7b0da), + SPH_C32(0x289506b4), SPH_C32(0xd75a4897), SPH_C32(0xf0c50000), + SPH_C32(0x59230000), SPH_C32(0x45820000), SPH_C32(0xe18d00c0), + SPH_C32(0x3b6d0631), SPH_C32(0xc2ed5699), SPH_C32(0xcbe0fe1c), + SPH_C32(0x56a7b19f) }, + { SPH_C32(0xf0c50000), SPH_C32(0x59230000), SPH_C32(0x45820000), + SPH_C32(0xe18d00c0), SPH_C32(0x3b6d0631), SPH_C32(0xc2ed5699), + SPH_C32(0xcbe0fe1c), SPH_C32(0x56a7b19f), SPH_C32(0x16ed0000), + SPH_C32(0x15680000), SPH_C32(0xedd70000), SPH_C32(0x325d0220), + SPH_C32(0xe30c3689), SPH_C32(0x5a4ae643), SPH_C32(0xe375f8a8), + SPH_C32(0x81fdf908) }, + { SPH_C32(0xb4310000), SPH_C32(0x77330000), SPH_C32(0xb15d0000), + SPH_C32(0x7fd004e0), SPH_C32(0x78a26138), SPH_C32(0xd116c35d), + SPH_C32(0xd256d489), SPH_C32(0x4e6f74de), SPH_C32(0xe3060000), + SPH_C32(0xbdc10000), SPH_C32(0x87130000), SPH_C32(0xbff20060), + SPH_C32(0x2eba0a1a), SPH_C32(0x8db53751), SPH_C32(0x73c5ab06), + SPH_C32(0x5bd61539) }, + { SPH_C32(0xe3060000), SPH_C32(0xbdc10000), SPH_C32(0x87130000), + SPH_C32(0xbff20060), SPH_C32(0x2eba0a1a), SPH_C32(0x8db53751), + SPH_C32(0x73c5ab06), SPH_C32(0x5bd61539), SPH_C32(0x57370000), + SPH_C32(0xcaf20000), SPH_C32(0x364e0000), SPH_C32(0xc0220480), + SPH_C32(0x56186b22), SPH_C32(0x5ca3f40c), SPH_C32(0xa1937f8f), + SPH_C32(0x15b961e7) }, + { SPH_C32(0x02f20000), SPH_C32(0xa2810000), SPH_C32(0x873f0000), + SPH_C32(0xe36c7800), SPH_C32(0x1e1d74ef), SPH_C32(0x073d2bd6), + SPH_C32(0xc4c23237), SPH_C32(0x7f32259e), SPH_C32(0xbadd0000), + SPH_C32(0x13ad0000), SPH_C32(0xb7e70000), SPH_C32(0xf7282800), + SPH_C32(0xdf45144d), SPH_C32(0x361ac33a), SPH_C32(0xea5a8d14), + SPH_C32(0x2a2c18f0) }, + { SPH_C32(0xbadd0000), SPH_C32(0x13ad0000), SPH_C32(0xb7e70000), + SPH_C32(0xf7282800), SPH_C32(0xdf45144d), SPH_C32(0x361ac33a), + SPH_C32(0xea5a8d14), SPH_C32(0x2a2c18f0), SPH_C32(0xb82f0000), + SPH_C32(0xb12c0000), SPH_C32(0x30d80000), SPH_C32(0x14445000), + SPH_C32(0xc15860a2), SPH_C32(0x3127e8ec), SPH_C32(0x2e98bf23), + SPH_C32(0x551e3d6e) }, + { SPH_C32(0x1e6c0000), SPH_C32(0xc4420000), SPH_C32(0x8a2e0000), + SPH_C32(0xbcb6b800), SPH_C32(0x2c4413b6), SPH_C32(0x8bfdd3da), + SPH_C32(0x6a0c1bc8), SPH_C32(0xb99dc2eb), SPH_C32(0x92560000), + SPH_C32(0x1eda0000), SPH_C32(0xea510000), SPH_C32(0xe8b13000), + SPH_C32(0xa93556a5), SPH_C32(0xebfb6199), SPH_C32(0xb15c2254), + SPH_C32(0x33c5244f) }, + { SPH_C32(0x92560000), SPH_C32(0x1eda0000), SPH_C32(0xea510000), + SPH_C32(0xe8b13000), SPH_C32(0xa93556a5), SPH_C32(0xebfb6199), + SPH_C32(0xb15c2254), SPH_C32(0x33c5244f), SPH_C32(0x8c3a0000), + SPH_C32(0xda980000), SPH_C32(0x607f0000), SPH_C32(0x54078800), + SPH_C32(0x85714513), SPH_C32(0x6006b243), SPH_C32(0xdb50399c), + SPH_C32(0x8a58e6a4) }, + { SPH_C32(0x033d0000), SPH_C32(0x08b30000), SPH_C32(0xf33a0000), + SPH_C32(0x3ac20007), SPH_C32(0x51298a50), SPH_C32(0x6b6e661f), + SPH_C32(0x0ea5cfe3), SPH_C32(0xe6da7ffe), SPH_C32(0xa8da0000), + SPH_C32(0x96be0000), SPH_C32(0x5c1d0000), SPH_C32(0x07da0002), + SPH_C32(0x7d669583), SPH_C32(0x1f98708a), SPH_C32(0xbb668808), + SPH_C32(0xda878000) }, + { SPH_C32(0xa8da0000), SPH_C32(0x96be0000), SPH_C32(0x5c1d0000), + SPH_C32(0x07da0002), SPH_C32(0x7d669583), SPH_C32(0x1f98708a), + SPH_C32(0xbb668808), SPH_C32(0xda878000), SPH_C32(0xabe70000), + SPH_C32(0x9e0d0000), SPH_C32(0xaf270000), SPH_C32(0x3d180005), + SPH_C32(0x2c4f1fd3), SPH_C32(0x74f61695), SPH_C32(0xb5c347eb), + SPH_C32(0x3c5dfffe) }, + { SPH_C32(0x01930000), SPH_C32(0xe7820000), SPH_C32(0xedfb0000), + SPH_C32(0xcf0c000b), SPH_C32(0x8dd08d58), SPH_C32(0xbca3b42e), + SPH_C32(0x063661e1), SPH_C32(0x536f9e7b), SPH_C32(0x92280000), + SPH_C32(0xdc850000), SPH_C32(0x57fa0000), SPH_C32(0x56dc0003), + SPH_C32(0xbae92316), SPH_C32(0x5aefa30c), SPH_C32(0x90cef752), + SPH_C32(0x7b1675d7) }, + { SPH_C32(0x92280000), SPH_C32(0xdc850000), SPH_C32(0x57fa0000), + SPH_C32(0x56dc0003), SPH_C32(0xbae92316), SPH_C32(0x5aefa30c), + SPH_C32(0x90cef752), SPH_C32(0x7b1675d7), SPH_C32(0x93bb0000), + SPH_C32(0x3b070000), SPH_C32(0xba010000), SPH_C32(0x99d00008), + SPH_C32(0x3739ae4e), SPH_C32(0xe64c1722), SPH_C32(0x96f896b3), + SPH_C32(0x2879ebac) }, + { SPH_C32(0x5fa80000), SPH_C32(0x56030000), SPH_C32(0x43ae0000), + SPH_C32(0x64f30013), SPH_C32(0x257e86bf), SPH_C32(0x1311944e), + SPH_C32(0x541e95bf), SPH_C32(0x8ea4db69), SPH_C32(0x00440000), + SPH_C32(0x7f480000), SPH_C32(0xda7c0000), SPH_C32(0x2a230001), + SPH_C32(0x3badc9cc), SPH_C32(0xa9b69c87), SPH_C32(0x030a9e60), + SPH_C32(0xbe0a679e) }, + { SPH_C32(0x00440000), SPH_C32(0x7f480000), SPH_C32(0xda7c0000), + SPH_C32(0x2a230001), SPH_C32(0x3badc9cc), SPH_C32(0xa9b69c87), + SPH_C32(0x030a9e60), SPH_C32(0xbe0a679e), SPH_C32(0x5fec0000), + SPH_C32(0x294b0000), SPH_C32(0x99d20000), SPH_C32(0x4ed00012), + SPH_C32(0x1ed34f73), SPH_C32(0xbaa708c9), SPH_C32(0x57140bdf), + SPH_C32(0x30aebcf7) }, + { SPH_C32(0xee930000), SPH_C32(0xd6070000), SPH_C32(0x92c10000), + SPH_C32(0x2b9801e0), SPH_C32(0x9451287c), SPH_C32(0x3b6cfb57), + SPH_C32(0x45312374), SPH_C32(0x201f6a64), SPH_C32(0x7b280000), + SPH_C32(0x57420000), SPH_C32(0xa9e50000), SPH_C32(0x634300a0), + SPH_C32(0x9edb442f), SPH_C32(0x6d9995bb), SPH_C32(0x27f83b03), + SPH_C32(0xc7ff60f0) }, + { SPH_C32(0x7b280000), SPH_C32(0x57420000), SPH_C32(0xa9e50000), + SPH_C32(0x634300a0), SPH_C32(0x9edb442f), SPH_C32(0x6d9995bb), + SPH_C32(0x27f83b03), SPH_C32(0xc7ff60f0), SPH_C32(0x95bb0000), + SPH_C32(0x81450000), SPH_C32(0x3b240000), SPH_C32(0x48db0140), + SPH_C32(0x0a8a6c53), SPH_C32(0x56f56eec), SPH_C32(0x62c91877), + SPH_C32(0xe7e00a94) } +}; + +#define INPUT_BIG do { \ + __constant const sph_u32 *tp = &T512[0][0]; \ + unsigned u, v; \ + m0 = 0; \ + m1 = 0; \ + m2 = 0; \ + m3 = 0; \ + m4 = 0; \ + m5 = 0; \ + m6 = 0; \ + m7 = 0; \ + m8 = 0; \ + m9 = 0; \ + mA = 0; \ + mB = 0; \ + mC = 0; \ + mD = 0; \ + mE = 0; \ + mF = 0; \ + for (u = 0; u < 8; u ++) { \ + unsigned db = buf(u); \ + for (v = 0; v < 8; v ++, db >>= 1) { \ + sph_u32 dm = SPH_T32(-(sph_u32)(db & 1)); \ + m0 ^= dm & *tp ++; \ + m1 ^= dm & *tp ++; \ + m2 ^= dm & *tp ++; \ + m3 ^= dm & *tp ++; \ + m4 ^= dm & *tp ++; \ + m5 ^= dm & *tp ++; \ + m6 ^= dm & *tp ++; \ + m7 ^= dm & *tp ++; \ + m8 ^= dm & *tp ++; \ + m9 ^= dm & *tp ++; \ + mA ^= dm & *tp ++; \ + mB ^= dm & *tp ++; \ + mC ^= dm & *tp ++; \ + mD ^= dm & *tp ++; \ + mE ^= dm & *tp ++; \ + mF ^= dm & *tp ++; \ + } \ + } \ + } while (0) + +#define INPUT_BIG_LOCAL do { \ + __local sph_u32 *tp = &(T512_L[0]); \ + unsigned u, v; \ + m0 = 0; \ + m1 = 0; \ + m2 = 0; \ + m3 = 0; \ + m4 = 0; \ + m5 = 0; \ + m6 = 0; \ + m7 = 0; \ + m8 = 0; \ + m9 = 0; \ + mA = 0; \ + mB = 0; \ + mC = 0; \ + mD = 0; \ + mE = 0; \ + mF = 0; \ + for (u = 0; u < 8; u ++) { \ + unsigned db = buf(u); \ + for (v = 0; v < 8; v ++, db >>= 1) { \ + sph_u32 dm = SPH_T32(-(sph_u32)(db & 1)); \ + m0 ^= dm & *tp ++; \ + m1 ^= dm & *tp ++; \ + m2 ^= dm & *tp ++; \ + m3 ^= dm & *tp ++; \ + m4 ^= dm & *tp ++; \ + m5 ^= dm & *tp ++; \ + m6 ^= dm & *tp ++; \ + m7 ^= dm & *tp ++; \ + m8 ^= dm & *tp ++; \ + m9 ^= dm & *tp ++; \ + mA ^= dm & *tp ++; \ + mB ^= dm & *tp ++; \ + mC ^= dm & *tp ++; \ + mD ^= dm & *tp ++; \ + mE ^= dm & *tp ++; \ + mF ^= dm & *tp ++; \ + } \ + } \ + } while (0) + +#endif diff --git a/kernel/marucoin-mod.cl b/kernel/marucoin-mod.cl index 36339d4b..7b41c594 100644 --- a/kernel/marucoin-mod.cl +++ b/kernel/marucoin-mod.cl @@ -33,6 +33,8 @@ #ifndef X13MOD_CL #define X13MOD_CL +#define DEBUG(x) + #if __ENDIAN_LITTLE__ #define SPH_LITTLE_ENDIAN 1 #else @@ -54,15 +56,15 @@ typedef int sph_s32; #define SPH_64 1 #define SPH_64_TRUE 1 -#define SPH_C32(x) ((sph_u32)(x ## U)) -#define SPH_T32(x) ((as_uint(x)) & SPH_C32(0xFFFFFFFF)) -#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) -#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) +#define SPH_C32(x) ((sph_u32)(x ## U)) +#define SPH_T32(x) (as_uint(x)) +#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n)) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) -#define SPH_C64(x) ((sph_u64)(x ## UL)) -#define SPH_T64(x) ((as_ulong(x)) & SPH_C64(0xFFFFFFFFFFFFFFFF)) -#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) -#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) +#define SPH_C64(x) ((sph_u64)(x ## UL)) +#define SPH_T64(x) (as_ulong(x)) +#define SPH_ROTL64(x, n) rotate(as_ulong(x), (n) & 0xFFFFFFFFFFFFFFFFUL) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) #define SPH_ECHO_64 1 #define SPH_KECCAK_64 1 @@ -74,9 +76,9 @@ typedef int sph_s32; #define SPH_SMALL_FOOTPRINT_GROESTL 0 #define SPH_GROESTL_BIG_ENDIAN 0 #define SPH_CUBEHASH_UNROLL 0 -#define SPH_KECCAK_UNROLL 0 +#define SPH_KECCAK_UNROLL 1 #if !defined SPH_HAMSI_EXPAND_BIG - #define SPH_HAMSI_EXPAND_BIG 4 + #define SPH_HAMSI_EXPAND_BIG 1 #endif #include "blake.cl" @@ -121,10 +123,10 @@ typedef union { __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void search(__global unsigned char* block, __global hash_t* hashes) { - uint gid = get_global_id(0); - __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); - // blake + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + // blake sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); @@ -134,11 +136,12 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes) if ((T0 = SPH_T64(T0 + 1024)) < 1024) T1 = SPH_T64(T1 + 1); - + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + M0 = DEC64BE(block + 0); M1 = DEC64BE(block + 8); M2 = DEC64BE(block + 16); @@ -169,25 +172,25 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes) hash->h8[6] = H6; hash->h8[7] = H7; - barrier(CLK_GLOBAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE); } __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void search1(__global hash_t* hashes) { - uint gid = get_global_id(0); + uint gid = get_global_id(0); __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); // bmw sph_u64 BMW_H[16]; - - #pragma unroll 16 + +#pragma unroll 16 for(unsigned u = 0; u < 16; u++) BMW_H[u] = BMW_IV512[u]; sph_u64 mv[16],q[32]; - sph_u64 tmp; - + sph_u64 tmp; + mv[0] = SWAP8(hash->h8[0]); mv[1] = SWAP8(hash->h8[1]); mv[2] = SWAP8(hash->h8[2]); @@ -206,7 +209,7 @@ __kernel void search1(__global hash_t* hashes) mv[15] = SPH_C64(512); tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]); - q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1]; + q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1]; tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]); q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2]; tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); @@ -216,7 +219,7 @@ __kernel void search1(__global hash_t* hashes) tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]); q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5]; tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); - q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6]; + q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6]; tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]); q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7]; tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]); @@ -226,7 +229,7 @@ __kernel void search1(__global hash_t* hashes) tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]); q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10]; tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]); - q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11]; + q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11]; tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]); q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12]; tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]); @@ -237,63 +240,63 @@ __kernel void search1(__global hash_t* hashes) q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15]; tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]); q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0]; - - #pragma unroll 2 + +#pragma unroll 2 for(int i=0;i<2;i++) { q[i+16] = - (SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) + - (SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) + - (SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) + - (SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) + - (SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) + - (SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) + - (SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) + - (SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) + - (SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) + - (SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) + - (SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) + - (SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) + - (SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) + - (SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) + - (SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) + - (SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) + - (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + - SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + (SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) + + (SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) + + (SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) + + (SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) + + (SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) + + (SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) + + (SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) + + (SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) + + (SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) + + (SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) + + (SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) + + (SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) + + (SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) + + (SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) + + (SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) + + (SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); } - - #pragma unroll 4 + +#pragma unroll 4 for(int i=2;i<6;i++) { q[i+16] = CONST_EXP2 + - (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + - SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); } - #pragma unroll 3 +#pragma unroll 3 for(int i=6;i<9;i++) { q[i+16] = CONST_EXP2 + - (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + - SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]); + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]); } - - #pragma unroll 4 + +#pragma unroll 4 for(int i=9;i<13;i++) { q[i+16] = CONST_EXP2 + - (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + - SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); } - - #pragma unroll 3 + +#pragma unroll 3 for(int i=13;i<16;i++) { q[i+16] = CONST_EXP2 + - (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + - SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); } - + sph_u64 XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23]; sph_u64 XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31]; @@ -315,7 +318,7 @@ __kernel void search1(__global hash_t* hashes) BMW_H[14] = SPH_ROTL64(BMW_H[2],15) + ( XH64 ^ q[30] ^ mv[14]) + (SHR(XL64,7) ^ q[21] ^ q[14]); BMW_H[15] = SPH_ROTL64(BMW_H[3],16) + ( XH64 ^ q[31] ^ mv[15]) + (SHR(XL64,2) ^ q[22] ^ q[15]); - #pragma unroll 16 +#pragma unroll 16 for(int i=0;i<16;i++) { mv[i] = BMW_H[i]; @@ -323,7 +326,7 @@ __kernel void search1(__global hash_t* hashes) } tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]); - q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1]; + q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1]; tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]); q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2]; tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); @@ -333,7 +336,7 @@ __kernel void search1(__global hash_t* hashes) tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]); q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5]; tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); - q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6]; + q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6]; tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]); q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7]; tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]); @@ -343,7 +346,7 @@ __kernel void search1(__global hash_t* hashes) tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]); q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10]; tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]); - q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11]; + q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11]; tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]); q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12]; tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]); @@ -354,65 +357,66 @@ __kernel void search1(__global hash_t* hashes) q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15]; tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]); q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0]; - - #pragma unroll 2 + +#pragma unroll 2 for(int i=0;i<2;i++) { q[i+16] = - (SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) + - (SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) + - (SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) + - (SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) + - (SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) + - (SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) + - (SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) + - (SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) + - (SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) + - (SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) + - (SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) + - (SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) + - (SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) + - (SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) + - (SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) + - (SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) + - (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + - SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + (SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) + + (SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) + + (SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) + + (SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) + + (SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) + + (SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) + + (SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) + + (SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) + + (SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) + + (SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) + + (SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) + + (SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) + + (SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) + + (SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) + + (SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) + + (SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); } - - #pragma unroll 4 + +#pragma unroll 4 for(int i=2;i<6;i++) { q[i+16] = CONST_EXP2 + - (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + - SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); } - #pragma unroll 3 +#pragma unroll 3 for(int i=6;i<9;i++) { q[i+16] = CONST_EXP2 + - (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + - SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]); + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]); } - #pragma unroll 4 +#pragma unroll 4 for(int i=9;i<13;i++) { q[i+16] = CONST_EXP2 + - (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + - SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); } - #pragma unroll 3 +#pragma unroll 3 for(int i=13;i<16;i++) { q[i+16] = CONST_EXP2 + - (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + - SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); } - + XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23]; - XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31]; + XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31]; + BMW_H[0] = (SHL(XH64, 5) ^ SHR(q[16],5) ^ mv[0]) + ( XL64 ^ q[24] ^ q[0]); BMW_H[1] = (SHR(XH64, 7) ^ SHL(q[17],8) ^ mv[1]) + ( XL64 ^ q[25] ^ q[1]); BMW_H[2] = (SHR(XH64, 5) ^ SHL(q[18],5) ^ mv[2]) + ( XL64 ^ q[26] ^ q[2]); @@ -439,8 +443,8 @@ __kernel void search1(__global hash_t* hashes) hash->h8[5] = SWAP8(BMW_H[13]); hash->h8[6] = SWAP8(BMW_H[14]); hash->h8[7] = SWAP8(BMW_H[15]); - - barrier(CLK_GLOBAL_MEM_FENCE); + + barrier(CLK_GLOBAL_MEM_FENCE); } __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) @@ -449,94 +453,67 @@ __kernel void search2(__global hash_t* hashes) uint gid = get_global_id(0); __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); - #if !SPH_SMALL_FOOTPRINT_GROESTL - __local sph_u64 T0_C[256], T1_C[256], T2_C[256], T3_C[256]; - __local sph_u64 T4_C[256], T5_C[256], T6_C[256], T7_C[256]; - #else - __local sph_u64 T0_C[256], T4_C[256]; - #endif + __local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256]; int init = get_local_id(0); int step = get_local_size(0); for (int i = init; i < 256; i += step) { - T0_C[i] = T0[i]; - T4_C[i] = T4[i]; - #if !SPH_SMALL_FOOTPRINT_GROESTL - T1_C[i] = T1[i]; - T2_C[i] = T2[i]; - T3_C[i] = T3[i]; - T5_C[i] = T5[i]; - T6_C[i] = T6[i]; - T7_C[i] = T7[i]; - #endif + T0_L[i] = T0[i]; + T4_L[i] = T4[i]; + T1_L[i] = T1[i]; + T2_L[i] = T2[i]; + T3_L[i] = T3[i]; + T5_L[i] = T5[i]; + T6_L[i] = T6[i]; + T7_L[i] = T7[i]; } - barrier(CLK_LOCAL_MEM_FENCE); // groestl - - #define T0 T0_C - #define T1 T1_C - #define T2 T2_C - #define T3 T3_C - #define T4 T4_C - #define T5 T5_C - #define T6 T6_C - #define T7 T7_C + barrier(CLK_LOCAL_MEM_FENCE); + + #define T0 T0_L + #define T1 T1_L + #define T2 T2_L + #define T3 T3_L + #define T4 T4_L + #define T5 T5_L + #define T6 T6_L + #define T7 T7_L // groestl - - sph_u64 H[16]; - - for (unsigned int u = 0; u < 15; u ++) - H[u] = 0; - - #if USE_LE - H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); - #else - H[15] = (sph_u64)512; - #endif + sph_u64 H[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0002000000000000}; sph_u64 g[16], m[16]; - m[0] = DEC64E(hash->h8[0]); - m[1] = DEC64E(hash->h8[1]); - m[2] = DEC64E(hash->h8[2]); - m[3] = DEC64E(hash->h8[3]); - m[4] = DEC64E(hash->h8[4]); - m[5] = DEC64E(hash->h8[5]); - m[6] = DEC64E(hash->h8[6]); - m[7] = DEC64E(hash->h8[7]); - - for (unsigned int u = 0; u < 16; u ++) - g[u] = m[u] ^ H[u]; - - m[8] = 0x80; g[8] = m[8] ^ H[8]; - m[9] = 0; g[9] = m[9] ^ H[9]; - m[10] = 0; g[10] = m[10] ^ H[10]; - m[11] = 0; g[11] = m[11] ^ H[11]; - m[12] = 0; g[12] = m[12] ^ H[12]; - m[13] = 0; g[13] = m[13] ^ H[13]; - m[14] = 0; g[14] = m[14] ^ H[14]; - m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + g[0] = m[0] = DEC64E(hash->h8[0]); + g[1] = m[1] = DEC64E(hash->h8[1]); + g[2] = m[2] = DEC64E(hash->h8[2]); + g[3] = m[3] = DEC64E(hash->h8[3]); + g[4] = m[4] = DEC64E(hash->h8[4]); + g[5] = m[5] = DEC64E(hash->h8[5]); + g[6] = m[6] = DEC64E(hash->h8[6]); + g[7] = m[7] = DEC64E(hash->h8[7]); + g[8] = m[8] = 0x80; + g[9] = m[9] = 0; + g[10] = m[10] = 0; + g[11] = m[11] = 0; + g[12] = m[12] = 0; + g[13] = m[13] = 0; + g[14] = m[14] = 0; + g[15] = 0x102000000000000; + m[15] = 0x100000000000000; PERM_BIG_P(g); PERM_BIG_Q(m); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= g[u] ^ m[u]; - sph_u64 xH[16]; - for (unsigned int u = 0; u < 16; u ++) - xH[u] = H[u]; - + xH[u] = H[u] ^= g[u] ^ m[u]; + PERM_BIG_P(xH); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= xH[u]; - - for (unsigned int u = 0; u < 8; u ++) - hash->h8[u] = DEC64E(H[u + 8]); + for (unsigned int u = 8; u < 16; u ++) + hash->h8[u-8] = DEC64E(H[u] ^ xH[u]); barrier(CLK_GLOBAL_MEM_FENCE); } @@ -561,10 +538,14 @@ __kernel void search3(__global hash_t* hashes) m5 = SWAP8(hash->h8[5]); m6 = SWAP8(hash->h8[6]); m7 = SWAP8(hash->h8[7]); + UBI_BIG(480, 64); + bcount = 0; m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + UBI_BIG(510, 8); + hash->h8[0] = SWAP8(h0); hash->h8[1] = SWAP8(h1); hash->h8[2] = SWAP8(h2); @@ -574,7 +555,7 @@ __kernel void search3(__global hash_t* hashes) hash->h8[6] = SWAP8(h6); hash->h8[7] = SWAP8(h7); - barrier(CLK_GLOBAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE); } __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) @@ -583,7 +564,7 @@ __kernel void search4(__global hash_t* hashes) uint gid = get_global_id(0); __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); - // jh + // jh sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); @@ -612,7 +593,7 @@ __kernel void search4(__global hash_t* hashes) h6l ^= DEC64E(hash->h8[5]); h7h ^= DEC64E(hash->h8[6]); h7l ^= DEC64E(hash->h8[7]); - + h0h ^= 0x80; h3l ^= 0x2000000000000; } @@ -630,7 +611,7 @@ __kernel void search4(__global hash_t* hashes) hash->h8[6] = DEC64E(h7h); hash->h8[7] = DEC64E(h7l); - barrier(CLK_GLOBAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE); } __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) @@ -642,7 +623,7 @@ __kernel void search5(__global hash_t* hashes) // keccak sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; - sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; @@ -664,6 +645,7 @@ __kernel void search5(__global hash_t* hashes) a21 ^= SWAP8(hash->h8[7]); a31 ^= 0x8000000000000001; KECCAK_F_1600; + // Finalize the "lane complement" a10 = ~a10; a20 = ~a20; @@ -677,7 +659,7 @@ __kernel void search5(__global hash_t* hashes) hash->h8[6] = SWAP8(a11); hash->h8[7] = SWAP8(a21); - barrier(CLK_GLOBAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE); } __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) @@ -740,7 +722,7 @@ __kernel void search6(__global hash_t* hashes) hash->h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47; } } - + hash->h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40; hash->h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41; hash->h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42; @@ -750,7 +732,7 @@ __kernel void search6(__global hash_t* hashes) hash->h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46; hash->h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47; - barrier(CLK_GLOBAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE); } __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) @@ -794,7 +776,7 @@ __kernel void search7(__global hash_t* hashes) x6 ^= SWAP4(hash->h4[15]); x7 ^= SWAP4(hash->h4[14]); } - else if(i == 1) + else if(i == 1) x0 ^= 0x80; else if (i == 2) xv ^= SPH_C32(1); @@ -817,7 +799,7 @@ __kernel void search7(__global hash_t* hashes) hash->h4[14] = xe; hash->h4[15] = xf; - barrier(CLK_GLOBAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE); } __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) @@ -825,11 +807,12 @@ __kernel void search8(__global hash_t* hashes) { uint gid = get_global_id(0); __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; - + int init = get_local_id(0); int step = get_local_size(0); - + for (int i = init; i < 256; i += step) { AES0[i] = AES0_C[i]; @@ -837,7 +820,7 @@ __kernel void search8(__global hash_t* hashes) AES2[i] = AES2_C[i]; AES3[i] = AES3_C[i]; } - + barrier(CLK_LOCAL_MEM_FENCE); // shavite @@ -853,7 +836,7 @@ __kernel void search8(__global hash_t* hashes) sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; - sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; + sph_u32 sc_count0 = 0x200, sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; rk00 = hash->h4[0]; rk01 = hash->h4[1]; @@ -896,7 +879,7 @@ __kernel void search8(__global hash_t* hashes) hash->h4[14] = hE; hash->h4[15] = hF; - barrier(CLK_GLOBAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE); } __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) @@ -908,10 +891,8 @@ __kernel void search9(__global hash_t* hashes) // simd s32 q[256]; unsigned char x[128]; - for(unsigned int i = 0; i < 64; i++) x[i] = hash->h1[i]; - for(unsigned int i = 64; i < 128; i++) x[i] = 0; @@ -921,14 +902,15 @@ __kernel void search9(__global hash_t* hashes) u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); FFT256(0, 1, 0, ll1); - for (int i = 0; i < 256; i ++) { - s32 tq; - - tq = q[i] + yoff_b_n[i]; - tq = REDS2(tq); - tq = REDS1(tq); - tq = REDS1(tq); - q[i] = (tq <= 128 ? tq : tq - 257); + for (int i = 0; i < 256; i ++) + { + s32 tq; + + tq = q[i] + yoff_b_n[i]; + tq = REDS2(tq); + tq = REDS1(tq); + tq = REDS1(tq); + q[i] = (tq <= 128 ? tq : tq - 257); } A0 ^= hash->h4[0]; @@ -954,21 +936,24 @@ __kernel void search9(__global hash_t* hashes) ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); STEP_BIG( - C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), - C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), - IF, 4, 13, PP8_4_); + C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), + C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), + IF, 4, 13, PP8_4_); + STEP_BIG( - C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), - C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), - IF, 13, 10, PP8_5_); + C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), + C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), + IF, 13, 10, PP8_5_); + STEP_BIG( - C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), - C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), - IF, 10, 25, PP8_6_); + C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), + C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), + IF, 10, 25, PP8_6_); + STEP_BIG( - C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), - C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), - IF, 25, 4, PP8_0_); + C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), + C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), + IF, 25, 4, PP8_0_); u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7; u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7; @@ -983,22 +968,27 @@ __kernel void search9(__global hash_t* hashes) ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + STEP_BIG( - COPY_A0, COPY_A1, COPY_A2, COPY_A3, - COPY_A4, COPY_A5, COPY_A6, COPY_A7, - IF, 4, 13, PP8_4_); + COPY_A0, COPY_A1, COPY_A2, COPY_A3, + COPY_A4, COPY_A5, COPY_A6, COPY_A7, + IF, 4, 13, PP8_4_); + STEP_BIG( - COPY_B0, COPY_B1, COPY_B2, COPY_B3, - COPY_B4, COPY_B5, COPY_B6, COPY_B7, - IF, 13, 10, PP8_5_); + COPY_B0, COPY_B1, COPY_B2, COPY_B3, + COPY_B4, COPY_B5, COPY_B6, COPY_B7, + IF, 13, 10, PP8_5_); + STEP_BIG( - COPY_C0, COPY_C1, COPY_C2, COPY_C3, - COPY_C4, COPY_C5, COPY_C6, COPY_C7, - IF, 10, 25, PP8_6_); + COPY_C0, COPY_C1, COPY_C2, COPY_C3, + COPY_C4, COPY_C5, COPY_C6, COPY_C7, + IF, 10, 25, PP8_6_); + STEP_BIG( - COPY_D0, COPY_D1, COPY_D2, COPY_D3, - COPY_D4, COPY_D5, COPY_D6, COPY_D7, - IF, 25, 4, PP8_0_); + COPY_D0, COPY_D1, COPY_D2, COPY_D3, + COPY_D4, COPY_D5, COPY_D6, COPY_D7, + IF, 25, 4, PP8_0_); + #undef q hash->h4[0] = A0; @@ -1018,7 +1008,7 @@ __kernel void search9(__global hash_t* hashes) hash->h4[14] = B6; hash->h4[15] = B7; - barrier(CLK_GLOBAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE); } __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) @@ -1026,8 +1016,7 @@ __kernel void search10(__global hash_t* hashes) { uint gid = get_global_id(0); uint offset = get_global_offset(0); - hash_t hash; - __global hash_t *hashp = &(hashes[gid-offset]); + __global hash_t *hash = &(hashes[gid-offset]); __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; @@ -1045,7 +1034,7 @@ __kernel void search10(__global hash_t* hashes) barrier(CLK_LOCAL_MEM_FENCE); for (int i = 0; i < 8; i++) - hash.h8[i] = hashes[gid-offset].h8[i]; + hash->h8[i] = hashes[gid-offset].h8[i]; // echo sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; @@ -1074,14 +1063,14 @@ __kernel void search10(__global hash_t* hashes) W61 = Vb61; W70 = Vb70; W71 = Vb71; - W80 = hash.h8[0]; - W81 = hash.h8[1]; - W90 = hash.h8[2]; - W91 = hash.h8[3]; - WA0 = hash.h8[4]; - WA1 = hash.h8[5]; - WB0 = hash.h8[6]; - WB1 = hash.h8[7]; + W80 = hash->h8[0]; + W81 = hash->h8[1]; + W90 = hash->h8[2]; + W91 = hash->h8[3]; + WA0 = hash->h8[4]; + WA1 = hash->h8[5]; + WB0 = hash->h8[6]; + WB1 = hash->h8[7]; WC0 = 0x80; WC1 = 0; WD0 = 0; @@ -1094,14 +1083,14 @@ __kernel void search10(__global hash_t* hashes) for (unsigned u = 0; u < 10; u ++) BIG_ROUND; - hashp->h8[0] = hash.h8[0] ^ Vb00 ^ W00 ^ W80; - hashp->h8[1] = hash.h8[1] ^ Vb01 ^ W01 ^ W81; - hashp->h8[2] = hash.h8[2] ^ Vb10 ^ W10 ^ W90; - hashp->h8[3] = hash.h8[3] ^ Vb11 ^ W11 ^ W91; - hashp->h8[4] = hash.h8[4] ^ Vb20 ^ W20 ^ WA0; - hashp->h8[5] = hash.h8[5] ^ Vb21 ^ W21 ^ WA1; - hashp->h8[6] = hash.h8[6] ^ Vb30 ^ W30 ^ WB0; - hashp->h8[7] = hash.h8[7] ^ Vb31 ^ W31 ^ WB1; + hash->h8[0] ^= Vb00 ^ W00 ^ W80; + hash->h8[1] ^= Vb01 ^ W01 ^ W81; + hash->h8[2] ^= Vb10 ^ W10 ^ W90; + hash->h8[3] ^= Vb11 ^ W11 ^ W91; + hash->h8[4] ^= Vb20 ^ W20 ^ WA0; + hash->h8[5] ^= Vb21 ^ W21 ^ WA1; + hash->h8[6] ^= Vb30 ^ W30 ^ WB0; + hash->h8[7] ^= Vb31 ^ W31 ^ WB1; barrier(CLK_GLOBAL_MEM_FENCE); } @@ -1110,9 +1099,22 @@ __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void search11(__global hash_t* hashes) { uint gid = get_global_id(0); - uint offset = get_global_offset(0); - __global hash_t *hash = &(hashes[gid-offset]); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + #ifdef INPUT_BIG_LOCAL + __local sph_u32 T512_L[1024]; + __constant const sph_u32 *T512_C = &T512[0][0]; + + int init = get_local_id(0); + int step = get_local_size(0); + for (int i = init; i < 1024; i += step) + T512_L[i] = T512_C[i]; + barrier(CLK_LOCAL_MEM_FENCE); + #else + #define INPUT_BIG_LOCAL INPUT_BIG + #endif + sph_u32 c0 = HAMSI_IV512[0], c1 = HAMSI_IV512[1], c2 = HAMSI_IV512[2], c3 = HAMSI_IV512[3]; sph_u32 c4 = HAMSI_IV512[4], c5 = HAMSI_IV512[5], c6 = HAMSI_IV512[6], c7 = HAMSI_IV512[7]; sph_u32 c8 = HAMSI_IV512[8], c9 = HAMSI_IV512[9], cA = HAMSI_IV512[10], cB = HAMSI_IV512[11]; @@ -1122,28 +1124,31 @@ __kernel void search11(__global hash_t* hashes) sph_u32 h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF }; #define buf(u) hash->h1[i + u] + for(int i = 0; i < 64; i += 8) { - INPUT_BIG; + INPUT_BIG_LOCAL; P_BIG; T_BIG; } - #undef buf + #undef buf #define buf(u) (u == 0 ? 0x80 : 0) - INPUT_BIG; + + INPUT_BIG_LOCAL; P_BIG; T_BIG; - #undef buf + #undef buf #define buf(u) (u == 6 ? 2 : 0) - INPUT_BIG; + + INPUT_BIG_LOCAL; PF_BIG; T_BIG; - for(unsigned u = 0; u < 16; u ++) - hash->h4[u] = h[u]; - + for (unsigned u = 0; u < 16; u ++) + hash->h4[u] = h[u]; + barrier(CLK_GLOBAL_MEM_FENCE); } @@ -1153,14 +1158,27 @@ __kernel void search12(__global hash_t* hashes, __global uint* output, const ulo uint gid = get_global_id(0); uint offset = get_global_offset(0); __global hash_t *hash = &(hashes[gid-offset]); - + + //mixtab + __local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256]; + int init = get_local_id(0); + int step = get_local_size(0); + for (int i = init; i < 256; i += step) + { + mixtab0[i] = mixtab0_c[i]; + mixtab1[i] = mixtab1_c[i]; + mixtab2[i] = mixtab2_c[i]; + mixtab3[i] = mixtab3_c[i]; + } + barrier(CLK_GLOBAL_MEM_FENCE); + // fugue sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09; sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19; sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29; sph_u32 S30, S31, S32, S33, S34, S35; - ulong fc_bit_count = (sph_u64) 64 << 3; + ulong fc_bit_count = (sph_u64) 0x200; S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0; S20 = SPH_C32(0x8807a57e); S21 = SPH_C32(0xe616af75); S22 = SPH_C32(0xc5d3e4db); S23 = SPH_C32(0xac9ab027); diff --git a/kernel/marucoin-modold.cl b/kernel/marucoin-modold.cl index 83d6e64b..197d8676 100644 --- a/kernel/marucoin-modold.cl +++ b/kernel/marucoin-modold.cl @@ -30,8 +30,8 @@ * @author phm */ -#ifndef X13MOD_CL -#define X13MOD_CL +#ifndef MARUCOIN_MOD_CL +#define MARUCOIN_MOD_CL #if __ENDIAN_LITTLE__ #define SPH_LITTLE_ENDIAN 1 @@ -55,12 +55,12 @@ typedef long sph_s64; #define SPH_64_TRUE 1 #define SPH_C32(x) ((sph_u32)(x ## U)) -#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) -#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) +#define SPH_T32(x) (as_uint(x)) +#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n)) #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) #define SPH_C64(x) ((sph_u64)(x ## UL)) -#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) +#define SPH_T64(x) (as_ulong(x)) #define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) #define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) @@ -74,9 +74,9 @@ typedef long sph_s64; #define SPH_SMALL_FOOTPRINT_GROESTL 0 #define SPH_GROESTL_BIG_ENDIAN 0 #define SPH_CUBEHASH_UNROLL 0 -#define SPH_KECCAK_UNROLL 0 -#if !defined SPH_HAMSI_EXPAND_BIG -#define SPH_HAMSI_EXPAND_BIG 4 +#define SPH_KECCAK_UNROLL 1 +#ifndef SPH_HAMSI_EXPAND_BIG + #define SPH_HAMSI_EXPAND_BIG 4 #endif #include "blake.cl" @@ -794,6 +794,31 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo } barrier(CLK_LOCAL_MEM_FENCE); + + #ifdef INPUT_BIG_LOCAL + __local sph_u32 T512_L[1024]; + __constant const sph_u32 *T512_C = &T512[0][0]; + + for (int i = init; i < 1024; i += step) + T512_L[i] = T512_C[i]; + + barrier(CLK_LOCAL_MEM_FENCE); + #else + #define INPUT_BIG_LOCAL INPUT_BIG + #endif + + + // mixtab + __local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256]; + for (int i = init; i < 256; i += step) + { + mixtab0[i] = mixtab0_c[i]; + mixtab1[i] = mixtab1_c[i]; + mixtab2[i] = mixtab2_c[i]; + mixtab3[i] = mixtab3_c[i]; + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int i = 0; i < 8; i++) { hash.h8[i] = hashes[gid-offset].h8[i]; @@ -875,18 +900,18 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo #define buf(u) hash.h1[i + u] for(int i = 0; i < 64; i += 8) { - INPUT_BIG; + INPUT_BIG_LOCAL; P_BIG; T_BIG; } #undef buf #define buf(u) (u == 0 ? 0x80 : 0) - INPUT_BIG; + INPUT_BIG_LOCAL; P_BIG; T_BIG; #undef buf #define buf(u) (u == 6 ? 2 : 0) - INPUT_BIG; + INPUT_BIG_LOCAL; PF_BIG; T_BIG; @@ -976,7 +1001,7 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo hash.h4[15] = SWAP4(S30); } - + bool result = (hash.h8[3] <= target); if (result) output[atomic_inc(output+0xFF)] = SWAP4(gid); @@ -984,4 +1009,4 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo barrier(CLK_GLOBAL_MEM_FENCE); } -#endif // X13MOD_CL +#endif // MARUCOIN_MOD_CL \ No newline at end of file diff --git a/kernel/shabal.cl b/kernel/shabal.cl new file mode 100644 index 00000000..2d01456e --- /dev/null +++ b/kernel/shabal.cl @@ -0,0 +1,350 @@ +/* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */ +/* + * Shabal implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +/* + * Part of this code was automatically generated (the part between + * the "BEGIN" and "END" markers). + */ + +#define sM 16 + +#define C32 SPH_C32 +#define T32 SPH_T32 + +#define O1 13 +#define O2 9 +#define O3 6 + +/* + * We copy the state into local variables, so that the compiler knows + * that it can optimize them at will. + */ + +/* BEGIN -- automatically generated code. */ + +#define INPUT_BLOCK_ADD do { \ + B0 = T32(B0 + M0); \ + B1 = T32(B1 + M1); \ + B2 = T32(B2 + M2); \ + B3 = T32(B3 + M3); \ + B4 = T32(B4 + M4); \ + B5 = T32(B5 + M5); \ + B6 = T32(B6 + M6); \ + B7 = T32(B7 + M7); \ + B8 = T32(B8 + M8); \ + B9 = T32(B9 + M9); \ + BA = T32(BA + MA); \ + BB = T32(BB + MB); \ + BC = T32(BC + MC); \ + BD = T32(BD + MD); \ + BE = T32(BE + ME); \ + BF = T32(BF + MF); \ + } while (0) + +#define INPUT_BLOCK_SUB do { \ + C0 = T32(C0 - M0); \ + C1 = T32(C1 - M1); \ + C2 = T32(C2 - M2); \ + C3 = T32(C3 - M3); \ + C4 = T32(C4 - M4); \ + C5 = T32(C5 - M5); \ + C6 = T32(C6 - M6); \ + C7 = T32(C7 - M7); \ + C8 = T32(C8 - M8); \ + C9 = T32(C9 - M9); \ + CA = T32(CA - MA); \ + CB = T32(CB - MB); \ + CC = T32(CC - MC); \ + CD = T32(CD - MD); \ + CE = T32(CE - ME); \ + CF = T32(CF - MF); \ + } while (0) + +#define XOR_W do { \ + A00 ^= Wlow; \ + A01 ^= Whigh; \ + } while (0) + +#define SWAP(v1, v2) do { \ + sph_u32 tmp = (v1); \ + (v1) = (v2); \ + (v2) = tmp; \ + } while (0) + +#define SWAP_BC do { \ + SWAP(B0, C0); \ + SWAP(B1, C1); \ + SWAP(B2, C2); \ + SWAP(B3, C3); \ + SWAP(B4, C4); \ + SWAP(B5, C5); \ + SWAP(B6, C6); \ + SWAP(B7, C7); \ + SWAP(B8, C8); \ + SWAP(B9, C9); \ + SWAP(BA, CA); \ + SWAP(BB, CB); \ + SWAP(BC, CC); \ + SWAP(BD, CD); \ + SWAP(BE, CE); \ + SWAP(BF, CF); \ + } while (0) + +#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) do { \ + xa0 = T32((xa0 \ + ^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \ + ^ xc) * 3U) \ + ^ xb1 ^ (xb2 & ~xb3) ^ xm; \ + xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \ + } while (0) + +#define PERM_STEP_0 do { \ + PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \ + PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \ + PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \ + PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \ + PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \ + PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \ + PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \ + PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \ + PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \ + PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \ + PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \ + PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \ + PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \ + PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \ + PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \ + PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \ + } while (0) + +#define PERM_STEP_1 do { \ + PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \ + PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \ + PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \ + PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \ + PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \ + PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \ + PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \ + PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \ + PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \ + PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \ + PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \ + PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \ + PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \ + PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \ + PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \ + PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \ + } while (0) + +#define PERM_STEP_2 do { \ + PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \ + PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \ + PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \ + PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \ + PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \ + PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \ + PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \ + PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \ + PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \ + PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \ + PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \ + PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \ + PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \ + PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \ + PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \ + PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \ + } while (0) + +#define APPLY_P do { \ + B0 = T32(B0 << 17) | (B0 >> 15); \ + B1 = T32(B1 << 17) | (B1 >> 15); \ + B2 = T32(B2 << 17) | (B2 >> 15); \ + B3 = T32(B3 << 17) | (B3 >> 15); \ + B4 = T32(B4 << 17) | (B4 >> 15); \ + B5 = T32(B5 << 17) | (B5 >> 15); \ + B6 = T32(B6 << 17) | (B6 >> 15); \ + B7 = T32(B7 << 17) | (B7 >> 15); \ + B8 = T32(B8 << 17) | (B8 >> 15); \ + B9 = T32(B9 << 17) | (B9 >> 15); \ + BA = T32(BA << 17) | (BA >> 15); \ + BB = T32(BB << 17) | (BB >> 15); \ + BC = T32(BC << 17) | (BC >> 15); \ + BD = T32(BD << 17) | (BD >> 15); \ + BE = T32(BE << 17) | (BE >> 15); \ + BF = T32(BF << 17) | (BF >> 15); \ + PERM_STEP_0; \ + PERM_STEP_1; \ + PERM_STEP_2; \ + A0B = T32(A0B + C6); \ + A0A = T32(A0A + C5); \ + A09 = T32(A09 + C4); \ + A08 = T32(A08 + C3); \ + A07 = T32(A07 + C2); \ + A06 = T32(A06 + C1); \ + A05 = T32(A05 + C0); \ + A04 = T32(A04 + CF); \ + A03 = T32(A03 + CE); \ + A02 = T32(A02 + CD); \ + A01 = T32(A01 + CC); \ + A00 = T32(A00 + CB); \ + A0B = T32(A0B + CA); \ + A0A = T32(A0A + C9); \ + A09 = T32(A09 + C8); \ + A08 = T32(A08 + C7); \ + A07 = T32(A07 + C6); \ + A06 = T32(A06 + C5); \ + A05 = T32(A05 + C4); \ + A04 = T32(A04 + C3); \ + A03 = T32(A03 + C2); \ + A02 = T32(A02 + C1); \ + A01 = T32(A01 + C0); \ + A00 = T32(A00 + CF); \ + A0B = T32(A0B + CE); \ + A0A = T32(A0A + CD); \ + A09 = T32(A09 + CC); \ + A08 = T32(A08 + CB); \ + A07 = T32(A07 + CA); \ + A06 = T32(A06 + C9); \ + A05 = T32(A05 + C8); \ + A04 = T32(A04 + C7); \ + A03 = T32(A03 + C6); \ + A02 = T32(A02 + C5); \ + A01 = T32(A01 + C4); \ + A00 = T32(A00 + C3); \ + } while (0) + +#define INCR_W do { \ + if ((Wlow = T32(Wlow + 1)) == 0) \ + Whigh = T32(Whigh + 1); \ + } while (0) + +__constant static const sph_u32 A_init_192[] = { + C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E), + C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465), + C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9) +}; + +__constant static const sph_u32 B_init_192[] = { + C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824), + C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7), + C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319), + C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C) +}; + +__constant static const sph_u32 C_init_192[] = { + C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B), + C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640), + C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3), + C32(0x1C096BF4), C32(0xAC76224B), C32(0x5215781C), C32(0xCD5D2669) +}; + +__constant static const sph_u32 A_init_224[] = { + C32(0xA5201467), C32(0xA9B8D94A), C32(0xD4CED997), C32(0x68379D7B), + C32(0xA7FC73BA), C32(0xF1A2546B), C32(0x606782BF), C32(0xE0BCFD0F), + C32(0x2F25374E), C32(0x069A149F), C32(0x5E2DFF25), C32(0xFAECF061) +}; + +__constant static const sph_u32 B_init_224[] = { + C32(0xEC9905D8), C32(0xF21850CF), C32(0xC0A746C8), C32(0x21DAD498), + C32(0x35156EEB), C32(0x088C97F2), C32(0x26303E40), C32(0x8A2D4FB5), + C32(0xFEEE44B6), C32(0x8A1E9573), C32(0x7B81111A), C32(0xCBC139F0), + C32(0xA3513861), C32(0x1D2C362E), C32(0x918C580E), C32(0xB58E1B9C) +}; + +__constant static const sph_u32 C_init_224[] = { + C32(0xE4B573A1), C32(0x4C1A0880), C32(0x1E907C51), C32(0x04807EFD), + C32(0x3AD8CDE5), C32(0x16B21302), C32(0x02512C53), C32(0x2204CB18), + C32(0x99405F2D), C32(0xE5B648A1), C32(0x70AB1D43), C32(0xA10C25C2), + C32(0x16F1AC05), C32(0x38BBEB56), C32(0x9B01DC60), C32(0xB1096D83) +}; + +__constant static const sph_u32 A_init_256[] = { + C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191), + C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C), + C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A) +}; + +__constant static const sph_u32 B_init_256[] = { + C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F), + C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002), + C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890), + C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5) +}; + +__constant static const sph_u32 C_init_256[] = { + C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55), + C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433), + C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F), + C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60) +}; + +__constant static const sph_u32 A_init_384[] = { + C32(0xC8FCA331), C32(0xE55C504E), C32(0x003EBF26), C32(0xBB6B8D83), + C32(0x7B0448C1), C32(0x41B82789), C32(0x0A7C9601), C32(0x8D659CFF), + C32(0xB6E2673E), C32(0xCA54C77B), C32(0x1460FD7E), C32(0x3FCB8F2D) +}; + +__constant static const sph_u32 B_init_384[] = { + C32(0x527291FC), C32(0x2A16455F), C32(0x78E627E5), C32(0x944F169F), + C32(0x1CA6F016), C32(0xA854EA25), C32(0x8DB98ABE), C32(0xF2C62641), + C32(0x30117DCB), C32(0xCF5C4309), C32(0x93711A25), C32(0xF9F671B8), + C32(0xB01D2116), C32(0x333F4B89), C32(0xB285D165), C32(0x86829B36) +}; + +__constant static const sph_u32 C_init_384[] = { + C32(0xF764B11A), C32(0x76172146), C32(0xCEF6934D), C32(0xC6D28399), + C32(0xFE095F61), C32(0x5E6018B4), C32(0x5048ECF5), C32(0x51353261), + C32(0x6E6E36DC), C32(0x63130DAD), C32(0xA9C69BD6), C32(0x1E90EA0C), + C32(0x7C35073B), C32(0x28D95E6D), C32(0xAA340E0D), C32(0xCB3DEE70) +}; + +__constant static const sph_u32 A_init_512[] = { + C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632), + C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B), + C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F) +}; + +__constant static const sph_u32 B_init_512[] = { + C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640), + C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08), + C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E), + C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B) +}; + +__constant static const sph_u32 C_init_512[] = { + C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359), + C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780), + C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A), + C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969) +}; + +/* END -- automatically generated code. */ diff --git a/kernel/whirlpool.cl b/kernel/whirlpool.cl new file mode 100644 index 00000000..812e8f42 --- /dev/null +++ b/kernel/whirlpool.cl @@ -0,0 +1,1167 @@ +/* $Id: whirlpool.c 227 2010-06-16 17:28:38Z tp $ */ +/* + * WHIRLPOOL implementation. + * + * Internally, we use little-endian convention, on the assumption that + * architectures which favour big-endian encoding are: + * 1. rarer + * 2. in decreasing numbers + * 3. able to decode little-endian data efficiently anyway + * + * The most common big-endian architecture is Sparc, and Ultrasparc CPU + * include special opcodes to perform little-endian accesses, which we use + * (see sph_types.h). Most modern CPU designs can work with both endianness + * and architecture designer now favour little-endian (basically, x86 has + * won the endianness war). + * + * TODO: implement a 32-bit version. Not only such a version would be handy + * for non-64-bit-able architectures, but it may also use smaller tables, + * at the expense of more lookups and XORs. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#if SPH_64 + +/* ====================================================================== */ +/* + * Constants for plain WHIRLPOOL (current version). + */ + +__constant static const sph_u64 plain_T0[256] = { + SPH_C64(0xD83078C018601818), SPH_C64(0x2646AF05238C2323), + SPH_C64(0xB891F97EC63FC6C6), SPH_C64(0xFBCD6F13E887E8E8), + SPH_C64(0xCB13A14C87268787), SPH_C64(0x116D62A9B8DAB8B8), + SPH_C64(0x0902050801040101), SPH_C64(0x0D9E6E424F214F4F), + SPH_C64(0x9B6CEEAD36D83636), SPH_C64(0xFF510459A6A2A6A6), + SPH_C64(0x0CB9BDDED26FD2D2), SPH_C64(0x0EF706FBF5F3F5F5), + SPH_C64(0x96F280EF79F97979), SPH_C64(0x30DECE5F6FA16F6F), + SPH_C64(0x6D3FEFFC917E9191), SPH_C64(0xF8A407AA52555252), + SPH_C64(0x47C0FD27609D6060), SPH_C64(0x35657689BCCABCBC), + SPH_C64(0x372BCDAC9B569B9B), SPH_C64(0x8A018C048E028E8E), + SPH_C64(0xD25B1571A3B6A3A3), SPH_C64(0x6C183C600C300C0C), + SPH_C64(0x84F68AFF7BF17B7B), SPH_C64(0x806AE1B535D43535), + SPH_C64(0xF53A69E81D741D1D), SPH_C64(0xB3DD4753E0A7E0E0), + SPH_C64(0x21B3ACF6D77BD7D7), SPH_C64(0x9C99ED5EC22FC2C2), + SPH_C64(0x435C966D2EB82E2E), SPH_C64(0x29967A624B314B4B), + SPH_C64(0x5DE121A3FEDFFEFE), SPH_C64(0xD5AE168257415757), + SPH_C64(0xBD2A41A815541515), SPH_C64(0xE8EEB69F77C17777), + SPH_C64(0x926EEBA537DC3737), SPH_C64(0x9ED7567BE5B3E5E5), + SPH_C64(0x1323D98C9F469F9F), SPH_C64(0x23FD17D3F0E7F0F0), + SPH_C64(0x20947F6A4A354A4A), SPH_C64(0x44A9959EDA4FDADA), + SPH_C64(0xA2B025FA587D5858), SPH_C64(0xCF8FCA06C903C9C9), + SPH_C64(0x7C528D5529A42929), SPH_C64(0x5A1422500A280A0A), + SPH_C64(0x507F4FE1B1FEB1B1), SPH_C64(0xC95D1A69A0BAA0A0), + SPH_C64(0x14D6DA7F6BB16B6B), SPH_C64(0xD917AB5C852E8585), + SPH_C64(0x3C677381BDCEBDBD), SPH_C64(0x8FBA34D25D695D5D), + SPH_C64(0x9020508010401010), SPH_C64(0x07F503F3F4F7F4F4), + SPH_C64(0xDD8BC016CB0BCBCB), SPH_C64(0xD37CC6ED3EF83E3E), + SPH_C64(0x2D0A112805140505), SPH_C64(0x78CEE61F67816767), + SPH_C64(0x97D55373E4B7E4E4), SPH_C64(0x024EBB25279C2727), + SPH_C64(0x7382583241194141), SPH_C64(0xA70B9D2C8B168B8B), + SPH_C64(0xF6530151A7A6A7A7), SPH_C64(0xB2FA94CF7DE97D7D), + SPH_C64(0x4937FBDC956E9595), SPH_C64(0x56AD9F8ED847D8D8), + SPH_C64(0x70EB308BFBCBFBFB), SPH_C64(0xCDC17123EE9FEEEE), + SPH_C64(0xBBF891C77CED7C7C), SPH_C64(0x71CCE31766856666), + SPH_C64(0x7BA78EA6DD53DDDD), SPH_C64(0xAF2E4BB8175C1717), + SPH_C64(0x458E460247014747), SPH_C64(0x1A21DC849E429E9E), + SPH_C64(0xD489C51ECA0FCACA), SPH_C64(0x585A99752DB42D2D), + SPH_C64(0x2E637991BFC6BFBF), SPH_C64(0x3F0E1B38071C0707), + SPH_C64(0xAC472301AD8EADAD), SPH_C64(0xB0B42FEA5A755A5A), + SPH_C64(0xEF1BB56C83368383), SPH_C64(0xB666FF8533CC3333), + SPH_C64(0x5CC6F23F63916363), SPH_C64(0x12040A1002080202), + SPH_C64(0x93493839AA92AAAA), SPH_C64(0xDEE2A8AF71D97171), + SPH_C64(0xC68DCF0EC807C8C8), SPH_C64(0xD1327DC819641919), + SPH_C64(0x3B92707249394949), SPH_C64(0x5FAF9A86D943D9D9), + SPH_C64(0x31F91DC3F2EFF2F2), SPH_C64(0xA8DB484BE3ABE3E3), + SPH_C64(0xB9B62AE25B715B5B), SPH_C64(0xBC0D9234881A8888), + SPH_C64(0x3E29C8A49A529A9A), SPH_C64(0x0B4CBE2D26982626), + SPH_C64(0xBF64FA8D32C83232), SPH_C64(0x597D4AE9B0FAB0B0), + SPH_C64(0xF2CF6A1BE983E9E9), SPH_C64(0x771E33780F3C0F0F), + SPH_C64(0x33B7A6E6D573D5D5), SPH_C64(0xF41DBA74803A8080), + SPH_C64(0x27617C99BEC2BEBE), SPH_C64(0xEB87DE26CD13CDCD), + SPH_C64(0x8968E4BD34D03434), SPH_C64(0x3290757A483D4848), + SPH_C64(0x54E324ABFFDBFFFF), SPH_C64(0x8DF48FF77AF57A7A), + SPH_C64(0x643DEAF4907A9090), SPH_C64(0x9DBE3EC25F615F5F), + SPH_C64(0x3D40A01D20802020), SPH_C64(0x0FD0D56768BD6868), + SPH_C64(0xCA3472D01A681A1A), SPH_C64(0xB7412C19AE82AEAE), + SPH_C64(0x7D755EC9B4EAB4B4), SPH_C64(0xCEA8199A544D5454), + SPH_C64(0x7F3BE5EC93769393), SPH_C64(0x2F44AA0D22882222), + SPH_C64(0x63C8E907648D6464), SPH_C64(0x2AFF12DBF1E3F1F1), + SPH_C64(0xCCE6A2BF73D17373), SPH_C64(0x82245A9012481212), + SPH_C64(0x7A805D3A401D4040), SPH_C64(0x4810284008200808), + SPH_C64(0x959BE856C32BC3C3), SPH_C64(0xDFC57B33EC97ECEC), + SPH_C64(0x4DAB9096DB4BDBDB), SPH_C64(0xC05F1F61A1BEA1A1), + SPH_C64(0x9107831C8D0E8D8D), SPH_C64(0xC87AC9F53DF43D3D), + SPH_C64(0x5B33F1CC97669797), SPH_C64(0x0000000000000000), + SPH_C64(0xF983D436CF1BCFCF), SPH_C64(0x6E5687452BAC2B2B), + SPH_C64(0xE1ECB39776C57676), SPH_C64(0xE619B06482328282), + SPH_C64(0x28B1A9FED67FD6D6), SPH_C64(0xC33677D81B6C1B1B), + SPH_C64(0x74775BC1B5EEB5B5), SPH_C64(0xBE432911AF86AFAF), + SPH_C64(0x1DD4DF776AB56A6A), SPH_C64(0xEAA00DBA505D5050), + SPH_C64(0x578A4C1245094545), SPH_C64(0x38FB18CBF3EBF3F3), + SPH_C64(0xAD60F09D30C03030), SPH_C64(0xC4C3742BEF9BEFEF), + SPH_C64(0xDA7EC3E53FFC3F3F), SPH_C64(0xC7AA1C9255495555), + SPH_C64(0xDB591079A2B2A2A2), SPH_C64(0xE9C96503EA8FEAEA), + SPH_C64(0x6ACAEC0F65896565), SPH_C64(0x036968B9BAD2BABA), + SPH_C64(0x4A5E93652FBC2F2F), SPH_C64(0x8E9DE74EC027C0C0), + SPH_C64(0x60A181BEDE5FDEDE), SPH_C64(0xFC386CE01C701C1C), + SPH_C64(0x46E72EBBFDD3FDFD), SPH_C64(0x1F9A64524D294D4D), + SPH_C64(0x7639E0E492729292), SPH_C64(0xFAEABC8F75C97575), + SPH_C64(0x360C1E3006180606), SPH_C64(0xAE0998248A128A8A), + SPH_C64(0x4B7940F9B2F2B2B2), SPH_C64(0x85D15963E6BFE6E6), + SPH_C64(0x7E1C36700E380E0E), SPH_C64(0xE73E63F81F7C1F1F), + SPH_C64(0x55C4F73762956262), SPH_C64(0x3AB5A3EED477D4D4), + SPH_C64(0x814D3229A89AA8A8), SPH_C64(0x5231F4C496629696), + SPH_C64(0x62EF3A9BF9C3F9F9), SPH_C64(0xA397F666C533C5C5), + SPH_C64(0x104AB13525942525), SPH_C64(0xABB220F259795959), + SPH_C64(0xD015AE54842A8484), SPH_C64(0xC5E4A7B772D57272), + SPH_C64(0xEC72DDD539E43939), SPH_C64(0x1698615A4C2D4C4C), + SPH_C64(0x94BC3BCA5E655E5E), SPH_C64(0x9FF085E778FD7878), + SPH_C64(0xE570D8DD38E03838), SPH_C64(0x980586148C0A8C8C), + SPH_C64(0x17BFB2C6D163D1D1), SPH_C64(0xE4570B41A5AEA5A5), + SPH_C64(0xA1D94D43E2AFE2E2), SPH_C64(0x4EC2F82F61996161), + SPH_C64(0x427B45F1B3F6B3B3), SPH_C64(0x3442A51521842121), + SPH_C64(0x0825D6949C4A9C9C), SPH_C64(0xEE3C66F01E781E1E), + SPH_C64(0x6186522243114343), SPH_C64(0xB193FC76C73BC7C7), + SPH_C64(0x4FE52BB3FCD7FCFC), SPH_C64(0x2408142004100404), + SPH_C64(0xE3A208B251595151), SPH_C64(0x252FC7BC995E9999), + SPH_C64(0x22DAC44F6DA96D6D), SPH_C64(0x651A39680D340D0D), + SPH_C64(0x79E93583FACFFAFA), SPH_C64(0x69A384B6DF5BDFDF), + SPH_C64(0xA9FC9BD77EE57E7E), SPH_C64(0x1948B43D24902424), + SPH_C64(0xFE76D7C53BEC3B3B), SPH_C64(0x9A4B3D31AB96ABAB), + SPH_C64(0xF081D13ECE1FCECE), SPH_C64(0x9922558811441111), + SPH_C64(0x8303890C8F068F8F), SPH_C64(0x049C6B4A4E254E4E), + SPH_C64(0x667351D1B7E6B7B7), SPH_C64(0xE0CB600BEB8BEBEB), + SPH_C64(0xC178CCFD3CF03C3C), SPH_C64(0xFD1FBF7C813E8181), + SPH_C64(0x4035FED4946A9494), SPH_C64(0x1CF30CEBF7FBF7F7), + SPH_C64(0x186F67A1B9DEB9B9), SPH_C64(0x8B265F98134C1313), + SPH_C64(0x51589C7D2CB02C2C), SPH_C64(0x05BBB8D6D36BD3D3), + SPH_C64(0x8CD35C6BE7BBE7E7), SPH_C64(0x39DCCB576EA56E6E), + SPH_C64(0xAA95F36EC437C4C4), SPH_C64(0x1B060F18030C0303), + SPH_C64(0xDCAC138A56455656), SPH_C64(0x5E88491A440D4444), + SPH_C64(0xA0FE9EDF7FE17F7F), SPH_C64(0x884F3721A99EA9A9), + SPH_C64(0x6754824D2AA82A2A), SPH_C64(0x0A6B6DB1BBD6BBBB), + SPH_C64(0x879FE246C123C1C1), SPH_C64(0xF1A602A253515353), + SPH_C64(0x72A58BAEDC57DCDC), SPH_C64(0x531627580B2C0B0B), + SPH_C64(0x0127D39C9D4E9D9D), SPH_C64(0x2BD8C1476CAD6C6C), + SPH_C64(0xA462F59531C43131), SPH_C64(0xF3E8B98774CD7474), + SPH_C64(0x15F109E3F6FFF6F6), SPH_C64(0x4C8C430A46054646), + SPH_C64(0xA5452609AC8AACAC), SPH_C64(0xB50F973C891E8989), + SPH_C64(0xB42844A014501414), SPH_C64(0xBADF425BE1A3E1E1), + SPH_C64(0xA62C4EB016581616), SPH_C64(0xF774D2CD3AE83A3A), + SPH_C64(0x06D2D06F69B96969), SPH_C64(0x41122D4809240909), + SPH_C64(0xD7E0ADA770DD7070), SPH_C64(0x6F7154D9B6E2B6B6), + SPH_C64(0x1EBDB7CED067D0D0), SPH_C64(0xD6C77E3BED93EDED), + SPH_C64(0xE285DB2ECC17CCCC), SPH_C64(0x6884572A42154242), + SPH_C64(0x2C2DC2B4985A9898), SPH_C64(0xED550E49A4AAA4A4), + SPH_C64(0x7550885D28A02828), SPH_C64(0x86B831DA5C6D5C5C), + SPH_C64(0x6BED3F93F8C7F8F8), SPH_C64(0xC211A44486228686) +}; + +#if !SPH_SMALL_FOOTPRINT_WHIRLPOOL + +__constant static const sph_u64 plain_T1[256] = { + SPH_C64(0x3078C018601818D8), SPH_C64(0x46AF05238C232326), + SPH_C64(0x91F97EC63FC6C6B8), SPH_C64(0xCD6F13E887E8E8FB), + SPH_C64(0x13A14C87268787CB), SPH_C64(0x6D62A9B8DAB8B811), + SPH_C64(0x0205080104010109), SPH_C64(0x9E6E424F214F4F0D), + SPH_C64(0x6CEEAD36D836369B), SPH_C64(0x510459A6A2A6A6FF), + SPH_C64(0xB9BDDED26FD2D20C), SPH_C64(0xF706FBF5F3F5F50E), + SPH_C64(0xF280EF79F9797996), SPH_C64(0xDECE5F6FA16F6F30), + SPH_C64(0x3FEFFC917E91916D), SPH_C64(0xA407AA52555252F8), + SPH_C64(0xC0FD27609D606047), SPH_C64(0x657689BCCABCBC35), + SPH_C64(0x2BCDAC9B569B9B37), SPH_C64(0x018C048E028E8E8A), + SPH_C64(0x5B1571A3B6A3A3D2), SPH_C64(0x183C600C300C0C6C), + SPH_C64(0xF68AFF7BF17B7B84), SPH_C64(0x6AE1B535D4353580), + SPH_C64(0x3A69E81D741D1DF5), SPH_C64(0xDD4753E0A7E0E0B3), + SPH_C64(0xB3ACF6D77BD7D721), SPH_C64(0x99ED5EC22FC2C29C), + SPH_C64(0x5C966D2EB82E2E43), SPH_C64(0x967A624B314B4B29), + SPH_C64(0xE121A3FEDFFEFE5D), SPH_C64(0xAE168257415757D5), + SPH_C64(0x2A41A815541515BD), SPH_C64(0xEEB69F77C17777E8), + SPH_C64(0x6EEBA537DC373792), SPH_C64(0xD7567BE5B3E5E59E), + SPH_C64(0x23D98C9F469F9F13), SPH_C64(0xFD17D3F0E7F0F023), + SPH_C64(0x947F6A4A354A4A20), SPH_C64(0xA9959EDA4FDADA44), + SPH_C64(0xB025FA587D5858A2), SPH_C64(0x8FCA06C903C9C9CF), + SPH_C64(0x528D5529A429297C), SPH_C64(0x1422500A280A0A5A), + SPH_C64(0x7F4FE1B1FEB1B150), SPH_C64(0x5D1A69A0BAA0A0C9), + SPH_C64(0xD6DA7F6BB16B6B14), SPH_C64(0x17AB5C852E8585D9), + SPH_C64(0x677381BDCEBDBD3C), SPH_C64(0xBA34D25D695D5D8F), + SPH_C64(0x2050801040101090), SPH_C64(0xF503F3F4F7F4F407), + SPH_C64(0x8BC016CB0BCBCBDD), SPH_C64(0x7CC6ED3EF83E3ED3), + SPH_C64(0x0A1128051405052D), SPH_C64(0xCEE61F6781676778), + SPH_C64(0xD55373E4B7E4E497), SPH_C64(0x4EBB25279C272702), + SPH_C64(0x8258324119414173), SPH_C64(0x0B9D2C8B168B8BA7), + SPH_C64(0x530151A7A6A7A7F6), SPH_C64(0xFA94CF7DE97D7DB2), + SPH_C64(0x37FBDC956E959549), SPH_C64(0xAD9F8ED847D8D856), + SPH_C64(0xEB308BFBCBFBFB70), SPH_C64(0xC17123EE9FEEEECD), + SPH_C64(0xF891C77CED7C7CBB), SPH_C64(0xCCE3176685666671), + SPH_C64(0xA78EA6DD53DDDD7B), SPH_C64(0x2E4BB8175C1717AF), + SPH_C64(0x8E46024701474745), SPH_C64(0x21DC849E429E9E1A), + SPH_C64(0x89C51ECA0FCACAD4), SPH_C64(0x5A99752DB42D2D58), + SPH_C64(0x637991BFC6BFBF2E), SPH_C64(0x0E1B38071C07073F), + SPH_C64(0x472301AD8EADADAC), SPH_C64(0xB42FEA5A755A5AB0), + SPH_C64(0x1BB56C83368383EF), SPH_C64(0x66FF8533CC3333B6), + SPH_C64(0xC6F23F639163635C), SPH_C64(0x040A100208020212), + SPH_C64(0x493839AA92AAAA93), SPH_C64(0xE2A8AF71D97171DE), + SPH_C64(0x8DCF0EC807C8C8C6), SPH_C64(0x327DC819641919D1), + SPH_C64(0x927072493949493B), SPH_C64(0xAF9A86D943D9D95F), + SPH_C64(0xF91DC3F2EFF2F231), SPH_C64(0xDB484BE3ABE3E3A8), + SPH_C64(0xB62AE25B715B5BB9), SPH_C64(0x0D9234881A8888BC), + SPH_C64(0x29C8A49A529A9A3E), SPH_C64(0x4CBE2D269826260B), + SPH_C64(0x64FA8D32C83232BF), SPH_C64(0x7D4AE9B0FAB0B059), + SPH_C64(0xCF6A1BE983E9E9F2), SPH_C64(0x1E33780F3C0F0F77), + SPH_C64(0xB7A6E6D573D5D533), SPH_C64(0x1DBA74803A8080F4), + SPH_C64(0x617C99BEC2BEBE27), SPH_C64(0x87DE26CD13CDCDEB), + SPH_C64(0x68E4BD34D0343489), SPH_C64(0x90757A483D484832), + SPH_C64(0xE324ABFFDBFFFF54), SPH_C64(0xF48FF77AF57A7A8D), + SPH_C64(0x3DEAF4907A909064), SPH_C64(0xBE3EC25F615F5F9D), + SPH_C64(0x40A01D208020203D), SPH_C64(0xD0D56768BD68680F), + SPH_C64(0x3472D01A681A1ACA), SPH_C64(0x412C19AE82AEAEB7), + SPH_C64(0x755EC9B4EAB4B47D), SPH_C64(0xA8199A544D5454CE), + SPH_C64(0x3BE5EC937693937F), SPH_C64(0x44AA0D228822222F), + SPH_C64(0xC8E907648D646463), SPH_C64(0xFF12DBF1E3F1F12A), + SPH_C64(0xE6A2BF73D17373CC), SPH_C64(0x245A901248121282), + SPH_C64(0x805D3A401D40407A), SPH_C64(0x1028400820080848), + SPH_C64(0x9BE856C32BC3C395), SPH_C64(0xC57B33EC97ECECDF), + SPH_C64(0xAB9096DB4BDBDB4D), SPH_C64(0x5F1F61A1BEA1A1C0), + SPH_C64(0x07831C8D0E8D8D91), SPH_C64(0x7AC9F53DF43D3DC8), + SPH_C64(0x33F1CC976697975B), SPH_C64(0x0000000000000000), + SPH_C64(0x83D436CF1BCFCFF9), SPH_C64(0x5687452BAC2B2B6E), + SPH_C64(0xECB39776C57676E1), SPH_C64(0x19B06482328282E6), + SPH_C64(0xB1A9FED67FD6D628), SPH_C64(0x3677D81B6C1B1BC3), + SPH_C64(0x775BC1B5EEB5B574), SPH_C64(0x432911AF86AFAFBE), + SPH_C64(0xD4DF776AB56A6A1D), SPH_C64(0xA00DBA505D5050EA), + SPH_C64(0x8A4C124509454557), SPH_C64(0xFB18CBF3EBF3F338), + SPH_C64(0x60F09D30C03030AD), SPH_C64(0xC3742BEF9BEFEFC4), + SPH_C64(0x7EC3E53FFC3F3FDA), SPH_C64(0xAA1C9255495555C7), + SPH_C64(0x591079A2B2A2A2DB), SPH_C64(0xC96503EA8FEAEAE9), + SPH_C64(0xCAEC0F658965656A), SPH_C64(0x6968B9BAD2BABA03), + SPH_C64(0x5E93652FBC2F2F4A), SPH_C64(0x9DE74EC027C0C08E), + SPH_C64(0xA181BEDE5FDEDE60), SPH_C64(0x386CE01C701C1CFC), + SPH_C64(0xE72EBBFDD3FDFD46), SPH_C64(0x9A64524D294D4D1F), + SPH_C64(0x39E0E49272929276), SPH_C64(0xEABC8F75C97575FA), + SPH_C64(0x0C1E300618060636), SPH_C64(0x0998248A128A8AAE), + SPH_C64(0x7940F9B2F2B2B24B), SPH_C64(0xD15963E6BFE6E685), + SPH_C64(0x1C36700E380E0E7E), SPH_C64(0x3E63F81F7C1F1FE7), + SPH_C64(0xC4F7376295626255), SPH_C64(0xB5A3EED477D4D43A), + SPH_C64(0x4D3229A89AA8A881), SPH_C64(0x31F4C49662969652), + SPH_C64(0xEF3A9BF9C3F9F962), SPH_C64(0x97F666C533C5C5A3), + SPH_C64(0x4AB1352594252510), SPH_C64(0xB220F259795959AB), + SPH_C64(0x15AE54842A8484D0), SPH_C64(0xE4A7B772D57272C5), + SPH_C64(0x72DDD539E43939EC), SPH_C64(0x98615A4C2D4C4C16), + SPH_C64(0xBC3BCA5E655E5E94), SPH_C64(0xF085E778FD78789F), + SPH_C64(0x70D8DD38E03838E5), SPH_C64(0x0586148C0A8C8C98), + SPH_C64(0xBFB2C6D163D1D117), SPH_C64(0x570B41A5AEA5A5E4), + SPH_C64(0xD94D43E2AFE2E2A1), SPH_C64(0xC2F82F619961614E), + SPH_C64(0x7B45F1B3F6B3B342), SPH_C64(0x42A5152184212134), + SPH_C64(0x25D6949C4A9C9C08), SPH_C64(0x3C66F01E781E1EEE), + SPH_C64(0x8652224311434361), SPH_C64(0x93FC76C73BC7C7B1), + SPH_C64(0xE52BB3FCD7FCFC4F), SPH_C64(0x0814200410040424), + SPH_C64(0xA208B251595151E3), SPH_C64(0x2FC7BC995E999925), + SPH_C64(0xDAC44F6DA96D6D22), SPH_C64(0x1A39680D340D0D65), + SPH_C64(0xE93583FACFFAFA79), SPH_C64(0xA384B6DF5BDFDF69), + SPH_C64(0xFC9BD77EE57E7EA9), SPH_C64(0x48B43D2490242419), + SPH_C64(0x76D7C53BEC3B3BFE), SPH_C64(0x4B3D31AB96ABAB9A), + SPH_C64(0x81D13ECE1FCECEF0), SPH_C64(0x2255881144111199), + SPH_C64(0x03890C8F068F8F83), SPH_C64(0x9C6B4A4E254E4E04), + SPH_C64(0x7351D1B7E6B7B766), SPH_C64(0xCB600BEB8BEBEBE0), + SPH_C64(0x78CCFD3CF03C3CC1), SPH_C64(0x1FBF7C813E8181FD), + SPH_C64(0x35FED4946A949440), SPH_C64(0xF30CEBF7FBF7F71C), + SPH_C64(0x6F67A1B9DEB9B918), SPH_C64(0x265F98134C13138B), + SPH_C64(0x589C7D2CB02C2C51), SPH_C64(0xBBB8D6D36BD3D305), + SPH_C64(0xD35C6BE7BBE7E78C), SPH_C64(0xDCCB576EA56E6E39), + SPH_C64(0x95F36EC437C4C4AA), SPH_C64(0x060F18030C03031B), + SPH_C64(0xAC138A56455656DC), SPH_C64(0x88491A440D44445E), + SPH_C64(0xFE9EDF7FE17F7FA0), SPH_C64(0x4F3721A99EA9A988), + SPH_C64(0x54824D2AA82A2A67), SPH_C64(0x6B6DB1BBD6BBBB0A), + SPH_C64(0x9FE246C123C1C187), SPH_C64(0xA602A253515353F1), + SPH_C64(0xA58BAEDC57DCDC72), SPH_C64(0x1627580B2C0B0B53), + SPH_C64(0x27D39C9D4E9D9D01), SPH_C64(0xD8C1476CAD6C6C2B), + SPH_C64(0x62F59531C43131A4), SPH_C64(0xE8B98774CD7474F3), + SPH_C64(0xF109E3F6FFF6F615), SPH_C64(0x8C430A460546464C), + SPH_C64(0x452609AC8AACACA5), SPH_C64(0x0F973C891E8989B5), + SPH_C64(0x2844A014501414B4), SPH_C64(0xDF425BE1A3E1E1BA), + SPH_C64(0x2C4EB016581616A6), SPH_C64(0x74D2CD3AE83A3AF7), + SPH_C64(0xD2D06F69B9696906), SPH_C64(0x122D480924090941), + SPH_C64(0xE0ADA770DD7070D7), SPH_C64(0x7154D9B6E2B6B66F), + SPH_C64(0xBDB7CED067D0D01E), SPH_C64(0xC77E3BED93EDEDD6), + SPH_C64(0x85DB2ECC17CCCCE2), SPH_C64(0x84572A4215424268), + SPH_C64(0x2DC2B4985A98982C), SPH_C64(0x550E49A4AAA4A4ED), + SPH_C64(0x50885D28A0282875), SPH_C64(0xB831DA5C6D5C5C86), + SPH_C64(0xED3F93F8C7F8F86B), SPH_C64(0x11A44486228686C2) +}; + +__constant static const sph_u64 plain_T2[256] = { + SPH_C64(0x78C018601818D830), SPH_C64(0xAF05238C23232646), + SPH_C64(0xF97EC63FC6C6B891), SPH_C64(0x6F13E887E8E8FBCD), + SPH_C64(0xA14C87268787CB13), SPH_C64(0x62A9B8DAB8B8116D), + SPH_C64(0x0508010401010902), SPH_C64(0x6E424F214F4F0D9E), + SPH_C64(0xEEAD36D836369B6C), SPH_C64(0x0459A6A2A6A6FF51), + SPH_C64(0xBDDED26FD2D20CB9), SPH_C64(0x06FBF5F3F5F50EF7), + SPH_C64(0x80EF79F9797996F2), SPH_C64(0xCE5F6FA16F6F30DE), + SPH_C64(0xEFFC917E91916D3F), SPH_C64(0x07AA52555252F8A4), + SPH_C64(0xFD27609D606047C0), SPH_C64(0x7689BCCABCBC3565), + SPH_C64(0xCDAC9B569B9B372B), SPH_C64(0x8C048E028E8E8A01), + SPH_C64(0x1571A3B6A3A3D25B), SPH_C64(0x3C600C300C0C6C18), + SPH_C64(0x8AFF7BF17B7B84F6), SPH_C64(0xE1B535D43535806A), + SPH_C64(0x69E81D741D1DF53A), SPH_C64(0x4753E0A7E0E0B3DD), + SPH_C64(0xACF6D77BD7D721B3), SPH_C64(0xED5EC22FC2C29C99), + SPH_C64(0x966D2EB82E2E435C), SPH_C64(0x7A624B314B4B2996), + SPH_C64(0x21A3FEDFFEFE5DE1), SPH_C64(0x168257415757D5AE), + SPH_C64(0x41A815541515BD2A), SPH_C64(0xB69F77C17777E8EE), + SPH_C64(0xEBA537DC3737926E), SPH_C64(0x567BE5B3E5E59ED7), + SPH_C64(0xD98C9F469F9F1323), SPH_C64(0x17D3F0E7F0F023FD), + SPH_C64(0x7F6A4A354A4A2094), SPH_C64(0x959EDA4FDADA44A9), + SPH_C64(0x25FA587D5858A2B0), SPH_C64(0xCA06C903C9C9CF8F), + SPH_C64(0x8D5529A429297C52), SPH_C64(0x22500A280A0A5A14), + SPH_C64(0x4FE1B1FEB1B1507F), SPH_C64(0x1A69A0BAA0A0C95D), + SPH_C64(0xDA7F6BB16B6B14D6), SPH_C64(0xAB5C852E8585D917), + SPH_C64(0x7381BDCEBDBD3C67), SPH_C64(0x34D25D695D5D8FBA), + SPH_C64(0x5080104010109020), SPH_C64(0x03F3F4F7F4F407F5), + SPH_C64(0xC016CB0BCBCBDD8B), SPH_C64(0xC6ED3EF83E3ED37C), + SPH_C64(0x1128051405052D0A), SPH_C64(0xE61F6781676778CE), + SPH_C64(0x5373E4B7E4E497D5), SPH_C64(0xBB25279C2727024E), + SPH_C64(0x5832411941417382), SPH_C64(0x9D2C8B168B8BA70B), + SPH_C64(0x0151A7A6A7A7F653), SPH_C64(0x94CF7DE97D7DB2FA), + SPH_C64(0xFBDC956E95954937), SPH_C64(0x9F8ED847D8D856AD), + SPH_C64(0x308BFBCBFBFB70EB), SPH_C64(0x7123EE9FEEEECDC1), + SPH_C64(0x91C77CED7C7CBBF8), SPH_C64(0xE3176685666671CC), + SPH_C64(0x8EA6DD53DDDD7BA7), SPH_C64(0x4BB8175C1717AF2E), + SPH_C64(0x460247014747458E), SPH_C64(0xDC849E429E9E1A21), + SPH_C64(0xC51ECA0FCACAD489), SPH_C64(0x99752DB42D2D585A), + SPH_C64(0x7991BFC6BFBF2E63), SPH_C64(0x1B38071C07073F0E), + SPH_C64(0x2301AD8EADADAC47), SPH_C64(0x2FEA5A755A5AB0B4), + SPH_C64(0xB56C83368383EF1B), SPH_C64(0xFF8533CC3333B666), + SPH_C64(0xF23F639163635CC6), SPH_C64(0x0A10020802021204), + SPH_C64(0x3839AA92AAAA9349), SPH_C64(0xA8AF71D97171DEE2), + SPH_C64(0xCF0EC807C8C8C68D), SPH_C64(0x7DC819641919D132), + SPH_C64(0x7072493949493B92), SPH_C64(0x9A86D943D9D95FAF), + SPH_C64(0x1DC3F2EFF2F231F9), SPH_C64(0x484BE3ABE3E3A8DB), + SPH_C64(0x2AE25B715B5BB9B6), SPH_C64(0x9234881A8888BC0D), + SPH_C64(0xC8A49A529A9A3E29), SPH_C64(0xBE2D269826260B4C), + SPH_C64(0xFA8D32C83232BF64), SPH_C64(0x4AE9B0FAB0B0597D), + SPH_C64(0x6A1BE983E9E9F2CF), SPH_C64(0x33780F3C0F0F771E), + SPH_C64(0xA6E6D573D5D533B7), SPH_C64(0xBA74803A8080F41D), + SPH_C64(0x7C99BEC2BEBE2761), SPH_C64(0xDE26CD13CDCDEB87), + SPH_C64(0xE4BD34D034348968), SPH_C64(0x757A483D48483290), + SPH_C64(0x24ABFFDBFFFF54E3), SPH_C64(0x8FF77AF57A7A8DF4), + SPH_C64(0xEAF4907A9090643D), SPH_C64(0x3EC25F615F5F9DBE), + SPH_C64(0xA01D208020203D40), SPH_C64(0xD56768BD68680FD0), + SPH_C64(0x72D01A681A1ACA34), SPH_C64(0x2C19AE82AEAEB741), + SPH_C64(0x5EC9B4EAB4B47D75), SPH_C64(0x199A544D5454CEA8), + SPH_C64(0xE5EC937693937F3B), SPH_C64(0xAA0D228822222F44), + SPH_C64(0xE907648D646463C8), SPH_C64(0x12DBF1E3F1F12AFF), + SPH_C64(0xA2BF73D17373CCE6), SPH_C64(0x5A90124812128224), + SPH_C64(0x5D3A401D40407A80), SPH_C64(0x2840082008084810), + SPH_C64(0xE856C32BC3C3959B), SPH_C64(0x7B33EC97ECECDFC5), + SPH_C64(0x9096DB4BDBDB4DAB), SPH_C64(0x1F61A1BEA1A1C05F), + SPH_C64(0x831C8D0E8D8D9107), SPH_C64(0xC9F53DF43D3DC87A), + SPH_C64(0xF1CC976697975B33), SPH_C64(0x0000000000000000), + SPH_C64(0xD436CF1BCFCFF983), SPH_C64(0x87452BAC2B2B6E56), + SPH_C64(0xB39776C57676E1EC), SPH_C64(0xB06482328282E619), + SPH_C64(0xA9FED67FD6D628B1), SPH_C64(0x77D81B6C1B1BC336), + SPH_C64(0x5BC1B5EEB5B57477), SPH_C64(0x2911AF86AFAFBE43), + SPH_C64(0xDF776AB56A6A1DD4), SPH_C64(0x0DBA505D5050EAA0), + SPH_C64(0x4C1245094545578A), SPH_C64(0x18CBF3EBF3F338FB), + SPH_C64(0xF09D30C03030AD60), SPH_C64(0x742BEF9BEFEFC4C3), + SPH_C64(0xC3E53FFC3F3FDA7E), SPH_C64(0x1C9255495555C7AA), + SPH_C64(0x1079A2B2A2A2DB59), SPH_C64(0x6503EA8FEAEAE9C9), + SPH_C64(0xEC0F658965656ACA), SPH_C64(0x68B9BAD2BABA0369), + SPH_C64(0x93652FBC2F2F4A5E), SPH_C64(0xE74EC027C0C08E9D), + SPH_C64(0x81BEDE5FDEDE60A1), SPH_C64(0x6CE01C701C1CFC38), + SPH_C64(0x2EBBFDD3FDFD46E7), SPH_C64(0x64524D294D4D1F9A), + SPH_C64(0xE0E4927292927639), SPH_C64(0xBC8F75C97575FAEA), + SPH_C64(0x1E3006180606360C), SPH_C64(0x98248A128A8AAE09), + SPH_C64(0x40F9B2F2B2B24B79), SPH_C64(0x5963E6BFE6E685D1), + SPH_C64(0x36700E380E0E7E1C), SPH_C64(0x63F81F7C1F1FE73E), + SPH_C64(0xF7376295626255C4), SPH_C64(0xA3EED477D4D43AB5), + SPH_C64(0x3229A89AA8A8814D), SPH_C64(0xF4C4966296965231), + SPH_C64(0x3A9BF9C3F9F962EF), SPH_C64(0xF666C533C5C5A397), + SPH_C64(0xB13525942525104A), SPH_C64(0x20F259795959ABB2), + SPH_C64(0xAE54842A8484D015), SPH_C64(0xA7B772D57272C5E4), + SPH_C64(0xDDD539E43939EC72), SPH_C64(0x615A4C2D4C4C1698), + SPH_C64(0x3BCA5E655E5E94BC), SPH_C64(0x85E778FD78789FF0), + SPH_C64(0xD8DD38E03838E570), SPH_C64(0x86148C0A8C8C9805), + SPH_C64(0xB2C6D163D1D117BF), SPH_C64(0x0B41A5AEA5A5E457), + SPH_C64(0x4D43E2AFE2E2A1D9), SPH_C64(0xF82F619961614EC2), + SPH_C64(0x45F1B3F6B3B3427B), SPH_C64(0xA515218421213442), + SPH_C64(0xD6949C4A9C9C0825), SPH_C64(0x66F01E781E1EEE3C), + SPH_C64(0x5222431143436186), SPH_C64(0xFC76C73BC7C7B193), + SPH_C64(0x2BB3FCD7FCFC4FE5), SPH_C64(0x1420041004042408), + SPH_C64(0x08B251595151E3A2), SPH_C64(0xC7BC995E9999252F), + SPH_C64(0xC44F6DA96D6D22DA), SPH_C64(0x39680D340D0D651A), + SPH_C64(0x3583FACFFAFA79E9), SPH_C64(0x84B6DF5BDFDF69A3), + SPH_C64(0x9BD77EE57E7EA9FC), SPH_C64(0xB43D249024241948), + SPH_C64(0xD7C53BEC3B3BFE76), SPH_C64(0x3D31AB96ABAB9A4B), + SPH_C64(0xD13ECE1FCECEF081), SPH_C64(0x5588114411119922), + SPH_C64(0x890C8F068F8F8303), SPH_C64(0x6B4A4E254E4E049C), + SPH_C64(0x51D1B7E6B7B76673), SPH_C64(0x600BEB8BEBEBE0CB), + SPH_C64(0xCCFD3CF03C3CC178), SPH_C64(0xBF7C813E8181FD1F), + SPH_C64(0xFED4946A94944035), SPH_C64(0x0CEBF7FBF7F71CF3), + SPH_C64(0x67A1B9DEB9B9186F), SPH_C64(0x5F98134C13138B26), + SPH_C64(0x9C7D2CB02C2C5158), SPH_C64(0xB8D6D36BD3D305BB), + SPH_C64(0x5C6BE7BBE7E78CD3), SPH_C64(0xCB576EA56E6E39DC), + SPH_C64(0xF36EC437C4C4AA95), SPH_C64(0x0F18030C03031B06), + SPH_C64(0x138A56455656DCAC), SPH_C64(0x491A440D44445E88), + SPH_C64(0x9EDF7FE17F7FA0FE), SPH_C64(0x3721A99EA9A9884F), + SPH_C64(0x824D2AA82A2A6754), SPH_C64(0x6DB1BBD6BBBB0A6B), + SPH_C64(0xE246C123C1C1879F), SPH_C64(0x02A253515353F1A6), + SPH_C64(0x8BAEDC57DCDC72A5), SPH_C64(0x27580B2C0B0B5316), + SPH_C64(0xD39C9D4E9D9D0127), SPH_C64(0xC1476CAD6C6C2BD8), + SPH_C64(0xF59531C43131A462), SPH_C64(0xB98774CD7474F3E8), + SPH_C64(0x09E3F6FFF6F615F1), SPH_C64(0x430A460546464C8C), + SPH_C64(0x2609AC8AACACA545), SPH_C64(0x973C891E8989B50F), + SPH_C64(0x44A014501414B428), SPH_C64(0x425BE1A3E1E1BADF), + SPH_C64(0x4EB016581616A62C), SPH_C64(0xD2CD3AE83A3AF774), + SPH_C64(0xD06F69B9696906D2), SPH_C64(0x2D48092409094112), + SPH_C64(0xADA770DD7070D7E0), SPH_C64(0x54D9B6E2B6B66F71), + SPH_C64(0xB7CED067D0D01EBD), SPH_C64(0x7E3BED93EDEDD6C7), + SPH_C64(0xDB2ECC17CCCCE285), SPH_C64(0x572A421542426884), + SPH_C64(0xC2B4985A98982C2D), SPH_C64(0x0E49A4AAA4A4ED55), + SPH_C64(0x885D28A028287550), SPH_C64(0x31DA5C6D5C5C86B8), + SPH_C64(0x3F93F8C7F8F86BED), SPH_C64(0xA44486228686C211) +}; + +__constant static const sph_u64 plain_T3[256] = { + SPH_C64(0xC018601818D83078), SPH_C64(0x05238C23232646AF), + SPH_C64(0x7EC63FC6C6B891F9), SPH_C64(0x13E887E8E8FBCD6F), + SPH_C64(0x4C87268787CB13A1), SPH_C64(0xA9B8DAB8B8116D62), + SPH_C64(0x0801040101090205), SPH_C64(0x424F214F4F0D9E6E), + SPH_C64(0xAD36D836369B6CEE), SPH_C64(0x59A6A2A6A6FF5104), + SPH_C64(0xDED26FD2D20CB9BD), SPH_C64(0xFBF5F3F5F50EF706), + SPH_C64(0xEF79F9797996F280), SPH_C64(0x5F6FA16F6F30DECE), + SPH_C64(0xFC917E91916D3FEF), SPH_C64(0xAA52555252F8A407), + SPH_C64(0x27609D606047C0FD), SPH_C64(0x89BCCABCBC356576), + SPH_C64(0xAC9B569B9B372BCD), SPH_C64(0x048E028E8E8A018C), + SPH_C64(0x71A3B6A3A3D25B15), SPH_C64(0x600C300C0C6C183C), + SPH_C64(0xFF7BF17B7B84F68A), SPH_C64(0xB535D43535806AE1), + SPH_C64(0xE81D741D1DF53A69), SPH_C64(0x53E0A7E0E0B3DD47), + SPH_C64(0xF6D77BD7D721B3AC), SPH_C64(0x5EC22FC2C29C99ED), + SPH_C64(0x6D2EB82E2E435C96), SPH_C64(0x624B314B4B29967A), + SPH_C64(0xA3FEDFFEFE5DE121), SPH_C64(0x8257415757D5AE16), + SPH_C64(0xA815541515BD2A41), SPH_C64(0x9F77C17777E8EEB6), + SPH_C64(0xA537DC3737926EEB), SPH_C64(0x7BE5B3E5E59ED756), + SPH_C64(0x8C9F469F9F1323D9), SPH_C64(0xD3F0E7F0F023FD17), + SPH_C64(0x6A4A354A4A20947F), SPH_C64(0x9EDA4FDADA44A995), + SPH_C64(0xFA587D5858A2B025), SPH_C64(0x06C903C9C9CF8FCA), + SPH_C64(0x5529A429297C528D), SPH_C64(0x500A280A0A5A1422), + SPH_C64(0xE1B1FEB1B1507F4F), SPH_C64(0x69A0BAA0A0C95D1A), + SPH_C64(0x7F6BB16B6B14D6DA), SPH_C64(0x5C852E8585D917AB), + SPH_C64(0x81BDCEBDBD3C6773), SPH_C64(0xD25D695D5D8FBA34), + SPH_C64(0x8010401010902050), SPH_C64(0xF3F4F7F4F407F503), + SPH_C64(0x16CB0BCBCBDD8BC0), SPH_C64(0xED3EF83E3ED37CC6), + SPH_C64(0x28051405052D0A11), SPH_C64(0x1F6781676778CEE6), + SPH_C64(0x73E4B7E4E497D553), SPH_C64(0x25279C2727024EBB), + SPH_C64(0x3241194141738258), SPH_C64(0x2C8B168B8BA70B9D), + SPH_C64(0x51A7A6A7A7F65301), SPH_C64(0xCF7DE97D7DB2FA94), + SPH_C64(0xDC956E95954937FB), SPH_C64(0x8ED847D8D856AD9F), + SPH_C64(0x8BFBCBFBFB70EB30), SPH_C64(0x23EE9FEEEECDC171), + SPH_C64(0xC77CED7C7CBBF891), SPH_C64(0x176685666671CCE3), + SPH_C64(0xA6DD53DDDD7BA78E), SPH_C64(0xB8175C1717AF2E4B), + SPH_C64(0x0247014747458E46), SPH_C64(0x849E429E9E1A21DC), + SPH_C64(0x1ECA0FCACAD489C5), SPH_C64(0x752DB42D2D585A99), + SPH_C64(0x91BFC6BFBF2E6379), SPH_C64(0x38071C07073F0E1B), + SPH_C64(0x01AD8EADADAC4723), SPH_C64(0xEA5A755A5AB0B42F), + SPH_C64(0x6C83368383EF1BB5), SPH_C64(0x8533CC3333B666FF), + SPH_C64(0x3F639163635CC6F2), SPH_C64(0x100208020212040A), + SPH_C64(0x39AA92AAAA934938), SPH_C64(0xAF71D97171DEE2A8), + SPH_C64(0x0EC807C8C8C68DCF), SPH_C64(0xC819641919D1327D), + SPH_C64(0x72493949493B9270), SPH_C64(0x86D943D9D95FAF9A), + SPH_C64(0xC3F2EFF2F231F91D), SPH_C64(0x4BE3ABE3E3A8DB48), + SPH_C64(0xE25B715B5BB9B62A), SPH_C64(0x34881A8888BC0D92), + SPH_C64(0xA49A529A9A3E29C8), SPH_C64(0x2D269826260B4CBE), + SPH_C64(0x8D32C83232BF64FA), SPH_C64(0xE9B0FAB0B0597D4A), + SPH_C64(0x1BE983E9E9F2CF6A), SPH_C64(0x780F3C0F0F771E33), + SPH_C64(0xE6D573D5D533B7A6), SPH_C64(0x74803A8080F41DBA), + SPH_C64(0x99BEC2BEBE27617C), SPH_C64(0x26CD13CDCDEB87DE), + SPH_C64(0xBD34D034348968E4), SPH_C64(0x7A483D4848329075), + SPH_C64(0xABFFDBFFFF54E324), SPH_C64(0xF77AF57A7A8DF48F), + SPH_C64(0xF4907A9090643DEA), SPH_C64(0xC25F615F5F9DBE3E), + SPH_C64(0x1D208020203D40A0), SPH_C64(0x6768BD68680FD0D5), + SPH_C64(0xD01A681A1ACA3472), SPH_C64(0x19AE82AEAEB7412C), + SPH_C64(0xC9B4EAB4B47D755E), SPH_C64(0x9A544D5454CEA819), + SPH_C64(0xEC937693937F3BE5), SPH_C64(0x0D228822222F44AA), + SPH_C64(0x07648D646463C8E9), SPH_C64(0xDBF1E3F1F12AFF12), + SPH_C64(0xBF73D17373CCE6A2), SPH_C64(0x901248121282245A), + SPH_C64(0x3A401D40407A805D), SPH_C64(0x4008200808481028), + SPH_C64(0x56C32BC3C3959BE8), SPH_C64(0x33EC97ECECDFC57B), + SPH_C64(0x96DB4BDBDB4DAB90), SPH_C64(0x61A1BEA1A1C05F1F), + SPH_C64(0x1C8D0E8D8D910783), SPH_C64(0xF53DF43D3DC87AC9), + SPH_C64(0xCC976697975B33F1), SPH_C64(0x0000000000000000), + SPH_C64(0x36CF1BCFCFF983D4), SPH_C64(0x452BAC2B2B6E5687), + SPH_C64(0x9776C57676E1ECB3), SPH_C64(0x6482328282E619B0), + SPH_C64(0xFED67FD6D628B1A9), SPH_C64(0xD81B6C1B1BC33677), + SPH_C64(0xC1B5EEB5B574775B), SPH_C64(0x11AF86AFAFBE4329), + SPH_C64(0x776AB56A6A1DD4DF), SPH_C64(0xBA505D5050EAA00D), + SPH_C64(0x1245094545578A4C), SPH_C64(0xCBF3EBF3F338FB18), + SPH_C64(0x9D30C03030AD60F0), SPH_C64(0x2BEF9BEFEFC4C374), + SPH_C64(0xE53FFC3F3FDA7EC3), SPH_C64(0x9255495555C7AA1C), + SPH_C64(0x79A2B2A2A2DB5910), SPH_C64(0x03EA8FEAEAE9C965), + SPH_C64(0x0F658965656ACAEC), SPH_C64(0xB9BAD2BABA036968), + SPH_C64(0x652FBC2F2F4A5E93), SPH_C64(0x4EC027C0C08E9DE7), + SPH_C64(0xBEDE5FDEDE60A181), SPH_C64(0xE01C701C1CFC386C), + SPH_C64(0xBBFDD3FDFD46E72E), SPH_C64(0x524D294D4D1F9A64), + SPH_C64(0xE4927292927639E0), SPH_C64(0x8F75C97575FAEABC), + SPH_C64(0x3006180606360C1E), SPH_C64(0x248A128A8AAE0998), + SPH_C64(0xF9B2F2B2B24B7940), SPH_C64(0x63E6BFE6E685D159), + SPH_C64(0x700E380E0E7E1C36), SPH_C64(0xF81F7C1F1FE73E63), + SPH_C64(0x376295626255C4F7), SPH_C64(0xEED477D4D43AB5A3), + SPH_C64(0x29A89AA8A8814D32), SPH_C64(0xC4966296965231F4), + SPH_C64(0x9BF9C3F9F962EF3A), SPH_C64(0x66C533C5C5A397F6), + SPH_C64(0x3525942525104AB1), SPH_C64(0xF259795959ABB220), + SPH_C64(0x54842A8484D015AE), SPH_C64(0xB772D57272C5E4A7), + SPH_C64(0xD539E43939EC72DD), SPH_C64(0x5A4C2D4C4C169861), + SPH_C64(0xCA5E655E5E94BC3B), SPH_C64(0xE778FD78789FF085), + SPH_C64(0xDD38E03838E570D8), SPH_C64(0x148C0A8C8C980586), + SPH_C64(0xC6D163D1D117BFB2), SPH_C64(0x41A5AEA5A5E4570B), + SPH_C64(0x43E2AFE2E2A1D94D), SPH_C64(0x2F619961614EC2F8), + SPH_C64(0xF1B3F6B3B3427B45), SPH_C64(0x15218421213442A5), + SPH_C64(0x949C4A9C9C0825D6), SPH_C64(0xF01E781E1EEE3C66), + SPH_C64(0x2243114343618652), SPH_C64(0x76C73BC7C7B193FC), + SPH_C64(0xB3FCD7FCFC4FE52B), SPH_C64(0x2004100404240814), + SPH_C64(0xB251595151E3A208), SPH_C64(0xBC995E9999252FC7), + SPH_C64(0x4F6DA96D6D22DAC4), SPH_C64(0x680D340D0D651A39), + SPH_C64(0x83FACFFAFA79E935), SPH_C64(0xB6DF5BDFDF69A384), + SPH_C64(0xD77EE57E7EA9FC9B), SPH_C64(0x3D249024241948B4), + SPH_C64(0xC53BEC3B3BFE76D7), SPH_C64(0x31AB96ABAB9A4B3D), + SPH_C64(0x3ECE1FCECEF081D1), SPH_C64(0x8811441111992255), + SPH_C64(0x0C8F068F8F830389), SPH_C64(0x4A4E254E4E049C6B), + SPH_C64(0xD1B7E6B7B7667351), SPH_C64(0x0BEB8BEBEBE0CB60), + SPH_C64(0xFD3CF03C3CC178CC), SPH_C64(0x7C813E8181FD1FBF), + SPH_C64(0xD4946A94944035FE), SPH_C64(0xEBF7FBF7F71CF30C), + SPH_C64(0xA1B9DEB9B9186F67), SPH_C64(0x98134C13138B265F), + SPH_C64(0x7D2CB02C2C51589C), SPH_C64(0xD6D36BD3D305BBB8), + SPH_C64(0x6BE7BBE7E78CD35C), SPH_C64(0x576EA56E6E39DCCB), + SPH_C64(0x6EC437C4C4AA95F3), SPH_C64(0x18030C03031B060F), + SPH_C64(0x8A56455656DCAC13), SPH_C64(0x1A440D44445E8849), + SPH_C64(0xDF7FE17F7FA0FE9E), SPH_C64(0x21A99EA9A9884F37), + SPH_C64(0x4D2AA82A2A675482), SPH_C64(0xB1BBD6BBBB0A6B6D), + SPH_C64(0x46C123C1C1879FE2), SPH_C64(0xA253515353F1A602), + SPH_C64(0xAEDC57DCDC72A58B), SPH_C64(0x580B2C0B0B531627), + SPH_C64(0x9C9D4E9D9D0127D3), SPH_C64(0x476CAD6C6C2BD8C1), + SPH_C64(0x9531C43131A462F5), SPH_C64(0x8774CD7474F3E8B9), + SPH_C64(0xE3F6FFF6F615F109), SPH_C64(0x0A460546464C8C43), + SPH_C64(0x09AC8AACACA54526), SPH_C64(0x3C891E8989B50F97), + SPH_C64(0xA014501414B42844), SPH_C64(0x5BE1A3E1E1BADF42), + SPH_C64(0xB016581616A62C4E), SPH_C64(0xCD3AE83A3AF774D2), + SPH_C64(0x6F69B9696906D2D0), SPH_C64(0x480924090941122D), + SPH_C64(0xA770DD7070D7E0AD), SPH_C64(0xD9B6E2B6B66F7154), + SPH_C64(0xCED067D0D01EBDB7), SPH_C64(0x3BED93EDEDD6C77E), + SPH_C64(0x2ECC17CCCCE285DB), SPH_C64(0x2A42154242688457), + SPH_C64(0xB4985A98982C2DC2), SPH_C64(0x49A4AAA4A4ED550E), + SPH_C64(0x5D28A02828755088), SPH_C64(0xDA5C6D5C5C86B831), + SPH_C64(0x93F8C7F8F86BED3F), SPH_C64(0x4486228686C211A4) +}; + +__constant static const sph_u64 plain_T4[256] = { + SPH_C64(0x18601818D83078C0), SPH_C64(0x238C23232646AF05), + SPH_C64(0xC63FC6C6B891F97E), SPH_C64(0xE887E8E8FBCD6F13), + SPH_C64(0x87268787CB13A14C), SPH_C64(0xB8DAB8B8116D62A9), + SPH_C64(0x0104010109020508), SPH_C64(0x4F214F4F0D9E6E42), + SPH_C64(0x36D836369B6CEEAD), SPH_C64(0xA6A2A6A6FF510459), + SPH_C64(0xD26FD2D20CB9BDDE), SPH_C64(0xF5F3F5F50EF706FB), + SPH_C64(0x79F9797996F280EF), SPH_C64(0x6FA16F6F30DECE5F), + SPH_C64(0x917E91916D3FEFFC), SPH_C64(0x52555252F8A407AA), + SPH_C64(0x609D606047C0FD27), SPH_C64(0xBCCABCBC35657689), + SPH_C64(0x9B569B9B372BCDAC), SPH_C64(0x8E028E8E8A018C04), + SPH_C64(0xA3B6A3A3D25B1571), SPH_C64(0x0C300C0C6C183C60), + SPH_C64(0x7BF17B7B84F68AFF), SPH_C64(0x35D43535806AE1B5), + SPH_C64(0x1D741D1DF53A69E8), SPH_C64(0xE0A7E0E0B3DD4753), + SPH_C64(0xD77BD7D721B3ACF6), SPH_C64(0xC22FC2C29C99ED5E), + SPH_C64(0x2EB82E2E435C966D), SPH_C64(0x4B314B4B29967A62), + SPH_C64(0xFEDFFEFE5DE121A3), SPH_C64(0x57415757D5AE1682), + SPH_C64(0x15541515BD2A41A8), SPH_C64(0x77C17777E8EEB69F), + SPH_C64(0x37DC3737926EEBA5), SPH_C64(0xE5B3E5E59ED7567B), + SPH_C64(0x9F469F9F1323D98C), SPH_C64(0xF0E7F0F023FD17D3), + SPH_C64(0x4A354A4A20947F6A), SPH_C64(0xDA4FDADA44A9959E), + SPH_C64(0x587D5858A2B025FA), SPH_C64(0xC903C9C9CF8FCA06), + SPH_C64(0x29A429297C528D55), SPH_C64(0x0A280A0A5A142250), + SPH_C64(0xB1FEB1B1507F4FE1), SPH_C64(0xA0BAA0A0C95D1A69), + SPH_C64(0x6BB16B6B14D6DA7F), SPH_C64(0x852E8585D917AB5C), + SPH_C64(0xBDCEBDBD3C677381), SPH_C64(0x5D695D5D8FBA34D2), + SPH_C64(0x1040101090205080), SPH_C64(0xF4F7F4F407F503F3), + SPH_C64(0xCB0BCBCBDD8BC016), SPH_C64(0x3EF83E3ED37CC6ED), + SPH_C64(0x051405052D0A1128), SPH_C64(0x6781676778CEE61F), + SPH_C64(0xE4B7E4E497D55373), SPH_C64(0x279C2727024EBB25), + SPH_C64(0x4119414173825832), SPH_C64(0x8B168B8BA70B9D2C), + SPH_C64(0xA7A6A7A7F6530151), SPH_C64(0x7DE97D7DB2FA94CF), + SPH_C64(0x956E95954937FBDC), SPH_C64(0xD847D8D856AD9F8E), + SPH_C64(0xFBCBFBFB70EB308B), SPH_C64(0xEE9FEEEECDC17123), + SPH_C64(0x7CED7C7CBBF891C7), SPH_C64(0x6685666671CCE317), + SPH_C64(0xDD53DDDD7BA78EA6), SPH_C64(0x175C1717AF2E4BB8), + SPH_C64(0x47014747458E4602), SPH_C64(0x9E429E9E1A21DC84), + SPH_C64(0xCA0FCACAD489C51E), SPH_C64(0x2DB42D2D585A9975), + SPH_C64(0xBFC6BFBF2E637991), SPH_C64(0x071C07073F0E1B38), + SPH_C64(0xAD8EADADAC472301), SPH_C64(0x5A755A5AB0B42FEA), + SPH_C64(0x83368383EF1BB56C), SPH_C64(0x33CC3333B666FF85), + SPH_C64(0x639163635CC6F23F), SPH_C64(0x0208020212040A10), + SPH_C64(0xAA92AAAA93493839), SPH_C64(0x71D97171DEE2A8AF), + SPH_C64(0xC807C8C8C68DCF0E), SPH_C64(0x19641919D1327DC8), + SPH_C64(0x493949493B927072), SPH_C64(0xD943D9D95FAF9A86), + SPH_C64(0xF2EFF2F231F91DC3), SPH_C64(0xE3ABE3E3A8DB484B), + SPH_C64(0x5B715B5BB9B62AE2), SPH_C64(0x881A8888BC0D9234), + SPH_C64(0x9A529A9A3E29C8A4), SPH_C64(0x269826260B4CBE2D), + SPH_C64(0x32C83232BF64FA8D), SPH_C64(0xB0FAB0B0597D4AE9), + SPH_C64(0xE983E9E9F2CF6A1B), SPH_C64(0x0F3C0F0F771E3378), + SPH_C64(0xD573D5D533B7A6E6), SPH_C64(0x803A8080F41DBA74), + SPH_C64(0xBEC2BEBE27617C99), SPH_C64(0xCD13CDCDEB87DE26), + SPH_C64(0x34D034348968E4BD), SPH_C64(0x483D48483290757A), + SPH_C64(0xFFDBFFFF54E324AB), SPH_C64(0x7AF57A7A8DF48FF7), + SPH_C64(0x907A9090643DEAF4), SPH_C64(0x5F615F5F9DBE3EC2), + SPH_C64(0x208020203D40A01D), SPH_C64(0x68BD68680FD0D567), + SPH_C64(0x1A681A1ACA3472D0), SPH_C64(0xAE82AEAEB7412C19), + SPH_C64(0xB4EAB4B47D755EC9), SPH_C64(0x544D5454CEA8199A), + SPH_C64(0x937693937F3BE5EC), SPH_C64(0x228822222F44AA0D), + SPH_C64(0x648D646463C8E907), SPH_C64(0xF1E3F1F12AFF12DB), + SPH_C64(0x73D17373CCE6A2BF), SPH_C64(0x1248121282245A90), + SPH_C64(0x401D40407A805D3A), SPH_C64(0x0820080848102840), + SPH_C64(0xC32BC3C3959BE856), SPH_C64(0xEC97ECECDFC57B33), + SPH_C64(0xDB4BDBDB4DAB9096), SPH_C64(0xA1BEA1A1C05F1F61), + SPH_C64(0x8D0E8D8D9107831C), SPH_C64(0x3DF43D3DC87AC9F5), + SPH_C64(0x976697975B33F1CC), SPH_C64(0x0000000000000000), + SPH_C64(0xCF1BCFCFF983D436), SPH_C64(0x2BAC2B2B6E568745), + SPH_C64(0x76C57676E1ECB397), SPH_C64(0x82328282E619B064), + SPH_C64(0xD67FD6D628B1A9FE), SPH_C64(0x1B6C1B1BC33677D8), + SPH_C64(0xB5EEB5B574775BC1), SPH_C64(0xAF86AFAFBE432911), + SPH_C64(0x6AB56A6A1DD4DF77), SPH_C64(0x505D5050EAA00DBA), + SPH_C64(0x45094545578A4C12), SPH_C64(0xF3EBF3F338FB18CB), + SPH_C64(0x30C03030AD60F09D), SPH_C64(0xEF9BEFEFC4C3742B), + SPH_C64(0x3FFC3F3FDA7EC3E5), SPH_C64(0x55495555C7AA1C92), + SPH_C64(0xA2B2A2A2DB591079), SPH_C64(0xEA8FEAEAE9C96503), + SPH_C64(0x658965656ACAEC0F), SPH_C64(0xBAD2BABA036968B9), + SPH_C64(0x2FBC2F2F4A5E9365), SPH_C64(0xC027C0C08E9DE74E), + SPH_C64(0xDE5FDEDE60A181BE), SPH_C64(0x1C701C1CFC386CE0), + SPH_C64(0xFDD3FDFD46E72EBB), SPH_C64(0x4D294D4D1F9A6452), + SPH_C64(0x927292927639E0E4), SPH_C64(0x75C97575FAEABC8F), + SPH_C64(0x06180606360C1E30), SPH_C64(0x8A128A8AAE099824), + SPH_C64(0xB2F2B2B24B7940F9), SPH_C64(0xE6BFE6E685D15963), + SPH_C64(0x0E380E0E7E1C3670), SPH_C64(0x1F7C1F1FE73E63F8), + SPH_C64(0x6295626255C4F737), SPH_C64(0xD477D4D43AB5A3EE), + SPH_C64(0xA89AA8A8814D3229), SPH_C64(0x966296965231F4C4), + SPH_C64(0xF9C3F9F962EF3A9B), SPH_C64(0xC533C5C5A397F666), + SPH_C64(0x25942525104AB135), SPH_C64(0x59795959ABB220F2), + SPH_C64(0x842A8484D015AE54), SPH_C64(0x72D57272C5E4A7B7), + SPH_C64(0x39E43939EC72DDD5), SPH_C64(0x4C2D4C4C1698615A), + SPH_C64(0x5E655E5E94BC3BCA), SPH_C64(0x78FD78789FF085E7), + SPH_C64(0x38E03838E570D8DD), SPH_C64(0x8C0A8C8C98058614), + SPH_C64(0xD163D1D117BFB2C6), SPH_C64(0xA5AEA5A5E4570B41), + SPH_C64(0xE2AFE2E2A1D94D43), SPH_C64(0x619961614EC2F82F), + SPH_C64(0xB3F6B3B3427B45F1), SPH_C64(0x218421213442A515), + SPH_C64(0x9C4A9C9C0825D694), SPH_C64(0x1E781E1EEE3C66F0), + SPH_C64(0x4311434361865222), SPH_C64(0xC73BC7C7B193FC76), + SPH_C64(0xFCD7FCFC4FE52BB3), SPH_C64(0x0410040424081420), + SPH_C64(0x51595151E3A208B2), SPH_C64(0x995E9999252FC7BC), + SPH_C64(0x6DA96D6D22DAC44F), SPH_C64(0x0D340D0D651A3968), + SPH_C64(0xFACFFAFA79E93583), SPH_C64(0xDF5BDFDF69A384B6), + SPH_C64(0x7EE57E7EA9FC9BD7), SPH_C64(0x249024241948B43D), + SPH_C64(0x3BEC3B3BFE76D7C5), SPH_C64(0xAB96ABAB9A4B3D31), + SPH_C64(0xCE1FCECEF081D13E), SPH_C64(0x1144111199225588), + SPH_C64(0x8F068F8F8303890C), SPH_C64(0x4E254E4E049C6B4A), + SPH_C64(0xB7E6B7B7667351D1), SPH_C64(0xEB8BEBEBE0CB600B), + SPH_C64(0x3CF03C3CC178CCFD), SPH_C64(0x813E8181FD1FBF7C), + SPH_C64(0x946A94944035FED4), SPH_C64(0xF7FBF7F71CF30CEB), + SPH_C64(0xB9DEB9B9186F67A1), SPH_C64(0x134C13138B265F98), + SPH_C64(0x2CB02C2C51589C7D), SPH_C64(0xD36BD3D305BBB8D6), + SPH_C64(0xE7BBE7E78CD35C6B), SPH_C64(0x6EA56E6E39DCCB57), + SPH_C64(0xC437C4C4AA95F36E), SPH_C64(0x030C03031B060F18), + SPH_C64(0x56455656DCAC138A), SPH_C64(0x440D44445E88491A), + SPH_C64(0x7FE17F7FA0FE9EDF), SPH_C64(0xA99EA9A9884F3721), + SPH_C64(0x2AA82A2A6754824D), SPH_C64(0xBBD6BBBB0A6B6DB1), + SPH_C64(0xC123C1C1879FE246), SPH_C64(0x53515353F1A602A2), + SPH_C64(0xDC57DCDC72A58BAE), SPH_C64(0x0B2C0B0B53162758), + SPH_C64(0x9D4E9D9D0127D39C), SPH_C64(0x6CAD6C6C2BD8C147), + SPH_C64(0x31C43131A462F595), SPH_C64(0x74CD7474F3E8B987), + SPH_C64(0xF6FFF6F615F109E3), SPH_C64(0x460546464C8C430A), + SPH_C64(0xAC8AACACA5452609), SPH_C64(0x891E8989B50F973C), + SPH_C64(0x14501414B42844A0), SPH_C64(0xE1A3E1E1BADF425B), + SPH_C64(0x16581616A62C4EB0), SPH_C64(0x3AE83A3AF774D2CD), + SPH_C64(0x69B9696906D2D06F), SPH_C64(0x0924090941122D48), + SPH_C64(0x70DD7070D7E0ADA7), SPH_C64(0xB6E2B6B66F7154D9), + SPH_C64(0xD067D0D01EBDB7CE), SPH_C64(0xED93EDEDD6C77E3B), + SPH_C64(0xCC17CCCCE285DB2E), SPH_C64(0x421542426884572A), + SPH_C64(0x985A98982C2DC2B4), SPH_C64(0xA4AAA4A4ED550E49), + SPH_C64(0x28A028287550885D), SPH_C64(0x5C6D5C5C86B831DA), + SPH_C64(0xF8C7F8F86BED3F93), SPH_C64(0x86228686C211A444) +}; + +__constant static const sph_u64 plain_T5[256] = { + SPH_C64(0x601818D83078C018), SPH_C64(0x8C23232646AF0523), + SPH_C64(0x3FC6C6B891F97EC6), SPH_C64(0x87E8E8FBCD6F13E8), + SPH_C64(0x268787CB13A14C87), SPH_C64(0xDAB8B8116D62A9B8), + SPH_C64(0x0401010902050801), SPH_C64(0x214F4F0D9E6E424F), + SPH_C64(0xD836369B6CEEAD36), SPH_C64(0xA2A6A6FF510459A6), + SPH_C64(0x6FD2D20CB9BDDED2), SPH_C64(0xF3F5F50EF706FBF5), + SPH_C64(0xF9797996F280EF79), SPH_C64(0xA16F6F30DECE5F6F), + SPH_C64(0x7E91916D3FEFFC91), SPH_C64(0x555252F8A407AA52), + SPH_C64(0x9D606047C0FD2760), SPH_C64(0xCABCBC35657689BC), + SPH_C64(0x569B9B372BCDAC9B), SPH_C64(0x028E8E8A018C048E), + SPH_C64(0xB6A3A3D25B1571A3), SPH_C64(0x300C0C6C183C600C), + SPH_C64(0xF17B7B84F68AFF7B), SPH_C64(0xD43535806AE1B535), + SPH_C64(0x741D1DF53A69E81D), SPH_C64(0xA7E0E0B3DD4753E0), + SPH_C64(0x7BD7D721B3ACF6D7), SPH_C64(0x2FC2C29C99ED5EC2), + SPH_C64(0xB82E2E435C966D2E), SPH_C64(0x314B4B29967A624B), + SPH_C64(0xDFFEFE5DE121A3FE), SPH_C64(0x415757D5AE168257), + SPH_C64(0x541515BD2A41A815), SPH_C64(0xC17777E8EEB69F77), + SPH_C64(0xDC3737926EEBA537), SPH_C64(0xB3E5E59ED7567BE5), + SPH_C64(0x469F9F1323D98C9F), SPH_C64(0xE7F0F023FD17D3F0), + SPH_C64(0x354A4A20947F6A4A), SPH_C64(0x4FDADA44A9959EDA), + SPH_C64(0x7D5858A2B025FA58), SPH_C64(0x03C9C9CF8FCA06C9), + SPH_C64(0xA429297C528D5529), SPH_C64(0x280A0A5A1422500A), + SPH_C64(0xFEB1B1507F4FE1B1), SPH_C64(0xBAA0A0C95D1A69A0), + SPH_C64(0xB16B6B14D6DA7F6B), SPH_C64(0x2E8585D917AB5C85), + SPH_C64(0xCEBDBD3C677381BD), SPH_C64(0x695D5D8FBA34D25D), + SPH_C64(0x4010109020508010), SPH_C64(0xF7F4F407F503F3F4), + SPH_C64(0x0BCBCBDD8BC016CB), SPH_C64(0xF83E3ED37CC6ED3E), + SPH_C64(0x1405052D0A112805), SPH_C64(0x81676778CEE61F67), + SPH_C64(0xB7E4E497D55373E4), SPH_C64(0x9C2727024EBB2527), + SPH_C64(0x1941417382583241), SPH_C64(0x168B8BA70B9D2C8B), + SPH_C64(0xA6A7A7F6530151A7), SPH_C64(0xE97D7DB2FA94CF7D), + SPH_C64(0x6E95954937FBDC95), SPH_C64(0x47D8D856AD9F8ED8), + SPH_C64(0xCBFBFB70EB308BFB), SPH_C64(0x9FEEEECDC17123EE), + SPH_C64(0xED7C7CBBF891C77C), SPH_C64(0x85666671CCE31766), + SPH_C64(0x53DDDD7BA78EA6DD), SPH_C64(0x5C1717AF2E4BB817), + SPH_C64(0x014747458E460247), SPH_C64(0x429E9E1A21DC849E), + SPH_C64(0x0FCACAD489C51ECA), SPH_C64(0xB42D2D585A99752D), + SPH_C64(0xC6BFBF2E637991BF), SPH_C64(0x1C07073F0E1B3807), + SPH_C64(0x8EADADAC472301AD), SPH_C64(0x755A5AB0B42FEA5A), + SPH_C64(0x368383EF1BB56C83), SPH_C64(0xCC3333B666FF8533), + SPH_C64(0x9163635CC6F23F63), SPH_C64(0x08020212040A1002), + SPH_C64(0x92AAAA93493839AA), SPH_C64(0xD97171DEE2A8AF71), + SPH_C64(0x07C8C8C68DCF0EC8), SPH_C64(0x641919D1327DC819), + SPH_C64(0x3949493B92707249), SPH_C64(0x43D9D95FAF9A86D9), + SPH_C64(0xEFF2F231F91DC3F2), SPH_C64(0xABE3E3A8DB484BE3), + SPH_C64(0x715B5BB9B62AE25B), SPH_C64(0x1A8888BC0D923488), + SPH_C64(0x529A9A3E29C8A49A), SPH_C64(0x9826260B4CBE2D26), + SPH_C64(0xC83232BF64FA8D32), SPH_C64(0xFAB0B0597D4AE9B0), + SPH_C64(0x83E9E9F2CF6A1BE9), SPH_C64(0x3C0F0F771E33780F), + SPH_C64(0x73D5D533B7A6E6D5), SPH_C64(0x3A8080F41DBA7480), + SPH_C64(0xC2BEBE27617C99BE), SPH_C64(0x13CDCDEB87DE26CD), + SPH_C64(0xD034348968E4BD34), SPH_C64(0x3D48483290757A48), + SPH_C64(0xDBFFFF54E324ABFF), SPH_C64(0xF57A7A8DF48FF77A), + SPH_C64(0x7A9090643DEAF490), SPH_C64(0x615F5F9DBE3EC25F), + SPH_C64(0x8020203D40A01D20), SPH_C64(0xBD68680FD0D56768), + SPH_C64(0x681A1ACA3472D01A), SPH_C64(0x82AEAEB7412C19AE), + SPH_C64(0xEAB4B47D755EC9B4), SPH_C64(0x4D5454CEA8199A54), + SPH_C64(0x7693937F3BE5EC93), SPH_C64(0x8822222F44AA0D22), + SPH_C64(0x8D646463C8E90764), SPH_C64(0xE3F1F12AFF12DBF1), + SPH_C64(0xD17373CCE6A2BF73), SPH_C64(0x48121282245A9012), + SPH_C64(0x1D40407A805D3A40), SPH_C64(0x2008084810284008), + SPH_C64(0x2BC3C3959BE856C3), SPH_C64(0x97ECECDFC57B33EC), + SPH_C64(0x4BDBDB4DAB9096DB), SPH_C64(0xBEA1A1C05F1F61A1), + SPH_C64(0x0E8D8D9107831C8D), SPH_C64(0xF43D3DC87AC9F53D), + SPH_C64(0x6697975B33F1CC97), SPH_C64(0x0000000000000000), + SPH_C64(0x1BCFCFF983D436CF), SPH_C64(0xAC2B2B6E5687452B), + SPH_C64(0xC57676E1ECB39776), SPH_C64(0x328282E619B06482), + SPH_C64(0x7FD6D628B1A9FED6), SPH_C64(0x6C1B1BC33677D81B), + SPH_C64(0xEEB5B574775BC1B5), SPH_C64(0x86AFAFBE432911AF), + SPH_C64(0xB56A6A1DD4DF776A), SPH_C64(0x5D5050EAA00DBA50), + SPH_C64(0x094545578A4C1245), SPH_C64(0xEBF3F338FB18CBF3), + SPH_C64(0xC03030AD60F09D30), SPH_C64(0x9BEFEFC4C3742BEF), + SPH_C64(0xFC3F3FDA7EC3E53F), SPH_C64(0x495555C7AA1C9255), + SPH_C64(0xB2A2A2DB591079A2), SPH_C64(0x8FEAEAE9C96503EA), + SPH_C64(0x8965656ACAEC0F65), SPH_C64(0xD2BABA036968B9BA), + SPH_C64(0xBC2F2F4A5E93652F), SPH_C64(0x27C0C08E9DE74EC0), + SPH_C64(0x5FDEDE60A181BEDE), SPH_C64(0x701C1CFC386CE01C), + SPH_C64(0xD3FDFD46E72EBBFD), SPH_C64(0x294D4D1F9A64524D), + SPH_C64(0x7292927639E0E492), SPH_C64(0xC97575FAEABC8F75), + SPH_C64(0x180606360C1E3006), SPH_C64(0x128A8AAE0998248A), + SPH_C64(0xF2B2B24B7940F9B2), SPH_C64(0xBFE6E685D15963E6), + SPH_C64(0x380E0E7E1C36700E), SPH_C64(0x7C1F1FE73E63F81F), + SPH_C64(0x95626255C4F73762), SPH_C64(0x77D4D43AB5A3EED4), + SPH_C64(0x9AA8A8814D3229A8), SPH_C64(0x6296965231F4C496), + SPH_C64(0xC3F9F962EF3A9BF9), SPH_C64(0x33C5C5A397F666C5), + SPH_C64(0x942525104AB13525), SPH_C64(0x795959ABB220F259), + SPH_C64(0x2A8484D015AE5484), SPH_C64(0xD57272C5E4A7B772), + SPH_C64(0xE43939EC72DDD539), SPH_C64(0x2D4C4C1698615A4C), + SPH_C64(0x655E5E94BC3BCA5E), SPH_C64(0xFD78789FF085E778), + SPH_C64(0xE03838E570D8DD38), SPH_C64(0x0A8C8C980586148C), + SPH_C64(0x63D1D117BFB2C6D1), SPH_C64(0xAEA5A5E4570B41A5), + SPH_C64(0xAFE2E2A1D94D43E2), SPH_C64(0x9961614EC2F82F61), + SPH_C64(0xF6B3B3427B45F1B3), SPH_C64(0x8421213442A51521), + SPH_C64(0x4A9C9C0825D6949C), SPH_C64(0x781E1EEE3C66F01E), + SPH_C64(0x1143436186522243), SPH_C64(0x3BC7C7B193FC76C7), + SPH_C64(0xD7FCFC4FE52BB3FC), SPH_C64(0x1004042408142004), + SPH_C64(0x595151E3A208B251), SPH_C64(0x5E9999252FC7BC99), + SPH_C64(0xA96D6D22DAC44F6D), SPH_C64(0x340D0D651A39680D), + SPH_C64(0xCFFAFA79E93583FA), SPH_C64(0x5BDFDF69A384B6DF), + SPH_C64(0xE57E7EA9FC9BD77E), SPH_C64(0x9024241948B43D24), + SPH_C64(0xEC3B3BFE76D7C53B), SPH_C64(0x96ABAB9A4B3D31AB), + SPH_C64(0x1FCECEF081D13ECE), SPH_C64(0x4411119922558811), + SPH_C64(0x068F8F8303890C8F), SPH_C64(0x254E4E049C6B4A4E), + SPH_C64(0xE6B7B7667351D1B7), SPH_C64(0x8BEBEBE0CB600BEB), + SPH_C64(0xF03C3CC178CCFD3C), SPH_C64(0x3E8181FD1FBF7C81), + SPH_C64(0x6A94944035FED494), SPH_C64(0xFBF7F71CF30CEBF7), + SPH_C64(0xDEB9B9186F67A1B9), SPH_C64(0x4C13138B265F9813), + SPH_C64(0xB02C2C51589C7D2C), SPH_C64(0x6BD3D305BBB8D6D3), + SPH_C64(0xBBE7E78CD35C6BE7), SPH_C64(0xA56E6E39DCCB576E), + SPH_C64(0x37C4C4AA95F36EC4), SPH_C64(0x0C03031B060F1803), + SPH_C64(0x455656DCAC138A56), SPH_C64(0x0D44445E88491A44), + SPH_C64(0xE17F7FA0FE9EDF7F), SPH_C64(0x9EA9A9884F3721A9), + SPH_C64(0xA82A2A6754824D2A), SPH_C64(0xD6BBBB0A6B6DB1BB), + SPH_C64(0x23C1C1879FE246C1), SPH_C64(0x515353F1A602A253), + SPH_C64(0x57DCDC72A58BAEDC), SPH_C64(0x2C0B0B531627580B), + SPH_C64(0x4E9D9D0127D39C9D), SPH_C64(0xAD6C6C2BD8C1476C), + SPH_C64(0xC43131A462F59531), SPH_C64(0xCD7474F3E8B98774), + SPH_C64(0xFFF6F615F109E3F6), SPH_C64(0x0546464C8C430A46), + SPH_C64(0x8AACACA5452609AC), SPH_C64(0x1E8989B50F973C89), + SPH_C64(0x501414B42844A014), SPH_C64(0xA3E1E1BADF425BE1), + SPH_C64(0x581616A62C4EB016), SPH_C64(0xE83A3AF774D2CD3A), + SPH_C64(0xB9696906D2D06F69), SPH_C64(0x24090941122D4809), + SPH_C64(0xDD7070D7E0ADA770), SPH_C64(0xE2B6B66F7154D9B6), + SPH_C64(0x67D0D01EBDB7CED0), SPH_C64(0x93EDEDD6C77E3BED), + SPH_C64(0x17CCCCE285DB2ECC), SPH_C64(0x1542426884572A42), + SPH_C64(0x5A98982C2DC2B498), SPH_C64(0xAAA4A4ED550E49A4), + SPH_C64(0xA028287550885D28), SPH_C64(0x6D5C5C86B831DA5C), + SPH_C64(0xC7F8F86BED3F93F8), SPH_C64(0x228686C211A44486) +}; + +__constant static const sph_u64 plain_T6[256] = { + SPH_C64(0x1818D83078C01860), SPH_C64(0x23232646AF05238C), + SPH_C64(0xC6C6B891F97EC63F), SPH_C64(0xE8E8FBCD6F13E887), + SPH_C64(0x8787CB13A14C8726), SPH_C64(0xB8B8116D62A9B8DA), + SPH_C64(0x0101090205080104), SPH_C64(0x4F4F0D9E6E424F21), + SPH_C64(0x36369B6CEEAD36D8), SPH_C64(0xA6A6FF510459A6A2), + SPH_C64(0xD2D20CB9BDDED26F), SPH_C64(0xF5F50EF706FBF5F3), + SPH_C64(0x797996F280EF79F9), SPH_C64(0x6F6F30DECE5F6FA1), + SPH_C64(0x91916D3FEFFC917E), SPH_C64(0x5252F8A407AA5255), + SPH_C64(0x606047C0FD27609D), SPH_C64(0xBCBC35657689BCCA), + SPH_C64(0x9B9B372BCDAC9B56), SPH_C64(0x8E8E8A018C048E02), + SPH_C64(0xA3A3D25B1571A3B6), SPH_C64(0x0C0C6C183C600C30), + SPH_C64(0x7B7B84F68AFF7BF1), SPH_C64(0x3535806AE1B535D4), + SPH_C64(0x1D1DF53A69E81D74), SPH_C64(0xE0E0B3DD4753E0A7), + SPH_C64(0xD7D721B3ACF6D77B), SPH_C64(0xC2C29C99ED5EC22F), + SPH_C64(0x2E2E435C966D2EB8), SPH_C64(0x4B4B29967A624B31), + SPH_C64(0xFEFE5DE121A3FEDF), SPH_C64(0x5757D5AE16825741), + SPH_C64(0x1515BD2A41A81554), SPH_C64(0x7777E8EEB69F77C1), + SPH_C64(0x3737926EEBA537DC), SPH_C64(0xE5E59ED7567BE5B3), + SPH_C64(0x9F9F1323D98C9F46), SPH_C64(0xF0F023FD17D3F0E7), + SPH_C64(0x4A4A20947F6A4A35), SPH_C64(0xDADA44A9959EDA4F), + SPH_C64(0x5858A2B025FA587D), SPH_C64(0xC9C9CF8FCA06C903), + SPH_C64(0x29297C528D5529A4), SPH_C64(0x0A0A5A1422500A28), + SPH_C64(0xB1B1507F4FE1B1FE), SPH_C64(0xA0A0C95D1A69A0BA), + SPH_C64(0x6B6B14D6DA7F6BB1), SPH_C64(0x8585D917AB5C852E), + SPH_C64(0xBDBD3C677381BDCE), SPH_C64(0x5D5D8FBA34D25D69), + SPH_C64(0x1010902050801040), SPH_C64(0xF4F407F503F3F4F7), + SPH_C64(0xCBCBDD8BC016CB0B), SPH_C64(0x3E3ED37CC6ED3EF8), + SPH_C64(0x05052D0A11280514), SPH_C64(0x676778CEE61F6781), + SPH_C64(0xE4E497D55373E4B7), SPH_C64(0x2727024EBB25279C), + SPH_C64(0x4141738258324119), SPH_C64(0x8B8BA70B9D2C8B16), + SPH_C64(0xA7A7F6530151A7A6), SPH_C64(0x7D7DB2FA94CF7DE9), + SPH_C64(0x95954937FBDC956E), SPH_C64(0xD8D856AD9F8ED847), + SPH_C64(0xFBFB70EB308BFBCB), SPH_C64(0xEEEECDC17123EE9F), + SPH_C64(0x7C7CBBF891C77CED), SPH_C64(0x666671CCE3176685), + SPH_C64(0xDDDD7BA78EA6DD53), SPH_C64(0x1717AF2E4BB8175C), + SPH_C64(0x4747458E46024701), SPH_C64(0x9E9E1A21DC849E42), + SPH_C64(0xCACAD489C51ECA0F), SPH_C64(0x2D2D585A99752DB4), + SPH_C64(0xBFBF2E637991BFC6), SPH_C64(0x07073F0E1B38071C), + SPH_C64(0xADADAC472301AD8E), SPH_C64(0x5A5AB0B42FEA5A75), + SPH_C64(0x8383EF1BB56C8336), SPH_C64(0x3333B666FF8533CC), + SPH_C64(0x63635CC6F23F6391), SPH_C64(0x020212040A100208), + SPH_C64(0xAAAA93493839AA92), SPH_C64(0x7171DEE2A8AF71D9), + SPH_C64(0xC8C8C68DCF0EC807), SPH_C64(0x1919D1327DC81964), + SPH_C64(0x49493B9270724939), SPH_C64(0xD9D95FAF9A86D943), + SPH_C64(0xF2F231F91DC3F2EF), SPH_C64(0xE3E3A8DB484BE3AB), + SPH_C64(0x5B5BB9B62AE25B71), SPH_C64(0x8888BC0D9234881A), + SPH_C64(0x9A9A3E29C8A49A52), SPH_C64(0x26260B4CBE2D2698), + SPH_C64(0x3232BF64FA8D32C8), SPH_C64(0xB0B0597D4AE9B0FA), + SPH_C64(0xE9E9F2CF6A1BE983), SPH_C64(0x0F0F771E33780F3C), + SPH_C64(0xD5D533B7A6E6D573), SPH_C64(0x8080F41DBA74803A), + SPH_C64(0xBEBE27617C99BEC2), SPH_C64(0xCDCDEB87DE26CD13), + SPH_C64(0x34348968E4BD34D0), SPH_C64(0x48483290757A483D), + SPH_C64(0xFFFF54E324ABFFDB), SPH_C64(0x7A7A8DF48FF77AF5), + SPH_C64(0x9090643DEAF4907A), SPH_C64(0x5F5F9DBE3EC25F61), + SPH_C64(0x20203D40A01D2080), SPH_C64(0x68680FD0D56768BD), + SPH_C64(0x1A1ACA3472D01A68), SPH_C64(0xAEAEB7412C19AE82), + SPH_C64(0xB4B47D755EC9B4EA), SPH_C64(0x5454CEA8199A544D), + SPH_C64(0x93937F3BE5EC9376), SPH_C64(0x22222F44AA0D2288), + SPH_C64(0x646463C8E907648D), SPH_C64(0xF1F12AFF12DBF1E3), + SPH_C64(0x7373CCE6A2BF73D1), SPH_C64(0x121282245A901248), + SPH_C64(0x40407A805D3A401D), SPH_C64(0x0808481028400820), + SPH_C64(0xC3C3959BE856C32B), SPH_C64(0xECECDFC57B33EC97), + SPH_C64(0xDBDB4DAB9096DB4B), SPH_C64(0xA1A1C05F1F61A1BE), + SPH_C64(0x8D8D9107831C8D0E), SPH_C64(0x3D3DC87AC9F53DF4), + SPH_C64(0x97975B33F1CC9766), SPH_C64(0x0000000000000000), + SPH_C64(0xCFCFF983D436CF1B), SPH_C64(0x2B2B6E5687452BAC), + SPH_C64(0x7676E1ECB39776C5), SPH_C64(0x8282E619B0648232), + SPH_C64(0xD6D628B1A9FED67F), SPH_C64(0x1B1BC33677D81B6C), + SPH_C64(0xB5B574775BC1B5EE), SPH_C64(0xAFAFBE432911AF86), + SPH_C64(0x6A6A1DD4DF776AB5), SPH_C64(0x5050EAA00DBA505D), + SPH_C64(0x4545578A4C124509), SPH_C64(0xF3F338FB18CBF3EB), + SPH_C64(0x3030AD60F09D30C0), SPH_C64(0xEFEFC4C3742BEF9B), + SPH_C64(0x3F3FDA7EC3E53FFC), SPH_C64(0x5555C7AA1C925549), + SPH_C64(0xA2A2DB591079A2B2), SPH_C64(0xEAEAE9C96503EA8F), + SPH_C64(0x65656ACAEC0F6589), SPH_C64(0xBABA036968B9BAD2), + SPH_C64(0x2F2F4A5E93652FBC), SPH_C64(0xC0C08E9DE74EC027), + SPH_C64(0xDEDE60A181BEDE5F), SPH_C64(0x1C1CFC386CE01C70), + SPH_C64(0xFDFD46E72EBBFDD3), SPH_C64(0x4D4D1F9A64524D29), + SPH_C64(0x92927639E0E49272), SPH_C64(0x7575FAEABC8F75C9), + SPH_C64(0x0606360C1E300618), SPH_C64(0x8A8AAE0998248A12), + SPH_C64(0xB2B24B7940F9B2F2), SPH_C64(0xE6E685D15963E6BF), + SPH_C64(0x0E0E7E1C36700E38), SPH_C64(0x1F1FE73E63F81F7C), + SPH_C64(0x626255C4F7376295), SPH_C64(0xD4D43AB5A3EED477), + SPH_C64(0xA8A8814D3229A89A), SPH_C64(0x96965231F4C49662), + SPH_C64(0xF9F962EF3A9BF9C3), SPH_C64(0xC5C5A397F666C533), + SPH_C64(0x2525104AB1352594), SPH_C64(0x5959ABB220F25979), + SPH_C64(0x8484D015AE54842A), SPH_C64(0x7272C5E4A7B772D5), + SPH_C64(0x3939EC72DDD539E4), SPH_C64(0x4C4C1698615A4C2D), + SPH_C64(0x5E5E94BC3BCA5E65), SPH_C64(0x78789FF085E778FD), + SPH_C64(0x3838E570D8DD38E0), SPH_C64(0x8C8C980586148C0A), + SPH_C64(0xD1D117BFB2C6D163), SPH_C64(0xA5A5E4570B41A5AE), + SPH_C64(0xE2E2A1D94D43E2AF), SPH_C64(0x61614EC2F82F6199), + SPH_C64(0xB3B3427B45F1B3F6), SPH_C64(0x21213442A5152184), + SPH_C64(0x9C9C0825D6949C4A), SPH_C64(0x1E1EEE3C66F01E78), + SPH_C64(0x4343618652224311), SPH_C64(0xC7C7B193FC76C73B), + SPH_C64(0xFCFC4FE52BB3FCD7), SPH_C64(0x0404240814200410), + SPH_C64(0x5151E3A208B25159), SPH_C64(0x9999252FC7BC995E), + SPH_C64(0x6D6D22DAC44F6DA9), SPH_C64(0x0D0D651A39680D34), + SPH_C64(0xFAFA79E93583FACF), SPH_C64(0xDFDF69A384B6DF5B), + SPH_C64(0x7E7EA9FC9BD77EE5), SPH_C64(0x24241948B43D2490), + SPH_C64(0x3B3BFE76D7C53BEC), SPH_C64(0xABAB9A4B3D31AB96), + SPH_C64(0xCECEF081D13ECE1F), SPH_C64(0x1111992255881144), + SPH_C64(0x8F8F8303890C8F06), SPH_C64(0x4E4E049C6B4A4E25), + SPH_C64(0xB7B7667351D1B7E6), SPH_C64(0xEBEBE0CB600BEB8B), + SPH_C64(0x3C3CC178CCFD3CF0), SPH_C64(0x8181FD1FBF7C813E), + SPH_C64(0x94944035FED4946A), SPH_C64(0xF7F71CF30CEBF7FB), + SPH_C64(0xB9B9186F67A1B9DE), SPH_C64(0x13138B265F98134C), + SPH_C64(0x2C2C51589C7D2CB0), SPH_C64(0xD3D305BBB8D6D36B), + SPH_C64(0xE7E78CD35C6BE7BB), SPH_C64(0x6E6E39DCCB576EA5), + SPH_C64(0xC4C4AA95F36EC437), SPH_C64(0x03031B060F18030C), + SPH_C64(0x5656DCAC138A5645), SPH_C64(0x44445E88491A440D), + SPH_C64(0x7F7FA0FE9EDF7FE1), SPH_C64(0xA9A9884F3721A99E), + SPH_C64(0x2A2A6754824D2AA8), SPH_C64(0xBBBB0A6B6DB1BBD6), + SPH_C64(0xC1C1879FE246C123), SPH_C64(0x5353F1A602A25351), + SPH_C64(0xDCDC72A58BAEDC57), SPH_C64(0x0B0B531627580B2C), + SPH_C64(0x9D9D0127D39C9D4E), SPH_C64(0x6C6C2BD8C1476CAD), + SPH_C64(0x3131A462F59531C4), SPH_C64(0x7474F3E8B98774CD), + SPH_C64(0xF6F615F109E3F6FF), SPH_C64(0x46464C8C430A4605), + SPH_C64(0xACACA5452609AC8A), SPH_C64(0x8989B50F973C891E), + SPH_C64(0x1414B42844A01450), SPH_C64(0xE1E1BADF425BE1A3), + SPH_C64(0x1616A62C4EB01658), SPH_C64(0x3A3AF774D2CD3AE8), + SPH_C64(0x696906D2D06F69B9), SPH_C64(0x090941122D480924), + SPH_C64(0x7070D7E0ADA770DD), SPH_C64(0xB6B66F7154D9B6E2), + SPH_C64(0xD0D01EBDB7CED067), SPH_C64(0xEDEDD6C77E3BED93), + SPH_C64(0xCCCCE285DB2ECC17), SPH_C64(0x42426884572A4215), + SPH_C64(0x98982C2DC2B4985A), SPH_C64(0xA4A4ED550E49A4AA), + SPH_C64(0x28287550885D28A0), SPH_C64(0x5C5C86B831DA5C6D), + SPH_C64(0xF8F86BED3F93F8C7), SPH_C64(0x8686C211A4448622) +}; + +__constant static const sph_u64 plain_T7[256] = { + SPH_C64(0x18D83078C0186018), SPH_C64(0x232646AF05238C23), + SPH_C64(0xC6B891F97EC63FC6), SPH_C64(0xE8FBCD6F13E887E8), + SPH_C64(0x87CB13A14C872687), SPH_C64(0xB8116D62A9B8DAB8), + SPH_C64(0x0109020508010401), SPH_C64(0x4F0D9E6E424F214F), + SPH_C64(0x369B6CEEAD36D836), SPH_C64(0xA6FF510459A6A2A6), + SPH_C64(0xD20CB9BDDED26FD2), SPH_C64(0xF50EF706FBF5F3F5), + SPH_C64(0x7996F280EF79F979), SPH_C64(0x6F30DECE5F6FA16F), + SPH_C64(0x916D3FEFFC917E91), SPH_C64(0x52F8A407AA525552), + SPH_C64(0x6047C0FD27609D60), SPH_C64(0xBC35657689BCCABC), + SPH_C64(0x9B372BCDAC9B569B), SPH_C64(0x8E8A018C048E028E), + SPH_C64(0xA3D25B1571A3B6A3), SPH_C64(0x0C6C183C600C300C), + SPH_C64(0x7B84F68AFF7BF17B), SPH_C64(0x35806AE1B535D435), + SPH_C64(0x1DF53A69E81D741D), SPH_C64(0xE0B3DD4753E0A7E0), + SPH_C64(0xD721B3ACF6D77BD7), SPH_C64(0xC29C99ED5EC22FC2), + SPH_C64(0x2E435C966D2EB82E), SPH_C64(0x4B29967A624B314B), + SPH_C64(0xFE5DE121A3FEDFFE), SPH_C64(0x57D5AE1682574157), + SPH_C64(0x15BD2A41A8155415), SPH_C64(0x77E8EEB69F77C177), + SPH_C64(0x37926EEBA537DC37), SPH_C64(0xE59ED7567BE5B3E5), + SPH_C64(0x9F1323D98C9F469F), SPH_C64(0xF023FD17D3F0E7F0), + SPH_C64(0x4A20947F6A4A354A), SPH_C64(0xDA44A9959EDA4FDA), + SPH_C64(0x58A2B025FA587D58), SPH_C64(0xC9CF8FCA06C903C9), + SPH_C64(0x297C528D5529A429), SPH_C64(0x0A5A1422500A280A), + SPH_C64(0xB1507F4FE1B1FEB1), SPH_C64(0xA0C95D1A69A0BAA0), + SPH_C64(0x6B14D6DA7F6BB16B), SPH_C64(0x85D917AB5C852E85), + SPH_C64(0xBD3C677381BDCEBD), SPH_C64(0x5D8FBA34D25D695D), + SPH_C64(0x1090205080104010), SPH_C64(0xF407F503F3F4F7F4), + SPH_C64(0xCBDD8BC016CB0BCB), SPH_C64(0x3ED37CC6ED3EF83E), + SPH_C64(0x052D0A1128051405), SPH_C64(0x6778CEE61F678167), + SPH_C64(0xE497D55373E4B7E4), SPH_C64(0x27024EBB25279C27), + SPH_C64(0x4173825832411941), SPH_C64(0x8BA70B9D2C8B168B), + SPH_C64(0xA7F6530151A7A6A7), SPH_C64(0x7DB2FA94CF7DE97D), + SPH_C64(0x954937FBDC956E95), SPH_C64(0xD856AD9F8ED847D8), + SPH_C64(0xFB70EB308BFBCBFB), SPH_C64(0xEECDC17123EE9FEE), + SPH_C64(0x7CBBF891C77CED7C), SPH_C64(0x6671CCE317668566), + SPH_C64(0xDD7BA78EA6DD53DD), SPH_C64(0x17AF2E4BB8175C17), + SPH_C64(0x47458E4602470147), SPH_C64(0x9E1A21DC849E429E), + SPH_C64(0xCAD489C51ECA0FCA), SPH_C64(0x2D585A99752DB42D), + SPH_C64(0xBF2E637991BFC6BF), SPH_C64(0x073F0E1B38071C07), + SPH_C64(0xADAC472301AD8EAD), SPH_C64(0x5AB0B42FEA5A755A), + SPH_C64(0x83EF1BB56C833683), SPH_C64(0x33B666FF8533CC33), + SPH_C64(0x635CC6F23F639163), SPH_C64(0x0212040A10020802), + SPH_C64(0xAA93493839AA92AA), SPH_C64(0x71DEE2A8AF71D971), + SPH_C64(0xC8C68DCF0EC807C8), SPH_C64(0x19D1327DC8196419), + SPH_C64(0x493B927072493949), SPH_C64(0xD95FAF9A86D943D9), + SPH_C64(0xF231F91DC3F2EFF2), SPH_C64(0xE3A8DB484BE3ABE3), + SPH_C64(0x5BB9B62AE25B715B), SPH_C64(0x88BC0D9234881A88), + SPH_C64(0x9A3E29C8A49A529A), SPH_C64(0x260B4CBE2D269826), + SPH_C64(0x32BF64FA8D32C832), SPH_C64(0xB0597D4AE9B0FAB0), + SPH_C64(0xE9F2CF6A1BE983E9), SPH_C64(0x0F771E33780F3C0F), + SPH_C64(0xD533B7A6E6D573D5), SPH_C64(0x80F41DBA74803A80), + SPH_C64(0xBE27617C99BEC2BE), SPH_C64(0xCDEB87DE26CD13CD), + SPH_C64(0x348968E4BD34D034), SPH_C64(0x483290757A483D48), + SPH_C64(0xFF54E324ABFFDBFF), SPH_C64(0x7A8DF48FF77AF57A), + SPH_C64(0x90643DEAF4907A90), SPH_C64(0x5F9DBE3EC25F615F), + SPH_C64(0x203D40A01D208020), SPH_C64(0x680FD0D56768BD68), + SPH_C64(0x1ACA3472D01A681A), SPH_C64(0xAEB7412C19AE82AE), + SPH_C64(0xB47D755EC9B4EAB4), SPH_C64(0x54CEA8199A544D54), + SPH_C64(0x937F3BE5EC937693), SPH_C64(0x222F44AA0D228822), + SPH_C64(0x6463C8E907648D64), SPH_C64(0xF12AFF12DBF1E3F1), + SPH_C64(0x73CCE6A2BF73D173), SPH_C64(0x1282245A90124812), + SPH_C64(0x407A805D3A401D40), SPH_C64(0x0848102840082008), + SPH_C64(0xC3959BE856C32BC3), SPH_C64(0xECDFC57B33EC97EC), + SPH_C64(0xDB4DAB9096DB4BDB), SPH_C64(0xA1C05F1F61A1BEA1), + SPH_C64(0x8D9107831C8D0E8D), SPH_C64(0x3DC87AC9F53DF43D), + SPH_C64(0x975B33F1CC976697), SPH_C64(0x0000000000000000), + SPH_C64(0xCFF983D436CF1BCF), SPH_C64(0x2B6E5687452BAC2B), + SPH_C64(0x76E1ECB39776C576), SPH_C64(0x82E619B064823282), + SPH_C64(0xD628B1A9FED67FD6), SPH_C64(0x1BC33677D81B6C1B), + SPH_C64(0xB574775BC1B5EEB5), SPH_C64(0xAFBE432911AF86AF), + SPH_C64(0x6A1DD4DF776AB56A), SPH_C64(0x50EAA00DBA505D50), + SPH_C64(0x45578A4C12450945), SPH_C64(0xF338FB18CBF3EBF3), + SPH_C64(0x30AD60F09D30C030), SPH_C64(0xEFC4C3742BEF9BEF), + SPH_C64(0x3FDA7EC3E53FFC3F), SPH_C64(0x55C7AA1C92554955), + SPH_C64(0xA2DB591079A2B2A2), SPH_C64(0xEAE9C96503EA8FEA), + SPH_C64(0x656ACAEC0F658965), SPH_C64(0xBA036968B9BAD2BA), + SPH_C64(0x2F4A5E93652FBC2F), SPH_C64(0xC08E9DE74EC027C0), + SPH_C64(0xDE60A181BEDE5FDE), SPH_C64(0x1CFC386CE01C701C), + SPH_C64(0xFD46E72EBBFDD3FD), SPH_C64(0x4D1F9A64524D294D), + SPH_C64(0x927639E0E4927292), SPH_C64(0x75FAEABC8F75C975), + SPH_C64(0x06360C1E30061806), SPH_C64(0x8AAE0998248A128A), + SPH_C64(0xB24B7940F9B2F2B2), SPH_C64(0xE685D15963E6BFE6), + SPH_C64(0x0E7E1C36700E380E), SPH_C64(0x1FE73E63F81F7C1F), + SPH_C64(0x6255C4F737629562), SPH_C64(0xD43AB5A3EED477D4), + SPH_C64(0xA8814D3229A89AA8), SPH_C64(0x965231F4C4966296), + SPH_C64(0xF962EF3A9BF9C3F9), SPH_C64(0xC5A397F666C533C5), + SPH_C64(0x25104AB135259425), SPH_C64(0x59ABB220F2597959), + SPH_C64(0x84D015AE54842A84), SPH_C64(0x72C5E4A7B772D572), + SPH_C64(0x39EC72DDD539E439), SPH_C64(0x4C1698615A4C2D4C), + SPH_C64(0x5E94BC3BCA5E655E), SPH_C64(0x789FF085E778FD78), + SPH_C64(0x38E570D8DD38E038), SPH_C64(0x8C980586148C0A8C), + SPH_C64(0xD117BFB2C6D163D1), SPH_C64(0xA5E4570B41A5AEA5), + SPH_C64(0xE2A1D94D43E2AFE2), SPH_C64(0x614EC2F82F619961), + SPH_C64(0xB3427B45F1B3F6B3), SPH_C64(0x213442A515218421), + SPH_C64(0x9C0825D6949C4A9C), SPH_C64(0x1EEE3C66F01E781E), + SPH_C64(0x4361865222431143), SPH_C64(0xC7B193FC76C73BC7), + SPH_C64(0xFC4FE52BB3FCD7FC), SPH_C64(0x0424081420041004), + SPH_C64(0x51E3A208B2515951), SPH_C64(0x99252FC7BC995E99), + SPH_C64(0x6D22DAC44F6DA96D), SPH_C64(0x0D651A39680D340D), + SPH_C64(0xFA79E93583FACFFA), SPH_C64(0xDF69A384B6DF5BDF), + SPH_C64(0x7EA9FC9BD77EE57E), SPH_C64(0x241948B43D249024), + SPH_C64(0x3BFE76D7C53BEC3B), SPH_C64(0xAB9A4B3D31AB96AB), + SPH_C64(0xCEF081D13ECE1FCE), SPH_C64(0x1199225588114411), + SPH_C64(0x8F8303890C8F068F), SPH_C64(0x4E049C6B4A4E254E), + SPH_C64(0xB7667351D1B7E6B7), SPH_C64(0xEBE0CB600BEB8BEB), + SPH_C64(0x3CC178CCFD3CF03C), SPH_C64(0x81FD1FBF7C813E81), + SPH_C64(0x944035FED4946A94), SPH_C64(0xF71CF30CEBF7FBF7), + SPH_C64(0xB9186F67A1B9DEB9), SPH_C64(0x138B265F98134C13), + SPH_C64(0x2C51589C7D2CB02C), SPH_C64(0xD305BBB8D6D36BD3), + SPH_C64(0xE78CD35C6BE7BBE7), SPH_C64(0x6E39DCCB576EA56E), + SPH_C64(0xC4AA95F36EC437C4), SPH_C64(0x031B060F18030C03), + SPH_C64(0x56DCAC138A564556), SPH_C64(0x445E88491A440D44), + SPH_C64(0x7FA0FE9EDF7FE17F), SPH_C64(0xA9884F3721A99EA9), + SPH_C64(0x2A6754824D2AA82A), SPH_C64(0xBB0A6B6DB1BBD6BB), + SPH_C64(0xC1879FE246C123C1), SPH_C64(0x53F1A602A2535153), + SPH_C64(0xDC72A58BAEDC57DC), SPH_C64(0x0B531627580B2C0B), + SPH_C64(0x9D0127D39C9D4E9D), SPH_C64(0x6C2BD8C1476CAD6C), + SPH_C64(0x31A462F59531C431), SPH_C64(0x74F3E8B98774CD74), + SPH_C64(0xF615F109E3F6FFF6), SPH_C64(0x464C8C430A460546), + SPH_C64(0xACA5452609AC8AAC), SPH_C64(0x89B50F973C891E89), + SPH_C64(0x14B42844A0145014), SPH_C64(0xE1BADF425BE1A3E1), + SPH_C64(0x16A62C4EB0165816), SPH_C64(0x3AF774D2CD3AE83A), + SPH_C64(0x6906D2D06F69B969), SPH_C64(0x0941122D48092409), + SPH_C64(0x70D7E0ADA770DD70), SPH_C64(0xB66F7154D9B6E2B6), + SPH_C64(0xD01EBDB7CED067D0), SPH_C64(0xEDD6C77E3BED93ED), + SPH_C64(0xCCE285DB2ECC17CC), SPH_C64(0x426884572A421542), + SPH_C64(0x982C2DC2B4985A98), SPH_C64(0xA4ED550E49A4AAA4), + SPH_C64(0x287550885D28A028), SPH_C64(0x5C86B831DA5C6D5C), + SPH_C64(0xF86BED3F93F8C7F8), SPH_C64(0x86C211A444862286) +}; + +#endif + +/* + * Round constants. + */ +__constant static const sph_u64 plain_RC[10] = { + SPH_C64(0x4F01B887E8C62318), + SPH_C64(0x52916F79F5D2A636), + SPH_C64(0x357B0CA38E9BBC60), + SPH_C64(0x57FE4B2EC2D7E01D), + SPH_C64(0xDA4AF09FE5377715), + SPH_C64(0x856BA0B10A29C958), + SPH_C64(0x67053ECBF4105DBD), + SPH_C64(0xD8957DA78B4127E4), + SPH_C64(0x9E4717DD667CEEFB), + SPH_C64(0x33835AAD07BF2DCA) +}; + +/* ====================================================================== */ + +#define BYTE(x, n) ((unsigned)((x) >> (8 * (n))) & 0xFF) + +#define ROUND_ELT(table, in, i0, i1, i2, i3, i4, i5, i6, i7) \ + (table ## 0[BYTE(in ## i0, 0)] \ + ^ table ## 1[BYTE(in ## i1, 1)] \ + ^ table ## 2[BYTE(in ## i2, 2)] \ + ^ table ## 3[BYTE(in ## i3, 3)] \ + ^ table ## 4[BYTE(in ## i4, 4)] \ + ^ table ## 5[BYTE(in ## i5, 5)] \ + ^ table ## 6[BYTE(in ## i6, 6)] \ + ^ table ## 7[BYTE(in ## i7, 7)]) + +#define ROUND(table, in, out, c0, c1, c2, c3, c4, c5, c6, c7) do { \ + out ## 0 = ROUND_ELT(table, in, 0, 7, 6, 5, 4, 3, 2, 1) ^ c0; \ + out ## 1 = ROUND_ELT(table, in, 1, 0, 7, 6, 5, 4, 3, 2) ^ c1; \ + out ## 2 = ROUND_ELT(table, in, 2, 1, 0, 7, 6, 5, 4, 3) ^ c2; \ + out ## 3 = ROUND_ELT(table, in, 3, 2, 1, 0, 7, 6, 5, 4) ^ c3; \ + out ## 4 = ROUND_ELT(table, in, 4, 3, 2, 1, 0, 7, 6, 5) ^ c4; \ + out ## 5 = ROUND_ELT(table, in, 5, 4, 3, 2, 1, 0, 7, 6) ^ c5; \ + out ## 6 = ROUND_ELT(table, in, 6, 5, 4, 3, 2, 1, 0, 7) ^ c6; \ + out ## 7 = ROUND_ELT(table, in, 7, 6, 5, 4, 3, 2, 1, 0) ^ c7; \ + } while (0) + +#define ROUND_KSCHED(table, in, out, c) \ + ROUND(table, in, out, c, 0, 0, 0, 0, 0, 0, 0) + +#define ROUND_WENC(table, in, key, out) \ + ROUND(table, in, out, key ## 0, key ## 1, key ## 2, \ + key ## 3, key ## 4, key ## 5, key ## 6, key ## 7) + +#define TRANSFER(dst, src) do { \ + dst ## 0 = src ## 0; \ + dst ## 1 = src ## 1; \ + dst ## 2 = src ## 2; \ + dst ## 3 = src ## 3; \ + dst ## 4 = src ## 4; \ + dst ## 5 = src ## 5; \ + dst ## 6 = src ## 6; \ + dst ## 7 = src ## 7; \ + } while (0) + +#endif diff --git a/kernel/x14.cl b/kernel/x14.cl new file mode 100644 index 00000000..038f9d92 --- /dev/null +++ b/kernel/x14.cl @@ -0,0 +1,1341 @@ +/* + * X14 kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 phm + * Copyright (c) 2014 Girino Vey + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author phm + */ + +#ifndef X14_CL +#define X14_CL + +#define DEBUG(x) + +#if __ENDIAN_LITTLE__ + #define SPH_LITTLE_ENDIAN 1 +#else + #define SPH_BIG_ENDIAN 1 +#endif + +#define SPH_UPTR sph_u64 + +typedef unsigned int sph_u32; +typedef int sph_s32; +#ifndef __OPENCL_VERSION__ + typedef unsigned long long sph_u64; + typedef long long sph_s64; +#else + typedef unsigned long sph_u64; + typedef long sph_s64; +#endif + +#define SPH_64 1 +#define SPH_64_TRUE 1 + +#define SPH_C32(x) ((sph_u32)(x ## U)) +#define SPH_T32(x) (as_uint(x)) +#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n)) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) + +#define SPH_C64(x) ((sph_u64)(x ## UL)) +#define SPH_T64(x) (as_ulong(x)) +#define SPH_ROTL64(x, n) rotate(as_ulong(x), (n) & 0xFFFFFFFFFFFFFFFFUL) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) + +#define SPH_ECHO_64 1 +#define SPH_KECCAK_64 1 +#define SPH_JH_64 1 +#define SPH_SIMD_NOCOPY 0 +#define SPH_KECCAK_NOCOPY 0 +#define SPH_COMPACT_BLAKE_64 0 +#define SPH_LUFFA_PARALLEL 0 +#define SPH_SMALL_FOOTPRINT_GROESTL 0 +#define SPH_GROESTL_BIG_ENDIAN 0 +#define SPH_CUBEHASH_UNROLL 0 +#define SPH_KECCAK_UNROLL 1 +#ifndef SPH_HAMSI_EXPAND_BIG + #define SPH_HAMSI_EXPAND_BIG 1 +#endif + +#include "blake.cl" +#include "bmw.cl" +#include "groestl.cl" +#include "jh.cl" +#include "keccak.cl" +#include "skein.cl" +#include "luffa.cl" +#include "cubehash.cl" +#include "shavite.cl" +#include "simd.cl" +#include "echo.cl" +#include "hamsi.cl" +#include "fugue.cl" +#include "shabal.cl" + +#define SWAP4(x) as_uint(as_uchar4(x).wzyx) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +#if SPH_BIG_ENDIAN + #define DEC64E(x) (x) + #define DEC64BE(x) (*(const __global sph_u64 *) (x)); + #define DEC32LE(x) SWAP4(*(const __global sph_u32 *) (x)); +#else + #define DEC64E(x) SWAP8(x) + #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); + #define DEC32LE(x) (*(const __global sph_u32 *) (x)); +#endif + +#define SHL(x, n) ((x) << (n)) +#define SHR(x, n) ((x) >> (n)) + +#define CONST_EXP2 q[i+0] + SPH_ROTL64(q[i+1], 5) + q[i+2] + SPH_ROTL64(q[i+3], 11) + \ + q[i+4] + SPH_ROTL64(q[i+5], 27) + q[i+6] + SPH_ROTL64(q[i+7], 32) + \ + q[i+8] + SPH_ROTL64(q[i+9], 37) + q[i+10] + SPH_ROTL64(q[i+11], 43) + \ + q[i+12] + SPH_ROTL64(q[i+13], 53) + (SHR(q[i+14],1) ^ q[i+14]) + (SHR(q[i+15],2) ^ q[i+15]) + +typedef union { + unsigned char h1[64]; + uint h4[16]; + ulong h8[8]; +} hash_t; + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global unsigned char* block, __global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // blake + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + T1 = SPH_T64(T1 + 1); + + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + + M0 = DEC64BE(block + 0); + M1 = DEC64BE(block + 8); + M2 = DEC64BE(block + 16); + M3 = DEC64BE(block + 24); + M4 = DEC64BE(block + 32); + M5 = DEC64BE(block + 40); + M6 = DEC64BE(block + 48); + M7 = DEC64BE(block + 56); + M8 = DEC64BE(block + 64); + M9 = DEC64BE(block + 72); + M9 &= 0xFFFFFFFF00000000; + M9 ^= SWAP4(gid); + MA = 0x8000000000000000; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x280; + + COMPRESS64; + + hash->h8[0] = H0; + hash->h8[1] = H1; + hash->h8[2] = H2; + hash->h8[3] = H3; + hash->h8[4] = H4; + hash->h8[5] = H5; + hash->h8[6] = H6; + hash->h8[7] = H7; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search1(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // bmw + sph_u64 BMW_H[16]; + +#pragma unroll 16 + for(unsigned u = 0; u < 16; u++) + BMW_H[u] = BMW_IV512[u]; + + sph_u64 mv[16],q[32]; + sph_u64 tmp; + + mv[0] = SWAP8(hash->h8[0]); + mv[1] = SWAP8(hash->h8[1]); + mv[2] = SWAP8(hash->h8[2]); + mv[3] = SWAP8(hash->h8[3]); + mv[4] = SWAP8(hash->h8[4]); + mv[5] = SWAP8(hash->h8[5]); + mv[6] = SWAP8(hash->h8[6]); + mv[7] = SWAP8(hash->h8[7]); + mv[8] = 0x80; + mv[9] = 0; + mv[10] = 0; + mv[11] = 0; + mv[12] = 0; + mv[13] = 0; + mv[14] = 0; + mv[15] = SPH_C64(512); + + tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]); + q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1]; + tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]); + q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2]; + tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); + q[2] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[3]; + tmp = (mv[0] ^ BMW_H[0]) - (mv[1] ^ BMW_H[1]) + (mv[8] ^ BMW_H[8]) - (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]); + q[3] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[4]; + tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]); + q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5]; + tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); + q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6]; + tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]); + q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7]; + tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]); + q[7] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[8]; + tmp = (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) - (mv[6] ^ BMW_H[6]) + (mv[13] ^ BMW_H[13]) - (mv[15] ^ BMW_H[15]); + q[8] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[9]; + tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]); + q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10]; + tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]); + q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11]; + tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]); + q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12]; + tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]); + q[12] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[13]; + tmp = (mv[2] ^ BMW_H[2]) + (mv[4] ^ BMW_H[4]) + (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[11] ^ BMW_H[11]); + q[13] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[14]; + tmp = (mv[3] ^ BMW_H[3]) - (mv[5] ^ BMW_H[5]) + (mv[8] ^ BMW_H[8]) - (mv[11] ^ BMW_H[11]) - (mv[12] ^ BMW_H[12]); + q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15]; + tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]); + q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0]; + +#pragma unroll 2 + for(int i=0;i<2;i++) + { + q[i+16] = + (SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) + + (SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) + + (SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) + + (SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) + + (SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) + + (SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) + + (SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) + + (SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) + + (SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) + + (SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) + + (SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) + + (SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) + + (SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) + + (SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) + + (SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) + + (SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + } + +#pragma unroll 4 + for(int i=2;i<6;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + } + +#pragma unroll 3 + for(int i=6;i<9;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]); + } + +#pragma unroll 4 + for(int i=9;i<13;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + } + +#pragma unroll 3 + for(int i=13;i<16;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + } + + sph_u64 XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23]; + sph_u64 XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31]; + + BMW_H[0] = (SHL(XH64, 5) ^ SHR(q[16],5) ^ mv[0]) + ( XL64 ^ q[24] ^ q[0]); + BMW_H[1] = (SHR(XH64, 7) ^ SHL(q[17],8) ^ mv[1]) + ( XL64 ^ q[25] ^ q[1]); + BMW_H[2] = (SHR(XH64, 5) ^ SHL(q[18],5) ^ mv[2]) + ( XL64 ^ q[26] ^ q[2]); + BMW_H[3] = (SHR(XH64, 1) ^ SHL(q[19],5) ^ mv[3]) + ( XL64 ^ q[27] ^ q[3]); + BMW_H[4] = (SHR(XH64, 3) ^ q[20] ^ mv[4]) + ( XL64 ^ q[28] ^ q[4]); + BMW_H[5] = (SHL(XH64, 6) ^ SHR(q[21],6) ^ mv[5]) + ( XL64 ^ q[29] ^ q[5]); + BMW_H[6] = (SHR(XH64, 4) ^ SHL(q[22],6) ^ mv[6]) + ( XL64 ^ q[30] ^ q[6]); + BMW_H[7] = (SHR(XH64,11) ^ SHL(q[23],2) ^ mv[7]) + ( XL64 ^ q[31] ^ q[7]); + + BMW_H[8] = SPH_ROTL64(BMW_H[4], 9) + ( XH64 ^ q[24] ^ mv[8]) + (SHL(XL64,8) ^ q[23] ^ q[8]); + BMW_H[9] = SPH_ROTL64(BMW_H[5],10) + ( XH64 ^ q[25] ^ mv[9]) + (SHR(XL64,6) ^ q[16] ^ q[9]); + BMW_H[10] = SPH_ROTL64(BMW_H[6],11) + ( XH64 ^ q[26] ^ mv[10]) + (SHL(XL64,6) ^ q[17] ^ q[10]); + BMW_H[11] = SPH_ROTL64(BMW_H[7],12) + ( XH64 ^ q[27] ^ mv[11]) + (SHL(XL64,4) ^ q[18] ^ q[11]); + BMW_H[12] = SPH_ROTL64(BMW_H[0],13) + ( XH64 ^ q[28] ^ mv[12]) + (SHR(XL64,3) ^ q[19] ^ q[12]); + BMW_H[13] = SPH_ROTL64(BMW_H[1],14) + ( XH64 ^ q[29] ^ mv[13]) + (SHR(XL64,4) ^ q[20] ^ q[13]); + BMW_H[14] = SPH_ROTL64(BMW_H[2],15) + ( XH64 ^ q[30] ^ mv[14]) + (SHR(XL64,7) ^ q[21] ^ q[14]); + BMW_H[15] = SPH_ROTL64(BMW_H[3],16) + ( XH64 ^ q[31] ^ mv[15]) + (SHR(XL64,2) ^ q[22] ^ q[15]); + +#pragma unroll 16 + for(int i=0;i<16;i++) + { + mv[i] = BMW_H[i]; + BMW_H[i] = 0xaaaaaaaaaaaaaaa0ull + (sph_u64)i; + } + + tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]); + q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1]; + tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]); + q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2]; + tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); + q[2] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[3]; + tmp = (mv[0] ^ BMW_H[0]) - (mv[1] ^ BMW_H[1]) + (mv[8] ^ BMW_H[8]) - (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]); + q[3] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[4]; + tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]); + q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5]; + tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); + q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6]; + tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]); + q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7]; + tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]); + q[7] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[8]; + tmp = (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) - (mv[6] ^ BMW_H[6]) + (mv[13] ^ BMW_H[13]) - (mv[15] ^ BMW_H[15]); + q[8] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[9]; + tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]); + q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10]; + tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]); + q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11]; + tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]); + q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12]; + tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]); + q[12] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[13]; + tmp = (mv[2] ^ BMW_H[2]) + (mv[4] ^ BMW_H[4]) + (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[11] ^ BMW_H[11]); + q[13] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[14]; + tmp = (mv[3] ^ BMW_H[3]) - (mv[5] ^ BMW_H[5]) + (mv[8] ^ BMW_H[8]) - (mv[11] ^ BMW_H[11]) - (mv[12] ^ BMW_H[12]); + q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15]; + tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]); + q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0]; + +#pragma unroll 2 + for(int i=0;i<2;i++) + { + q[i+16] = + (SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) + + (SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) + + (SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) + + (SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) + + (SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) + + (SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) + + (SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) + + (SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) + + (SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) + + (SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) + + (SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) + + (SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) + + (SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) + + (SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) + + (SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) + + (SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + } + +#pragma unroll 4 + for(int i=2;i<6;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); + } + +#pragma unroll 3 + for(int i=6;i<9;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]); + } + +#pragma unroll 4 + for(int i=9;i<13;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + } + +#pragma unroll 3 + for(int i=13;i<16;i++) + { + q[i+16] = CONST_EXP2 + + (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + + SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); + } + + XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23]; + XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31]; + + BMW_H[0] = (SHL(XH64, 5) ^ SHR(q[16],5) ^ mv[0]) + ( XL64 ^ q[24] ^ q[0]); + BMW_H[1] = (SHR(XH64, 7) ^ SHL(q[17],8) ^ mv[1]) + ( XL64 ^ q[25] ^ q[1]); + BMW_H[2] = (SHR(XH64, 5) ^ SHL(q[18],5) ^ mv[2]) + ( XL64 ^ q[26] ^ q[2]); + BMW_H[3] = (SHR(XH64, 1) ^ SHL(q[19],5) ^ mv[3]) + ( XL64 ^ q[27] ^ q[3]); + BMW_H[4] = (SHR(XH64, 3) ^ q[20] ^ mv[4]) + ( XL64 ^ q[28] ^ q[4]); + BMW_H[5] = (SHL(XH64, 6) ^ SHR(q[21],6) ^ mv[5]) + ( XL64 ^ q[29] ^ q[5]); + BMW_H[6] = (SHR(XH64, 4) ^ SHL(q[22],6) ^ mv[6]) + ( XL64 ^ q[30] ^ q[6]); + BMW_H[7] = (SHR(XH64,11) ^ SHL(q[23],2) ^ mv[7]) + ( XL64 ^ q[31] ^ q[7]); + + BMW_H[8] = SPH_ROTL64(BMW_H[4], 9) + ( XH64 ^ q[24] ^ mv[8]) + (SHL(XL64,8) ^ q[23] ^ q[8]); + BMW_H[9] = SPH_ROTL64(BMW_H[5],10) + ( XH64 ^ q[25] ^ mv[9]) + (SHR(XL64,6) ^ q[16] ^ q[9]); + BMW_H[10] = SPH_ROTL64(BMW_H[6],11) + ( XH64 ^ q[26] ^ mv[10]) + (SHL(XL64,6) ^ q[17] ^ q[10]); + BMW_H[11] = SPH_ROTL64(BMW_H[7],12) + ( XH64 ^ q[27] ^ mv[11]) + (SHL(XL64,4) ^ q[18] ^ q[11]); + BMW_H[12] = SPH_ROTL64(BMW_H[0],13) + ( XH64 ^ q[28] ^ mv[12]) + (SHR(XL64,3) ^ q[19] ^ q[12]); + BMW_H[13] = SPH_ROTL64(BMW_H[1],14) + ( XH64 ^ q[29] ^ mv[13]) + (SHR(XL64,4) ^ q[20] ^ q[13]); + BMW_H[14] = SPH_ROTL64(BMW_H[2],15) + ( XH64 ^ q[30] ^ mv[14]) + (SHR(XL64,7) ^ q[21] ^ q[14]); + BMW_H[15] = SPH_ROTL64(BMW_H[3],16) + ( XH64 ^ q[31] ^ mv[15]) + (SHR(XL64,2) ^ q[22] ^ q[15]); + + hash->h8[0] = SWAP8(BMW_H[8]); + hash->h8[1] = SWAP8(BMW_H[9]); + hash->h8[2] = SWAP8(BMW_H[10]); + hash->h8[3] = SWAP8(BMW_H[11]); + hash->h8[4] = SWAP8(BMW_H[12]); + hash->h8[5] = SWAP8(BMW_H[13]); + hash->h8[6] = SWAP8(BMW_H[14]); + hash->h8[7] = SWAP8(BMW_H[15]); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search2(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + __local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + T0_L[i] = T0[i]; + T4_L[i] = T4[i]; + T1_L[i] = T1[i]; + T2_L[i] = T2[i]; + T3_L[i] = T3[i]; + T5_L[i] = T5[i]; + T6_L[i] = T6[i]; + T7_L[i] = T7[i]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + #define T0 T0_L + #define T1 T1_L + #define T2 T2_L + #define T3 T3_L + #define T4 T4_L + #define T5 T5_L + #define T6 T6_L + #define T7 T7_L + + // groestl + sph_u64 H[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0002000000000000}; + + sph_u64 g[16], m[16]; + g[0] = m[0] = DEC64E(hash->h8[0]); + g[1] = m[1] = DEC64E(hash->h8[1]); + g[2] = m[2] = DEC64E(hash->h8[2]); + g[3] = m[3] = DEC64E(hash->h8[3]); + g[4] = m[4] = DEC64E(hash->h8[4]); + g[5] = m[5] = DEC64E(hash->h8[5]); + g[6] = m[6] = DEC64E(hash->h8[6]); + g[7] = m[7] = DEC64E(hash->h8[7]); + g[8] = m[8] = 0x80; + g[9] = m[9] = 0; + g[10] = m[10] = 0; + g[11] = m[11] = 0; + g[12] = m[12] = 0; + g[13] = m[13] = 0; + g[14] = m[14] = 0; + g[15] = 0x102000000000000; + m[15] = 0x100000000000000; + + PERM_BIG_P(g); + PERM_BIG_Q(m); + + sph_u64 xH[16]; + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u] ^= g[u] ^ m[u]; + + PERM_BIG_P(xH); + + for (unsigned int u = 8; u < 16; u ++) + hash->h8[u-8] = DEC64E(H[u] ^ xH[u]); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search3(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // skein + + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; + + m0 = SWAP8(hash->h8[0]); + m1 = SWAP8(hash->h8[1]); + m2 = SWAP8(hash->h8[2]); + m3 = SWAP8(hash->h8[3]); + m4 = SWAP8(hash->h8[4]); + m5 = SWAP8(hash->h8[5]); + m6 = SWAP8(hash->h8[6]); + m7 = SWAP8(hash->h8[7]); + + UBI_BIG(480, 64); + + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + + UBI_BIG(510, 8); + + hash->h8[0] = SWAP8(h0); + hash->h8[1] = SWAP8(h1); + hash->h8[2] = SWAP8(h2); + hash->h8[3] = SWAP8(h3); + hash->h8[4] = SWAP8(h4); + hash->h8[5] = SWAP8(h5); + hash->h8[6] = SWAP8(h6); + hash->h8[7] = SWAP8(h7); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search4(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // jh + + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; + + for(int i = 0; i < 2; i++) + { + if (i == 0) + { + h0h ^= DEC64E(hash->h8[0]); + h0l ^= DEC64E(hash->h8[1]); + h1h ^= DEC64E(hash->h8[2]); + h1l ^= DEC64E(hash->h8[3]); + h2h ^= DEC64E(hash->h8[4]); + h2l ^= DEC64E(hash->h8[5]); + h3h ^= DEC64E(hash->h8[6]); + h3l ^= DEC64E(hash->h8[7]); + } + else if(i == 1) + { + h4h ^= DEC64E(hash->h8[0]); + h4l ^= DEC64E(hash->h8[1]); + h5h ^= DEC64E(hash->h8[2]); + h5l ^= DEC64E(hash->h8[3]); + h6h ^= DEC64E(hash->h8[4]); + h6l ^= DEC64E(hash->h8[5]); + h7h ^= DEC64E(hash->h8[6]); + h7l ^= DEC64E(hash->h8[7]); + + h0h ^= 0x80; + h3l ^= 0x2000000000000; + } + E8; + } + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash->h8[0] = DEC64E(h4h); + hash->h8[1] = DEC64E(h4l); + hash->h8[2] = DEC64E(h5h); + hash->h8[3] = DEC64E(h5l); + hash->h8[4] = DEC64E(h6h); + hash->h8[5] = DEC64E(h6l); + hash->h8[6] = DEC64E(h7h); + hash->h8[7] = DEC64E(h7l); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search5(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // keccak + + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; + + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); + + a00 ^= SWAP8(hash->h8[0]); + a10 ^= SWAP8(hash->h8[1]); + a20 ^= SWAP8(hash->h8[2]); + a30 ^= SWAP8(hash->h8[3]); + a40 ^= SWAP8(hash->h8[4]); + a01 ^= SWAP8(hash->h8[5]); + a11 ^= SWAP8(hash->h8[6]); + a21 ^= SWAP8(hash->h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; + + hash->h8[0] = SWAP8(a00); + hash->h8[1] = SWAP8(a10); + hash->h8[2] = SWAP8(a20); + hash->h8[3] = SWAP8(a30); + hash->h8[4] = SWAP8(a40); + hash->h8[5] = SWAP8(a01); + hash->h8[6] = SWAP8(a11); + hash->h8[7] = SWAP8(a21); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search6(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // luffa + + sph_u32 V00 = SPH_C32(0x6d251e69), V01 = SPH_C32(0x44b051e0), V02 = SPH_C32(0x4eaa6fb4), V03 = SPH_C32(0xdbf78465), V04 = SPH_C32(0x6e292011), V05 = SPH_C32(0x90152df4), V06 = SPH_C32(0xee058139), V07 = SPH_C32(0xdef610bb); + sph_u32 V10 = SPH_C32(0xc3b44b95), V11 = SPH_C32(0xd9d2f256), V12 = SPH_C32(0x70eee9a0), V13 = SPH_C32(0xde099fa3), V14 = SPH_C32(0x5d9b0557), V15 = SPH_C32(0x8fc944b3), V16 = SPH_C32(0xcf1ccf0e), V17 = SPH_C32(0x746cd581); + sph_u32 V20 = SPH_C32(0xf7efc89d), V21 = SPH_C32(0x5dba5781), V22 = SPH_C32(0x04016ce5), V23 = SPH_C32(0xad659c05), V24 = SPH_C32(0x0306194f), V25 = SPH_C32(0x666d1836), V26 = SPH_C32(0x24aa230a), V27 = SPH_C32(0x8b264ae7); + sph_u32 V30 = SPH_C32(0x858075d5), V31 = SPH_C32(0x36d79cce), V32 = SPH_C32(0xe571f7d7), V33 = SPH_C32(0x204b1f67), V34 = SPH_C32(0x35870c6a), V35 = SPH_C32(0x57e9e923), V36 = SPH_C32(0x14bcb808), V37 = SPH_C32(0x7cde72ce); + sph_u32 V40 = SPH_C32(0x6c68e9be), V41 = SPH_C32(0x5ec41e22), V42 = SPH_C32(0xc825b7c7), V43 = SPH_C32(0xaffb4363), V44 = SPH_C32(0xf5df3999), V45 = SPH_C32(0x0fc688f1), V46 = SPH_C32(0xb07224cc), V47 = SPH_C32(0x03e86cea); + + DECL_TMP8(M); + + M0 = hash->h4[1]; + M1 = hash->h4[0]; + M2 = hash->h4[3]; + M3 = hash->h4[2]; + M4 = hash->h4[5]; + M5 = hash->h4[4]; + M6 = hash->h4[7]; + M7 = hash->h4[6]; + + for(uint i = 0; i < 5; i++) + { + MI5; + LUFFA_P5; + + if(i == 0) + { + M0 = hash->h4[9]; + M1 = hash->h4[8]; + M2 = hash->h4[11]; + M3 = hash->h4[10]; + M4 = hash->h4[13]; + M5 = hash->h4[12]; + M6 = hash->h4[15]; + M7 = hash->h4[14]; + } + else if(i == 1) + { + M0 = 0x80000000; + M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } + else if(i == 2) + M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + else if(i == 3) + { + hash->h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash->h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash->h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash->h4[2] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash->h4[5] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash->h4[4] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash->h4[7] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash->h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + } + } + + hash->h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash->h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash->h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash->h4[10] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash->h4[13] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash->h4[12] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash->h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash->h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search7(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // cubehash.h1 + + sph_u32 x0 = SPH_C32(0x2AEA2A61), x1 = SPH_C32(0x50F494D4), x2 = SPH_C32(0x2D538B8B), x3 = SPH_C32(0x4167D83E); + sph_u32 x4 = SPH_C32(0x3FEE2313), x5 = SPH_C32(0xC701CF8C), x6 = SPH_C32(0xCC39968E), x7 = SPH_C32(0x50AC5695); + sph_u32 x8 = SPH_C32(0x4D42C787), x9 = SPH_C32(0xA647A8B3), xa = SPH_C32(0x97CF0BEF), xb = SPH_C32(0x825B4537); + sph_u32 xc = SPH_C32(0xEEF864D2), xd = SPH_C32(0xF22090C4), xe = SPH_C32(0xD0E5CD33), xf = SPH_C32(0xA23911AE); + sph_u32 xg = SPH_C32(0xFCD398D9), xh = SPH_C32(0x148FE485), xi = SPH_C32(0x1B017BEF), xj = SPH_C32(0xB6444532); + sph_u32 xk = SPH_C32(0x6A536159), xl = SPH_C32(0x2FF5781C), xm = SPH_C32(0x91FA7934), xn = SPH_C32(0x0DBADEA9); + sph_u32 xo = SPH_C32(0xD65C8A2B), xp = SPH_C32(0xA5A70E75), xq = SPH_C32(0xB1C62456), xr = SPH_C32(0xBC796576); + sph_u32 xs = SPH_C32(0x1921C8F7), xt = SPH_C32(0xE7989AF1), xu = SPH_C32(0x7795D246), xv = SPH_C32(0xD43E3B44); + + x0 ^= SWAP4(hash->h4[1]); + x1 ^= SWAP4(hash->h4[0]); + x2 ^= SWAP4(hash->h4[3]); + x3 ^= SWAP4(hash->h4[2]); + x4 ^= SWAP4(hash->h4[5]); + x5 ^= SWAP4(hash->h4[4]); + x6 ^= SWAP4(hash->h4[7]); + x7 ^= SWAP4(hash->h4[6]); + + for (int i = 0; i < 13; i ++) + { + SIXTEEN_ROUNDS; + + if (i == 0) + { + x0 ^= SWAP4(hash->h4[9]); + x1 ^= SWAP4(hash->h4[8]); + x2 ^= SWAP4(hash->h4[11]); + x3 ^= SWAP4(hash->h4[10]); + x4 ^= SWAP4(hash->h4[13]); + x5 ^= SWAP4(hash->h4[12]); + x6 ^= SWAP4(hash->h4[15]); + x7 ^= SWAP4(hash->h4[14]); + } + else if(i == 1) + x0 ^= 0x80; + else if (i == 2) + xv ^= SPH_C32(1); + } + + hash->h4[0] = x0; + hash->h4[1] = x1; + hash->h4[2] = x2; + hash->h4[3] = x3; + hash->h4[4] = x4; + hash->h4[5] = x5; + hash->h4[6] = x6; + hash->h4[7] = x7; + hash->h4[8] = x8; + hash->h4[9] = x9; + hash->h4[10] = xa; + hash->h4[11] = xb; + hash->h4[12] = xc; + hash->h4[13] = xd; + hash->h4[14] = xe; + hash->h4[15] = xf; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search8(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + AES0[i] = AES0_C[i]; + AES1[i] = AES1_C[i]; + AES2[i] = AES2_C[i]; + AES3[i] = AES3_C[i]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + // shavite + // IV + sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); + sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); + sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); + sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); + + // state + sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; + sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; + sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; + sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; + + sph_u32 sc_count0 = 0x200, sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; + + rk00 = hash->h4[0]; + rk01 = hash->h4[1]; + rk02 = hash->h4[2]; + rk03 = hash->h4[3]; + rk04 = hash->h4[4]; + rk05 = hash->h4[5]; + rk06 = hash->h4[6]; + rk07 = hash->h4[7]; + rk08 = hash->h4[8]; + rk09 = hash->h4[9]; + rk0A = hash->h4[10]; + rk0B = hash->h4[11]; + rk0C = hash->h4[12]; + rk0D = hash->h4[13]; + rk0E = hash->h4[14]; + rk0F = hash->h4[15]; + rk10 = 0x80; + rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; + rk1B = 0x2000000; + rk1C = rk1D = rk1E = 0; + rk1F = 0x2000000; + + c512(buf); + + hash->h4[0] = h0; + hash->h4[1] = h1; + hash->h4[2] = h2; + hash->h4[3] = h3; + hash->h4[4] = h4; + hash->h4[5] = h5; + hash->h4[6] = h6; + hash->h4[7] = h7; + hash->h4[8] = h8; + hash->h4[9] = h9; + hash->h4[10] = hA; + hash->h4[11] = hB; + hash->h4[12] = hC; + hash->h4[13] = hD; + hash->h4[14] = hE; + hash->h4[15] = hF; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search9(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // simd + s32 q[256]; + unsigned char x[128]; + for(unsigned int i = 0; i < 64; i++) + x[i] = hash->h1[i]; + for(unsigned int i = 64; i < 128; i++) + x[i] = 0; + + u32 A0 = C32(0x0BA16B95), A1 = C32(0x72F999AD), A2 = C32(0x9FECC2AE), A3 = C32(0xBA3264FC), A4 = C32(0x5E894929), A5 = C32(0x8E9F30E5), A6 = C32(0x2F1DAA37), A7 = C32(0xF0F2C558); + u32 B0 = C32(0xAC506643), B1 = C32(0xA90635A5), B2 = C32(0xE25B878B), B3 = C32(0xAAB7878F), B4 = C32(0x88817F7A), B5 = C32(0x0A02892B), B6 = C32(0x559A7550), B7 = C32(0x598F657E); + u32 C0 = C32(0x7EEF60A1), C1 = C32(0x6B70E3E8), C2 = C32(0x9C1714D1), C3 = C32(0xB958E2A8), C4 = C32(0xAB02675E), C5 = C32(0xED1C014F), C6 = C32(0xCD8D65BB), C7 = C32(0xFDB7A257); + u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); + + FFT256(0, 1, 0, ll1); + for (int i = 0; i < 256; i ++) + { + s32 tq; + + tq = q[i] + yoff_b_n[i]; + tq = REDS2(tq); + tq = REDS1(tq); + tq = REDS1(tq); + q[i] = (tq <= 128 ? tq : tq - 257); + } + + A0 ^= hash->h4[0]; + A1 ^= hash->h4[1]; + A2 ^= hash->h4[2]; + A3 ^= hash->h4[3]; + A4 ^= hash->h4[4]; + A5 ^= hash->h4[5]; + A6 ^= hash->h4[6]; + A7 ^= hash->h4[7]; + B0 ^= hash->h4[8]; + B1 ^= hash->h4[9]; + B2 ^= hash->h4[10]; + B3 ^= hash->h4[11]; + B4 ^= hash->h4[12]; + B5 ^= hash->h4[13]; + B6 ^= hash->h4[14]; + B7 ^= hash->h4[15]; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + + STEP_BIG( + C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), + C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), + IF, 4, 13, PP8_4_); + + STEP_BIG( + C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), + C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), + IF, 13, 10, PP8_5_); + + STEP_BIG( + C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), + C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), + IF, 10, 25, PP8_6_); + + STEP_BIG( + C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), + C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), + IF, 25, 4, PP8_0_); + + u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7; + u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7; + u32 COPY_C0 = C0, COPY_C1 = C1, COPY_C2 = C2, COPY_C3 = C3, COPY_C4 = C4, COPY_C5 = C5, COPY_C6 = C6, COPY_C7 = C7; + u32 COPY_D0 = D0, COPY_D1 = D1, COPY_D2 = D2, COPY_D3 = D3, COPY_D4 = D4, COPY_D5 = D5, COPY_D6 = D6, COPY_D7 = D7; + + #define q SIMD_Q + + A0 ^= 0x200; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + + STEP_BIG( + COPY_A0, COPY_A1, COPY_A2, COPY_A3, + COPY_A4, COPY_A5, COPY_A6, COPY_A7, + IF, 4, 13, PP8_4_); + + STEP_BIG( + COPY_B0, COPY_B1, COPY_B2, COPY_B3, + COPY_B4, COPY_B5, COPY_B6, COPY_B7, + IF, 13, 10, PP8_5_); + + STEP_BIG( + COPY_C0, COPY_C1, COPY_C2, COPY_C3, + COPY_C4, COPY_C5, COPY_C6, COPY_C7, + IF, 10, 25, PP8_6_); + + STEP_BIG( + COPY_D0, COPY_D1, COPY_D2, COPY_D3, + COPY_D4, COPY_D5, COPY_D6, COPY_D7, + IF, 25, 4, PP8_0_); + + #undef q + + hash->h4[0] = A0; + hash->h4[1] = A1; + hash->h4[2] = A2; + hash->h4[3] = A3; + hash->h4[4] = A4; + hash->h4[5] = A5; + hash->h4[6] = A6; + hash->h4[7] = A7; + hash->h4[8] = B0; + hash->h4[9] = B1; + hash->h4[10] = B2; + hash->h4[11] = B3; + hash->h4[12] = B4; + hash->h4[13] = B5; + hash->h4[14] = B6; + hash->h4[15] = B7; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search10(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + uint offset = get_global_offset(0); + hash_t hash; + __global hash_t *hashp = &(hashes[gid-offset]); + + __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + AES0[i] = AES0_C[i]; + AES1[i] = AES1_C[i]; + AES2[i] = AES2_C[i]; + AES3[i] = AES3_C[i]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for (int i = 0; i < 8; i++) + hash.h8[i] = hashes[gid-offset].h8[i]; + + // echo + sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; + sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; + Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; + Vb01 = Vb11 = Vb21 = Vb31 = Vb41 = Vb51 = Vb61 = Vb71 = 0; + + sph_u32 K0 = 512; + sph_u32 K1 = 0; + sph_u32 K2 = 0; + sph_u32 K3 = 0; + + W00 = Vb00; + W01 = Vb01; + W10 = Vb10; + W11 = Vb11; + W20 = Vb20; + W21 = Vb21; + W30 = Vb30; + W31 = Vb31; + W40 = Vb40; + W41 = Vb41; + W50 = Vb50; + W51 = Vb51; + W60 = Vb60; + W61 = Vb61; + W70 = Vb70; + W71 = Vb71; + W80 = hash.h8[0]; + W81 = hash.h8[1]; + W90 = hash.h8[2]; + W91 = hash.h8[3]; + WA0 = hash.h8[4]; + WA1 = hash.h8[5]; + WB0 = hash.h8[6]; + WB1 = hash.h8[7]; + WC0 = 0x80; + WC1 = 0; + WD0 = 0; + WD1 = 0; + WE0 = 0; + WE1 = 0x200000000000000; + WF0 = 0x200; + WF1 = 0; + + for (unsigned u = 0; u < 10; u ++) + BIG_ROUND; + + hashp->h8[0] = hash.h8[0] ^ Vb00 ^ W00 ^ W80; + hashp->h8[1] = hash.h8[1] ^ Vb01 ^ W01 ^ W81; + hashp->h8[2] = hash.h8[2] ^ Vb10 ^ W10 ^ W90; + hashp->h8[3] = hash.h8[3] ^ Vb11 ^ W11 ^ W91; + hashp->h8[4] = hash.h8[4] ^ Vb20 ^ W20 ^ WA0; + hashp->h8[5] = hash.h8[5] ^ Vb21 ^ W21 ^ WA1; + hashp->h8[6] = hash.h8[6] ^ Vb30 ^ W30 ^ WB0; + hashp->h8[7] = hash.h8[7] ^ Vb31 ^ W31 ^ WB1; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search11(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + #ifdef INPUT_BIG_LOCAL + __local sph_u32 T512_L[1024]; + __constant const sph_u32 *T512_C = &T512[0][0]; + + int init = get_local_id(0); + int step = get_local_size(0); + for (int i = init; i < 1024; i += step) + T512_L[i] = T512_C[i]; + + barrier(CLK_LOCAL_MEM_FENCE); + #else + #define INPUT_BIG_LOCAL INPUT_BIG + #endif + + sph_u32 c0 = HAMSI_IV512[0], c1 = HAMSI_IV512[1], c2 = HAMSI_IV512[2], c3 = HAMSI_IV512[3]; + sph_u32 c4 = HAMSI_IV512[4], c5 = HAMSI_IV512[5], c6 = HAMSI_IV512[6], c7 = HAMSI_IV512[7]; + sph_u32 c8 = HAMSI_IV512[8], c9 = HAMSI_IV512[9], cA = HAMSI_IV512[10], cB = HAMSI_IV512[11]; + sph_u32 cC = HAMSI_IV512[12], cD = HAMSI_IV512[13], cE = HAMSI_IV512[14], cF = HAMSI_IV512[15]; + sph_u32 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u32 m8, m9, mA, mB, mC, mD, mE, mF; + sph_u32 h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF }; + + #define buf(u) hash->h1[i + u] + + for(int i = 0; i < 64; i += 8) + { + INPUT_BIG_LOCAL; + P_BIG; + T_BIG; + } + + #undef buf + #define buf(u) (u == 0 ? 0x80 : 0) + + INPUT_BIG_LOCAL; + P_BIG; + T_BIG; + + #undef buf + #define buf(u) (u == 6 ? 2 : 0) + + INPUT_BIG_LOCAL; + PF_BIG; + T_BIG; + + for (unsigned u = 0; u < 16; u ++) + hash->h4[u] = h[u]; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search12(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + uint offset = get_global_offset(0); + __global hash_t *hash = &(hashes[gid-offset]); + + // mixtab + __local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256]; + int init = get_local_id(0); + int step = get_local_size(0); + for (int i = init; i < 256; i += step) + { + mixtab0[i] = mixtab0_c[i]; + mixtab1[i] = mixtab1_c[i]; + mixtab2[i] = mixtab2_c[i]; + mixtab3[i] = mixtab3_c[i]; + } + + // fugue + sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09; + sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19; + sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29; + sph_u32 S30, S31, S32, S33, S34, S35; + + ulong fc_bit_count = (sph_u64) 64 << 3; + + S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0; + S20 = SPH_C32(0x8807a57e); S21 = SPH_C32(0xe616af75); S22 = SPH_C32(0xc5d3e4db); S23 = SPH_C32(0xac9ab027); + S24 = SPH_C32(0xd915f117); S25 = SPH_C32(0xb6eecc54); S26 = SPH_C32(0x06e8020b); S27 = SPH_C32(0x4a92efd1); + S28 = SPH_C32(0xaac6e2c9); S29 = SPH_C32(0xddb21398); S30 = SPH_C32(0xcae65838); S31 = SPH_C32(0x437f203f); + S32 = SPH_C32(0x25ea78e7); S33 = SPH_C32(0x951fddd6); S34 = SPH_C32(0xda6ed11d); S35 = SPH_C32(0xe13e3567); + + FUGUE512_3((hash->h4[0x0]), (hash->h4[0x1]), (hash->h4[0x2])); + FUGUE512_3((hash->h4[0x3]), (hash->h4[0x4]), (hash->h4[0x5])); + FUGUE512_3((hash->h4[0x6]), (hash->h4[0x7]), (hash->h4[0x8])); + FUGUE512_3((hash->h4[0x9]), (hash->h4[0xA]), (hash->h4[0xB])); + FUGUE512_3((hash->h4[0xC]), (hash->h4[0xD]), (hash->h4[0xE])); + FUGUE512_3((hash->h4[0xF]), as_uint2(fc_bit_count).y, as_uint2(fc_bit_count).x); + + // apply round shift if necessary + int i; + + for (i = 0; i < 32; i ++) + { + ROR3; + CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); + SMIX(S00, S01, S02, S03); + } + + for (i = 0; i < 13; i ++) + { + S04 ^= S00; + S09 ^= S00; + S18 ^= S00; + S27 ^= S00; + ROR9; + SMIX(S00, S01, S02, S03); + S04 ^= S00; + S10 ^= S00; + S18 ^= S00; + S27 ^= S00; + ROR9; + SMIX(S00, S01, S02, S03); + S04 ^= S00; + S10 ^= S00; + S19 ^= S00; + S27 ^= S00; + ROR9; + SMIX(S00, S01, S02, S03); + S04 ^= S00; + S10 ^= S00; + S19 ^= S00; + S28 ^= S00; + ROR8; + SMIX(S00, S01, S02, S03); + } + + S04 ^= S00; + S09 ^= S00; + S18 ^= S00; + S27 ^= S00; + + hash->h4[0] = SWAP4(S01); + hash->h4[1] = SWAP4(S02); + hash->h4[2] = SWAP4(S03); + hash->h4[3] = SWAP4(S04); + hash->h4[4] = SWAP4(S09); + hash->h4[5] = SWAP4(S10); + hash->h4[6] = SWAP4(S11); + hash->h4[7] = SWAP4(S12); + hash->h4[8] = SWAP4(S18); + hash->h4[9] = SWAP4(S19); + hash->h4[10] = SWAP4(S20); + hash->h4[11] = SWAP4(S21); + hash->h4[12] = SWAP4(S27); + hash->h4[13] = SWAP4(S28); + hash->h4[14] = SWAP4(S29); + hash->h4[15] = SWAP4(S30); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search13(__global hash_t* hashes, __global uint* output, const ulong target) +{ + uint gid = get_global_id(0); + uint offset = get_global_offset(0); + __global hash_t *hash = &(hashes[gid-offset]); + + // shabal + sph_u32 A00 = A_init_512[0], A01 = A_init_512[1], A02 = A_init_512[2], A03 = A_init_512[3], A04 = A_init_512[4], A05 = A_init_512[5], A06 = A_init_512[6], A07 = A_init_512[7], + A08 = A_init_512[8], A09 = A_init_512[9], A0A = A_init_512[10], A0B = A_init_512[11]; + sph_u32 B0 = B_init_512[0], B1 = B_init_512[1], B2 = B_init_512[2], B3 = B_init_512[3], B4 = B_init_512[4], B5 = B_init_512[5], B6 = B_init_512[6], B7 = B_init_512[7], + B8 = B_init_512[8], B9 = B_init_512[9], BA = B_init_512[10], BB = B_init_512[11], BC = B_init_512[12], BD = B_init_512[13], BE = B_init_512[14], BF = B_init_512[15]; + sph_u32 C0 = C_init_512[0], C1 = C_init_512[1], C2 = C_init_512[2], C3 = C_init_512[3], C4 = C_init_512[4], C5 = C_init_512[5], C6 = C_init_512[6], C7 = C_init_512[7], + C8 = C_init_512[8], C9 = C_init_512[9], CA = C_init_512[10], CB = C_init_512[11], CC = C_init_512[12], CD = C_init_512[13], CE = C_init_512[14], CF = C_init_512[15]; + sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF; + sph_u32 Wlow = 1, Whigh = 0; + + M0 = hash->h4[0]; + M1 = hash->h4[1]; + M2 = hash->h4[2]; + M3 = hash->h4[3]; + M4 = hash->h4[4]; + M5 = hash->h4[5]; + M6 = hash->h4[6]; + M7 = hash->h4[7]; + M8 = hash->h4[8]; + M9 = hash->h4[9]; + MA = hash->h4[10]; + MB = hash->h4[11]; + MC = hash->h4[12]; + MD = hash->h4[13]; + ME = hash->h4[14]; + MF = hash->h4[15]; + + INPUT_BLOCK_ADD; + XOR_W; + APPLY_P; + INPUT_BLOCK_SUB; + SWAP_BC; + INCR_W; + + M0 = 0x80; + M1 = M2 = M3 = M4 = M5 = M6 = M7 = M8 = M9 = MA = MB = MC = MD = ME = MF = 0; + + INPUT_BLOCK_ADD; + XOR_W; + APPLY_P; + + for (unsigned i = 0; i < 3; i ++) + { + SWAP_BC; + XOR_W; + APPLY_P; + } + + hash->h4[0] = B0; + hash->h4[1] = B1; + hash->h4[2] = B2; + hash->h4[3] = B3; + hash->h4[4] = B4; + hash->h4[5] = B5; + hash->h4[6] = B6; + hash->h4[7] = B7; + hash->h4[8] = B8; + hash->h4[9] = B9; + hash->h4[10] = BA; + hash->h4[11] = BB; + hash->h4[12] = BC; + hash->h4[13] = BD; + hash->h4[14] = BE; + hash->h4[15] = BF; + + bool result = (hash->h8[3] <= target); + if (result) + output[atomic_inc(output+0xFF)] = SWAP4(gid); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +#endif // X14_CL \ No newline at end of file diff --git a/kernel/x14old.cl b/kernel/x14old.cl new file mode 100644 index 00000000..30a317ff --- /dev/null +++ b/kernel/x14old.cl @@ -0,0 +1,1078 @@ +/* + * X14 kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 phm + * Copyright (c) 2014 Girino Vey + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author phm + */ + +#ifndef X14_CL +#define X14_CL + +#if __ENDIAN_LITTLE__ +#define SPH_LITTLE_ENDIAN 1 +#else +#define SPH_BIG_ENDIAN 1 +#endif + +#define SPH_UPTR sph_u64 + +typedef unsigned int sph_u32; +typedef int sph_s32; +#ifndef __OPENCL_VERSION__ +typedef unsigned long long sph_u64; +typedef long long sph_s64; +#else +typedef unsigned long sph_u64; +typedef long sph_s64; +#endif + +#define SPH_64 1 +#define SPH_64_TRUE 1 + +#define SPH_C32(x) ((sph_u32)(x ## U)) +#define SPH_T32(x) (as_uint(x)) +#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n)) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) + +#define SPH_C64(x) ((sph_u64)(x ## UL)) +#define SPH_T64(x) (as_ulong(x)) +#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) + +#define SPH_ECHO_64 1 +#define SPH_KECCAK_64 1 +#define SPH_JH_64 1 +#define SPH_SIMD_NOCOPY 0 +#define SPH_KECCAK_NOCOPY 0 +#define SPH_COMPACT_BLAKE_64 0 +#define SPH_LUFFA_PARALLEL 0 +#define SPH_SMALL_FOOTPRINT_GROESTL 0 +#define SPH_GROESTL_BIG_ENDIAN 0 +#define SPH_CUBEHASH_UNROLL 0 +#define SPH_KECCAK_UNROLL 1 +#ifndef SPH_HAMSI_EXPAND_BIG + #define SPH_HAMSI_EXPAND_BIG 4 +#endif + +#include "blake.cl" +#include "bmw.cl" +#include "groestl.cl" +#include "jh.cl" +#include "keccak.cl" +#include "skein.cl" +#include "luffa.cl" +#include "cubehash.cl" +#include "shavite.cl" +#include "simd.cl" +#include "echo.cl" +#include "hamsi.cl" +#include "fugue.cl" +#include "shabal.cl" + + +#define SWAP4(x) as_uint(as_uchar4(x).wzyx) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +#if SPH_BIG_ENDIAN + #define DEC64E(x) (x) + #define DEC64BE(x) (*(const __global sph_u64 *) (x)); +#else + #define DEC64E(x) SWAP8(x) + #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); +#endif + +typedef union { + unsigned char h1[64]; + uint h4[16]; + ulong h8[8]; +} hash_t; + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global unsigned char* block, __global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + // blake + + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + { + T1 = SPH_T64(T1 + 1); + } + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + M0 = DEC64BE(block + 0); + M1 = DEC64BE(block + 8); + M2 = DEC64BE(block + 16); + M3 = DEC64BE(block + 24); + M4 = DEC64BE(block + 32); + M5 = DEC64BE(block + 40); + M6 = DEC64BE(block + 48); + M7 = DEC64BE(block + 56); + M8 = DEC64BE(block + 64); + M9 = DEC64BE(block + 72); + M9 &= 0xFFFFFFFF00000000; + M9 ^= SWAP4(gid); + MA = 0x8000000000000000; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x280; + + COMPRESS64; + + hash->h8[0] = H0; + hash->h8[1] = H1; + hash->h8[2] = H2; + hash->h8[3] = H3; + hash->h8[4] = H4; + hash->h8[5] = H5; + hash->h8[6] = H6; + hash->h8[7] = H7; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search1(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + // bmw + sph_u64 BMW_H[16]; + for(unsigned u = 0; u < 16; u++) + BMW_H[u] = BMW_IV512[u]; + + sph_u64 BMW_h1[16], BMW_h2[16]; + sph_u64 mv[16]; + + mv[ 0] = SWAP8(hash->h8[0]); + mv[ 1] = SWAP8(hash->h8[1]); + mv[ 2] = SWAP8(hash->h8[2]); + mv[ 3] = SWAP8(hash->h8[3]); + mv[ 4] = SWAP8(hash->h8[4]); + mv[ 5] = SWAP8(hash->h8[5]); + mv[ 6] = SWAP8(hash->h8[6]); + mv[ 7] = SWAP8(hash->h8[7]); + mv[ 8] = 0x80; + mv[ 9] = 0; + mv[10] = 0; + mv[11] = 0; + mv[12] = 0; + mv[13] = 0; + mv[14] = 0; + mv[15] = 0x200; +#define M(x) (mv[x]) +#define H(x) (BMW_H[x]) +#define dH(x) (BMW_h2[x]) + + FOLDb; + +#undef M +#undef H +#undef dH + +#define M(x) (BMW_h2[x]) +#define H(x) (final_b[x]) +#define dH(x) (BMW_h1[x]) + + FOLDb; + +#undef M +#undef H +#undef dH + + hash->h8[0] = SWAP8(BMW_h1[8]); + hash->h8[1] = SWAP8(BMW_h1[9]); + hash->h8[2] = SWAP8(BMW_h1[10]); + hash->h8[3] = SWAP8(BMW_h1[11]); + hash->h8[4] = SWAP8(BMW_h1[12]); + hash->h8[5] = SWAP8(BMW_h1[13]); + hash->h8[6] = SWAP8(BMW_h1[14]); + hash->h8[7] = SWAP8(BMW_h1[15]); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search2(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + __local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + T0_L[i] = T0[i]; + T1_L[i] = T1[i]; + T2_L[i] = T2[i]; + T3_L[i] = T3[i]; + T4_L[i] = T4[i]; + T5_L[i] = T5[i]; + T6_L[i] = T6[i]; + T7_L[i] = T7[i]; + } + barrier(CLK_LOCAL_MEM_FENCE); + +#define T0 T0_L +#define T1 T1_L +#define T2 T2_L +#define T3 T3_L +#define T4 T4_L +#define T5 T5_L +#define T6 T6_L +#define T7 T7_L + + // groestl + + sph_u64 H[16]; + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; +#if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); +#else + H[15] = (sph_u64)512; +#endif + + sph_u64 g[16], m[16]; + m[0] = DEC64E(hash->h8[0]); + m[1] = DEC64E(hash->h8[1]); + m[2] = DEC64E(hash->h8[2]); + m[3] = DEC64E(hash->h8[3]); + m[4] = DEC64E(hash->h8[4]); + m[5] = DEC64E(hash->h8[5]); + m[6] = DEC64E(hash->h8[6]); + m[7] = DEC64E(hash->h8[7]); + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + for (unsigned int u = 0; u < 8; u ++) + hash->h8[u] = DEC64E(H[u + 8]); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search3(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // skein + + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; + + m0 = SWAP8(hash->h8[0]); + m1 = SWAP8(hash->h8[1]); + m2 = SWAP8(hash->h8[2]); + m3 = SWAP8(hash->h8[3]); + m4 = SWAP8(hash->h8[4]); + m5 = SWAP8(hash->h8[5]); + m6 = SWAP8(hash->h8[6]); + m7 = SWAP8(hash->h8[7]); + UBI_BIG(480, 64); + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + UBI_BIG(510, 8); + hash->h8[0] = SWAP8(h0); + hash->h8[1] = SWAP8(h1); + hash->h8[2] = SWAP8(h2); + hash->h8[3] = SWAP8(h3); + hash->h8[4] = SWAP8(h4); + hash->h8[5] = SWAP8(h5); + hash->h8[6] = SWAP8(h6); + hash->h8[7] = SWAP8(h7); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search4(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // jh + + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; + + for(int i = 0; i < 2; i++) + { + if (i == 0) { + h0h ^= DEC64E(hash->h8[0]); + h0l ^= DEC64E(hash->h8[1]); + h1h ^= DEC64E(hash->h8[2]); + h1l ^= DEC64E(hash->h8[3]); + h2h ^= DEC64E(hash->h8[4]); + h2l ^= DEC64E(hash->h8[5]); + h3h ^= DEC64E(hash->h8[6]); + h3l ^= DEC64E(hash->h8[7]); + } else if(i == 1) { + h4h ^= DEC64E(hash->h8[0]); + h4l ^= DEC64E(hash->h8[1]); + h5h ^= DEC64E(hash->h8[2]); + h5l ^= DEC64E(hash->h8[3]); + h6h ^= DEC64E(hash->h8[4]); + h6l ^= DEC64E(hash->h8[5]); + h7h ^= DEC64E(hash->h8[6]); + h7l ^= DEC64E(hash->h8[7]); + + h0h ^= 0x80; + h3l ^= 0x2000000000000; + } + E8; + } + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash->h8[0] = DEC64E(h4h); + hash->h8[1] = DEC64E(h4l); + hash->h8[2] = DEC64E(h5h); + hash->h8[3] = DEC64E(h5l); + hash->h8[4] = DEC64E(h6h); + hash->h8[5] = DEC64E(h6l); + hash->h8[6] = DEC64E(h7h); + hash->h8[7] = DEC64E(h7l); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search5(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // keccak + + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; + + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); + + a00 ^= SWAP8(hash->h8[0]); + a10 ^= SWAP8(hash->h8[1]); + a20 ^= SWAP8(hash->h8[2]); + a30 ^= SWAP8(hash->h8[3]); + a40 ^= SWAP8(hash->h8[4]); + a01 ^= SWAP8(hash->h8[5]); + a11 ^= SWAP8(hash->h8[6]); + a21 ^= SWAP8(hash->h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; + + hash->h8[0] = SWAP8(a00); + hash->h8[1] = SWAP8(a10); + hash->h8[2] = SWAP8(a20); + hash->h8[3] = SWAP8(a30); + hash->h8[4] = SWAP8(a40); + hash->h8[5] = SWAP8(a01); + hash->h8[6] = SWAP8(a11); + hash->h8[7] = SWAP8(a21); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search6(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // luffa + + sph_u32 V00 = SPH_C32(0x6d251e69), V01 = SPH_C32(0x44b051e0), V02 = SPH_C32(0x4eaa6fb4), V03 = SPH_C32(0xdbf78465), V04 = SPH_C32(0x6e292011), V05 = SPH_C32(0x90152df4), V06 = SPH_C32(0xee058139), V07 = SPH_C32(0xdef610bb); + sph_u32 V10 = SPH_C32(0xc3b44b95), V11 = SPH_C32(0xd9d2f256), V12 = SPH_C32(0x70eee9a0), V13 = SPH_C32(0xde099fa3), V14 = SPH_C32(0x5d9b0557), V15 = SPH_C32(0x8fc944b3), V16 = SPH_C32(0xcf1ccf0e), V17 = SPH_C32(0x746cd581); + sph_u32 V20 = SPH_C32(0xf7efc89d), V21 = SPH_C32(0x5dba5781), V22 = SPH_C32(0x04016ce5), V23 = SPH_C32(0xad659c05), V24 = SPH_C32(0x0306194f), V25 = SPH_C32(0x666d1836), V26 = SPH_C32(0x24aa230a), V27 = SPH_C32(0x8b264ae7); + sph_u32 V30 = SPH_C32(0x858075d5), V31 = SPH_C32(0x36d79cce), V32 = SPH_C32(0xe571f7d7), V33 = SPH_C32(0x204b1f67), V34 = SPH_C32(0x35870c6a), V35 = SPH_C32(0x57e9e923), V36 = SPH_C32(0x14bcb808), V37 = SPH_C32(0x7cde72ce); + sph_u32 V40 = SPH_C32(0x6c68e9be), V41 = SPH_C32(0x5ec41e22), V42 = SPH_C32(0xc825b7c7), V43 = SPH_C32(0xaffb4363), V44 = SPH_C32(0xf5df3999), V45 = SPH_C32(0x0fc688f1), V46 = SPH_C32(0xb07224cc), V47 = SPH_C32(0x03e86cea); + + DECL_TMP8(M); + + M0 = hash->h4[1]; + M1 = hash->h4[0]; + M2 = hash->h4[3]; + M3 = hash->h4[2]; + M4 = hash->h4[5]; + M5 = hash->h4[4]; + M6 = hash->h4[7]; + M7 = hash->h4[6]; + + for(uint i = 0; i < 5; i++) + { + MI5; + LUFFA_P5; + + if(i == 0) { + M0 = hash->h4[9]; + M1 = hash->h4[8]; + M2 = hash->h4[11]; + M3 = hash->h4[10]; + M4 = hash->h4[13]; + M5 = hash->h4[12]; + M6 = hash->h4[15]; + M7 = hash->h4[14]; + } else if(i == 1) { + M0 = 0x80000000; + M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } else if(i == 2) { + M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } else if(i == 3) { + hash->h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash->h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash->h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash->h4[2] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash->h4[5] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash->h4[4] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash->h4[7] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash->h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + } + } + + hash->h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash->h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash->h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash->h4[10] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash->h4[13] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash->h4[12] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash->h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash->h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search7(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // cubehash.h1 + + sph_u32 x0 = SPH_C32(0x2AEA2A61), x1 = SPH_C32(0x50F494D4), x2 = SPH_C32(0x2D538B8B), x3 = SPH_C32(0x4167D83E); + sph_u32 x4 = SPH_C32(0x3FEE2313), x5 = SPH_C32(0xC701CF8C), x6 = SPH_C32(0xCC39968E), x7 = SPH_C32(0x50AC5695); + sph_u32 x8 = SPH_C32(0x4D42C787), x9 = SPH_C32(0xA647A8B3), xa = SPH_C32(0x97CF0BEF), xb = SPH_C32(0x825B4537); + sph_u32 xc = SPH_C32(0xEEF864D2), xd = SPH_C32(0xF22090C4), xe = SPH_C32(0xD0E5CD33), xf = SPH_C32(0xA23911AE); + sph_u32 xg = SPH_C32(0xFCD398D9), xh = SPH_C32(0x148FE485), xi = SPH_C32(0x1B017BEF), xj = SPH_C32(0xB6444532); + sph_u32 xk = SPH_C32(0x6A536159), xl = SPH_C32(0x2FF5781C), xm = SPH_C32(0x91FA7934), xn = SPH_C32(0x0DBADEA9); + sph_u32 xo = SPH_C32(0xD65C8A2B), xp = SPH_C32(0xA5A70E75), xq = SPH_C32(0xB1C62456), xr = SPH_C32(0xBC796576); + sph_u32 xs = SPH_C32(0x1921C8F7), xt = SPH_C32(0xE7989AF1), xu = SPH_C32(0x7795D246), xv = SPH_C32(0xD43E3B44); + + x0 ^= SWAP4(hash->h4[1]); + x1 ^= SWAP4(hash->h4[0]); + x2 ^= SWAP4(hash->h4[3]); + x3 ^= SWAP4(hash->h4[2]); + x4 ^= SWAP4(hash->h4[5]); + x5 ^= SWAP4(hash->h4[4]); + x6 ^= SWAP4(hash->h4[7]); + x7 ^= SWAP4(hash->h4[6]); + + for (int i = 0; i < 13; i ++) { + SIXTEEN_ROUNDS; + + if (i == 0) { + x0 ^= SWAP4(hash->h4[9]); + x1 ^= SWAP4(hash->h4[8]); + x2 ^= SWAP4(hash->h4[11]); + x3 ^= SWAP4(hash->h4[10]); + x4 ^= SWAP4(hash->h4[13]); + x5 ^= SWAP4(hash->h4[12]); + x6 ^= SWAP4(hash->h4[15]); + x7 ^= SWAP4(hash->h4[14]); + } else if(i == 1) { + x0 ^= 0x80; + } else if (i == 2) { + xv ^= SPH_C32(1); + } + } + + hash->h4[0] = x0; + hash->h4[1] = x1; + hash->h4[2] = x2; + hash->h4[3] = x3; + hash->h4[4] = x4; + hash->h4[5] = x5; + hash->h4[6] = x6; + hash->h4[7] = x7; + hash->h4[8] = x8; + hash->h4[9] = x9; + hash->h4[10] = xa; + hash->h4[11] = xb; + hash->h4[12] = xc; + hash->h4[13] = xd; + hash->h4[14] = xe; + hash->h4[15] = xf; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search8(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + AES0[i] = AES0_C[i]; + AES1[i] = AES1_C[i]; + AES2[i] = AES2_C[i]; + AES3[i] = AES3_C[i]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + // shavite + // IV + sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); + sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); + sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); + sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); + + // state + sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; + sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; + sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; + sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; + + sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; + + rk00 = hash->h4[0]; + rk01 = hash->h4[1]; + rk02 = hash->h4[2]; + rk03 = hash->h4[3]; + rk04 = hash->h4[4]; + rk05 = hash->h4[5]; + rk06 = hash->h4[6]; + rk07 = hash->h4[7]; + rk08 = hash->h4[8]; + rk09 = hash->h4[9]; + rk0A = hash->h4[10]; + rk0B = hash->h4[11]; + rk0C = hash->h4[12]; + rk0D = hash->h4[13]; + rk0E = hash->h4[14]; + rk0F = hash->h4[15]; + rk10 = 0x80; + rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; + rk1B = 0x2000000; + rk1C = rk1D = rk1E = 0; + rk1F = 0x2000000; + + c512(buf); + + hash->h4[0] = h0; + hash->h4[1] = h1; + hash->h4[2] = h2; + hash->h4[3] = h3; + hash->h4[4] = h4; + hash->h4[5] = h5; + hash->h4[6] = h6; + hash->h4[7] = h7; + hash->h4[8] = h8; + hash->h4[9] = h9; + hash->h4[10] = hA; + hash->h4[11] = hB; + hash->h4[12] = hC; + hash->h4[13] = hD; + hash->h4[14] = hE; + hash->h4[15] = hF; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search9(__global hash_t* hashes) +{ + uint gid = get_global_id(0); + __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); + + // simd + s32 q[256]; + unsigned char x[128]; + for(unsigned int i = 0; i < 64; i++) + x[i] = hash->h1[i]; + for(unsigned int i = 64; i < 128; i++) + x[i] = 0; + + u32 A0 = C32(0x0BA16B95), A1 = C32(0x72F999AD), A2 = C32(0x9FECC2AE), A3 = C32(0xBA3264FC), A4 = C32(0x5E894929), A5 = C32(0x8E9F30E5), A6 = C32(0x2F1DAA37), A7 = C32(0xF0F2C558); + u32 B0 = C32(0xAC506643), B1 = C32(0xA90635A5), B2 = C32(0xE25B878B), B3 = C32(0xAAB7878F), B4 = C32(0x88817F7A), B5 = C32(0x0A02892B), B6 = C32(0x559A7550), B7 = C32(0x598F657E); + u32 C0 = C32(0x7EEF60A1), C1 = C32(0x6B70E3E8), C2 = C32(0x9C1714D1), C3 = C32(0xB958E2A8), C4 = C32(0xAB02675E), C5 = C32(0xED1C014F), C6 = C32(0xCD8D65BB), C7 = C32(0xFDB7A257); + u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); + + FFT256(0, 1, 0, ll1); + for (int i = 0; i < 256; i ++) { + s32 tq; + + tq = q[i] + yoff_b_n[i]; + tq = REDS2(tq); + tq = REDS1(tq); + tq = REDS1(tq); + q[i] = (tq <= 128 ? tq : tq - 257); + } + + A0 ^= hash->h4[0]; + A1 ^= hash->h4[1]; + A2 ^= hash->h4[2]; + A3 ^= hash->h4[3]; + A4 ^= hash->h4[4]; + A5 ^= hash->h4[5]; + A6 ^= hash->h4[6]; + A7 ^= hash->h4[7]; + B0 ^= hash->h4[8]; + B1 ^= hash->h4[9]; + B2 ^= hash->h4[10]; + B3 ^= hash->h4[11]; + B4 ^= hash->h4[12]; + B5 ^= hash->h4[13]; + B6 ^= hash->h4[14]; + B7 ^= hash->h4[15]; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + + STEP_BIG( + C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), + C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), + IF, 4, 13, PP8_4_); + STEP_BIG( + C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), + C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), + IF, 13, 10, PP8_5_); + STEP_BIG( + C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), + C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), + IF, 10, 25, PP8_6_); + STEP_BIG( + C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), + C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), + IF, 25, 4, PP8_0_); + + u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7; + u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7; + u32 COPY_C0 = C0, COPY_C1 = C1, COPY_C2 = C2, COPY_C3 = C3, COPY_C4 = C4, COPY_C5 = C5, COPY_C6 = C6, COPY_C7 = C7; + u32 COPY_D0 = D0, COPY_D1 = D1, COPY_D2 = D2, COPY_D3 = D3, COPY_D4 = D4, COPY_D5 = D5, COPY_D6 = D6, COPY_D7 = D7; + + #define q SIMD_Q + + A0 ^= 0x200; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + STEP_BIG( + COPY_A0, COPY_A1, COPY_A2, COPY_A3, + COPY_A4, COPY_A5, COPY_A6, COPY_A7, + IF, 4, 13, PP8_4_); + STEP_BIG( + COPY_B0, COPY_B1, COPY_B2, COPY_B3, + COPY_B4, COPY_B5, COPY_B6, COPY_B7, + IF, 13, 10, PP8_5_); + STEP_BIG( + COPY_C0, COPY_C1, COPY_C2, COPY_C3, + COPY_C4, COPY_C5, COPY_C6, COPY_C7, + IF, 10, 25, PP8_6_); + STEP_BIG( + COPY_D0, COPY_D1, COPY_D2, COPY_D3, + COPY_D4, COPY_D5, COPY_D6, COPY_D7, + IF, 25, 4, PP8_0_); + #undef q + + hash->h4[0] = A0; + hash->h4[1] = A1; + hash->h4[2] = A2; + hash->h4[3] = A3; + hash->h4[4] = A4; + hash->h4[5] = A5; + hash->h4[6] = A6; + hash->h4[7] = A7; + hash->h4[8] = B0; + hash->h4[9] = B1; + hash->h4[10] = B2; + hash->h4[11] = B3; + hash->h4[12] = B4; + hash->h4[13] = B5; + hash->h4[14] = B6; + hash->h4[15] = B7; + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search10(__global hash_t* hashes, __global uint* output, const ulong target) +{ + uint gid = get_global_id(0); + uint offset = get_global_offset(0); + hash_t hash; + + __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; + + int init = get_local_id(0); + int step = get_local_size(0); + + for (int i = init; i < 256; i += step) + { + AES0[i] = AES0_C[i]; + AES1[i] = AES1_C[i]; + AES2[i] = AES2_C[i]; + AES3[i] = AES3_C[i]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + #ifdef INPUT_BIG_LOCAL + __local sph_u32 T512_L[1024]; + __constant const sph_u32 *T512_C = &T512[0][0]; + + for (int i = init; i < 1024; i += step) + T512_L[i] = T512_C[i]; + + barrier(CLK_LOCAL_MEM_FENCE); + #else + #define INPUT_BIG_LOCAL INPUT_BIG + #endif + + // mixtab + __local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256]; + for (int i = init; i < 256; i += step) + { + mixtab0[i] = mixtab0_c[i]; + mixtab1[i] = mixtab1_c[i]; + mixtab2[i] = mixtab2_c[i]; + mixtab3[i] = mixtab3_c[i]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + + for (int i = 0; i < 8; i++) { + hash.h8[i] = hashes[gid-offset].h8[i]; + } + + // echo + + { + + sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; + sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; + Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; + Vb01 = Vb11 = Vb21 = Vb31 = Vb41 = Vb51 = Vb61 = Vb71 = 0; + + sph_u32 K0 = 512; + sph_u32 K1 = 0; + sph_u32 K2 = 0; + sph_u32 K3 = 0; + + W00 = Vb00; + W01 = Vb01; + W10 = Vb10; + W11 = Vb11; + W20 = Vb20; + W21 = Vb21; + W30 = Vb30; + W31 = Vb31; + W40 = Vb40; + W41 = Vb41; + W50 = Vb50; + W51 = Vb51; + W60 = Vb60; + W61 = Vb61; + W70 = Vb70; + W71 = Vb71; + W80 = hash.h8[0]; + W81 = hash.h8[1]; + W90 = hash.h8[2]; + W91 = hash.h8[3]; + WA0 = hash.h8[4]; + WA1 = hash.h8[5]; + WB0 = hash.h8[6]; + WB1 = hash.h8[7]; + WC0 = 0x80; + WC1 = 0; + WD0 = 0; + WD1 = 0; + WE0 = 0; + WE1 = 0x200000000000000; + WF0 = 0x200; + WF1 = 0; + + for (unsigned u = 0; u < 10; u ++) { + BIG_ROUND; + } + + hash.h8[0] ^= Vb00 ^ W00 ^ W80; + hash.h8[1] ^= Vb01 ^ W01 ^ W81; + hash.h8[2] ^= Vb10 ^ W10 ^ W90; + hash.h8[3] ^= Vb11 ^ W11 ^ W91; + hash.h8[4] ^= Vb20 ^ W20 ^ WA0; + hash.h8[5] ^= Vb21 ^ W21 ^ WA1; + hash.h8[6] ^= Vb30 ^ W30 ^ WB0; + hash.h8[7] ^= Vb31 ^ W31 ^ WB1; + + } + + // hamsi + + { + + sph_u32 c0 = HAMSI_IV512[0], c1 = HAMSI_IV512[1], c2 = HAMSI_IV512[2], c3 = HAMSI_IV512[3]; + sph_u32 c4 = HAMSI_IV512[4], c5 = HAMSI_IV512[5], c6 = HAMSI_IV512[6], c7 = HAMSI_IV512[7]; + sph_u32 c8 = HAMSI_IV512[8], c9 = HAMSI_IV512[9], cA = HAMSI_IV512[10], cB = HAMSI_IV512[11]; + sph_u32 cC = HAMSI_IV512[12], cD = HAMSI_IV512[13], cE = HAMSI_IV512[14], cF = HAMSI_IV512[15]; + sph_u32 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u32 m8, m9, mA, mB, mC, mD, mE, mF; + sph_u32 h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF }; + +#define buf(u) hash.h1[i + u] + for(int i = 0; i < 64; i += 8) { + INPUT_BIG_LOCAL; + P_BIG; + T_BIG; + } +#undef buf +#define buf(u) (u == 0 ? 0x80 : 0) + INPUT_BIG_LOCAL; + P_BIG; + T_BIG; +#undef buf +#define buf(u) (u == 6 ? 2 : 0) + INPUT_BIG_LOCAL; + PF_BIG; + T_BIG; + + for (unsigned u = 0; u < 16; u ++) + hash.h4[u] = h[u]; + + } + + // fugue + + { + + sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09; + sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19; + sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29; + sph_u32 S30, S31, S32, S33, S34, S35; + + ulong fc_bit_count = (sph_u64) 64 << 3; + + S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0; + S20 = SPH_C32(0x8807a57e); S21 = SPH_C32(0xe616af75); S22 = SPH_C32(0xc5d3e4db); S23 = SPH_C32(0xac9ab027); + S24 = SPH_C32(0xd915f117); S25 = SPH_C32(0xb6eecc54); S26 = SPH_C32(0x06e8020b); S27 = SPH_C32(0x4a92efd1); + S28 = SPH_C32(0xaac6e2c9); S29 = SPH_C32(0xddb21398); S30 = SPH_C32(0xcae65838); S31 = SPH_C32(0x437f203f); + S32 = SPH_C32(0x25ea78e7); S33 = SPH_C32(0x951fddd6); S34 = SPH_C32(0xda6ed11d); S35 = SPH_C32(0xe13e3567); + + FUGUE512_3((hash.h4[0x0]), (hash.h4[0x1]), (hash.h4[0x2])); + FUGUE512_3((hash.h4[0x3]), (hash.h4[0x4]), (hash.h4[0x5])); + FUGUE512_3((hash.h4[0x6]), (hash.h4[0x7]), (hash.h4[0x8])); + FUGUE512_3((hash.h4[0x9]), (hash.h4[0xA]), (hash.h4[0xB])); + FUGUE512_3((hash.h4[0xC]), (hash.h4[0xD]), (hash.h4[0xE])); + FUGUE512_3((hash.h4[0xF]), as_uint2(fc_bit_count).y, as_uint2(fc_bit_count).x); + + // apply round shift if necessary + int i; + + for (i = 0; i < 32; i ++) { + ROR3; + CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); + SMIX(S00, S01, S02, S03); + } + for (i = 0; i < 13; i ++) { + S04 ^= S00; + S09 ^= S00; + S18 ^= S00; + S27 ^= S00; + ROR9; + SMIX(S00, S01, S02, S03); + S04 ^= S00; + S10 ^= S00; + S18 ^= S00; + S27 ^= S00; + ROR9; + SMIX(S00, S01, S02, S03); + S04 ^= S00; + S10 ^= S00; + S19 ^= S00; + S27 ^= S00; + ROR9; + SMIX(S00, S01, S02, S03); + S04 ^= S00; + S10 ^= S00; + S19 ^= S00; + S28 ^= S00; + ROR8; + SMIX(S00, S01, S02, S03); + } + S04 ^= S00; + S09 ^= S00; + S18 ^= S00; + S27 ^= S00; + + hash.h4[0] = SWAP4(S01); + hash.h4[1] = SWAP4(S02); + hash.h4[2] = SWAP4(S03); + hash.h4[3] = SWAP4(S04); + hash.h4[4] = SWAP4(S09); + hash.h4[5] = SWAP4(S10); + hash.h4[6] = SWAP4(S11); + hash.h4[7] = SWAP4(S12); + hash.h4[8] = SWAP4(S18); + hash.h4[9] = SWAP4(S19); + hash.h4[10] = SWAP4(S20); + hash.h4[11] = SWAP4(S21); + hash.h4[12] = SWAP4(S27); + hash.h4[13] = SWAP4(S28); + hash.h4[14] = SWAP4(S29); + hash.h4[15] = SWAP4(S30); + + } + + //shabal + { + sph_u32 A00 = A_init_512[0], A01 = A_init_512[1], A02 = A_init_512[2], A03 = A_init_512[3], A04 = A_init_512[4], A05 = A_init_512[5], A06 = A_init_512[6], A07 = A_init_512[7], + A08 = A_init_512[8], A09 = A_init_512[9], A0A = A_init_512[10], A0B = A_init_512[11]; + sph_u32 B0 = B_init_512[0], B1 = B_init_512[1], B2 = B_init_512[2], B3 = B_init_512[3], B4 = B_init_512[4], B5 = B_init_512[5], B6 = B_init_512[6], B7 = B_init_512[7], + B8 = B_init_512[8], B9 = B_init_512[9], BA = B_init_512[10], BB = B_init_512[11], BC = B_init_512[12], BD = B_init_512[13], BE = B_init_512[14], BF = B_init_512[15]; + sph_u32 C0 = C_init_512[0], C1 = C_init_512[1], C2 = C_init_512[2], C3 = C_init_512[3], C4 = C_init_512[4], C5 = C_init_512[5], C6 = C_init_512[6], C7 = C_init_512[7], + C8 = C_init_512[8], C9 = C_init_512[9], CA = C_init_512[10], CB = C_init_512[11], CC = C_init_512[12], CD = C_init_512[13], CE = C_init_512[14], CF = C_init_512[15]; + sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF; + sph_u32 Wlow = 1, Whigh = 0; + + M0 = hash.h4[0]; + M1 = hash.h4[1]; + M2 = hash.h4[2]; + M3 = hash.h4[3]; + M4 = hash.h4[4]; + M5 = hash.h4[5]; + M6 = hash.h4[6]; + M7 = hash.h4[7]; + M8 = hash.h4[8]; + M9 = hash.h4[9]; + MA = hash.h4[10]; + MB = hash.h4[11]; + MC = hash.h4[12]; + MD = hash.h4[13]; + ME = hash.h4[14]; + MF = hash.h4[15]; + + INPUT_BLOCK_ADD; + XOR_W; + APPLY_P; + INPUT_BLOCK_SUB; + SWAP_BC; + INCR_W; + + M0 = 0x80; + M1 = M2 = M3 = M4 = M5 = M6 = M7 = M8 = M9 = MA = MB = MC = MD = ME = MF = 0; + + INPUT_BLOCK_ADD; + XOR_W; + APPLY_P; + for (unsigned i = 0; i < 3; i ++) { + SWAP_BC; + XOR_W; + APPLY_P; + } + + hash.h4[0] = B0; + hash.h4[1] = B1; + hash.h4[2] = B2; + hash.h4[3] = B3; + hash.h4[4] = B4; + hash.h4[5] = B5; + hash.h4[6] = B6; + hash.h4[7] = B7; + hash.h4[8] = B8; + hash.h4[9] = B9; + hash.h4[10] = BA; + hash.h4[11] = BB; + hash.h4[12] = BC; + hash.h4[13] = BD; + hash.h4[14] = BE; + hash.h4[15] = BF; + } + + bool result = (hash.h8[3] <= target); + if (result) + output[atomic_inc(output+0xFF)] = SWAP4(gid); + + barrier(CLK_GLOBAL_MEM_FENCE); +} + +#endif // X14_CL \ No newline at end of file diff --git a/miner.h b/miner.h index fa980c4c..cac35f2a 100644 --- a/miner.h +++ b/miner.h @@ -1033,6 +1033,7 @@ extern int swork_id; extern int opt_tcp_keepalive; extern bool opt_incognito; extern int opt_hamsi_expand_big; +extern bool opt_hamsi_short; #if LOCK_TRACKING extern pthread_mutex_t lockstat_lock; diff --git a/sgminer.c b/sgminer.c index 62bd8ef1..200fcb8b 100644 --- a/sgminer.c +++ b/sgminer.c @@ -192,6 +192,7 @@ int nDevs; int opt_dynamic_interval = 7; int opt_g_threads = -1; int opt_hamsi_expand_big = 4; +bool opt_hamsi_short = false; bool opt_restart = true; struct list_head scan_devices; @@ -1459,7 +1460,10 @@ struct opt_table opt_config_table[] = { "Set GPU lookup gap for scrypt mining, comma separated"), OPT_WITH_ARG("--hamsi-expand-big", set_int_1_to_10, opt_show_intval, &opt_hamsi_expand_big, - "Set SPH_HAMSI_EXPAND_BIG for X13 algorithms (1 or 4 are common)"), + "Set SPH_HAMSI_EXPAND_BIG for X13 derived algorithms (1 or 4 are common)"), + OPT_WITHOUT_ARG("--hamsi-short", + opt_set_bool, &opt_hamsi_short, + "Set SPH_HAMSI_SHORT for X13 derived algorithms (Can give better hashrate for some GPUs)"), #ifdef HAVE_CURSES OPT_WITHOUT_ARG("--incognito", opt_set_bool, &opt_incognito, diff --git a/sph/Makefile.am b/sph/Makefile.am index 5b44b9b9..d80e438a 100644 --- a/sph/Makefile.am +++ b/sph/Makefile.am @@ -1,3 +1,3 @@ noinst_LIBRARIES = libsph.a -libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c +libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c shabal.c whirlpool.c diff --git a/sph/shabal.c b/sph/shabal.c new file mode 100644 index 00000000..c5e35d84 --- /dev/null +++ b/sph/shabal.c @@ -0,0 +1,806 @@ +/* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */ +/* + * Shabal implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_shabal.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4146) +#endif + +/* + * Part of this code was automatically generated (the part between + * the "BEGIN" and "END" markers). + */ + +#define sM 16 + +#define C32 SPH_C32 +#define T32 SPH_T32 + +#define O1 13 +#define O2 9 +#define O3 6 + +/* + * We copy the state into local variables, so that the compiler knows + * that it can optimize them at will. + */ + +/* BEGIN -- automatically generated code. */ + +#define DECL_STATE \ + sph_u32 A00, A01, A02, A03, A04, A05, A06, A07, \ + A08, A09, A0A, A0B; \ + sph_u32 B0, B1, B2, B3, B4, B5, B6, B7, \ + B8, B9, BA, BB, BC, BD, BE, BF; \ + sph_u32 C0, C1, C2, C3, C4, C5, C6, C7, \ + C8, C9, CA, CB, CC, CD, CE, CF; \ + sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, \ + M8, M9, MA, MB, MC, MD, ME, MF; \ + sph_u32 Wlow, Whigh; + +#define READ_STATE(state) do { \ + A00 = (state)->A[0]; \ + A01 = (state)->A[1]; \ + A02 = (state)->A[2]; \ + A03 = (state)->A[3]; \ + A04 = (state)->A[4]; \ + A05 = (state)->A[5]; \ + A06 = (state)->A[6]; \ + A07 = (state)->A[7]; \ + A08 = (state)->A[8]; \ + A09 = (state)->A[9]; \ + A0A = (state)->A[10]; \ + A0B = (state)->A[11]; \ + B0 = (state)->B[0]; \ + B1 = (state)->B[1]; \ + B2 = (state)->B[2]; \ + B3 = (state)->B[3]; \ + B4 = (state)->B[4]; \ + B5 = (state)->B[5]; \ + B6 = (state)->B[6]; \ + B7 = (state)->B[7]; \ + B8 = (state)->B[8]; \ + B9 = (state)->B[9]; \ + BA = (state)->B[10]; \ + BB = (state)->B[11]; \ + BC = (state)->B[12]; \ + BD = (state)->B[13]; \ + BE = (state)->B[14]; \ + BF = (state)->B[15]; \ + C0 = (state)->C[0]; \ + C1 = (state)->C[1]; \ + C2 = (state)->C[2]; \ + C3 = (state)->C[3]; \ + C4 = (state)->C[4]; \ + C5 = (state)->C[5]; \ + C6 = (state)->C[6]; \ + C7 = (state)->C[7]; \ + C8 = (state)->C[8]; \ + C9 = (state)->C[9]; \ + CA = (state)->C[10]; \ + CB = (state)->C[11]; \ + CC = (state)->C[12]; \ + CD = (state)->C[13]; \ + CE = (state)->C[14]; \ + CF = (state)->C[15]; \ + Wlow = (state)->Wlow; \ + Whigh = (state)->Whigh; \ + } while (0) + +#define WRITE_STATE(state) do { \ + (state)->A[0] = A00; \ + (state)->A[1] = A01; \ + (state)->A[2] = A02; \ + (state)->A[3] = A03; \ + (state)->A[4] = A04; \ + (state)->A[5] = A05; \ + (state)->A[6] = A06; \ + (state)->A[7] = A07; \ + (state)->A[8] = A08; \ + (state)->A[9] = A09; \ + (state)->A[10] = A0A; \ + (state)->A[11] = A0B; \ + (state)->B[0] = B0; \ + (state)->B[1] = B1; \ + (state)->B[2] = B2; \ + (state)->B[3] = B3; \ + (state)->B[4] = B4; \ + (state)->B[5] = B5; \ + (state)->B[6] = B6; \ + (state)->B[7] = B7; \ + (state)->B[8] = B8; \ + (state)->B[9] = B9; \ + (state)->B[10] = BA; \ + (state)->B[11] = BB; \ + (state)->B[12] = BC; \ + (state)->B[13] = BD; \ + (state)->B[14] = BE; \ + (state)->B[15] = BF; \ + (state)->C[0] = C0; \ + (state)->C[1] = C1; \ + (state)->C[2] = C2; \ + (state)->C[3] = C3; \ + (state)->C[4] = C4; \ + (state)->C[5] = C5; \ + (state)->C[6] = C6; \ + (state)->C[7] = C7; \ + (state)->C[8] = C8; \ + (state)->C[9] = C9; \ + (state)->C[10] = CA; \ + (state)->C[11] = CB; \ + (state)->C[12] = CC; \ + (state)->C[13] = CD; \ + (state)->C[14] = CE; \ + (state)->C[15] = CF; \ + (state)->Wlow = Wlow; \ + (state)->Whigh = Whigh; \ + } while (0) + +#define DECODE_BLOCK do { \ + M0 = sph_dec32le_aligned(buf + 0); \ + M1 = sph_dec32le_aligned(buf + 4); \ + M2 = sph_dec32le_aligned(buf + 8); \ + M3 = sph_dec32le_aligned(buf + 12); \ + M4 = sph_dec32le_aligned(buf + 16); \ + M5 = sph_dec32le_aligned(buf + 20); \ + M6 = sph_dec32le_aligned(buf + 24); \ + M7 = sph_dec32le_aligned(buf + 28); \ + M8 = sph_dec32le_aligned(buf + 32); \ + M9 = sph_dec32le_aligned(buf + 36); \ + MA = sph_dec32le_aligned(buf + 40); \ + MB = sph_dec32le_aligned(buf + 44); \ + MC = sph_dec32le_aligned(buf + 48); \ + MD = sph_dec32le_aligned(buf + 52); \ + ME = sph_dec32le_aligned(buf + 56); \ + MF = sph_dec32le_aligned(buf + 60); \ + } while (0) + +#define INPUT_BLOCK_ADD do { \ + B0 = T32(B0 + M0); \ + B1 = T32(B1 + M1); \ + B2 = T32(B2 + M2); \ + B3 = T32(B3 + M3); \ + B4 = T32(B4 + M4); \ + B5 = T32(B5 + M5); \ + B6 = T32(B6 + M6); \ + B7 = T32(B7 + M7); \ + B8 = T32(B8 + M8); \ + B9 = T32(B9 + M9); \ + BA = T32(BA + MA); \ + BB = T32(BB + MB); \ + BC = T32(BC + MC); \ + BD = T32(BD + MD); \ + BE = T32(BE + ME); \ + BF = T32(BF + MF); \ + } while (0) + +#define INPUT_BLOCK_SUB do { \ + C0 = T32(C0 - M0); \ + C1 = T32(C1 - M1); \ + C2 = T32(C2 - M2); \ + C3 = T32(C3 - M3); \ + C4 = T32(C4 - M4); \ + C5 = T32(C5 - M5); \ + C6 = T32(C6 - M6); \ + C7 = T32(C7 - M7); \ + C8 = T32(C8 - M8); \ + C9 = T32(C9 - M9); \ + CA = T32(CA - MA); \ + CB = T32(CB - MB); \ + CC = T32(CC - MC); \ + CD = T32(CD - MD); \ + CE = T32(CE - ME); \ + CF = T32(CF - MF); \ + } while (0) + +#define XOR_W do { \ + A00 ^= Wlow; \ + A01 ^= Whigh; \ + } while (0) + +#define SWAP(v1, v2) do { \ + sph_u32 tmp = (v1); \ + (v1) = (v2); \ + (v2) = tmp; \ + } while (0) + +#define SWAP_BC do { \ + SWAP(B0, C0); \ + SWAP(B1, C1); \ + SWAP(B2, C2); \ + SWAP(B3, C3); \ + SWAP(B4, C4); \ + SWAP(B5, C5); \ + SWAP(B6, C6); \ + SWAP(B7, C7); \ + SWAP(B8, C8); \ + SWAP(B9, C9); \ + SWAP(BA, CA); \ + SWAP(BB, CB); \ + SWAP(BC, CC); \ + SWAP(BD, CD); \ + SWAP(BE, CE); \ + SWAP(BF, CF); \ + } while (0) + +#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) do { \ + xa0 = T32((xa0 \ + ^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \ + ^ xc) * 3U) \ + ^ xb1 ^ (xb2 & ~xb3) ^ xm; \ + xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \ + } while (0) + +#define PERM_STEP_0 do { \ + PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \ + PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \ + PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \ + PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \ + PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \ + PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \ + PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \ + PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \ + PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \ + PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \ + PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \ + PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \ + PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \ + PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \ + PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \ + PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \ + } while (0) + +#define PERM_STEP_1 do { \ + PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \ + PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \ + PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \ + PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \ + PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \ + PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \ + PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \ + PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \ + PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \ + PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \ + PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \ + PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \ + PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \ + PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \ + PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \ + PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \ + } while (0) + +#define PERM_STEP_2 do { \ + PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \ + PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \ + PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \ + PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \ + PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \ + PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \ + PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \ + PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \ + PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \ + PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \ + PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \ + PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \ + PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \ + PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \ + PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \ + PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \ + } while (0) + +#define APPLY_P do { \ + B0 = T32(B0 << 17) | (B0 >> 15); \ + B1 = T32(B1 << 17) | (B1 >> 15); \ + B2 = T32(B2 << 17) | (B2 >> 15); \ + B3 = T32(B3 << 17) | (B3 >> 15); \ + B4 = T32(B4 << 17) | (B4 >> 15); \ + B5 = T32(B5 << 17) | (B5 >> 15); \ + B6 = T32(B6 << 17) | (B6 >> 15); \ + B7 = T32(B7 << 17) | (B7 >> 15); \ + B8 = T32(B8 << 17) | (B8 >> 15); \ + B9 = T32(B9 << 17) | (B9 >> 15); \ + BA = T32(BA << 17) | (BA >> 15); \ + BB = T32(BB << 17) | (BB >> 15); \ + BC = T32(BC << 17) | (BC >> 15); \ + BD = T32(BD << 17) | (BD >> 15); \ + BE = T32(BE << 17) | (BE >> 15); \ + BF = T32(BF << 17) | (BF >> 15); \ + PERM_STEP_0; \ + PERM_STEP_1; \ + PERM_STEP_2; \ + A0B = T32(A0B + C6); \ + A0A = T32(A0A + C5); \ + A09 = T32(A09 + C4); \ + A08 = T32(A08 + C3); \ + A07 = T32(A07 + C2); \ + A06 = T32(A06 + C1); \ + A05 = T32(A05 + C0); \ + A04 = T32(A04 + CF); \ + A03 = T32(A03 + CE); \ + A02 = T32(A02 + CD); \ + A01 = T32(A01 + CC); \ + A00 = T32(A00 + CB); \ + A0B = T32(A0B + CA); \ + A0A = T32(A0A + C9); \ + A09 = T32(A09 + C8); \ + A08 = T32(A08 + C7); \ + A07 = T32(A07 + C6); \ + A06 = T32(A06 + C5); \ + A05 = T32(A05 + C4); \ + A04 = T32(A04 + C3); \ + A03 = T32(A03 + C2); \ + A02 = T32(A02 + C1); \ + A01 = T32(A01 + C0); \ + A00 = T32(A00 + CF); \ + A0B = T32(A0B + CE); \ + A0A = T32(A0A + CD); \ + A09 = T32(A09 + CC); \ + A08 = T32(A08 + CB); \ + A07 = T32(A07 + CA); \ + A06 = T32(A06 + C9); \ + A05 = T32(A05 + C8); \ + A04 = T32(A04 + C7); \ + A03 = T32(A03 + C6); \ + A02 = T32(A02 + C5); \ + A01 = T32(A01 + C4); \ + A00 = T32(A00 + C3); \ + } while (0) + +#define INCR_W do { \ + if ((Wlow = T32(Wlow + 1)) == 0) \ + Whigh = T32(Whigh + 1); \ + } while (0) + +static const sph_u32 A_init_192[] = { + C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E), + C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465), + C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9) +}; + +static const sph_u32 B_init_192[] = { + C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824), + C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7), + C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319), + C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C) +}; + +static const sph_u32 C_init_192[] = { + C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B), + C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640), + C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3), + C32(0x1C096BF4), C32(0xAC76224B), C32(0x5215781C), C32(0xCD5D2669) +}; + +static const sph_u32 A_init_224[] = { + C32(0xA5201467), C32(0xA9B8D94A), C32(0xD4CED997), C32(0x68379D7B), + C32(0xA7FC73BA), C32(0xF1A2546B), C32(0x606782BF), C32(0xE0BCFD0F), + C32(0x2F25374E), C32(0x069A149F), C32(0x5E2DFF25), C32(0xFAECF061) +}; + +static const sph_u32 B_init_224[] = { + C32(0xEC9905D8), C32(0xF21850CF), C32(0xC0A746C8), C32(0x21DAD498), + C32(0x35156EEB), C32(0x088C97F2), C32(0x26303E40), C32(0x8A2D4FB5), + C32(0xFEEE44B6), C32(0x8A1E9573), C32(0x7B81111A), C32(0xCBC139F0), + C32(0xA3513861), C32(0x1D2C362E), C32(0x918C580E), C32(0xB58E1B9C) +}; + +static const sph_u32 C_init_224[] = { + C32(0xE4B573A1), C32(0x4C1A0880), C32(0x1E907C51), C32(0x04807EFD), + C32(0x3AD8CDE5), C32(0x16B21302), C32(0x02512C53), C32(0x2204CB18), + C32(0x99405F2D), C32(0xE5B648A1), C32(0x70AB1D43), C32(0xA10C25C2), + C32(0x16F1AC05), C32(0x38BBEB56), C32(0x9B01DC60), C32(0xB1096D83) +}; + +static const sph_u32 A_init_256[] = { + C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191), + C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C), + C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A) +}; + +static const sph_u32 B_init_256[] = { + C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F), + C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002), + C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890), + C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5) +}; + +static const sph_u32 C_init_256[] = { + C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55), + C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433), + C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F), + C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60) +}; + +static const sph_u32 A_init_384[] = { + C32(0xC8FCA331), C32(0xE55C504E), C32(0x003EBF26), C32(0xBB6B8D83), + C32(0x7B0448C1), C32(0x41B82789), C32(0x0A7C9601), C32(0x8D659CFF), + C32(0xB6E2673E), C32(0xCA54C77B), C32(0x1460FD7E), C32(0x3FCB8F2D) +}; + +static const sph_u32 B_init_384[] = { + C32(0x527291FC), C32(0x2A16455F), C32(0x78E627E5), C32(0x944F169F), + C32(0x1CA6F016), C32(0xA854EA25), C32(0x8DB98ABE), C32(0xF2C62641), + C32(0x30117DCB), C32(0xCF5C4309), C32(0x93711A25), C32(0xF9F671B8), + C32(0xB01D2116), C32(0x333F4B89), C32(0xB285D165), C32(0x86829B36) +}; + +static const sph_u32 C_init_384[] = { + C32(0xF764B11A), C32(0x76172146), C32(0xCEF6934D), C32(0xC6D28399), + C32(0xFE095F61), C32(0x5E6018B4), C32(0x5048ECF5), C32(0x51353261), + C32(0x6E6E36DC), C32(0x63130DAD), C32(0xA9C69BD6), C32(0x1E90EA0C), + C32(0x7C35073B), C32(0x28D95E6D), C32(0xAA340E0D), C32(0xCB3DEE70) +}; + +static const sph_u32 A_init_512[] = { + C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632), + C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B), + C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F) +}; + +static const sph_u32 B_init_512[] = { + C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640), + C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08), + C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E), + C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B) +}; + +static const sph_u32 C_init_512[] = { + C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359), + C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780), + C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A), + C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969) +}; + +/* END -- automatically generated code. */ + +static void +shabal_init(void *cc, unsigned size) +{ + /* + * We have precomputed initial states for all the supported + * output bit lengths. + */ + const sph_u32 *A_init, *B_init, *C_init; + sph_shabal_context *sc; + + switch (size) { + case 192: + A_init = A_init_192; + B_init = B_init_192; + C_init = C_init_192; + break; + case 224: + A_init = A_init_224; + B_init = B_init_224; + C_init = C_init_224; + break; + case 256: + A_init = A_init_256; + B_init = B_init_256; + C_init = C_init_256; + break; + case 384: + A_init = A_init_384; + B_init = B_init_384; + C_init = C_init_384; + break; + case 512: + A_init = A_init_512; + B_init = B_init_512; + C_init = C_init_512; + break; + default: + return; + } + sc = (sph_shabal_context *)cc; + memcpy(sc->A, A_init, sizeof sc->A); + memcpy(sc->B, B_init, sizeof sc->B); + memcpy(sc->C, C_init, sizeof sc->C); + sc->Wlow = 1; + sc->Whigh = 0; + sc->ptr = 0; +} + +static void +shabal_core(void *cc, const unsigned char *data, size_t len) +{ + sph_shabal_context *sc; + unsigned char *buf; + size_t ptr; + DECL_STATE + + sc = (sph_shabal_context *)cc; + buf = sc->buf; + ptr = sc->ptr; + + /* + * We do not want to copy the state to local variables if the + * amount of data is less than what is needed to complete the + * current block. Note that it is anyway suboptimal to call + * this method many times for small chunks of data. + */ + if (len < (sizeof sc->buf) - ptr) { + memcpy(buf + ptr, data, len); + ptr += len; + sc->ptr = ptr; + return; + } + + READ_STATE(sc); + while (len > 0) { + size_t clen; + + clen = (sizeof sc->buf) - ptr; + if (clen > len) + clen = len; + memcpy(buf + ptr, data, clen); + ptr += clen; + data += clen; + len -= clen; + if (ptr == sizeof sc->buf) { + DECODE_BLOCK; + INPUT_BLOCK_ADD; + XOR_W; + APPLY_P; + INPUT_BLOCK_SUB; + SWAP_BC; + INCR_W; + ptr = 0; + } + } + WRITE_STATE(sc); + sc->ptr = ptr; +} + +static void +shabal_close(void *cc, unsigned ub, unsigned n, void *dst, unsigned size_words) +{ + sph_shabal_context *sc; + unsigned char *buf; + size_t ptr; + int i; + unsigned z; + union { + unsigned char tmp_out[64]; + sph_u32 dummy; + } u; + size_t out_len; + DECL_STATE + + sc = (sph_shabal_context *)cc; + buf = sc->buf; + ptr = sc->ptr; + z = 0x80 >> n; + buf[ptr] = ((ub & -z) | z) & 0xFF; + memset(buf + ptr + 1, 0, (sizeof sc->buf) - (ptr + 1)); + READ_STATE(sc); + DECODE_BLOCK; + INPUT_BLOCK_ADD; + XOR_W; + APPLY_P; + for (i = 0; i < 3; i ++) { + SWAP_BC; + XOR_W; + APPLY_P; + } + + /* + * We just use our local variables; no need to go through + * the state structure. In order to share some code, we + * emit the relevant words into a temporary buffer, which + * we finally copy into the destination array. + */ + switch (size_words) { + case 16: + sph_enc32le_aligned(u.tmp_out + 0, B0); + sph_enc32le_aligned(u.tmp_out + 4, B1); + sph_enc32le_aligned(u.tmp_out + 8, B2); + sph_enc32le_aligned(u.tmp_out + 12, B3); + /* fall through */ + case 12: + sph_enc32le_aligned(u.tmp_out + 16, B4); + sph_enc32le_aligned(u.tmp_out + 20, B5); + sph_enc32le_aligned(u.tmp_out + 24, B6); + sph_enc32le_aligned(u.tmp_out + 28, B7); + /* fall through */ + case 8: + sph_enc32le_aligned(u.tmp_out + 32, B8); + /* fall through */ + case 7: + sph_enc32le_aligned(u.tmp_out + 36, B9); + /* fall through */ + case 6: + sph_enc32le_aligned(u.tmp_out + 40, BA); + sph_enc32le_aligned(u.tmp_out + 44, BB); + sph_enc32le_aligned(u.tmp_out + 48, BC); + sph_enc32le_aligned(u.tmp_out + 52, BD); + sph_enc32le_aligned(u.tmp_out + 56, BE); + sph_enc32le_aligned(u.tmp_out + 60, BF); + break; + default: + return; + } + out_len = size_words << 2; + memcpy(dst, u.tmp_out + (sizeof u.tmp_out) - out_len, out_len); + shabal_init(sc, size_words << 5); +} + +/* see sph_shabal.h */ +void +sph_shabal192_init(void *cc) +{ + shabal_init(cc, 192); +} + +/* see sph_shabal.h */ +void +sph_shabal192(void *cc, const void *data, size_t len) +{ + shabal_core(cc, (const unsigned char*)data, len); +} + +/* see sph_shabal.h */ +void +sph_shabal192_close(void *cc, void *dst) +{ + shabal_close(cc, 0, 0, dst, 6); +} + +/* see sph_shabal.h */ +void +sph_shabal192_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + shabal_close(cc, ub, n, dst, 6); +} + +/* see sph_shabal.h */ +void +sph_shabal224_init(void *cc) +{ + shabal_init(cc, 224); +} + +/* see sph_shabal.h */ +void +sph_shabal224(void *cc, const void *data, size_t len) +{ + shabal_core(cc, (const unsigned char*)data, len); +} + +/* see sph_shabal.h */ +void +sph_shabal224_close(void *cc, void *dst) +{ + shabal_close(cc, 0, 0, dst, 7); +} + +/* see sph_shabal.h */ +void +sph_shabal224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + shabal_close(cc, ub, n, dst, 7); +} + +/* see sph_shabal.h */ +void +sph_shabal256_init(void *cc) +{ + shabal_init(cc, 256); +} + +/* see sph_shabal.h */ +void +sph_shabal256(void *cc, const void *data, size_t len) +{ + shabal_core(cc, (const unsigned char*)data, len); +} + +/* see sph_shabal.h */ +void +sph_shabal256_close(void *cc, void *dst) +{ + shabal_close(cc, 0, 0, dst, 8); +} + +/* see sph_shabal.h */ +void +sph_shabal256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + shabal_close(cc, ub, n, dst, 8); +} + +/* see sph_shabal.h */ +void +sph_shabal384_init(void *cc) +{ + shabal_init(cc, 384); +} + +/* see sph_shabal.h */ +void +sph_shabal384(void *cc, const void *data, size_t len) +{ + shabal_core(cc, (const unsigned char*)data, len); +} + +/* see sph_shabal.h */ +void +sph_shabal384_close(void *cc, void *dst) +{ + shabal_close(cc, 0, 0, dst, 12); +} + +/* see sph_shabal.h */ +void +sph_shabal384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + shabal_close(cc, ub, n, dst, 12); +} + +/* see sph_shabal.h */ +void +sph_shabal512_init(void *cc) +{ + shabal_init(cc, 512); +} + +/* see sph_shabal.h */ +void +sph_shabal512(void *cc, const void *data, size_t len) +{ + shabal_core(cc, (const unsigned char*)data, len); +} + +/* see sph_shabal.h */ +void +sph_shabal512_close(void *cc, void *dst) +{ + shabal_close(cc, 0, 0, dst, 16); +} + +/* see sph_shabal.h */ +void +sph_shabal512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + shabal_close(cc, ub, n, dst, 16); +} +#ifdef __cplusplus +} +#endif diff --git a/sph/sph_shabal.h b/sph/sph_shabal.h new file mode 100644 index 00000000..9ee6c81d --- /dev/null +++ b/sph/sph_shabal.h @@ -0,0 +1,344 @@ +/* $Id: sph_shabal.h 175 2010-05-07 16:03:20Z tp $ */ +/** + * Shabal interface. Shabal is a family of functions which differ by + * their output size; this implementation defines Shabal for output + * sizes 192, 224, 256, 384 and 512 bits. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_shabal.h + * @author Thomas Pornin + */ + +#ifndef SPH_SHABAL_H__ +#define SPH_SHABAL_H__ + +#include +#include "sph_types.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +/** + * Output size (in bits) for Shabal-192. + */ +#define SPH_SIZE_shabal192 192 + +/** + * Output size (in bits) for Shabal-224. + */ +#define SPH_SIZE_shabal224 224 + +/** + * Output size (in bits) for Shabal-256. + */ +#define SPH_SIZE_shabal256 256 + +/** + * Output size (in bits) for Shabal-384. + */ +#define SPH_SIZE_shabal384 384 + +/** + * Output size (in bits) for Shabal-512. + */ +#define SPH_SIZE_shabal512 512 + +/** + * This structure is a context for Shabal computations: it contains the + * intermediate values and some data from the last entered block. Once + * a Shabal computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running Shabal computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + size_t ptr; + sph_u32 A[12], B[16], C[16]; + sph_u32 Whigh, Wlow; +#endif +} sph_shabal_context; + +/** + * Type for a Shabal-192 context (identical to the common context). + */ +typedef sph_shabal_context sph_shabal192_context; + +/** + * Type for a Shabal-224 context (identical to the common context). + */ +typedef sph_shabal_context sph_shabal224_context; + +/** + * Type for a Shabal-256 context (identical to the common context). + */ +typedef sph_shabal_context sph_shabal256_context; + +/** + * Type for a Shabal-384 context (identical to the common context). + */ +typedef sph_shabal_context sph_shabal384_context; + +/** + * Type for a Shabal-512 context (identical to the common context). + */ +typedef sph_shabal_context sph_shabal512_context; + +/** + * Initialize a Shabal-192 context. This process performs no memory allocation. + * + * @param cc the Shabal-192 context (pointer to a + * sph_shabal192_context) + */ +void sph_shabal192_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Shabal-192 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_shabal192(void *cc, const void *data, size_t len); + +/** + * Terminate the current Shabal-192 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (24 bytes). The context is automatically + * reinitialized. + * + * @param cc the Shabal-192 context + * @param dst the destination buffer + */ +void sph_shabal192_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (24 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Shabal-192 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_shabal192_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Shabal-224 context. This process performs no memory allocation. + * + * @param cc the Shabal-224 context (pointer to a + * sph_shabal224_context) + */ +void sph_shabal224_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Shabal-224 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_shabal224(void *cc, const void *data, size_t len); + +/** + * Terminate the current Shabal-224 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (28 bytes). The context is automatically + * reinitialized. + * + * @param cc the Shabal-224 context + * @param dst the destination buffer + */ +void sph_shabal224_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (28 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Shabal-224 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_shabal224_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Shabal-256 context. This process performs no memory allocation. + * + * @param cc the Shabal-256 context (pointer to a + * sph_shabal256_context) + */ +void sph_shabal256_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Shabal-256 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_shabal256(void *cc, const void *data, size_t len); + +/** + * Terminate the current Shabal-256 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (32 bytes). The context is automatically + * reinitialized. + * + * @param cc the Shabal-256 context + * @param dst the destination buffer + */ +void sph_shabal256_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (32 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Shabal-256 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_shabal256_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Shabal-384 context. This process performs no memory allocation. + * + * @param cc the Shabal-384 context (pointer to a + * sph_shabal384_context) + */ +void sph_shabal384_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Shabal-384 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_shabal384(void *cc, const void *data, size_t len); + +/** + * Terminate the current Shabal-384 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (48 bytes). The context is automatically + * reinitialized. + * + * @param cc the Shabal-384 context + * @param dst the destination buffer + */ +void sph_shabal384_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (48 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Shabal-384 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_shabal384_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Shabal-512 context. This process performs no memory allocation. + * + * @param cc the Shabal-512 context (pointer to a + * sph_shabal512_context) + */ +void sph_shabal512_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Shabal-512 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_shabal512(void *cc, const void *data, size_t len); + +/** + * Terminate the current Shabal-512 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (64 bytes). The context is automatically + * reinitialized. + * + * @param cc the Shabal-512 context + * @param dst the destination buffer + */ +void sph_shabal512_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (64 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Shabal-512 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_shabal512_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/sph/sph_whirlpool.h b/sph/sph_whirlpool.h new file mode 100644 index 00000000..61dd2a0a --- /dev/null +++ b/sph/sph_whirlpool.h @@ -0,0 +1,218 @@ +/* $Id: sph_whirlpool.h 216 2010-06-08 09:46:57Z tp $ */ +/** + * WHIRLPOOL interface. + * + * WHIRLPOOL knows three variants, dubbed "WHIRLPOOL-0" (original + * version, published in 2000, studied by NESSIE), "WHIRLPOOL-1" + * (first revision, 2001, with a new S-box) and "WHIRLPOOL" (current + * version, 2003, with a new diffusion matrix, also described as "plain + * WHIRLPOOL"). All three variants are implemented here. + * + * The original WHIRLPOOL (i.e. WHIRLPOOL-0) was published in: P. S. L. + * M. Barreto, V. Rijmen, "The Whirlpool Hashing Function", First open + * NESSIE Workshop, Leuven, Belgium, November 13--14, 2000. + * + * The current WHIRLPOOL specification and a reference implementation + * can be found on the WHIRLPOOL web page: + * http://paginas.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_whirlpool.h + * @author Thomas Pornin + */ + +#ifndef SPH_WHIRLPOOL_H__ +#define SPH_WHIRLPOOL_H__ + +#include +#include "sph_types.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#if SPH_64 + +/** + * Output size (in bits) for WHIRLPOOL. + */ +#define SPH_SIZE_whirlpool 512 + +/** + * Output size (in bits) for WHIRLPOOL-0. + */ +#define SPH_SIZE_whirlpool0 512 + +/** + * Output size (in bits) for WHIRLPOOL-1. + */ +#define SPH_SIZE_whirlpool1 512 + +/** + * This structure is a context for WHIRLPOOL computations: it contains the + * intermediate values and some data from the last entered block. Once + * a WHIRLPOOL computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running WHIRLPOOL computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u64 state[8]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_whirlpool_context; + +/** + * Initialize a WHIRLPOOL context. This process performs no memory allocation. + * + * @param cc the WHIRLPOOL context (pointer to a + * sph_whirlpool_context) + */ +void sph_whirlpool_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). This function applies the + * plain WHIRLPOOL algorithm. + * + * @param cc the WHIRLPOOL context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_whirlpool(void *cc, const void *data, size_t len); + +/** + * Terminate the current WHIRLPOOL computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (64 bytes). The context is automatically + * reinitialized. + * + * @param cc the WHIRLPOOL context + * @param dst the destination buffer + */ +void sph_whirlpool_close(void *cc, void *dst); + +/** + * WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL. + */ +typedef sph_whirlpool_context sph_whirlpool0_context; + +#ifdef DOXYGEN_IGNORE +/** + * Initialize a WHIRLPOOL-0 context. This function is identical to + * sph_whirlpool_init(). + * + * @param cc the WHIRLPOOL context (pointer to a + * sph_whirlpool0_context) + */ +void sph_whirlpool0_init(void *cc); +#endif + +#ifndef DOXYGEN_IGNORE +#define sph_whirlpool0_init sph_whirlpool_init +#endif + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). This function applies the + * WHIRLPOOL-0 algorithm. + * + * @param cc the WHIRLPOOL context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_whirlpool0(void *cc, const void *data, size_t len); + +/** + * Terminate the current WHIRLPOOL-0 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (64 bytes). The context is automatically + * reinitialized. + * + * @param cc the WHIRLPOOL-0 context + * @param dst the destination buffer + */ +void sph_whirlpool0_close(void *cc, void *dst); + +/** + * WHIRLPOOL-1 uses the same structure than plain WHIRLPOOL. + */ +typedef sph_whirlpool_context sph_whirlpool1_context; + +#ifdef DOXYGEN_IGNORE +/** + * Initialize a WHIRLPOOL-1 context. This function is identical to + * sph_whirlpool_init(). + * + * @param cc the WHIRLPOOL context (pointer to a + * sph_whirlpool1_context) + */ +void sph_whirlpool1_init(void *cc); +#endif + +#ifndef DOXYGEN_IGNORE +#define sph_whirlpool1_init sph_whirlpool_init +#endif + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). This function applies the + * WHIRLPOOL-1 algorithm. + * + * @param cc the WHIRLPOOL context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_whirlpool1(void *cc, const void *data, size_t len); + +/** + * Terminate the current WHIRLPOOL-1 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (64 bytes). The context is automatically + * reinitialized. + * + * @param cc the WHIRLPOOL-1 context + * @param dst the destination buffer + */ +void sph_whirlpool1_close(void *cc, void *dst); + +#endif + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/sph/whirlpool.c b/sph/whirlpool.c new file mode 100644 index 00000000..e49f3453 --- /dev/null +++ b/sph/whirlpool.c @@ -0,0 +1,3480 @@ +/* $Id: whirlpool.c 227 2010-06-16 17:28:38Z tp $ */ +/* + * WHIRLPOOL implementation. + * + * Internally, we use little-endian convention, on the assumption that + * architectures which favour big-endian encoding are: + * 1. rarer + * 2. in decreasing numbers + * 3. able to decode little-endian data efficiently anyway + * + * The most common big-endian architecture is Sparc, and Ultrasparc CPU + * include special opcodes to perform little-endian accesses, which we use + * (see sph_types.h). Most modern CPU designs can work with both endianness + * and architecture designer now favour little-endian (basically, x86 has + * won the endianness war). + * + * TODO: implement a 32-bit version. Not only such a version would be handy + * for non-64-bit-able architectures, but it may also use smaller tables, + * at the expense of more lookups and XORs. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_whirlpool.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_WHIRLPOOL +#define SPH_SMALL_FOOTPRINT_WHIRLPOOL 1 +#endif + +/* ====================================================================== */ +/* + * Constants for plain WHIRLPOOL (current version). + */ + +static const sph_u64 plain_T0[256] = { + SPH_C64(0xD83078C018601818), SPH_C64(0x2646AF05238C2323), + SPH_C64(0xB891F97EC63FC6C6), SPH_C64(0xFBCD6F13E887E8E8), + SPH_C64(0xCB13A14C87268787), SPH_C64(0x116D62A9B8DAB8B8), + SPH_C64(0x0902050801040101), SPH_C64(0x0D9E6E424F214F4F), + SPH_C64(0x9B6CEEAD36D83636), SPH_C64(0xFF510459A6A2A6A6), + SPH_C64(0x0CB9BDDED26FD2D2), SPH_C64(0x0EF706FBF5F3F5F5), + SPH_C64(0x96F280EF79F97979), SPH_C64(0x30DECE5F6FA16F6F), + SPH_C64(0x6D3FEFFC917E9191), SPH_C64(0xF8A407AA52555252), + SPH_C64(0x47C0FD27609D6060), SPH_C64(0x35657689BCCABCBC), + SPH_C64(0x372BCDAC9B569B9B), SPH_C64(0x8A018C048E028E8E), + SPH_C64(0xD25B1571A3B6A3A3), SPH_C64(0x6C183C600C300C0C), + SPH_C64(0x84F68AFF7BF17B7B), SPH_C64(0x806AE1B535D43535), + SPH_C64(0xF53A69E81D741D1D), SPH_C64(0xB3DD4753E0A7E0E0), + SPH_C64(0x21B3ACF6D77BD7D7), SPH_C64(0x9C99ED5EC22FC2C2), + SPH_C64(0x435C966D2EB82E2E), SPH_C64(0x29967A624B314B4B), + SPH_C64(0x5DE121A3FEDFFEFE), SPH_C64(0xD5AE168257415757), + SPH_C64(0xBD2A41A815541515), SPH_C64(0xE8EEB69F77C17777), + SPH_C64(0x926EEBA537DC3737), SPH_C64(0x9ED7567BE5B3E5E5), + SPH_C64(0x1323D98C9F469F9F), SPH_C64(0x23FD17D3F0E7F0F0), + SPH_C64(0x20947F6A4A354A4A), SPH_C64(0x44A9959EDA4FDADA), + SPH_C64(0xA2B025FA587D5858), SPH_C64(0xCF8FCA06C903C9C9), + SPH_C64(0x7C528D5529A42929), SPH_C64(0x5A1422500A280A0A), + SPH_C64(0x507F4FE1B1FEB1B1), SPH_C64(0xC95D1A69A0BAA0A0), + SPH_C64(0x14D6DA7F6BB16B6B), SPH_C64(0xD917AB5C852E8585), + SPH_C64(0x3C677381BDCEBDBD), SPH_C64(0x8FBA34D25D695D5D), + SPH_C64(0x9020508010401010), SPH_C64(0x07F503F3F4F7F4F4), + SPH_C64(0xDD8BC016CB0BCBCB), SPH_C64(0xD37CC6ED3EF83E3E), + SPH_C64(0x2D0A112805140505), SPH_C64(0x78CEE61F67816767), + SPH_C64(0x97D55373E4B7E4E4), SPH_C64(0x024EBB25279C2727), + SPH_C64(0x7382583241194141), SPH_C64(0xA70B9D2C8B168B8B), + SPH_C64(0xF6530151A7A6A7A7), SPH_C64(0xB2FA94CF7DE97D7D), + SPH_C64(0x4937FBDC956E9595), SPH_C64(0x56AD9F8ED847D8D8), + SPH_C64(0x70EB308BFBCBFBFB), SPH_C64(0xCDC17123EE9FEEEE), + SPH_C64(0xBBF891C77CED7C7C), SPH_C64(0x71CCE31766856666), + SPH_C64(0x7BA78EA6DD53DDDD), SPH_C64(0xAF2E4BB8175C1717), + SPH_C64(0x458E460247014747), SPH_C64(0x1A21DC849E429E9E), + SPH_C64(0xD489C51ECA0FCACA), SPH_C64(0x585A99752DB42D2D), + SPH_C64(0x2E637991BFC6BFBF), SPH_C64(0x3F0E1B38071C0707), + SPH_C64(0xAC472301AD8EADAD), SPH_C64(0xB0B42FEA5A755A5A), + SPH_C64(0xEF1BB56C83368383), SPH_C64(0xB666FF8533CC3333), + SPH_C64(0x5CC6F23F63916363), SPH_C64(0x12040A1002080202), + SPH_C64(0x93493839AA92AAAA), SPH_C64(0xDEE2A8AF71D97171), + SPH_C64(0xC68DCF0EC807C8C8), SPH_C64(0xD1327DC819641919), + SPH_C64(0x3B92707249394949), SPH_C64(0x5FAF9A86D943D9D9), + SPH_C64(0x31F91DC3F2EFF2F2), SPH_C64(0xA8DB484BE3ABE3E3), + SPH_C64(0xB9B62AE25B715B5B), SPH_C64(0xBC0D9234881A8888), + SPH_C64(0x3E29C8A49A529A9A), SPH_C64(0x0B4CBE2D26982626), + SPH_C64(0xBF64FA8D32C83232), SPH_C64(0x597D4AE9B0FAB0B0), + SPH_C64(0xF2CF6A1BE983E9E9), SPH_C64(0x771E33780F3C0F0F), + SPH_C64(0x33B7A6E6D573D5D5), SPH_C64(0xF41DBA74803A8080), + SPH_C64(0x27617C99BEC2BEBE), SPH_C64(0xEB87DE26CD13CDCD), + SPH_C64(0x8968E4BD34D03434), SPH_C64(0x3290757A483D4848), + SPH_C64(0x54E324ABFFDBFFFF), SPH_C64(0x8DF48FF77AF57A7A), + SPH_C64(0x643DEAF4907A9090), SPH_C64(0x9DBE3EC25F615F5F), + SPH_C64(0x3D40A01D20802020), SPH_C64(0x0FD0D56768BD6868), + SPH_C64(0xCA3472D01A681A1A), SPH_C64(0xB7412C19AE82AEAE), + SPH_C64(0x7D755EC9B4EAB4B4), SPH_C64(0xCEA8199A544D5454), + SPH_C64(0x7F3BE5EC93769393), SPH_C64(0x2F44AA0D22882222), + SPH_C64(0x63C8E907648D6464), SPH_C64(0x2AFF12DBF1E3F1F1), + SPH_C64(0xCCE6A2BF73D17373), SPH_C64(0x82245A9012481212), + SPH_C64(0x7A805D3A401D4040), SPH_C64(0x4810284008200808), + SPH_C64(0x959BE856C32BC3C3), SPH_C64(0xDFC57B33EC97ECEC), + SPH_C64(0x4DAB9096DB4BDBDB), SPH_C64(0xC05F1F61A1BEA1A1), + SPH_C64(0x9107831C8D0E8D8D), SPH_C64(0xC87AC9F53DF43D3D), + SPH_C64(0x5B33F1CC97669797), SPH_C64(0x0000000000000000), + SPH_C64(0xF983D436CF1BCFCF), SPH_C64(0x6E5687452BAC2B2B), + SPH_C64(0xE1ECB39776C57676), SPH_C64(0xE619B06482328282), + SPH_C64(0x28B1A9FED67FD6D6), SPH_C64(0xC33677D81B6C1B1B), + SPH_C64(0x74775BC1B5EEB5B5), SPH_C64(0xBE432911AF86AFAF), + SPH_C64(0x1DD4DF776AB56A6A), SPH_C64(0xEAA00DBA505D5050), + SPH_C64(0x578A4C1245094545), SPH_C64(0x38FB18CBF3EBF3F3), + SPH_C64(0xAD60F09D30C03030), SPH_C64(0xC4C3742BEF9BEFEF), + SPH_C64(0xDA7EC3E53FFC3F3F), SPH_C64(0xC7AA1C9255495555), + SPH_C64(0xDB591079A2B2A2A2), SPH_C64(0xE9C96503EA8FEAEA), + SPH_C64(0x6ACAEC0F65896565), SPH_C64(0x036968B9BAD2BABA), + SPH_C64(0x4A5E93652FBC2F2F), SPH_C64(0x8E9DE74EC027C0C0), + SPH_C64(0x60A181BEDE5FDEDE), SPH_C64(0xFC386CE01C701C1C), + SPH_C64(0x46E72EBBFDD3FDFD), SPH_C64(0x1F9A64524D294D4D), + SPH_C64(0x7639E0E492729292), SPH_C64(0xFAEABC8F75C97575), + SPH_C64(0x360C1E3006180606), SPH_C64(0xAE0998248A128A8A), + SPH_C64(0x4B7940F9B2F2B2B2), SPH_C64(0x85D15963E6BFE6E6), + SPH_C64(0x7E1C36700E380E0E), SPH_C64(0xE73E63F81F7C1F1F), + SPH_C64(0x55C4F73762956262), SPH_C64(0x3AB5A3EED477D4D4), + SPH_C64(0x814D3229A89AA8A8), SPH_C64(0x5231F4C496629696), + SPH_C64(0x62EF3A9BF9C3F9F9), SPH_C64(0xA397F666C533C5C5), + SPH_C64(0x104AB13525942525), SPH_C64(0xABB220F259795959), + SPH_C64(0xD015AE54842A8484), SPH_C64(0xC5E4A7B772D57272), + SPH_C64(0xEC72DDD539E43939), SPH_C64(0x1698615A4C2D4C4C), + SPH_C64(0x94BC3BCA5E655E5E), SPH_C64(0x9FF085E778FD7878), + SPH_C64(0xE570D8DD38E03838), SPH_C64(0x980586148C0A8C8C), + SPH_C64(0x17BFB2C6D163D1D1), SPH_C64(0xE4570B41A5AEA5A5), + SPH_C64(0xA1D94D43E2AFE2E2), SPH_C64(0x4EC2F82F61996161), + SPH_C64(0x427B45F1B3F6B3B3), SPH_C64(0x3442A51521842121), + SPH_C64(0x0825D6949C4A9C9C), SPH_C64(0xEE3C66F01E781E1E), + SPH_C64(0x6186522243114343), SPH_C64(0xB193FC76C73BC7C7), + SPH_C64(0x4FE52BB3FCD7FCFC), SPH_C64(0x2408142004100404), + SPH_C64(0xE3A208B251595151), SPH_C64(0x252FC7BC995E9999), + SPH_C64(0x22DAC44F6DA96D6D), SPH_C64(0x651A39680D340D0D), + SPH_C64(0x79E93583FACFFAFA), SPH_C64(0x69A384B6DF5BDFDF), + SPH_C64(0xA9FC9BD77EE57E7E), SPH_C64(0x1948B43D24902424), + SPH_C64(0xFE76D7C53BEC3B3B), SPH_C64(0x9A4B3D31AB96ABAB), + SPH_C64(0xF081D13ECE1FCECE), SPH_C64(0x9922558811441111), + SPH_C64(0x8303890C8F068F8F), SPH_C64(0x049C6B4A4E254E4E), + SPH_C64(0x667351D1B7E6B7B7), SPH_C64(0xE0CB600BEB8BEBEB), + SPH_C64(0xC178CCFD3CF03C3C), SPH_C64(0xFD1FBF7C813E8181), + SPH_C64(0x4035FED4946A9494), SPH_C64(0x1CF30CEBF7FBF7F7), + SPH_C64(0x186F67A1B9DEB9B9), SPH_C64(0x8B265F98134C1313), + SPH_C64(0x51589C7D2CB02C2C), SPH_C64(0x05BBB8D6D36BD3D3), + SPH_C64(0x8CD35C6BE7BBE7E7), SPH_C64(0x39DCCB576EA56E6E), + SPH_C64(0xAA95F36EC437C4C4), SPH_C64(0x1B060F18030C0303), + SPH_C64(0xDCAC138A56455656), SPH_C64(0x5E88491A440D4444), + SPH_C64(0xA0FE9EDF7FE17F7F), SPH_C64(0x884F3721A99EA9A9), + SPH_C64(0x6754824D2AA82A2A), SPH_C64(0x0A6B6DB1BBD6BBBB), + SPH_C64(0x879FE246C123C1C1), SPH_C64(0xF1A602A253515353), + SPH_C64(0x72A58BAEDC57DCDC), SPH_C64(0x531627580B2C0B0B), + SPH_C64(0x0127D39C9D4E9D9D), SPH_C64(0x2BD8C1476CAD6C6C), + SPH_C64(0xA462F59531C43131), SPH_C64(0xF3E8B98774CD7474), + SPH_C64(0x15F109E3F6FFF6F6), SPH_C64(0x4C8C430A46054646), + SPH_C64(0xA5452609AC8AACAC), SPH_C64(0xB50F973C891E8989), + SPH_C64(0xB42844A014501414), SPH_C64(0xBADF425BE1A3E1E1), + SPH_C64(0xA62C4EB016581616), SPH_C64(0xF774D2CD3AE83A3A), + SPH_C64(0x06D2D06F69B96969), SPH_C64(0x41122D4809240909), + SPH_C64(0xD7E0ADA770DD7070), SPH_C64(0x6F7154D9B6E2B6B6), + SPH_C64(0x1EBDB7CED067D0D0), SPH_C64(0xD6C77E3BED93EDED), + SPH_C64(0xE285DB2ECC17CCCC), SPH_C64(0x6884572A42154242), + SPH_C64(0x2C2DC2B4985A9898), SPH_C64(0xED550E49A4AAA4A4), + SPH_C64(0x7550885D28A02828), SPH_C64(0x86B831DA5C6D5C5C), + SPH_C64(0x6BED3F93F8C7F8F8), SPH_C64(0xC211A44486228686) +}; + +#if !SPH_SMALL_FOOTPRINT_WHIRLPOOL + +static const sph_u64 plain_T1[256] = { + SPH_C64(0x3078C018601818D8), SPH_C64(0x46AF05238C232326), + SPH_C64(0x91F97EC63FC6C6B8), SPH_C64(0xCD6F13E887E8E8FB), + SPH_C64(0x13A14C87268787CB), SPH_C64(0x6D62A9B8DAB8B811), + SPH_C64(0x0205080104010109), SPH_C64(0x9E6E424F214F4F0D), + SPH_C64(0x6CEEAD36D836369B), SPH_C64(0x510459A6A2A6A6FF), + SPH_C64(0xB9BDDED26FD2D20C), SPH_C64(0xF706FBF5F3F5F50E), + SPH_C64(0xF280EF79F9797996), SPH_C64(0xDECE5F6FA16F6F30), + SPH_C64(0x3FEFFC917E91916D), SPH_C64(0xA407AA52555252F8), + SPH_C64(0xC0FD27609D606047), SPH_C64(0x657689BCCABCBC35), + SPH_C64(0x2BCDAC9B569B9B37), SPH_C64(0x018C048E028E8E8A), + SPH_C64(0x5B1571A3B6A3A3D2), SPH_C64(0x183C600C300C0C6C), + SPH_C64(0xF68AFF7BF17B7B84), SPH_C64(0x6AE1B535D4353580), + SPH_C64(0x3A69E81D741D1DF5), SPH_C64(0xDD4753E0A7E0E0B3), + SPH_C64(0xB3ACF6D77BD7D721), SPH_C64(0x99ED5EC22FC2C29C), + SPH_C64(0x5C966D2EB82E2E43), SPH_C64(0x967A624B314B4B29), + SPH_C64(0xE121A3FEDFFEFE5D), SPH_C64(0xAE168257415757D5), + SPH_C64(0x2A41A815541515BD), SPH_C64(0xEEB69F77C17777E8), + SPH_C64(0x6EEBA537DC373792), SPH_C64(0xD7567BE5B3E5E59E), + SPH_C64(0x23D98C9F469F9F13), SPH_C64(0xFD17D3F0E7F0F023), + SPH_C64(0x947F6A4A354A4A20), SPH_C64(0xA9959EDA4FDADA44), + SPH_C64(0xB025FA587D5858A2), SPH_C64(0x8FCA06C903C9C9CF), + SPH_C64(0x528D5529A429297C), SPH_C64(0x1422500A280A0A5A), + SPH_C64(0x7F4FE1B1FEB1B150), SPH_C64(0x5D1A69A0BAA0A0C9), + SPH_C64(0xD6DA7F6BB16B6B14), SPH_C64(0x17AB5C852E8585D9), + SPH_C64(0x677381BDCEBDBD3C), SPH_C64(0xBA34D25D695D5D8F), + SPH_C64(0x2050801040101090), SPH_C64(0xF503F3F4F7F4F407), + SPH_C64(0x8BC016CB0BCBCBDD), SPH_C64(0x7CC6ED3EF83E3ED3), + SPH_C64(0x0A1128051405052D), SPH_C64(0xCEE61F6781676778), + SPH_C64(0xD55373E4B7E4E497), SPH_C64(0x4EBB25279C272702), + SPH_C64(0x8258324119414173), SPH_C64(0x0B9D2C8B168B8BA7), + SPH_C64(0x530151A7A6A7A7F6), SPH_C64(0xFA94CF7DE97D7DB2), + SPH_C64(0x37FBDC956E959549), SPH_C64(0xAD9F8ED847D8D856), + SPH_C64(0xEB308BFBCBFBFB70), SPH_C64(0xC17123EE9FEEEECD), + SPH_C64(0xF891C77CED7C7CBB), SPH_C64(0xCCE3176685666671), + SPH_C64(0xA78EA6DD53DDDD7B), SPH_C64(0x2E4BB8175C1717AF), + SPH_C64(0x8E46024701474745), SPH_C64(0x21DC849E429E9E1A), + SPH_C64(0x89C51ECA0FCACAD4), SPH_C64(0x5A99752DB42D2D58), + SPH_C64(0x637991BFC6BFBF2E), SPH_C64(0x0E1B38071C07073F), + SPH_C64(0x472301AD8EADADAC), SPH_C64(0xB42FEA5A755A5AB0), + SPH_C64(0x1BB56C83368383EF), SPH_C64(0x66FF8533CC3333B6), + SPH_C64(0xC6F23F639163635C), SPH_C64(0x040A100208020212), + SPH_C64(0x493839AA92AAAA93), SPH_C64(0xE2A8AF71D97171DE), + SPH_C64(0x8DCF0EC807C8C8C6), SPH_C64(0x327DC819641919D1), + SPH_C64(0x927072493949493B), SPH_C64(0xAF9A86D943D9D95F), + SPH_C64(0xF91DC3F2EFF2F231), SPH_C64(0xDB484BE3ABE3E3A8), + SPH_C64(0xB62AE25B715B5BB9), SPH_C64(0x0D9234881A8888BC), + SPH_C64(0x29C8A49A529A9A3E), SPH_C64(0x4CBE2D269826260B), + SPH_C64(0x64FA8D32C83232BF), SPH_C64(0x7D4AE9B0FAB0B059), + SPH_C64(0xCF6A1BE983E9E9F2), SPH_C64(0x1E33780F3C0F0F77), + SPH_C64(0xB7A6E6D573D5D533), SPH_C64(0x1DBA74803A8080F4), + SPH_C64(0x617C99BEC2BEBE27), SPH_C64(0x87DE26CD13CDCDEB), + SPH_C64(0x68E4BD34D0343489), SPH_C64(0x90757A483D484832), + SPH_C64(0xE324ABFFDBFFFF54), SPH_C64(0xF48FF77AF57A7A8D), + SPH_C64(0x3DEAF4907A909064), SPH_C64(0xBE3EC25F615F5F9D), + SPH_C64(0x40A01D208020203D), SPH_C64(0xD0D56768BD68680F), + SPH_C64(0x3472D01A681A1ACA), SPH_C64(0x412C19AE82AEAEB7), + SPH_C64(0x755EC9B4EAB4B47D), SPH_C64(0xA8199A544D5454CE), + SPH_C64(0x3BE5EC937693937F), SPH_C64(0x44AA0D228822222F), + SPH_C64(0xC8E907648D646463), SPH_C64(0xFF12DBF1E3F1F12A), + SPH_C64(0xE6A2BF73D17373CC), SPH_C64(0x245A901248121282), + SPH_C64(0x805D3A401D40407A), SPH_C64(0x1028400820080848), + SPH_C64(0x9BE856C32BC3C395), SPH_C64(0xC57B33EC97ECECDF), + SPH_C64(0xAB9096DB4BDBDB4D), SPH_C64(0x5F1F61A1BEA1A1C0), + SPH_C64(0x07831C8D0E8D8D91), SPH_C64(0x7AC9F53DF43D3DC8), + SPH_C64(0x33F1CC976697975B), SPH_C64(0x0000000000000000), + SPH_C64(0x83D436CF1BCFCFF9), SPH_C64(0x5687452BAC2B2B6E), + SPH_C64(0xECB39776C57676E1), SPH_C64(0x19B06482328282E6), + SPH_C64(0xB1A9FED67FD6D628), SPH_C64(0x3677D81B6C1B1BC3), + SPH_C64(0x775BC1B5EEB5B574), SPH_C64(0x432911AF86AFAFBE), + SPH_C64(0xD4DF776AB56A6A1D), SPH_C64(0xA00DBA505D5050EA), + SPH_C64(0x8A4C124509454557), SPH_C64(0xFB18CBF3EBF3F338), + SPH_C64(0x60F09D30C03030AD), SPH_C64(0xC3742BEF9BEFEFC4), + SPH_C64(0x7EC3E53FFC3F3FDA), SPH_C64(0xAA1C9255495555C7), + SPH_C64(0x591079A2B2A2A2DB), SPH_C64(0xC96503EA8FEAEAE9), + SPH_C64(0xCAEC0F658965656A), SPH_C64(0x6968B9BAD2BABA03), + SPH_C64(0x5E93652FBC2F2F4A), SPH_C64(0x9DE74EC027C0C08E), + SPH_C64(0xA181BEDE5FDEDE60), SPH_C64(0x386CE01C701C1CFC), + SPH_C64(0xE72EBBFDD3FDFD46), SPH_C64(0x9A64524D294D4D1F), + SPH_C64(0x39E0E49272929276), SPH_C64(0xEABC8F75C97575FA), + SPH_C64(0x0C1E300618060636), SPH_C64(0x0998248A128A8AAE), + SPH_C64(0x7940F9B2F2B2B24B), SPH_C64(0xD15963E6BFE6E685), + SPH_C64(0x1C36700E380E0E7E), SPH_C64(0x3E63F81F7C1F1FE7), + SPH_C64(0xC4F7376295626255), SPH_C64(0xB5A3EED477D4D43A), + SPH_C64(0x4D3229A89AA8A881), SPH_C64(0x31F4C49662969652), + SPH_C64(0xEF3A9BF9C3F9F962), SPH_C64(0x97F666C533C5C5A3), + SPH_C64(0x4AB1352594252510), SPH_C64(0xB220F259795959AB), + SPH_C64(0x15AE54842A8484D0), SPH_C64(0xE4A7B772D57272C5), + SPH_C64(0x72DDD539E43939EC), SPH_C64(0x98615A4C2D4C4C16), + SPH_C64(0xBC3BCA5E655E5E94), SPH_C64(0xF085E778FD78789F), + SPH_C64(0x70D8DD38E03838E5), SPH_C64(0x0586148C0A8C8C98), + SPH_C64(0xBFB2C6D163D1D117), SPH_C64(0x570B41A5AEA5A5E4), + SPH_C64(0xD94D43E2AFE2E2A1), SPH_C64(0xC2F82F619961614E), + SPH_C64(0x7B45F1B3F6B3B342), SPH_C64(0x42A5152184212134), + SPH_C64(0x25D6949C4A9C9C08), SPH_C64(0x3C66F01E781E1EEE), + SPH_C64(0x8652224311434361), SPH_C64(0x93FC76C73BC7C7B1), + SPH_C64(0xE52BB3FCD7FCFC4F), SPH_C64(0x0814200410040424), + SPH_C64(0xA208B251595151E3), SPH_C64(0x2FC7BC995E999925), + SPH_C64(0xDAC44F6DA96D6D22), SPH_C64(0x1A39680D340D0D65), + SPH_C64(0xE93583FACFFAFA79), SPH_C64(0xA384B6DF5BDFDF69), + SPH_C64(0xFC9BD77EE57E7EA9), SPH_C64(0x48B43D2490242419), + SPH_C64(0x76D7C53BEC3B3BFE), SPH_C64(0x4B3D31AB96ABAB9A), + SPH_C64(0x81D13ECE1FCECEF0), SPH_C64(0x2255881144111199), + SPH_C64(0x03890C8F068F8F83), SPH_C64(0x9C6B4A4E254E4E04), + SPH_C64(0x7351D1B7E6B7B766), SPH_C64(0xCB600BEB8BEBEBE0), + SPH_C64(0x78CCFD3CF03C3CC1), SPH_C64(0x1FBF7C813E8181FD), + SPH_C64(0x35FED4946A949440), SPH_C64(0xF30CEBF7FBF7F71C), + SPH_C64(0x6F67A1B9DEB9B918), SPH_C64(0x265F98134C13138B), + SPH_C64(0x589C7D2CB02C2C51), SPH_C64(0xBBB8D6D36BD3D305), + SPH_C64(0xD35C6BE7BBE7E78C), SPH_C64(0xDCCB576EA56E6E39), + SPH_C64(0x95F36EC437C4C4AA), SPH_C64(0x060F18030C03031B), + SPH_C64(0xAC138A56455656DC), SPH_C64(0x88491A440D44445E), + SPH_C64(0xFE9EDF7FE17F7FA0), SPH_C64(0x4F3721A99EA9A988), + SPH_C64(0x54824D2AA82A2A67), SPH_C64(0x6B6DB1BBD6BBBB0A), + SPH_C64(0x9FE246C123C1C187), SPH_C64(0xA602A253515353F1), + SPH_C64(0xA58BAEDC57DCDC72), SPH_C64(0x1627580B2C0B0B53), + SPH_C64(0x27D39C9D4E9D9D01), SPH_C64(0xD8C1476CAD6C6C2B), + SPH_C64(0x62F59531C43131A4), SPH_C64(0xE8B98774CD7474F3), + SPH_C64(0xF109E3F6FFF6F615), SPH_C64(0x8C430A460546464C), + SPH_C64(0x452609AC8AACACA5), SPH_C64(0x0F973C891E8989B5), + SPH_C64(0x2844A014501414B4), SPH_C64(0xDF425BE1A3E1E1BA), + SPH_C64(0x2C4EB016581616A6), SPH_C64(0x74D2CD3AE83A3AF7), + SPH_C64(0xD2D06F69B9696906), SPH_C64(0x122D480924090941), + SPH_C64(0xE0ADA770DD7070D7), SPH_C64(0x7154D9B6E2B6B66F), + SPH_C64(0xBDB7CED067D0D01E), SPH_C64(0xC77E3BED93EDEDD6), + SPH_C64(0x85DB2ECC17CCCCE2), SPH_C64(0x84572A4215424268), + SPH_C64(0x2DC2B4985A98982C), SPH_C64(0x550E49A4AAA4A4ED), + SPH_C64(0x50885D28A0282875), SPH_C64(0xB831DA5C6D5C5C86), + SPH_C64(0xED3F93F8C7F8F86B), SPH_C64(0x11A44486228686C2) +}; + +static const sph_u64 plain_T2[256] = { + SPH_C64(0x78C018601818D830), SPH_C64(0xAF05238C23232646), + SPH_C64(0xF97EC63FC6C6B891), SPH_C64(0x6F13E887E8E8FBCD), + SPH_C64(0xA14C87268787CB13), SPH_C64(0x62A9B8DAB8B8116D), + SPH_C64(0x0508010401010902), SPH_C64(0x6E424F214F4F0D9E), + SPH_C64(0xEEAD36D836369B6C), SPH_C64(0x0459A6A2A6A6FF51), + SPH_C64(0xBDDED26FD2D20CB9), SPH_C64(0x06FBF5F3F5F50EF7), + SPH_C64(0x80EF79F9797996F2), SPH_C64(0xCE5F6FA16F6F30DE), + SPH_C64(0xEFFC917E91916D3F), SPH_C64(0x07AA52555252F8A4), + SPH_C64(0xFD27609D606047C0), SPH_C64(0x7689BCCABCBC3565), + SPH_C64(0xCDAC9B569B9B372B), SPH_C64(0x8C048E028E8E8A01), + SPH_C64(0x1571A3B6A3A3D25B), SPH_C64(0x3C600C300C0C6C18), + SPH_C64(0x8AFF7BF17B7B84F6), SPH_C64(0xE1B535D43535806A), + SPH_C64(0x69E81D741D1DF53A), SPH_C64(0x4753E0A7E0E0B3DD), + SPH_C64(0xACF6D77BD7D721B3), SPH_C64(0xED5EC22FC2C29C99), + SPH_C64(0x966D2EB82E2E435C), SPH_C64(0x7A624B314B4B2996), + SPH_C64(0x21A3FEDFFEFE5DE1), SPH_C64(0x168257415757D5AE), + SPH_C64(0x41A815541515BD2A), SPH_C64(0xB69F77C17777E8EE), + SPH_C64(0xEBA537DC3737926E), SPH_C64(0x567BE5B3E5E59ED7), + SPH_C64(0xD98C9F469F9F1323), SPH_C64(0x17D3F0E7F0F023FD), + SPH_C64(0x7F6A4A354A4A2094), SPH_C64(0x959EDA4FDADA44A9), + SPH_C64(0x25FA587D5858A2B0), SPH_C64(0xCA06C903C9C9CF8F), + SPH_C64(0x8D5529A429297C52), SPH_C64(0x22500A280A0A5A14), + SPH_C64(0x4FE1B1FEB1B1507F), SPH_C64(0x1A69A0BAA0A0C95D), + SPH_C64(0xDA7F6BB16B6B14D6), SPH_C64(0xAB5C852E8585D917), + SPH_C64(0x7381BDCEBDBD3C67), SPH_C64(0x34D25D695D5D8FBA), + SPH_C64(0x5080104010109020), SPH_C64(0x03F3F4F7F4F407F5), + SPH_C64(0xC016CB0BCBCBDD8B), SPH_C64(0xC6ED3EF83E3ED37C), + SPH_C64(0x1128051405052D0A), SPH_C64(0xE61F6781676778CE), + SPH_C64(0x5373E4B7E4E497D5), SPH_C64(0xBB25279C2727024E), + SPH_C64(0x5832411941417382), SPH_C64(0x9D2C8B168B8BA70B), + SPH_C64(0x0151A7A6A7A7F653), SPH_C64(0x94CF7DE97D7DB2FA), + SPH_C64(0xFBDC956E95954937), SPH_C64(0x9F8ED847D8D856AD), + SPH_C64(0x308BFBCBFBFB70EB), SPH_C64(0x7123EE9FEEEECDC1), + SPH_C64(0x91C77CED7C7CBBF8), SPH_C64(0xE3176685666671CC), + SPH_C64(0x8EA6DD53DDDD7BA7), SPH_C64(0x4BB8175C1717AF2E), + SPH_C64(0x460247014747458E), SPH_C64(0xDC849E429E9E1A21), + SPH_C64(0xC51ECA0FCACAD489), SPH_C64(0x99752DB42D2D585A), + SPH_C64(0x7991BFC6BFBF2E63), SPH_C64(0x1B38071C07073F0E), + SPH_C64(0x2301AD8EADADAC47), SPH_C64(0x2FEA5A755A5AB0B4), + SPH_C64(0xB56C83368383EF1B), SPH_C64(0xFF8533CC3333B666), + SPH_C64(0xF23F639163635CC6), SPH_C64(0x0A10020802021204), + SPH_C64(0x3839AA92AAAA9349), SPH_C64(0xA8AF71D97171DEE2), + SPH_C64(0xCF0EC807C8C8C68D), SPH_C64(0x7DC819641919D132), + SPH_C64(0x7072493949493B92), SPH_C64(0x9A86D943D9D95FAF), + SPH_C64(0x1DC3F2EFF2F231F9), SPH_C64(0x484BE3ABE3E3A8DB), + SPH_C64(0x2AE25B715B5BB9B6), SPH_C64(0x9234881A8888BC0D), + SPH_C64(0xC8A49A529A9A3E29), SPH_C64(0xBE2D269826260B4C), + SPH_C64(0xFA8D32C83232BF64), SPH_C64(0x4AE9B0FAB0B0597D), + SPH_C64(0x6A1BE983E9E9F2CF), SPH_C64(0x33780F3C0F0F771E), + SPH_C64(0xA6E6D573D5D533B7), SPH_C64(0xBA74803A8080F41D), + SPH_C64(0x7C99BEC2BEBE2761), SPH_C64(0xDE26CD13CDCDEB87), + SPH_C64(0xE4BD34D034348968), SPH_C64(0x757A483D48483290), + SPH_C64(0x24ABFFDBFFFF54E3), SPH_C64(0x8FF77AF57A7A8DF4), + SPH_C64(0xEAF4907A9090643D), SPH_C64(0x3EC25F615F5F9DBE), + SPH_C64(0xA01D208020203D40), SPH_C64(0xD56768BD68680FD0), + SPH_C64(0x72D01A681A1ACA34), SPH_C64(0x2C19AE82AEAEB741), + SPH_C64(0x5EC9B4EAB4B47D75), SPH_C64(0x199A544D5454CEA8), + SPH_C64(0xE5EC937693937F3B), SPH_C64(0xAA0D228822222F44), + SPH_C64(0xE907648D646463C8), SPH_C64(0x12DBF1E3F1F12AFF), + SPH_C64(0xA2BF73D17373CCE6), SPH_C64(0x5A90124812128224), + SPH_C64(0x5D3A401D40407A80), SPH_C64(0x2840082008084810), + SPH_C64(0xE856C32BC3C3959B), SPH_C64(0x7B33EC97ECECDFC5), + SPH_C64(0x9096DB4BDBDB4DAB), SPH_C64(0x1F61A1BEA1A1C05F), + SPH_C64(0x831C8D0E8D8D9107), SPH_C64(0xC9F53DF43D3DC87A), + SPH_C64(0xF1CC976697975B33), SPH_C64(0x0000000000000000), + SPH_C64(0xD436CF1BCFCFF983), SPH_C64(0x87452BAC2B2B6E56), + SPH_C64(0xB39776C57676E1EC), SPH_C64(0xB06482328282E619), + SPH_C64(0xA9FED67FD6D628B1), SPH_C64(0x77D81B6C1B1BC336), + SPH_C64(0x5BC1B5EEB5B57477), SPH_C64(0x2911AF86AFAFBE43), + SPH_C64(0xDF776AB56A6A1DD4), SPH_C64(0x0DBA505D5050EAA0), + SPH_C64(0x4C1245094545578A), SPH_C64(0x18CBF3EBF3F338FB), + SPH_C64(0xF09D30C03030AD60), SPH_C64(0x742BEF9BEFEFC4C3), + SPH_C64(0xC3E53FFC3F3FDA7E), SPH_C64(0x1C9255495555C7AA), + SPH_C64(0x1079A2B2A2A2DB59), SPH_C64(0x6503EA8FEAEAE9C9), + SPH_C64(0xEC0F658965656ACA), SPH_C64(0x68B9BAD2BABA0369), + SPH_C64(0x93652FBC2F2F4A5E), SPH_C64(0xE74EC027C0C08E9D), + SPH_C64(0x81BEDE5FDEDE60A1), SPH_C64(0x6CE01C701C1CFC38), + SPH_C64(0x2EBBFDD3FDFD46E7), SPH_C64(0x64524D294D4D1F9A), + SPH_C64(0xE0E4927292927639), SPH_C64(0xBC8F75C97575FAEA), + SPH_C64(0x1E3006180606360C), SPH_C64(0x98248A128A8AAE09), + SPH_C64(0x40F9B2F2B2B24B79), SPH_C64(0x5963E6BFE6E685D1), + SPH_C64(0x36700E380E0E7E1C), SPH_C64(0x63F81F7C1F1FE73E), + SPH_C64(0xF7376295626255C4), SPH_C64(0xA3EED477D4D43AB5), + SPH_C64(0x3229A89AA8A8814D), SPH_C64(0xF4C4966296965231), + SPH_C64(0x3A9BF9C3F9F962EF), SPH_C64(0xF666C533C5C5A397), + SPH_C64(0xB13525942525104A), SPH_C64(0x20F259795959ABB2), + SPH_C64(0xAE54842A8484D015), SPH_C64(0xA7B772D57272C5E4), + SPH_C64(0xDDD539E43939EC72), SPH_C64(0x615A4C2D4C4C1698), + SPH_C64(0x3BCA5E655E5E94BC), SPH_C64(0x85E778FD78789FF0), + SPH_C64(0xD8DD38E03838E570), SPH_C64(0x86148C0A8C8C9805), + SPH_C64(0xB2C6D163D1D117BF), SPH_C64(0x0B41A5AEA5A5E457), + SPH_C64(0x4D43E2AFE2E2A1D9), SPH_C64(0xF82F619961614EC2), + SPH_C64(0x45F1B3F6B3B3427B), SPH_C64(0xA515218421213442), + SPH_C64(0xD6949C4A9C9C0825), SPH_C64(0x66F01E781E1EEE3C), + SPH_C64(0x5222431143436186), SPH_C64(0xFC76C73BC7C7B193), + SPH_C64(0x2BB3FCD7FCFC4FE5), SPH_C64(0x1420041004042408), + SPH_C64(0x08B251595151E3A2), SPH_C64(0xC7BC995E9999252F), + SPH_C64(0xC44F6DA96D6D22DA), SPH_C64(0x39680D340D0D651A), + SPH_C64(0x3583FACFFAFA79E9), SPH_C64(0x84B6DF5BDFDF69A3), + SPH_C64(0x9BD77EE57E7EA9FC), SPH_C64(0xB43D249024241948), + SPH_C64(0xD7C53BEC3B3BFE76), SPH_C64(0x3D31AB96ABAB9A4B), + SPH_C64(0xD13ECE1FCECEF081), SPH_C64(0x5588114411119922), + SPH_C64(0x890C8F068F8F8303), SPH_C64(0x6B4A4E254E4E049C), + SPH_C64(0x51D1B7E6B7B76673), SPH_C64(0x600BEB8BEBEBE0CB), + SPH_C64(0xCCFD3CF03C3CC178), SPH_C64(0xBF7C813E8181FD1F), + SPH_C64(0xFED4946A94944035), SPH_C64(0x0CEBF7FBF7F71CF3), + SPH_C64(0x67A1B9DEB9B9186F), SPH_C64(0x5F98134C13138B26), + SPH_C64(0x9C7D2CB02C2C5158), SPH_C64(0xB8D6D36BD3D305BB), + SPH_C64(0x5C6BE7BBE7E78CD3), SPH_C64(0xCB576EA56E6E39DC), + SPH_C64(0xF36EC437C4C4AA95), SPH_C64(0x0F18030C03031B06), + SPH_C64(0x138A56455656DCAC), SPH_C64(0x491A440D44445E88), + SPH_C64(0x9EDF7FE17F7FA0FE), SPH_C64(0x3721A99EA9A9884F), + SPH_C64(0x824D2AA82A2A6754), SPH_C64(0x6DB1BBD6BBBB0A6B), + SPH_C64(0xE246C123C1C1879F), SPH_C64(0x02A253515353F1A6), + SPH_C64(0x8BAEDC57DCDC72A5), SPH_C64(0x27580B2C0B0B5316), + SPH_C64(0xD39C9D4E9D9D0127), SPH_C64(0xC1476CAD6C6C2BD8), + SPH_C64(0xF59531C43131A462), SPH_C64(0xB98774CD7474F3E8), + SPH_C64(0x09E3F6FFF6F615F1), SPH_C64(0x430A460546464C8C), + SPH_C64(0x2609AC8AACACA545), SPH_C64(0x973C891E8989B50F), + SPH_C64(0x44A014501414B428), SPH_C64(0x425BE1A3E1E1BADF), + SPH_C64(0x4EB016581616A62C), SPH_C64(0xD2CD3AE83A3AF774), + SPH_C64(0xD06F69B9696906D2), SPH_C64(0x2D48092409094112), + SPH_C64(0xADA770DD7070D7E0), SPH_C64(0x54D9B6E2B6B66F71), + SPH_C64(0xB7CED067D0D01EBD), SPH_C64(0x7E3BED93EDEDD6C7), + SPH_C64(0xDB2ECC17CCCCE285), SPH_C64(0x572A421542426884), + SPH_C64(0xC2B4985A98982C2D), SPH_C64(0x0E49A4AAA4A4ED55), + SPH_C64(0x885D28A028287550), SPH_C64(0x31DA5C6D5C5C86B8), + SPH_C64(0x3F93F8C7F8F86BED), SPH_C64(0xA44486228686C211) +}; + +static const sph_u64 plain_T3[256] = { + SPH_C64(0xC018601818D83078), SPH_C64(0x05238C23232646AF), + SPH_C64(0x7EC63FC6C6B891F9), SPH_C64(0x13E887E8E8FBCD6F), + SPH_C64(0x4C87268787CB13A1), SPH_C64(0xA9B8DAB8B8116D62), + SPH_C64(0x0801040101090205), SPH_C64(0x424F214F4F0D9E6E), + SPH_C64(0xAD36D836369B6CEE), SPH_C64(0x59A6A2A6A6FF5104), + SPH_C64(0xDED26FD2D20CB9BD), SPH_C64(0xFBF5F3F5F50EF706), + SPH_C64(0xEF79F9797996F280), SPH_C64(0x5F6FA16F6F30DECE), + SPH_C64(0xFC917E91916D3FEF), SPH_C64(0xAA52555252F8A407), + SPH_C64(0x27609D606047C0FD), SPH_C64(0x89BCCABCBC356576), + SPH_C64(0xAC9B569B9B372BCD), SPH_C64(0x048E028E8E8A018C), + SPH_C64(0x71A3B6A3A3D25B15), SPH_C64(0x600C300C0C6C183C), + SPH_C64(0xFF7BF17B7B84F68A), SPH_C64(0xB535D43535806AE1), + SPH_C64(0xE81D741D1DF53A69), SPH_C64(0x53E0A7E0E0B3DD47), + SPH_C64(0xF6D77BD7D721B3AC), SPH_C64(0x5EC22FC2C29C99ED), + SPH_C64(0x6D2EB82E2E435C96), SPH_C64(0x624B314B4B29967A), + SPH_C64(0xA3FEDFFEFE5DE121), SPH_C64(0x8257415757D5AE16), + SPH_C64(0xA815541515BD2A41), SPH_C64(0x9F77C17777E8EEB6), + SPH_C64(0xA537DC3737926EEB), SPH_C64(0x7BE5B3E5E59ED756), + SPH_C64(0x8C9F469F9F1323D9), SPH_C64(0xD3F0E7F0F023FD17), + SPH_C64(0x6A4A354A4A20947F), SPH_C64(0x9EDA4FDADA44A995), + SPH_C64(0xFA587D5858A2B025), SPH_C64(0x06C903C9C9CF8FCA), + SPH_C64(0x5529A429297C528D), SPH_C64(0x500A280A0A5A1422), + SPH_C64(0xE1B1FEB1B1507F4F), SPH_C64(0x69A0BAA0A0C95D1A), + SPH_C64(0x7F6BB16B6B14D6DA), SPH_C64(0x5C852E8585D917AB), + SPH_C64(0x81BDCEBDBD3C6773), SPH_C64(0xD25D695D5D8FBA34), + SPH_C64(0x8010401010902050), SPH_C64(0xF3F4F7F4F407F503), + SPH_C64(0x16CB0BCBCBDD8BC0), SPH_C64(0xED3EF83E3ED37CC6), + SPH_C64(0x28051405052D0A11), SPH_C64(0x1F6781676778CEE6), + SPH_C64(0x73E4B7E4E497D553), SPH_C64(0x25279C2727024EBB), + SPH_C64(0x3241194141738258), SPH_C64(0x2C8B168B8BA70B9D), + SPH_C64(0x51A7A6A7A7F65301), SPH_C64(0xCF7DE97D7DB2FA94), + SPH_C64(0xDC956E95954937FB), SPH_C64(0x8ED847D8D856AD9F), + SPH_C64(0x8BFBCBFBFB70EB30), SPH_C64(0x23EE9FEEEECDC171), + SPH_C64(0xC77CED7C7CBBF891), SPH_C64(0x176685666671CCE3), + SPH_C64(0xA6DD53DDDD7BA78E), SPH_C64(0xB8175C1717AF2E4B), + SPH_C64(0x0247014747458E46), SPH_C64(0x849E429E9E1A21DC), + SPH_C64(0x1ECA0FCACAD489C5), SPH_C64(0x752DB42D2D585A99), + SPH_C64(0x91BFC6BFBF2E6379), SPH_C64(0x38071C07073F0E1B), + SPH_C64(0x01AD8EADADAC4723), SPH_C64(0xEA5A755A5AB0B42F), + SPH_C64(0x6C83368383EF1BB5), SPH_C64(0x8533CC3333B666FF), + SPH_C64(0x3F639163635CC6F2), SPH_C64(0x100208020212040A), + SPH_C64(0x39AA92AAAA934938), SPH_C64(0xAF71D97171DEE2A8), + SPH_C64(0x0EC807C8C8C68DCF), SPH_C64(0xC819641919D1327D), + SPH_C64(0x72493949493B9270), SPH_C64(0x86D943D9D95FAF9A), + SPH_C64(0xC3F2EFF2F231F91D), SPH_C64(0x4BE3ABE3E3A8DB48), + SPH_C64(0xE25B715B5BB9B62A), SPH_C64(0x34881A8888BC0D92), + SPH_C64(0xA49A529A9A3E29C8), SPH_C64(0x2D269826260B4CBE), + SPH_C64(0x8D32C83232BF64FA), SPH_C64(0xE9B0FAB0B0597D4A), + SPH_C64(0x1BE983E9E9F2CF6A), SPH_C64(0x780F3C0F0F771E33), + SPH_C64(0xE6D573D5D533B7A6), SPH_C64(0x74803A8080F41DBA), + SPH_C64(0x99BEC2BEBE27617C), SPH_C64(0x26CD13CDCDEB87DE), + SPH_C64(0xBD34D034348968E4), SPH_C64(0x7A483D4848329075), + SPH_C64(0xABFFDBFFFF54E324), SPH_C64(0xF77AF57A7A8DF48F), + SPH_C64(0xF4907A9090643DEA), SPH_C64(0xC25F615F5F9DBE3E), + SPH_C64(0x1D208020203D40A0), SPH_C64(0x6768BD68680FD0D5), + SPH_C64(0xD01A681A1ACA3472), SPH_C64(0x19AE82AEAEB7412C), + SPH_C64(0xC9B4EAB4B47D755E), SPH_C64(0x9A544D5454CEA819), + SPH_C64(0xEC937693937F3BE5), SPH_C64(0x0D228822222F44AA), + SPH_C64(0x07648D646463C8E9), SPH_C64(0xDBF1E3F1F12AFF12), + SPH_C64(0xBF73D17373CCE6A2), SPH_C64(0x901248121282245A), + SPH_C64(0x3A401D40407A805D), SPH_C64(0x4008200808481028), + SPH_C64(0x56C32BC3C3959BE8), SPH_C64(0x33EC97ECECDFC57B), + SPH_C64(0x96DB4BDBDB4DAB90), SPH_C64(0x61A1BEA1A1C05F1F), + SPH_C64(0x1C8D0E8D8D910783), SPH_C64(0xF53DF43D3DC87AC9), + SPH_C64(0xCC976697975B33F1), SPH_C64(0x0000000000000000), + SPH_C64(0x36CF1BCFCFF983D4), SPH_C64(0x452BAC2B2B6E5687), + SPH_C64(0x9776C57676E1ECB3), SPH_C64(0x6482328282E619B0), + SPH_C64(0xFED67FD6D628B1A9), SPH_C64(0xD81B6C1B1BC33677), + SPH_C64(0xC1B5EEB5B574775B), SPH_C64(0x11AF86AFAFBE4329), + SPH_C64(0x776AB56A6A1DD4DF), SPH_C64(0xBA505D5050EAA00D), + SPH_C64(0x1245094545578A4C), SPH_C64(0xCBF3EBF3F338FB18), + SPH_C64(0x9D30C03030AD60F0), SPH_C64(0x2BEF9BEFEFC4C374), + SPH_C64(0xE53FFC3F3FDA7EC3), SPH_C64(0x9255495555C7AA1C), + SPH_C64(0x79A2B2A2A2DB5910), SPH_C64(0x03EA8FEAEAE9C965), + SPH_C64(0x0F658965656ACAEC), SPH_C64(0xB9BAD2BABA036968), + SPH_C64(0x652FBC2F2F4A5E93), SPH_C64(0x4EC027C0C08E9DE7), + SPH_C64(0xBEDE5FDEDE60A181), SPH_C64(0xE01C701C1CFC386C), + SPH_C64(0xBBFDD3FDFD46E72E), SPH_C64(0x524D294D4D1F9A64), + SPH_C64(0xE4927292927639E0), SPH_C64(0x8F75C97575FAEABC), + SPH_C64(0x3006180606360C1E), SPH_C64(0x248A128A8AAE0998), + SPH_C64(0xF9B2F2B2B24B7940), SPH_C64(0x63E6BFE6E685D159), + SPH_C64(0x700E380E0E7E1C36), SPH_C64(0xF81F7C1F1FE73E63), + SPH_C64(0x376295626255C4F7), SPH_C64(0xEED477D4D43AB5A3), + SPH_C64(0x29A89AA8A8814D32), SPH_C64(0xC4966296965231F4), + SPH_C64(0x9BF9C3F9F962EF3A), SPH_C64(0x66C533C5C5A397F6), + SPH_C64(0x3525942525104AB1), SPH_C64(0xF259795959ABB220), + SPH_C64(0x54842A8484D015AE), SPH_C64(0xB772D57272C5E4A7), + SPH_C64(0xD539E43939EC72DD), SPH_C64(0x5A4C2D4C4C169861), + SPH_C64(0xCA5E655E5E94BC3B), SPH_C64(0xE778FD78789FF085), + SPH_C64(0xDD38E03838E570D8), SPH_C64(0x148C0A8C8C980586), + SPH_C64(0xC6D163D1D117BFB2), SPH_C64(0x41A5AEA5A5E4570B), + SPH_C64(0x43E2AFE2E2A1D94D), SPH_C64(0x2F619961614EC2F8), + SPH_C64(0xF1B3F6B3B3427B45), SPH_C64(0x15218421213442A5), + SPH_C64(0x949C4A9C9C0825D6), SPH_C64(0xF01E781E1EEE3C66), + SPH_C64(0x2243114343618652), SPH_C64(0x76C73BC7C7B193FC), + SPH_C64(0xB3FCD7FCFC4FE52B), SPH_C64(0x2004100404240814), + SPH_C64(0xB251595151E3A208), SPH_C64(0xBC995E9999252FC7), + SPH_C64(0x4F6DA96D6D22DAC4), SPH_C64(0x680D340D0D651A39), + SPH_C64(0x83FACFFAFA79E935), SPH_C64(0xB6DF5BDFDF69A384), + SPH_C64(0xD77EE57E7EA9FC9B), SPH_C64(0x3D249024241948B4), + SPH_C64(0xC53BEC3B3BFE76D7), SPH_C64(0x31AB96ABAB9A4B3D), + SPH_C64(0x3ECE1FCECEF081D1), SPH_C64(0x8811441111992255), + SPH_C64(0x0C8F068F8F830389), SPH_C64(0x4A4E254E4E049C6B), + SPH_C64(0xD1B7E6B7B7667351), SPH_C64(0x0BEB8BEBEBE0CB60), + SPH_C64(0xFD3CF03C3CC178CC), SPH_C64(0x7C813E8181FD1FBF), + SPH_C64(0xD4946A94944035FE), SPH_C64(0xEBF7FBF7F71CF30C), + SPH_C64(0xA1B9DEB9B9186F67), SPH_C64(0x98134C13138B265F), + SPH_C64(0x7D2CB02C2C51589C), SPH_C64(0xD6D36BD3D305BBB8), + SPH_C64(0x6BE7BBE7E78CD35C), SPH_C64(0x576EA56E6E39DCCB), + SPH_C64(0x6EC437C4C4AA95F3), SPH_C64(0x18030C03031B060F), + SPH_C64(0x8A56455656DCAC13), SPH_C64(0x1A440D44445E8849), + SPH_C64(0xDF7FE17F7FA0FE9E), SPH_C64(0x21A99EA9A9884F37), + SPH_C64(0x4D2AA82A2A675482), SPH_C64(0xB1BBD6BBBB0A6B6D), + SPH_C64(0x46C123C1C1879FE2), SPH_C64(0xA253515353F1A602), + SPH_C64(0xAEDC57DCDC72A58B), SPH_C64(0x580B2C0B0B531627), + SPH_C64(0x9C9D4E9D9D0127D3), SPH_C64(0x476CAD6C6C2BD8C1), + SPH_C64(0x9531C43131A462F5), SPH_C64(0x8774CD7474F3E8B9), + SPH_C64(0xE3F6FFF6F615F109), SPH_C64(0x0A460546464C8C43), + SPH_C64(0x09AC8AACACA54526), SPH_C64(0x3C891E8989B50F97), + SPH_C64(0xA014501414B42844), SPH_C64(0x5BE1A3E1E1BADF42), + SPH_C64(0xB016581616A62C4E), SPH_C64(0xCD3AE83A3AF774D2), + SPH_C64(0x6F69B9696906D2D0), SPH_C64(0x480924090941122D), + SPH_C64(0xA770DD7070D7E0AD), SPH_C64(0xD9B6E2B6B66F7154), + SPH_C64(0xCED067D0D01EBDB7), SPH_C64(0x3BED93EDEDD6C77E), + SPH_C64(0x2ECC17CCCCE285DB), SPH_C64(0x2A42154242688457), + SPH_C64(0xB4985A98982C2DC2), SPH_C64(0x49A4AAA4A4ED550E), + SPH_C64(0x5D28A02828755088), SPH_C64(0xDA5C6D5C5C86B831), + SPH_C64(0x93F8C7F8F86BED3F), SPH_C64(0x4486228686C211A4) +}; + +static const sph_u64 plain_T4[256] = { + SPH_C64(0x18601818D83078C0), SPH_C64(0x238C23232646AF05), + SPH_C64(0xC63FC6C6B891F97E), SPH_C64(0xE887E8E8FBCD6F13), + SPH_C64(0x87268787CB13A14C), SPH_C64(0xB8DAB8B8116D62A9), + SPH_C64(0x0104010109020508), SPH_C64(0x4F214F4F0D9E6E42), + SPH_C64(0x36D836369B6CEEAD), SPH_C64(0xA6A2A6A6FF510459), + SPH_C64(0xD26FD2D20CB9BDDE), SPH_C64(0xF5F3F5F50EF706FB), + SPH_C64(0x79F9797996F280EF), SPH_C64(0x6FA16F6F30DECE5F), + SPH_C64(0x917E91916D3FEFFC), SPH_C64(0x52555252F8A407AA), + SPH_C64(0x609D606047C0FD27), SPH_C64(0xBCCABCBC35657689), + SPH_C64(0x9B569B9B372BCDAC), SPH_C64(0x8E028E8E8A018C04), + SPH_C64(0xA3B6A3A3D25B1571), SPH_C64(0x0C300C0C6C183C60), + SPH_C64(0x7BF17B7B84F68AFF), SPH_C64(0x35D43535806AE1B5), + SPH_C64(0x1D741D1DF53A69E8), SPH_C64(0xE0A7E0E0B3DD4753), + SPH_C64(0xD77BD7D721B3ACF6), SPH_C64(0xC22FC2C29C99ED5E), + SPH_C64(0x2EB82E2E435C966D), SPH_C64(0x4B314B4B29967A62), + SPH_C64(0xFEDFFEFE5DE121A3), SPH_C64(0x57415757D5AE1682), + SPH_C64(0x15541515BD2A41A8), SPH_C64(0x77C17777E8EEB69F), + SPH_C64(0x37DC3737926EEBA5), SPH_C64(0xE5B3E5E59ED7567B), + SPH_C64(0x9F469F9F1323D98C), SPH_C64(0xF0E7F0F023FD17D3), + SPH_C64(0x4A354A4A20947F6A), SPH_C64(0xDA4FDADA44A9959E), + SPH_C64(0x587D5858A2B025FA), SPH_C64(0xC903C9C9CF8FCA06), + SPH_C64(0x29A429297C528D55), SPH_C64(0x0A280A0A5A142250), + SPH_C64(0xB1FEB1B1507F4FE1), SPH_C64(0xA0BAA0A0C95D1A69), + SPH_C64(0x6BB16B6B14D6DA7F), SPH_C64(0x852E8585D917AB5C), + SPH_C64(0xBDCEBDBD3C677381), SPH_C64(0x5D695D5D8FBA34D2), + SPH_C64(0x1040101090205080), SPH_C64(0xF4F7F4F407F503F3), + SPH_C64(0xCB0BCBCBDD8BC016), SPH_C64(0x3EF83E3ED37CC6ED), + SPH_C64(0x051405052D0A1128), SPH_C64(0x6781676778CEE61F), + SPH_C64(0xE4B7E4E497D55373), SPH_C64(0x279C2727024EBB25), + SPH_C64(0x4119414173825832), SPH_C64(0x8B168B8BA70B9D2C), + SPH_C64(0xA7A6A7A7F6530151), SPH_C64(0x7DE97D7DB2FA94CF), + SPH_C64(0x956E95954937FBDC), SPH_C64(0xD847D8D856AD9F8E), + SPH_C64(0xFBCBFBFB70EB308B), SPH_C64(0xEE9FEEEECDC17123), + SPH_C64(0x7CED7C7CBBF891C7), SPH_C64(0x6685666671CCE317), + SPH_C64(0xDD53DDDD7BA78EA6), SPH_C64(0x175C1717AF2E4BB8), + SPH_C64(0x47014747458E4602), SPH_C64(0x9E429E9E1A21DC84), + SPH_C64(0xCA0FCACAD489C51E), SPH_C64(0x2DB42D2D585A9975), + SPH_C64(0xBFC6BFBF2E637991), SPH_C64(0x071C07073F0E1B38), + SPH_C64(0xAD8EADADAC472301), SPH_C64(0x5A755A5AB0B42FEA), + SPH_C64(0x83368383EF1BB56C), SPH_C64(0x33CC3333B666FF85), + SPH_C64(0x639163635CC6F23F), SPH_C64(0x0208020212040A10), + SPH_C64(0xAA92AAAA93493839), SPH_C64(0x71D97171DEE2A8AF), + SPH_C64(0xC807C8C8C68DCF0E), SPH_C64(0x19641919D1327DC8), + SPH_C64(0x493949493B927072), SPH_C64(0xD943D9D95FAF9A86), + SPH_C64(0xF2EFF2F231F91DC3), SPH_C64(0xE3ABE3E3A8DB484B), + SPH_C64(0x5B715B5BB9B62AE2), SPH_C64(0x881A8888BC0D9234), + SPH_C64(0x9A529A9A3E29C8A4), SPH_C64(0x269826260B4CBE2D), + SPH_C64(0x32C83232BF64FA8D), SPH_C64(0xB0FAB0B0597D4AE9), + SPH_C64(0xE983E9E9F2CF6A1B), SPH_C64(0x0F3C0F0F771E3378), + SPH_C64(0xD573D5D533B7A6E6), SPH_C64(0x803A8080F41DBA74), + SPH_C64(0xBEC2BEBE27617C99), SPH_C64(0xCD13CDCDEB87DE26), + SPH_C64(0x34D034348968E4BD), SPH_C64(0x483D48483290757A), + SPH_C64(0xFFDBFFFF54E324AB), SPH_C64(0x7AF57A7A8DF48FF7), + SPH_C64(0x907A9090643DEAF4), SPH_C64(0x5F615F5F9DBE3EC2), + SPH_C64(0x208020203D40A01D), SPH_C64(0x68BD68680FD0D567), + SPH_C64(0x1A681A1ACA3472D0), SPH_C64(0xAE82AEAEB7412C19), + SPH_C64(0xB4EAB4B47D755EC9), SPH_C64(0x544D5454CEA8199A), + SPH_C64(0x937693937F3BE5EC), SPH_C64(0x228822222F44AA0D), + SPH_C64(0x648D646463C8E907), SPH_C64(0xF1E3F1F12AFF12DB), + SPH_C64(0x73D17373CCE6A2BF), SPH_C64(0x1248121282245A90), + SPH_C64(0x401D40407A805D3A), SPH_C64(0x0820080848102840), + SPH_C64(0xC32BC3C3959BE856), SPH_C64(0xEC97ECECDFC57B33), + SPH_C64(0xDB4BDBDB4DAB9096), SPH_C64(0xA1BEA1A1C05F1F61), + SPH_C64(0x8D0E8D8D9107831C), SPH_C64(0x3DF43D3DC87AC9F5), + SPH_C64(0x976697975B33F1CC), SPH_C64(0x0000000000000000), + SPH_C64(0xCF1BCFCFF983D436), SPH_C64(0x2BAC2B2B6E568745), + SPH_C64(0x76C57676E1ECB397), SPH_C64(0x82328282E619B064), + SPH_C64(0xD67FD6D628B1A9FE), SPH_C64(0x1B6C1B1BC33677D8), + SPH_C64(0xB5EEB5B574775BC1), SPH_C64(0xAF86AFAFBE432911), + SPH_C64(0x6AB56A6A1DD4DF77), SPH_C64(0x505D5050EAA00DBA), + SPH_C64(0x45094545578A4C12), SPH_C64(0xF3EBF3F338FB18CB), + SPH_C64(0x30C03030AD60F09D), SPH_C64(0xEF9BEFEFC4C3742B), + SPH_C64(0x3FFC3F3FDA7EC3E5), SPH_C64(0x55495555C7AA1C92), + SPH_C64(0xA2B2A2A2DB591079), SPH_C64(0xEA8FEAEAE9C96503), + SPH_C64(0x658965656ACAEC0F), SPH_C64(0xBAD2BABA036968B9), + SPH_C64(0x2FBC2F2F4A5E9365), SPH_C64(0xC027C0C08E9DE74E), + SPH_C64(0xDE5FDEDE60A181BE), SPH_C64(0x1C701C1CFC386CE0), + SPH_C64(0xFDD3FDFD46E72EBB), SPH_C64(0x4D294D4D1F9A6452), + SPH_C64(0x927292927639E0E4), SPH_C64(0x75C97575FAEABC8F), + SPH_C64(0x06180606360C1E30), SPH_C64(0x8A128A8AAE099824), + SPH_C64(0xB2F2B2B24B7940F9), SPH_C64(0xE6BFE6E685D15963), + SPH_C64(0x0E380E0E7E1C3670), SPH_C64(0x1F7C1F1FE73E63F8), + SPH_C64(0x6295626255C4F737), SPH_C64(0xD477D4D43AB5A3EE), + SPH_C64(0xA89AA8A8814D3229), SPH_C64(0x966296965231F4C4), + SPH_C64(0xF9C3F9F962EF3A9B), SPH_C64(0xC533C5C5A397F666), + SPH_C64(0x25942525104AB135), SPH_C64(0x59795959ABB220F2), + SPH_C64(0x842A8484D015AE54), SPH_C64(0x72D57272C5E4A7B7), + SPH_C64(0x39E43939EC72DDD5), SPH_C64(0x4C2D4C4C1698615A), + SPH_C64(0x5E655E5E94BC3BCA), SPH_C64(0x78FD78789FF085E7), + SPH_C64(0x38E03838E570D8DD), SPH_C64(0x8C0A8C8C98058614), + SPH_C64(0xD163D1D117BFB2C6), SPH_C64(0xA5AEA5A5E4570B41), + SPH_C64(0xE2AFE2E2A1D94D43), SPH_C64(0x619961614EC2F82F), + SPH_C64(0xB3F6B3B3427B45F1), SPH_C64(0x218421213442A515), + SPH_C64(0x9C4A9C9C0825D694), SPH_C64(0x1E781E1EEE3C66F0), + SPH_C64(0x4311434361865222), SPH_C64(0xC73BC7C7B193FC76), + SPH_C64(0xFCD7FCFC4FE52BB3), SPH_C64(0x0410040424081420), + SPH_C64(0x51595151E3A208B2), SPH_C64(0x995E9999252FC7BC), + SPH_C64(0x6DA96D6D22DAC44F), SPH_C64(0x0D340D0D651A3968), + SPH_C64(0xFACFFAFA79E93583), SPH_C64(0xDF5BDFDF69A384B6), + SPH_C64(0x7EE57E7EA9FC9BD7), SPH_C64(0x249024241948B43D), + SPH_C64(0x3BEC3B3BFE76D7C5), SPH_C64(0xAB96ABAB9A4B3D31), + SPH_C64(0xCE1FCECEF081D13E), SPH_C64(0x1144111199225588), + SPH_C64(0x8F068F8F8303890C), SPH_C64(0x4E254E4E049C6B4A), + SPH_C64(0xB7E6B7B7667351D1), SPH_C64(0xEB8BEBEBE0CB600B), + SPH_C64(0x3CF03C3CC178CCFD), SPH_C64(0x813E8181FD1FBF7C), + SPH_C64(0x946A94944035FED4), SPH_C64(0xF7FBF7F71CF30CEB), + SPH_C64(0xB9DEB9B9186F67A1), SPH_C64(0x134C13138B265F98), + SPH_C64(0x2CB02C2C51589C7D), SPH_C64(0xD36BD3D305BBB8D6), + SPH_C64(0xE7BBE7E78CD35C6B), SPH_C64(0x6EA56E6E39DCCB57), + SPH_C64(0xC437C4C4AA95F36E), SPH_C64(0x030C03031B060F18), + SPH_C64(0x56455656DCAC138A), SPH_C64(0x440D44445E88491A), + SPH_C64(0x7FE17F7FA0FE9EDF), SPH_C64(0xA99EA9A9884F3721), + SPH_C64(0x2AA82A2A6754824D), SPH_C64(0xBBD6BBBB0A6B6DB1), + SPH_C64(0xC123C1C1879FE246), SPH_C64(0x53515353F1A602A2), + SPH_C64(0xDC57DCDC72A58BAE), SPH_C64(0x0B2C0B0B53162758), + SPH_C64(0x9D4E9D9D0127D39C), SPH_C64(0x6CAD6C6C2BD8C147), + SPH_C64(0x31C43131A462F595), SPH_C64(0x74CD7474F3E8B987), + SPH_C64(0xF6FFF6F615F109E3), SPH_C64(0x460546464C8C430A), + SPH_C64(0xAC8AACACA5452609), SPH_C64(0x891E8989B50F973C), + SPH_C64(0x14501414B42844A0), SPH_C64(0xE1A3E1E1BADF425B), + SPH_C64(0x16581616A62C4EB0), SPH_C64(0x3AE83A3AF774D2CD), + SPH_C64(0x69B9696906D2D06F), SPH_C64(0x0924090941122D48), + SPH_C64(0x70DD7070D7E0ADA7), SPH_C64(0xB6E2B6B66F7154D9), + SPH_C64(0xD067D0D01EBDB7CE), SPH_C64(0xED93EDEDD6C77E3B), + SPH_C64(0xCC17CCCCE285DB2E), SPH_C64(0x421542426884572A), + SPH_C64(0x985A98982C2DC2B4), SPH_C64(0xA4AAA4A4ED550E49), + SPH_C64(0x28A028287550885D), SPH_C64(0x5C6D5C5C86B831DA), + SPH_C64(0xF8C7F8F86BED3F93), SPH_C64(0x86228686C211A444) +}; + +static const sph_u64 plain_T5[256] = { + SPH_C64(0x601818D83078C018), SPH_C64(0x8C23232646AF0523), + SPH_C64(0x3FC6C6B891F97EC6), SPH_C64(0x87E8E8FBCD6F13E8), + SPH_C64(0x268787CB13A14C87), SPH_C64(0xDAB8B8116D62A9B8), + SPH_C64(0x0401010902050801), SPH_C64(0x214F4F0D9E6E424F), + SPH_C64(0xD836369B6CEEAD36), SPH_C64(0xA2A6A6FF510459A6), + SPH_C64(0x6FD2D20CB9BDDED2), SPH_C64(0xF3F5F50EF706FBF5), + SPH_C64(0xF9797996F280EF79), SPH_C64(0xA16F6F30DECE5F6F), + SPH_C64(0x7E91916D3FEFFC91), SPH_C64(0x555252F8A407AA52), + SPH_C64(0x9D606047C0FD2760), SPH_C64(0xCABCBC35657689BC), + SPH_C64(0x569B9B372BCDAC9B), SPH_C64(0x028E8E8A018C048E), + SPH_C64(0xB6A3A3D25B1571A3), SPH_C64(0x300C0C6C183C600C), + SPH_C64(0xF17B7B84F68AFF7B), SPH_C64(0xD43535806AE1B535), + SPH_C64(0x741D1DF53A69E81D), SPH_C64(0xA7E0E0B3DD4753E0), + SPH_C64(0x7BD7D721B3ACF6D7), SPH_C64(0x2FC2C29C99ED5EC2), + SPH_C64(0xB82E2E435C966D2E), SPH_C64(0x314B4B29967A624B), + SPH_C64(0xDFFEFE5DE121A3FE), SPH_C64(0x415757D5AE168257), + SPH_C64(0x541515BD2A41A815), SPH_C64(0xC17777E8EEB69F77), + SPH_C64(0xDC3737926EEBA537), SPH_C64(0xB3E5E59ED7567BE5), + SPH_C64(0x469F9F1323D98C9F), SPH_C64(0xE7F0F023FD17D3F0), + SPH_C64(0x354A4A20947F6A4A), SPH_C64(0x4FDADA44A9959EDA), + SPH_C64(0x7D5858A2B025FA58), SPH_C64(0x03C9C9CF8FCA06C9), + SPH_C64(0xA429297C528D5529), SPH_C64(0x280A0A5A1422500A), + SPH_C64(0xFEB1B1507F4FE1B1), SPH_C64(0xBAA0A0C95D1A69A0), + SPH_C64(0xB16B6B14D6DA7F6B), SPH_C64(0x2E8585D917AB5C85), + SPH_C64(0xCEBDBD3C677381BD), SPH_C64(0x695D5D8FBA34D25D), + SPH_C64(0x4010109020508010), SPH_C64(0xF7F4F407F503F3F4), + SPH_C64(0x0BCBCBDD8BC016CB), SPH_C64(0xF83E3ED37CC6ED3E), + SPH_C64(0x1405052D0A112805), SPH_C64(0x81676778CEE61F67), + SPH_C64(0xB7E4E497D55373E4), SPH_C64(0x9C2727024EBB2527), + SPH_C64(0x1941417382583241), SPH_C64(0x168B8BA70B9D2C8B), + SPH_C64(0xA6A7A7F6530151A7), SPH_C64(0xE97D7DB2FA94CF7D), + SPH_C64(0x6E95954937FBDC95), SPH_C64(0x47D8D856AD9F8ED8), + SPH_C64(0xCBFBFB70EB308BFB), SPH_C64(0x9FEEEECDC17123EE), + SPH_C64(0xED7C7CBBF891C77C), SPH_C64(0x85666671CCE31766), + SPH_C64(0x53DDDD7BA78EA6DD), SPH_C64(0x5C1717AF2E4BB817), + SPH_C64(0x014747458E460247), SPH_C64(0x429E9E1A21DC849E), + SPH_C64(0x0FCACAD489C51ECA), SPH_C64(0xB42D2D585A99752D), + SPH_C64(0xC6BFBF2E637991BF), SPH_C64(0x1C07073F0E1B3807), + SPH_C64(0x8EADADAC472301AD), SPH_C64(0x755A5AB0B42FEA5A), + SPH_C64(0x368383EF1BB56C83), SPH_C64(0xCC3333B666FF8533), + SPH_C64(0x9163635CC6F23F63), SPH_C64(0x08020212040A1002), + SPH_C64(0x92AAAA93493839AA), SPH_C64(0xD97171DEE2A8AF71), + SPH_C64(0x07C8C8C68DCF0EC8), SPH_C64(0x641919D1327DC819), + SPH_C64(0x3949493B92707249), SPH_C64(0x43D9D95FAF9A86D9), + SPH_C64(0xEFF2F231F91DC3F2), SPH_C64(0xABE3E3A8DB484BE3), + SPH_C64(0x715B5BB9B62AE25B), SPH_C64(0x1A8888BC0D923488), + SPH_C64(0x529A9A3E29C8A49A), SPH_C64(0x9826260B4CBE2D26), + SPH_C64(0xC83232BF64FA8D32), SPH_C64(0xFAB0B0597D4AE9B0), + SPH_C64(0x83E9E9F2CF6A1BE9), SPH_C64(0x3C0F0F771E33780F), + SPH_C64(0x73D5D533B7A6E6D5), SPH_C64(0x3A8080F41DBA7480), + SPH_C64(0xC2BEBE27617C99BE), SPH_C64(0x13CDCDEB87DE26CD), + SPH_C64(0xD034348968E4BD34), SPH_C64(0x3D48483290757A48), + SPH_C64(0xDBFFFF54E324ABFF), SPH_C64(0xF57A7A8DF48FF77A), + SPH_C64(0x7A9090643DEAF490), SPH_C64(0x615F5F9DBE3EC25F), + SPH_C64(0x8020203D40A01D20), SPH_C64(0xBD68680FD0D56768), + SPH_C64(0x681A1ACA3472D01A), SPH_C64(0x82AEAEB7412C19AE), + SPH_C64(0xEAB4B47D755EC9B4), SPH_C64(0x4D5454CEA8199A54), + SPH_C64(0x7693937F3BE5EC93), SPH_C64(0x8822222F44AA0D22), + SPH_C64(0x8D646463C8E90764), SPH_C64(0xE3F1F12AFF12DBF1), + SPH_C64(0xD17373CCE6A2BF73), SPH_C64(0x48121282245A9012), + SPH_C64(0x1D40407A805D3A40), SPH_C64(0x2008084810284008), + SPH_C64(0x2BC3C3959BE856C3), SPH_C64(0x97ECECDFC57B33EC), + SPH_C64(0x4BDBDB4DAB9096DB), SPH_C64(0xBEA1A1C05F1F61A1), + SPH_C64(0x0E8D8D9107831C8D), SPH_C64(0xF43D3DC87AC9F53D), + SPH_C64(0x6697975B33F1CC97), SPH_C64(0x0000000000000000), + SPH_C64(0x1BCFCFF983D436CF), SPH_C64(0xAC2B2B6E5687452B), + SPH_C64(0xC57676E1ECB39776), SPH_C64(0x328282E619B06482), + SPH_C64(0x7FD6D628B1A9FED6), SPH_C64(0x6C1B1BC33677D81B), + SPH_C64(0xEEB5B574775BC1B5), SPH_C64(0x86AFAFBE432911AF), + SPH_C64(0xB56A6A1DD4DF776A), SPH_C64(0x5D5050EAA00DBA50), + SPH_C64(0x094545578A4C1245), SPH_C64(0xEBF3F338FB18CBF3), + SPH_C64(0xC03030AD60F09D30), SPH_C64(0x9BEFEFC4C3742BEF), + SPH_C64(0xFC3F3FDA7EC3E53F), SPH_C64(0x495555C7AA1C9255), + SPH_C64(0xB2A2A2DB591079A2), SPH_C64(0x8FEAEAE9C96503EA), + SPH_C64(0x8965656ACAEC0F65), SPH_C64(0xD2BABA036968B9BA), + SPH_C64(0xBC2F2F4A5E93652F), SPH_C64(0x27C0C08E9DE74EC0), + SPH_C64(0x5FDEDE60A181BEDE), SPH_C64(0x701C1CFC386CE01C), + SPH_C64(0xD3FDFD46E72EBBFD), SPH_C64(0x294D4D1F9A64524D), + SPH_C64(0x7292927639E0E492), SPH_C64(0xC97575FAEABC8F75), + SPH_C64(0x180606360C1E3006), SPH_C64(0x128A8AAE0998248A), + SPH_C64(0xF2B2B24B7940F9B2), SPH_C64(0xBFE6E685D15963E6), + SPH_C64(0x380E0E7E1C36700E), SPH_C64(0x7C1F1FE73E63F81F), + SPH_C64(0x95626255C4F73762), SPH_C64(0x77D4D43AB5A3EED4), + SPH_C64(0x9AA8A8814D3229A8), SPH_C64(0x6296965231F4C496), + SPH_C64(0xC3F9F962EF3A9BF9), SPH_C64(0x33C5C5A397F666C5), + SPH_C64(0x942525104AB13525), SPH_C64(0x795959ABB220F259), + SPH_C64(0x2A8484D015AE5484), SPH_C64(0xD57272C5E4A7B772), + SPH_C64(0xE43939EC72DDD539), SPH_C64(0x2D4C4C1698615A4C), + SPH_C64(0x655E5E94BC3BCA5E), SPH_C64(0xFD78789FF085E778), + SPH_C64(0xE03838E570D8DD38), SPH_C64(0x0A8C8C980586148C), + SPH_C64(0x63D1D117BFB2C6D1), SPH_C64(0xAEA5A5E4570B41A5), + SPH_C64(0xAFE2E2A1D94D43E2), SPH_C64(0x9961614EC2F82F61), + SPH_C64(0xF6B3B3427B45F1B3), SPH_C64(0x8421213442A51521), + SPH_C64(0x4A9C9C0825D6949C), SPH_C64(0x781E1EEE3C66F01E), + SPH_C64(0x1143436186522243), SPH_C64(0x3BC7C7B193FC76C7), + SPH_C64(0xD7FCFC4FE52BB3FC), SPH_C64(0x1004042408142004), + SPH_C64(0x595151E3A208B251), SPH_C64(0x5E9999252FC7BC99), + SPH_C64(0xA96D6D22DAC44F6D), SPH_C64(0x340D0D651A39680D), + SPH_C64(0xCFFAFA79E93583FA), SPH_C64(0x5BDFDF69A384B6DF), + SPH_C64(0xE57E7EA9FC9BD77E), SPH_C64(0x9024241948B43D24), + SPH_C64(0xEC3B3BFE76D7C53B), SPH_C64(0x96ABAB9A4B3D31AB), + SPH_C64(0x1FCECEF081D13ECE), SPH_C64(0x4411119922558811), + SPH_C64(0x068F8F8303890C8F), SPH_C64(0x254E4E049C6B4A4E), + SPH_C64(0xE6B7B7667351D1B7), SPH_C64(0x8BEBEBE0CB600BEB), + SPH_C64(0xF03C3CC178CCFD3C), SPH_C64(0x3E8181FD1FBF7C81), + SPH_C64(0x6A94944035FED494), SPH_C64(0xFBF7F71CF30CEBF7), + SPH_C64(0xDEB9B9186F67A1B9), SPH_C64(0x4C13138B265F9813), + SPH_C64(0xB02C2C51589C7D2C), SPH_C64(0x6BD3D305BBB8D6D3), + SPH_C64(0xBBE7E78CD35C6BE7), SPH_C64(0xA56E6E39DCCB576E), + SPH_C64(0x37C4C4AA95F36EC4), SPH_C64(0x0C03031B060F1803), + SPH_C64(0x455656DCAC138A56), SPH_C64(0x0D44445E88491A44), + SPH_C64(0xE17F7FA0FE9EDF7F), SPH_C64(0x9EA9A9884F3721A9), + SPH_C64(0xA82A2A6754824D2A), SPH_C64(0xD6BBBB0A6B6DB1BB), + SPH_C64(0x23C1C1879FE246C1), SPH_C64(0x515353F1A602A253), + SPH_C64(0x57DCDC72A58BAEDC), SPH_C64(0x2C0B0B531627580B), + SPH_C64(0x4E9D9D0127D39C9D), SPH_C64(0xAD6C6C2BD8C1476C), + SPH_C64(0xC43131A462F59531), SPH_C64(0xCD7474F3E8B98774), + SPH_C64(0xFFF6F615F109E3F6), SPH_C64(0x0546464C8C430A46), + SPH_C64(0x8AACACA5452609AC), SPH_C64(0x1E8989B50F973C89), + SPH_C64(0x501414B42844A014), SPH_C64(0xA3E1E1BADF425BE1), + SPH_C64(0x581616A62C4EB016), SPH_C64(0xE83A3AF774D2CD3A), + SPH_C64(0xB9696906D2D06F69), SPH_C64(0x24090941122D4809), + SPH_C64(0xDD7070D7E0ADA770), SPH_C64(0xE2B6B66F7154D9B6), + SPH_C64(0x67D0D01EBDB7CED0), SPH_C64(0x93EDEDD6C77E3BED), + SPH_C64(0x17CCCCE285DB2ECC), SPH_C64(0x1542426884572A42), + SPH_C64(0x5A98982C2DC2B498), SPH_C64(0xAAA4A4ED550E49A4), + SPH_C64(0xA028287550885D28), SPH_C64(0x6D5C5C86B831DA5C), + SPH_C64(0xC7F8F86BED3F93F8), SPH_C64(0x228686C211A44486) +}; + +static const sph_u64 plain_T6[256] = { + SPH_C64(0x1818D83078C01860), SPH_C64(0x23232646AF05238C), + SPH_C64(0xC6C6B891F97EC63F), SPH_C64(0xE8E8FBCD6F13E887), + SPH_C64(0x8787CB13A14C8726), SPH_C64(0xB8B8116D62A9B8DA), + SPH_C64(0x0101090205080104), SPH_C64(0x4F4F0D9E6E424F21), + SPH_C64(0x36369B6CEEAD36D8), SPH_C64(0xA6A6FF510459A6A2), + SPH_C64(0xD2D20CB9BDDED26F), SPH_C64(0xF5F50EF706FBF5F3), + SPH_C64(0x797996F280EF79F9), SPH_C64(0x6F6F30DECE5F6FA1), + SPH_C64(0x91916D3FEFFC917E), SPH_C64(0x5252F8A407AA5255), + SPH_C64(0x606047C0FD27609D), SPH_C64(0xBCBC35657689BCCA), + SPH_C64(0x9B9B372BCDAC9B56), SPH_C64(0x8E8E8A018C048E02), + SPH_C64(0xA3A3D25B1571A3B6), SPH_C64(0x0C0C6C183C600C30), + SPH_C64(0x7B7B84F68AFF7BF1), SPH_C64(0x3535806AE1B535D4), + SPH_C64(0x1D1DF53A69E81D74), SPH_C64(0xE0E0B3DD4753E0A7), + SPH_C64(0xD7D721B3ACF6D77B), SPH_C64(0xC2C29C99ED5EC22F), + SPH_C64(0x2E2E435C966D2EB8), SPH_C64(0x4B4B29967A624B31), + SPH_C64(0xFEFE5DE121A3FEDF), SPH_C64(0x5757D5AE16825741), + SPH_C64(0x1515BD2A41A81554), SPH_C64(0x7777E8EEB69F77C1), + SPH_C64(0x3737926EEBA537DC), SPH_C64(0xE5E59ED7567BE5B3), + SPH_C64(0x9F9F1323D98C9F46), SPH_C64(0xF0F023FD17D3F0E7), + SPH_C64(0x4A4A20947F6A4A35), SPH_C64(0xDADA44A9959EDA4F), + SPH_C64(0x5858A2B025FA587D), SPH_C64(0xC9C9CF8FCA06C903), + SPH_C64(0x29297C528D5529A4), SPH_C64(0x0A0A5A1422500A28), + SPH_C64(0xB1B1507F4FE1B1FE), SPH_C64(0xA0A0C95D1A69A0BA), + SPH_C64(0x6B6B14D6DA7F6BB1), SPH_C64(0x8585D917AB5C852E), + SPH_C64(0xBDBD3C677381BDCE), SPH_C64(0x5D5D8FBA34D25D69), + SPH_C64(0x1010902050801040), SPH_C64(0xF4F407F503F3F4F7), + SPH_C64(0xCBCBDD8BC016CB0B), SPH_C64(0x3E3ED37CC6ED3EF8), + SPH_C64(0x05052D0A11280514), SPH_C64(0x676778CEE61F6781), + SPH_C64(0xE4E497D55373E4B7), SPH_C64(0x2727024EBB25279C), + SPH_C64(0x4141738258324119), SPH_C64(0x8B8BA70B9D2C8B16), + SPH_C64(0xA7A7F6530151A7A6), SPH_C64(0x7D7DB2FA94CF7DE9), + SPH_C64(0x95954937FBDC956E), SPH_C64(0xD8D856AD9F8ED847), + SPH_C64(0xFBFB70EB308BFBCB), SPH_C64(0xEEEECDC17123EE9F), + SPH_C64(0x7C7CBBF891C77CED), SPH_C64(0x666671CCE3176685), + SPH_C64(0xDDDD7BA78EA6DD53), SPH_C64(0x1717AF2E4BB8175C), + SPH_C64(0x4747458E46024701), SPH_C64(0x9E9E1A21DC849E42), + SPH_C64(0xCACAD489C51ECA0F), SPH_C64(0x2D2D585A99752DB4), + SPH_C64(0xBFBF2E637991BFC6), SPH_C64(0x07073F0E1B38071C), + SPH_C64(0xADADAC472301AD8E), SPH_C64(0x5A5AB0B42FEA5A75), + SPH_C64(0x8383EF1BB56C8336), SPH_C64(0x3333B666FF8533CC), + SPH_C64(0x63635CC6F23F6391), SPH_C64(0x020212040A100208), + SPH_C64(0xAAAA93493839AA92), SPH_C64(0x7171DEE2A8AF71D9), + SPH_C64(0xC8C8C68DCF0EC807), SPH_C64(0x1919D1327DC81964), + SPH_C64(0x49493B9270724939), SPH_C64(0xD9D95FAF9A86D943), + SPH_C64(0xF2F231F91DC3F2EF), SPH_C64(0xE3E3A8DB484BE3AB), + SPH_C64(0x5B5BB9B62AE25B71), SPH_C64(0x8888BC0D9234881A), + SPH_C64(0x9A9A3E29C8A49A52), SPH_C64(0x26260B4CBE2D2698), + SPH_C64(0x3232BF64FA8D32C8), SPH_C64(0xB0B0597D4AE9B0FA), + SPH_C64(0xE9E9F2CF6A1BE983), SPH_C64(0x0F0F771E33780F3C), + SPH_C64(0xD5D533B7A6E6D573), SPH_C64(0x8080F41DBA74803A), + SPH_C64(0xBEBE27617C99BEC2), SPH_C64(0xCDCDEB87DE26CD13), + SPH_C64(0x34348968E4BD34D0), SPH_C64(0x48483290757A483D), + SPH_C64(0xFFFF54E324ABFFDB), SPH_C64(0x7A7A8DF48FF77AF5), + SPH_C64(0x9090643DEAF4907A), SPH_C64(0x5F5F9DBE3EC25F61), + SPH_C64(0x20203D40A01D2080), SPH_C64(0x68680FD0D56768BD), + SPH_C64(0x1A1ACA3472D01A68), SPH_C64(0xAEAEB7412C19AE82), + SPH_C64(0xB4B47D755EC9B4EA), SPH_C64(0x5454CEA8199A544D), + SPH_C64(0x93937F3BE5EC9376), SPH_C64(0x22222F44AA0D2288), + SPH_C64(0x646463C8E907648D), SPH_C64(0xF1F12AFF12DBF1E3), + SPH_C64(0x7373CCE6A2BF73D1), SPH_C64(0x121282245A901248), + SPH_C64(0x40407A805D3A401D), SPH_C64(0x0808481028400820), + SPH_C64(0xC3C3959BE856C32B), SPH_C64(0xECECDFC57B33EC97), + SPH_C64(0xDBDB4DAB9096DB4B), SPH_C64(0xA1A1C05F1F61A1BE), + SPH_C64(0x8D8D9107831C8D0E), SPH_C64(0x3D3DC87AC9F53DF4), + SPH_C64(0x97975B33F1CC9766), SPH_C64(0x0000000000000000), + SPH_C64(0xCFCFF983D436CF1B), SPH_C64(0x2B2B6E5687452BAC), + SPH_C64(0x7676E1ECB39776C5), SPH_C64(0x8282E619B0648232), + SPH_C64(0xD6D628B1A9FED67F), SPH_C64(0x1B1BC33677D81B6C), + SPH_C64(0xB5B574775BC1B5EE), SPH_C64(0xAFAFBE432911AF86), + SPH_C64(0x6A6A1DD4DF776AB5), SPH_C64(0x5050EAA00DBA505D), + SPH_C64(0x4545578A4C124509), SPH_C64(0xF3F338FB18CBF3EB), + SPH_C64(0x3030AD60F09D30C0), SPH_C64(0xEFEFC4C3742BEF9B), + SPH_C64(0x3F3FDA7EC3E53FFC), SPH_C64(0x5555C7AA1C925549), + SPH_C64(0xA2A2DB591079A2B2), SPH_C64(0xEAEAE9C96503EA8F), + SPH_C64(0x65656ACAEC0F6589), SPH_C64(0xBABA036968B9BAD2), + SPH_C64(0x2F2F4A5E93652FBC), SPH_C64(0xC0C08E9DE74EC027), + SPH_C64(0xDEDE60A181BEDE5F), SPH_C64(0x1C1CFC386CE01C70), + SPH_C64(0xFDFD46E72EBBFDD3), SPH_C64(0x4D4D1F9A64524D29), + SPH_C64(0x92927639E0E49272), SPH_C64(0x7575FAEABC8F75C9), + SPH_C64(0x0606360C1E300618), SPH_C64(0x8A8AAE0998248A12), + SPH_C64(0xB2B24B7940F9B2F2), SPH_C64(0xE6E685D15963E6BF), + SPH_C64(0x0E0E7E1C36700E38), SPH_C64(0x1F1FE73E63F81F7C), + SPH_C64(0x626255C4F7376295), SPH_C64(0xD4D43AB5A3EED477), + SPH_C64(0xA8A8814D3229A89A), SPH_C64(0x96965231F4C49662), + SPH_C64(0xF9F962EF3A9BF9C3), SPH_C64(0xC5C5A397F666C533), + SPH_C64(0x2525104AB1352594), SPH_C64(0x5959ABB220F25979), + SPH_C64(0x8484D015AE54842A), SPH_C64(0x7272C5E4A7B772D5), + SPH_C64(0x3939EC72DDD539E4), SPH_C64(0x4C4C1698615A4C2D), + SPH_C64(0x5E5E94BC3BCA5E65), SPH_C64(0x78789FF085E778FD), + SPH_C64(0x3838E570D8DD38E0), SPH_C64(0x8C8C980586148C0A), + SPH_C64(0xD1D117BFB2C6D163), SPH_C64(0xA5A5E4570B41A5AE), + SPH_C64(0xE2E2A1D94D43E2AF), SPH_C64(0x61614EC2F82F6199), + SPH_C64(0xB3B3427B45F1B3F6), SPH_C64(0x21213442A5152184), + SPH_C64(0x9C9C0825D6949C4A), SPH_C64(0x1E1EEE3C66F01E78), + SPH_C64(0x4343618652224311), SPH_C64(0xC7C7B193FC76C73B), + SPH_C64(0xFCFC4FE52BB3FCD7), SPH_C64(0x0404240814200410), + SPH_C64(0x5151E3A208B25159), SPH_C64(0x9999252FC7BC995E), + SPH_C64(0x6D6D22DAC44F6DA9), SPH_C64(0x0D0D651A39680D34), + SPH_C64(0xFAFA79E93583FACF), SPH_C64(0xDFDF69A384B6DF5B), + SPH_C64(0x7E7EA9FC9BD77EE5), SPH_C64(0x24241948B43D2490), + SPH_C64(0x3B3BFE76D7C53BEC), SPH_C64(0xABAB9A4B3D31AB96), + SPH_C64(0xCECEF081D13ECE1F), SPH_C64(0x1111992255881144), + SPH_C64(0x8F8F8303890C8F06), SPH_C64(0x4E4E049C6B4A4E25), + SPH_C64(0xB7B7667351D1B7E6), SPH_C64(0xEBEBE0CB600BEB8B), + SPH_C64(0x3C3CC178CCFD3CF0), SPH_C64(0x8181FD1FBF7C813E), + SPH_C64(0x94944035FED4946A), SPH_C64(0xF7F71CF30CEBF7FB), + SPH_C64(0xB9B9186F67A1B9DE), SPH_C64(0x13138B265F98134C), + SPH_C64(0x2C2C51589C7D2CB0), SPH_C64(0xD3D305BBB8D6D36B), + SPH_C64(0xE7E78CD35C6BE7BB), SPH_C64(0x6E6E39DCCB576EA5), + SPH_C64(0xC4C4AA95F36EC437), SPH_C64(0x03031B060F18030C), + SPH_C64(0x5656DCAC138A5645), SPH_C64(0x44445E88491A440D), + SPH_C64(0x7F7FA0FE9EDF7FE1), SPH_C64(0xA9A9884F3721A99E), + SPH_C64(0x2A2A6754824D2AA8), SPH_C64(0xBBBB0A6B6DB1BBD6), + SPH_C64(0xC1C1879FE246C123), SPH_C64(0x5353F1A602A25351), + SPH_C64(0xDCDC72A58BAEDC57), SPH_C64(0x0B0B531627580B2C), + SPH_C64(0x9D9D0127D39C9D4E), SPH_C64(0x6C6C2BD8C1476CAD), + SPH_C64(0x3131A462F59531C4), SPH_C64(0x7474F3E8B98774CD), + SPH_C64(0xF6F615F109E3F6FF), SPH_C64(0x46464C8C430A4605), + SPH_C64(0xACACA5452609AC8A), SPH_C64(0x8989B50F973C891E), + SPH_C64(0x1414B42844A01450), SPH_C64(0xE1E1BADF425BE1A3), + SPH_C64(0x1616A62C4EB01658), SPH_C64(0x3A3AF774D2CD3AE8), + SPH_C64(0x696906D2D06F69B9), SPH_C64(0x090941122D480924), + SPH_C64(0x7070D7E0ADA770DD), SPH_C64(0xB6B66F7154D9B6E2), + SPH_C64(0xD0D01EBDB7CED067), SPH_C64(0xEDEDD6C77E3BED93), + SPH_C64(0xCCCCE285DB2ECC17), SPH_C64(0x42426884572A4215), + SPH_C64(0x98982C2DC2B4985A), SPH_C64(0xA4A4ED550E49A4AA), + SPH_C64(0x28287550885D28A0), SPH_C64(0x5C5C86B831DA5C6D), + SPH_C64(0xF8F86BED3F93F8C7), SPH_C64(0x8686C211A4448622) +}; + +static const sph_u64 plain_T7[256] = { + SPH_C64(0x18D83078C0186018), SPH_C64(0x232646AF05238C23), + SPH_C64(0xC6B891F97EC63FC6), SPH_C64(0xE8FBCD6F13E887E8), + SPH_C64(0x87CB13A14C872687), SPH_C64(0xB8116D62A9B8DAB8), + SPH_C64(0x0109020508010401), SPH_C64(0x4F0D9E6E424F214F), + SPH_C64(0x369B6CEEAD36D836), SPH_C64(0xA6FF510459A6A2A6), + SPH_C64(0xD20CB9BDDED26FD2), SPH_C64(0xF50EF706FBF5F3F5), + SPH_C64(0x7996F280EF79F979), SPH_C64(0x6F30DECE5F6FA16F), + SPH_C64(0x916D3FEFFC917E91), SPH_C64(0x52F8A407AA525552), + SPH_C64(0x6047C0FD27609D60), SPH_C64(0xBC35657689BCCABC), + SPH_C64(0x9B372BCDAC9B569B), SPH_C64(0x8E8A018C048E028E), + SPH_C64(0xA3D25B1571A3B6A3), SPH_C64(0x0C6C183C600C300C), + SPH_C64(0x7B84F68AFF7BF17B), SPH_C64(0x35806AE1B535D435), + SPH_C64(0x1DF53A69E81D741D), SPH_C64(0xE0B3DD4753E0A7E0), + SPH_C64(0xD721B3ACF6D77BD7), SPH_C64(0xC29C99ED5EC22FC2), + SPH_C64(0x2E435C966D2EB82E), SPH_C64(0x4B29967A624B314B), + SPH_C64(0xFE5DE121A3FEDFFE), SPH_C64(0x57D5AE1682574157), + SPH_C64(0x15BD2A41A8155415), SPH_C64(0x77E8EEB69F77C177), + SPH_C64(0x37926EEBA537DC37), SPH_C64(0xE59ED7567BE5B3E5), + SPH_C64(0x9F1323D98C9F469F), SPH_C64(0xF023FD17D3F0E7F0), + SPH_C64(0x4A20947F6A4A354A), SPH_C64(0xDA44A9959EDA4FDA), + SPH_C64(0x58A2B025FA587D58), SPH_C64(0xC9CF8FCA06C903C9), + SPH_C64(0x297C528D5529A429), SPH_C64(0x0A5A1422500A280A), + SPH_C64(0xB1507F4FE1B1FEB1), SPH_C64(0xA0C95D1A69A0BAA0), + SPH_C64(0x6B14D6DA7F6BB16B), SPH_C64(0x85D917AB5C852E85), + SPH_C64(0xBD3C677381BDCEBD), SPH_C64(0x5D8FBA34D25D695D), + SPH_C64(0x1090205080104010), SPH_C64(0xF407F503F3F4F7F4), + SPH_C64(0xCBDD8BC016CB0BCB), SPH_C64(0x3ED37CC6ED3EF83E), + SPH_C64(0x052D0A1128051405), SPH_C64(0x6778CEE61F678167), + SPH_C64(0xE497D55373E4B7E4), SPH_C64(0x27024EBB25279C27), + SPH_C64(0x4173825832411941), SPH_C64(0x8BA70B9D2C8B168B), + SPH_C64(0xA7F6530151A7A6A7), SPH_C64(0x7DB2FA94CF7DE97D), + SPH_C64(0x954937FBDC956E95), SPH_C64(0xD856AD9F8ED847D8), + SPH_C64(0xFB70EB308BFBCBFB), SPH_C64(0xEECDC17123EE9FEE), + SPH_C64(0x7CBBF891C77CED7C), SPH_C64(0x6671CCE317668566), + SPH_C64(0xDD7BA78EA6DD53DD), SPH_C64(0x17AF2E4BB8175C17), + SPH_C64(0x47458E4602470147), SPH_C64(0x9E1A21DC849E429E), + SPH_C64(0xCAD489C51ECA0FCA), SPH_C64(0x2D585A99752DB42D), + SPH_C64(0xBF2E637991BFC6BF), SPH_C64(0x073F0E1B38071C07), + SPH_C64(0xADAC472301AD8EAD), SPH_C64(0x5AB0B42FEA5A755A), + SPH_C64(0x83EF1BB56C833683), SPH_C64(0x33B666FF8533CC33), + SPH_C64(0x635CC6F23F639163), SPH_C64(0x0212040A10020802), + SPH_C64(0xAA93493839AA92AA), SPH_C64(0x71DEE2A8AF71D971), + SPH_C64(0xC8C68DCF0EC807C8), SPH_C64(0x19D1327DC8196419), + SPH_C64(0x493B927072493949), SPH_C64(0xD95FAF9A86D943D9), + SPH_C64(0xF231F91DC3F2EFF2), SPH_C64(0xE3A8DB484BE3ABE3), + SPH_C64(0x5BB9B62AE25B715B), SPH_C64(0x88BC0D9234881A88), + SPH_C64(0x9A3E29C8A49A529A), SPH_C64(0x260B4CBE2D269826), + SPH_C64(0x32BF64FA8D32C832), SPH_C64(0xB0597D4AE9B0FAB0), + SPH_C64(0xE9F2CF6A1BE983E9), SPH_C64(0x0F771E33780F3C0F), + SPH_C64(0xD533B7A6E6D573D5), SPH_C64(0x80F41DBA74803A80), + SPH_C64(0xBE27617C99BEC2BE), SPH_C64(0xCDEB87DE26CD13CD), + SPH_C64(0x348968E4BD34D034), SPH_C64(0x483290757A483D48), + SPH_C64(0xFF54E324ABFFDBFF), SPH_C64(0x7A8DF48FF77AF57A), + SPH_C64(0x90643DEAF4907A90), SPH_C64(0x5F9DBE3EC25F615F), + SPH_C64(0x203D40A01D208020), SPH_C64(0x680FD0D56768BD68), + SPH_C64(0x1ACA3472D01A681A), SPH_C64(0xAEB7412C19AE82AE), + SPH_C64(0xB47D755EC9B4EAB4), SPH_C64(0x54CEA8199A544D54), + SPH_C64(0x937F3BE5EC937693), SPH_C64(0x222F44AA0D228822), + SPH_C64(0x6463C8E907648D64), SPH_C64(0xF12AFF12DBF1E3F1), + SPH_C64(0x73CCE6A2BF73D173), SPH_C64(0x1282245A90124812), + SPH_C64(0x407A805D3A401D40), SPH_C64(0x0848102840082008), + SPH_C64(0xC3959BE856C32BC3), SPH_C64(0xECDFC57B33EC97EC), + SPH_C64(0xDB4DAB9096DB4BDB), SPH_C64(0xA1C05F1F61A1BEA1), + SPH_C64(0x8D9107831C8D0E8D), SPH_C64(0x3DC87AC9F53DF43D), + SPH_C64(0x975B33F1CC976697), SPH_C64(0x0000000000000000), + SPH_C64(0xCFF983D436CF1BCF), SPH_C64(0x2B6E5687452BAC2B), + SPH_C64(0x76E1ECB39776C576), SPH_C64(0x82E619B064823282), + SPH_C64(0xD628B1A9FED67FD6), SPH_C64(0x1BC33677D81B6C1B), + SPH_C64(0xB574775BC1B5EEB5), SPH_C64(0xAFBE432911AF86AF), + SPH_C64(0x6A1DD4DF776AB56A), SPH_C64(0x50EAA00DBA505D50), + SPH_C64(0x45578A4C12450945), SPH_C64(0xF338FB18CBF3EBF3), + SPH_C64(0x30AD60F09D30C030), SPH_C64(0xEFC4C3742BEF9BEF), + SPH_C64(0x3FDA7EC3E53FFC3F), SPH_C64(0x55C7AA1C92554955), + SPH_C64(0xA2DB591079A2B2A2), SPH_C64(0xEAE9C96503EA8FEA), + SPH_C64(0x656ACAEC0F658965), SPH_C64(0xBA036968B9BAD2BA), + SPH_C64(0x2F4A5E93652FBC2F), SPH_C64(0xC08E9DE74EC027C0), + SPH_C64(0xDE60A181BEDE5FDE), SPH_C64(0x1CFC386CE01C701C), + SPH_C64(0xFD46E72EBBFDD3FD), SPH_C64(0x4D1F9A64524D294D), + SPH_C64(0x927639E0E4927292), SPH_C64(0x75FAEABC8F75C975), + SPH_C64(0x06360C1E30061806), SPH_C64(0x8AAE0998248A128A), + SPH_C64(0xB24B7940F9B2F2B2), SPH_C64(0xE685D15963E6BFE6), + SPH_C64(0x0E7E1C36700E380E), SPH_C64(0x1FE73E63F81F7C1F), + SPH_C64(0x6255C4F737629562), SPH_C64(0xD43AB5A3EED477D4), + SPH_C64(0xA8814D3229A89AA8), SPH_C64(0x965231F4C4966296), + SPH_C64(0xF962EF3A9BF9C3F9), SPH_C64(0xC5A397F666C533C5), + SPH_C64(0x25104AB135259425), SPH_C64(0x59ABB220F2597959), + SPH_C64(0x84D015AE54842A84), SPH_C64(0x72C5E4A7B772D572), + SPH_C64(0x39EC72DDD539E439), SPH_C64(0x4C1698615A4C2D4C), + SPH_C64(0x5E94BC3BCA5E655E), SPH_C64(0x789FF085E778FD78), + SPH_C64(0x38E570D8DD38E038), SPH_C64(0x8C980586148C0A8C), + SPH_C64(0xD117BFB2C6D163D1), SPH_C64(0xA5E4570B41A5AEA5), + SPH_C64(0xE2A1D94D43E2AFE2), SPH_C64(0x614EC2F82F619961), + SPH_C64(0xB3427B45F1B3F6B3), SPH_C64(0x213442A515218421), + SPH_C64(0x9C0825D6949C4A9C), SPH_C64(0x1EEE3C66F01E781E), + SPH_C64(0x4361865222431143), SPH_C64(0xC7B193FC76C73BC7), + SPH_C64(0xFC4FE52BB3FCD7FC), SPH_C64(0x0424081420041004), + SPH_C64(0x51E3A208B2515951), SPH_C64(0x99252FC7BC995E99), + SPH_C64(0x6D22DAC44F6DA96D), SPH_C64(0x0D651A39680D340D), + SPH_C64(0xFA79E93583FACFFA), SPH_C64(0xDF69A384B6DF5BDF), + SPH_C64(0x7EA9FC9BD77EE57E), SPH_C64(0x241948B43D249024), + SPH_C64(0x3BFE76D7C53BEC3B), SPH_C64(0xAB9A4B3D31AB96AB), + SPH_C64(0xCEF081D13ECE1FCE), SPH_C64(0x1199225588114411), + SPH_C64(0x8F8303890C8F068F), SPH_C64(0x4E049C6B4A4E254E), + SPH_C64(0xB7667351D1B7E6B7), SPH_C64(0xEBE0CB600BEB8BEB), + SPH_C64(0x3CC178CCFD3CF03C), SPH_C64(0x81FD1FBF7C813E81), + SPH_C64(0x944035FED4946A94), SPH_C64(0xF71CF30CEBF7FBF7), + SPH_C64(0xB9186F67A1B9DEB9), SPH_C64(0x138B265F98134C13), + SPH_C64(0x2C51589C7D2CB02C), SPH_C64(0xD305BBB8D6D36BD3), + SPH_C64(0xE78CD35C6BE7BBE7), SPH_C64(0x6E39DCCB576EA56E), + SPH_C64(0xC4AA95F36EC437C4), SPH_C64(0x031B060F18030C03), + SPH_C64(0x56DCAC138A564556), SPH_C64(0x445E88491A440D44), + SPH_C64(0x7FA0FE9EDF7FE17F), SPH_C64(0xA9884F3721A99EA9), + SPH_C64(0x2A6754824D2AA82A), SPH_C64(0xBB0A6B6DB1BBD6BB), + SPH_C64(0xC1879FE246C123C1), SPH_C64(0x53F1A602A2535153), + SPH_C64(0xDC72A58BAEDC57DC), SPH_C64(0x0B531627580B2C0B), + SPH_C64(0x9D0127D39C9D4E9D), SPH_C64(0x6C2BD8C1476CAD6C), + SPH_C64(0x31A462F59531C431), SPH_C64(0x74F3E8B98774CD74), + SPH_C64(0xF615F109E3F6FFF6), SPH_C64(0x464C8C430A460546), + SPH_C64(0xACA5452609AC8AAC), SPH_C64(0x89B50F973C891E89), + SPH_C64(0x14B42844A0145014), SPH_C64(0xE1BADF425BE1A3E1), + SPH_C64(0x16A62C4EB0165816), SPH_C64(0x3AF774D2CD3AE83A), + SPH_C64(0x6906D2D06F69B969), SPH_C64(0x0941122D48092409), + SPH_C64(0x70D7E0ADA770DD70), SPH_C64(0xB66F7154D9B6E2B6), + SPH_C64(0xD01EBDB7CED067D0), SPH_C64(0xEDD6C77E3BED93ED), + SPH_C64(0xCCE285DB2ECC17CC), SPH_C64(0x426884572A421542), + SPH_C64(0x982C2DC2B4985A98), SPH_C64(0xA4ED550E49A4AAA4), + SPH_C64(0x287550885D28A028), SPH_C64(0x5C86B831DA5C6D5C), + SPH_C64(0xF86BED3F93F8C7F8), SPH_C64(0x86C211A444862286) +}; + +#endif + +/* + * Round constants. + */ +static const sph_u64 plain_RC[10] = { + SPH_C64(0x4F01B887E8C62318), + SPH_C64(0x52916F79F5D2A636), + SPH_C64(0x357B0CA38E9BBC60), + SPH_C64(0x57FE4B2EC2D7E01D), + SPH_C64(0xDA4AF09FE5377715), + SPH_C64(0x856BA0B10A29C958), + SPH_C64(0x67053ECBF4105DBD), + SPH_C64(0xD8957DA78B4127E4), + SPH_C64(0x9E4717DD667CEEFB), + SPH_C64(0x33835AAD07BF2DCA) +}; + +/* ====================================================================== */ +/* + * Constants for plain WHIRLPOOL-0 (first version). + */ + +static const sph_u64 old0_T0[256] = { + SPH_C64(0xD50F67D568B86868), SPH_C64(0xB71ECEB7D06DD0D0), + SPH_C64(0x60E00B60EB20EBEB), SPH_C64(0x876E45872B7D2B2B), + SPH_C64(0x75327A7548D84848), SPH_C64(0xD3019CD39DBA9D9D), + SPH_C64(0xDF1D77DF6ABE6A6A), SPH_C64(0x53977353E431E4E4), + SPH_C64(0x48A84B48E338E3E3), SPH_C64(0x15D27115A3F8A3A3), + SPH_C64(0x13DC8A1356FA5656), SPH_C64(0xBFFD7CBF819E8181), + SPH_C64(0x94B2CF947D877D7D), SPH_C64(0x122ADB12F10EF1F1), + SPH_C64(0xABD95CAB85928585), SPH_C64(0xDC1A84DC9EBF9E9E), + SPH_C64(0x9C517D9C2C742C2C), SPH_C64(0x8C8A048C8E8F8E8E), + SPH_C64(0x859FE78578887878), SPH_C64(0xC5D41EC5CA43CACA), + SPH_C64(0x4BAFB84B17391717), SPH_C64(0x37882137A9E6A9A9), + SPH_C64(0xF84E2FF861A36161), SPH_C64(0xA633E6A6D562D5D5), + SPH_C64(0x348FD2345DE75D5D), SPH_C64(0x275358270B1D0B0B), + SPH_C64(0x869814868C898C8C), SPH_C64(0xCCC1FDCC3C443C3C), + SPH_C64(0xB6E89FB677997777), SPH_C64(0x08E3B20851F35151), + SPH_C64(0xAA2F0DAA22662222), SPH_C64(0x57682A5742C64242), + SPH_C64(0xC3DAE5C33F413F3F), SPH_C64(0x19CE9A1954FC5454), + SPH_C64(0x5873325841C34141), SPH_C64(0xBAF474BA809D8080), + SPH_C64(0xDBE22EDBCC49CCCC), SPH_C64(0xA4C244A486978686), + SPH_C64(0x4542F145B3C8B3B3), SPH_C64(0x78D8C07818281818), + SPH_C64(0x96436D962E722E2E), SPH_C64(0x16D5821657F95757), + SPH_C64(0x1E36301E060A0606), SPH_C64(0xF75537F762A66262), + SPH_C64(0x0307F303F401F4F4), SPH_C64(0xEE9BADEE365A3636), + SPH_C64(0xB217C6B2D16ED1D1), SPH_C64(0xDA147FDA6BBD6B6B), + SPH_C64(0x77C3D8771B2D1B1B), SPH_C64(0xEC6A0FEC65AF6565), + SPH_C64(0xBCFA8FBC759F7575), SPH_C64(0x5090805010301010), + SPH_C64(0x95449E95DA73DADA), SPH_C64(0x703B727049DB4949), + SPH_C64(0xBE0B2DBE266A2626), SPH_C64(0x3A629B3AF916F9F9), + SPH_C64(0xC0DD16C0CB40CBCB), SPH_C64(0xE37117E366AA6666), + SPH_C64(0x5C8C6B5CE734E7E7), SPH_C64(0x6803B968BAD3BABA), + SPH_C64(0x2CB7192CAEEFAEAE), SPH_C64(0x0DEABA0D50F05050), + SPH_C64(0x07F8AA0752F65252), SPH_C64(0x3D9A313DABE0ABAB), + SPH_C64(0x112D2811050F0505), SPH_C64(0x1723D317F00DF0F0), + SPH_C64(0x396568390D170D0D), SPH_C64(0xA2CCBFA273957373), + SPH_C64(0xD7FEC5D73B4D3B3B), SPH_C64(0x14242014040C0404), + SPH_C64(0xA03D1DA020602020), SPH_C64(0x215DA321FE1FFEFE), + SPH_C64(0x8E7BA68EDD7ADDDD), SPH_C64(0x060EFB06F502F5F5), + SPH_C64(0x5E7DC95EB4C1B4B4), SPH_C64(0x3E9DC23E5FE15F5F), + SPH_C64(0x225A50220A1E0A0A), SPH_C64(0x5B74C15BB5C2B5B5), + SPH_C64(0xE78E4EE7C05DC0C0), SPH_C64(0x1AC9691AA0FDA0A0), + SPH_C64(0xA8DEAFA871937171), SPH_C64(0x0BE4410BA5F2A5A5), + SPH_C64(0x995875992D772D2D), SPH_C64(0xFD4727FD60A06060), + SPH_C64(0xA7C5B7A772967272), SPH_C64(0xE57FECE593A89393), + SPH_C64(0xDDECD5DD394B3939), SPH_C64(0x2848402808180808), + SPH_C64(0xB5EF6CB583988383), SPH_C64(0xA53415A521632121), + SPH_C64(0x3186DA315CE45C5C), SPH_C64(0xA1CB4CA187948787), + SPH_C64(0x4F50E14FB1CEB1B1), SPH_C64(0x47B35347E03DE0E0), + SPH_C64(0x0000000000000000), SPH_C64(0xE89556E8C358C3C3), + SPH_C64(0x5A82905A12361212), SPH_C64(0xEF6DFCEF91AE9191), + SPH_C64(0x98AE24988A838A8A), SPH_C64(0x0A12100A02060202), + SPH_C64(0x6CFCE06C1C241C1C), SPH_C64(0x59856359E637E6E6), + SPH_C64(0x4C57124C45CF4545), SPH_C64(0xED9C5EEDC25BC2C2), + SPH_C64(0xF3AA6EF3C451C4C4), SPH_C64(0x2E46BB2EFD1AFDFD), + SPH_C64(0x792E9179BFDCBFBF), SPH_C64(0x495E1A4944CC4444), + SPH_C64(0x1FC0611FA1FEA1A1), SPH_C64(0x61165A614CD44C4C), + SPH_C64(0xFFB685FF33553333), SPH_C64(0xF6A366F6C552C5C5), + SPH_C64(0xAED054AE84918484), SPH_C64(0xAF2605AF23652323), + SPH_C64(0x91BBC7917C847C7C), SPH_C64(0x4A59E94AB0CDB0B0), + SPH_C64(0xB11035B1256F2525), SPH_C64(0x41BDA841153F1515), + SPH_C64(0xE180B5E1355F3535), SPH_C64(0xD0066FD069BB6969), + SPH_C64(0x2454AB24FF1CFFFF), SPH_C64(0xFE40D4FE94A19494), + SPH_C64(0x641F52644DD74D4D), SPH_C64(0xADD7A7AD70907070), + SPH_C64(0x10DB7910A2FBA2A2), SPH_C64(0x29BE1129AFECAFAF), + SPH_C64(0xDEEB26DECD4ACDCD), SPH_C64(0xA928FEA9D667D6D6), + SPH_C64(0xC12B47C16CB46C6C), SPH_C64(0x5166D151B7C4B7B7), + SPH_C64(0x3F6B933FF815F8F8), SPH_C64(0x2D41482D091B0909), + SPH_C64(0x1838CB18F308F3F3), SPH_C64(0xE6781FE667A96767), + SPH_C64(0x0EED490EA4F1A4A4), SPH_C64(0x65E90365EA23EAEA), + SPH_C64(0x7BDF337BEC29ECEC), SPH_C64(0x546FD954B6C7B6B6), + SPH_C64(0xA33AEEA3D461D4D4), SPH_C64(0xBD0CDEBDD26BD2D2), + SPH_C64(0x44B4A044143C1414), SPH_C64(0x66EEF0661E221E1E), + SPH_C64(0x42BA5B42E13EE1E1), SPH_C64(0xB4193DB4246C2424), + SPH_C64(0xD8E5DDD838483838), SPH_C64(0xF9B87EF9C657C6C6), + SPH_C64(0x904D9690DB70DBDB), SPH_C64(0x7A29627A4BDD4B4B), + SPH_C64(0x8F8DF78F7A8E7A7A), SPH_C64(0xD2F7CDD23A4E3A3A), + SPH_C64(0x8160BE81DE7FDEDE), SPH_C64(0x3B94CA3B5EE25E5E), + SPH_C64(0x8469B684DF7CDFDF), SPH_C64(0xFB49DCFB95A29595), + SPH_C64(0x2B4FB32BFC19FCFC), SPH_C64(0x38933938AAE3AAAA), + SPH_C64(0xAC21F6ACD764D7D7), SPH_C64(0xD1F03ED1CE4FCECE), + SPH_C64(0x1B3F381B07090707), SPH_C64(0x337778330F110F0F), + SPH_C64(0xC9C8F5C93D473D3D), SPH_C64(0x25A2FA2558E85858), + SPH_C64(0xC83EA4C89AB39A9A), SPH_C64(0xC22CB4C298B59898), + SPH_C64(0xD60894D69CB99C9C), SPH_C64(0x1D31C31DF20BF2F2), + SPH_C64(0x01F65101A7F4A7A7), SPH_C64(0x5599885511331111), + SPH_C64(0x9BA9D79B7E827E7E), SPH_C64(0x9DA72C9D8B808B8B), + SPH_C64(0x5261225243C54343), SPH_C64(0x0F1B180F03050303), + SPH_C64(0x4DA1434DE23BE2E2), SPH_C64(0x8B72AE8BDC79DCDC), + SPH_C64(0x569E7B56E532E5E5), SPH_C64(0x404BF940B2CBB2B2), + SPH_C64(0x6B044A6B4ED24E4E), SPH_C64(0xFCB176FCC754C7C7), + SPH_C64(0xC4224FC46DB76D6D), SPH_C64(0x6AF21B6AE926E9E9), + SPH_C64(0xBB0225BB27692727), SPH_C64(0x5D7A3A5D40C04040), + SPH_C64(0x9F568E9FD875D8D8), SPH_C64(0xEB92A5EB37593737), + SPH_C64(0xE076E4E092AB9292), SPH_C64(0x89830C898F8C8F8F), + SPH_C64(0x0509080501030101), SPH_C64(0x69F5E8691D271D1D), + SPH_C64(0x02F1A20253F55353), SPH_C64(0xC6D3EDC63E423E3E), + SPH_C64(0x20ABF22059EB5959), SPH_C64(0xE28746E2C15EC1C1), + SPH_C64(0x6E0D426E4FD14F4F), SPH_C64(0xFABF8DFA32563232), + SPH_C64(0x4EA6B04E163A1616), SPH_C64(0x35798335FA13FAFA), + SPH_C64(0xB9F387B9749C7474), SPH_C64(0x30708B30FB10FBFB), + SPH_C64(0xF25C3FF263A56363), SPH_C64(0xD9138CD99FBC9F9F), + SPH_C64(0xE489BDE4345C3434), SPH_C64(0x72CAD0721A2E1A1A), + SPH_C64(0x82674D822A7E2A2A), SPH_C64(0x2FB0EA2F5AEE5A5A), + SPH_C64(0x83911C838D8A8D8D), SPH_C64(0xCACF06CAC946C9C9), + SPH_C64(0xD4F936D4CF4CCFCF), SPH_C64(0x0915E309F607F6F6), + SPH_C64(0xEA64F4EA90AD9090), SPH_C64(0x88755D8828782828), + SPH_C64(0x92BC349288858888), SPH_C64(0xCD37ACCD9BB09B9B), + SPH_C64(0xF5A495F531533131), SPH_C64(0x367E70360E120E0E), + SPH_C64(0x733C8173BDDABDBD), SPH_C64(0x7F206A7F4ADE4A4A), + SPH_C64(0x6FFB136FE825E8E8), SPH_C64(0xF452C4F496A79696), + SPH_C64(0x04FF5904A6F7A6A6), SPH_C64(0x3C6C603C0C140C0C), + SPH_C64(0xCFC60ECFC845C8C8), SPH_C64(0x8096EF80798B7979), + SPH_C64(0x76358976BCD9BCBC), SPH_C64(0x7C27997CBEDFBEBE), + SPH_C64(0x74C42B74EF2CEFEF), SPH_C64(0xCB3957CB6EB26E6E), + SPH_C64(0x434C0A4346CA4646), SPH_C64(0xF15BCCF197A49797), + SPH_C64(0x2AB9E22A5BED5B5B), SPH_C64(0x7ED63B7EED2AEDED), + SPH_C64(0x7DD1C87D192B1919), SPH_C64(0x9A5F869AD976D9D9), + SPH_C64(0x26A50926ACE9ACAC), SPH_C64(0xC725BCC799B69999), + SPH_C64(0x32812932A8E5A8A8), SPH_C64(0x8D7C558D297B2929), + SPH_C64(0xE96307E964AC6464), SPH_C64(0x63E7F8631F211F1F), + SPH_C64(0x23AC0123ADEAADAD), SPH_C64(0x1CC7921C55FF5555), + SPH_C64(0x5F8B985F13351313), SPH_C64(0x6D0AB16DBBD0BBBB), + SPH_C64(0x0C1CEB0CF704F7F7), SPH_C64(0xCE305FCE6FB16F6F), + SPH_C64(0x6718A167B9D6B9B9), SPH_C64(0x4645024647C94747), + SPH_C64(0x934A65932F712F2F), SPH_C64(0x71CD2371EE2FEEEE), + SPH_C64(0x6211A962B8D5B8B8), SPH_C64(0x8A84FF8A7B8D7B7B), + SPH_C64(0x97B53C9789868989), SPH_C64(0xF0AD9DF030503030), + SPH_C64(0xB805D6B8D368D3D3), SPH_C64(0x9EA0DF9E7F817F7F), + SPH_C64(0xB3E197B3769A7676), SPH_C64(0xB0E664B0829B8282) +}; + +#if !SPH_SMALL_FOOTPRINT_WHIRLPOOL + +static const sph_u64 old0_T1[256] = { + SPH_C64(0x0F67D568B86868D5), SPH_C64(0x1ECEB7D06DD0D0B7), + SPH_C64(0xE00B60EB20EBEB60), SPH_C64(0x6E45872B7D2B2B87), + SPH_C64(0x327A7548D8484875), SPH_C64(0x019CD39DBA9D9DD3), + SPH_C64(0x1D77DF6ABE6A6ADF), SPH_C64(0x977353E431E4E453), + SPH_C64(0xA84B48E338E3E348), SPH_C64(0xD27115A3F8A3A315), + SPH_C64(0xDC8A1356FA565613), SPH_C64(0xFD7CBF819E8181BF), + SPH_C64(0xB2CF947D877D7D94), SPH_C64(0x2ADB12F10EF1F112), + SPH_C64(0xD95CAB85928585AB), SPH_C64(0x1A84DC9EBF9E9EDC), + SPH_C64(0x517D9C2C742C2C9C), SPH_C64(0x8A048C8E8F8E8E8C), + SPH_C64(0x9FE7857888787885), SPH_C64(0xD41EC5CA43CACAC5), + SPH_C64(0xAFB84B173917174B), SPH_C64(0x882137A9E6A9A937), + SPH_C64(0x4E2FF861A36161F8), SPH_C64(0x33E6A6D562D5D5A6), + SPH_C64(0x8FD2345DE75D5D34), SPH_C64(0x5358270B1D0B0B27), + SPH_C64(0x9814868C898C8C86), SPH_C64(0xC1FDCC3C443C3CCC), + SPH_C64(0xE89FB677997777B6), SPH_C64(0xE3B20851F3515108), + SPH_C64(0x2F0DAA22662222AA), SPH_C64(0x682A5742C6424257), + SPH_C64(0xDAE5C33F413F3FC3), SPH_C64(0xCE9A1954FC545419), + SPH_C64(0x73325841C3414158), SPH_C64(0xF474BA809D8080BA), + SPH_C64(0xE22EDBCC49CCCCDB), SPH_C64(0xC244A486978686A4), + SPH_C64(0x42F145B3C8B3B345), SPH_C64(0xD8C0781828181878), + SPH_C64(0x436D962E722E2E96), SPH_C64(0xD5821657F9575716), + SPH_C64(0x36301E060A06061E), SPH_C64(0x5537F762A66262F7), + SPH_C64(0x07F303F401F4F403), SPH_C64(0x9BADEE365A3636EE), + SPH_C64(0x17C6B2D16ED1D1B2), SPH_C64(0x147FDA6BBD6B6BDA), + SPH_C64(0xC3D8771B2D1B1B77), SPH_C64(0x6A0FEC65AF6565EC), + SPH_C64(0xFA8FBC759F7575BC), SPH_C64(0x9080501030101050), + SPH_C64(0x449E95DA73DADA95), SPH_C64(0x3B727049DB494970), + SPH_C64(0x0B2DBE266A2626BE), SPH_C64(0x629B3AF916F9F93A), + SPH_C64(0xDD16C0CB40CBCBC0), SPH_C64(0x7117E366AA6666E3), + SPH_C64(0x8C6B5CE734E7E75C), SPH_C64(0x03B968BAD3BABA68), + SPH_C64(0xB7192CAEEFAEAE2C), SPH_C64(0xEABA0D50F050500D), + SPH_C64(0xF8AA0752F6525207), SPH_C64(0x9A313DABE0ABAB3D), + SPH_C64(0x2D2811050F050511), SPH_C64(0x23D317F00DF0F017), + SPH_C64(0x6568390D170D0D39), SPH_C64(0xCCBFA273957373A2), + SPH_C64(0xFEC5D73B4D3B3BD7), SPH_C64(0x242014040C040414), + SPH_C64(0x3D1DA020602020A0), SPH_C64(0x5DA321FE1FFEFE21), + SPH_C64(0x7BA68EDD7ADDDD8E), SPH_C64(0x0EFB06F502F5F506), + SPH_C64(0x7DC95EB4C1B4B45E), SPH_C64(0x9DC23E5FE15F5F3E), + SPH_C64(0x5A50220A1E0A0A22), SPH_C64(0x74C15BB5C2B5B55B), + SPH_C64(0x8E4EE7C05DC0C0E7), SPH_C64(0xC9691AA0FDA0A01A), + SPH_C64(0xDEAFA871937171A8), SPH_C64(0xE4410BA5F2A5A50B), + SPH_C64(0x5875992D772D2D99), SPH_C64(0x4727FD60A06060FD), + SPH_C64(0xC5B7A772967272A7), SPH_C64(0x7FECE593A89393E5), + SPH_C64(0xECD5DD394B3939DD), SPH_C64(0x4840280818080828), + SPH_C64(0xEF6CB583988383B5), SPH_C64(0x3415A521632121A5), + SPH_C64(0x86DA315CE45C5C31), SPH_C64(0xCB4CA187948787A1), + SPH_C64(0x50E14FB1CEB1B14F), SPH_C64(0xB35347E03DE0E047), + SPH_C64(0x0000000000000000), SPH_C64(0x9556E8C358C3C3E8), + SPH_C64(0x82905A123612125A), SPH_C64(0x6DFCEF91AE9191EF), + SPH_C64(0xAE24988A838A8A98), SPH_C64(0x12100A020602020A), + SPH_C64(0xFCE06C1C241C1C6C), SPH_C64(0x856359E637E6E659), + SPH_C64(0x57124C45CF45454C), SPH_C64(0x9C5EEDC25BC2C2ED), + SPH_C64(0xAA6EF3C451C4C4F3), SPH_C64(0x46BB2EFD1AFDFD2E), + SPH_C64(0x2E9179BFDCBFBF79), SPH_C64(0x5E1A4944CC444449), + SPH_C64(0xC0611FA1FEA1A11F), SPH_C64(0x165A614CD44C4C61), + SPH_C64(0xB685FF33553333FF), SPH_C64(0xA366F6C552C5C5F6), + SPH_C64(0xD054AE84918484AE), SPH_C64(0x2605AF23652323AF), + SPH_C64(0xBBC7917C847C7C91), SPH_C64(0x59E94AB0CDB0B04A), + SPH_C64(0x1035B1256F2525B1), SPH_C64(0xBDA841153F151541), + SPH_C64(0x80B5E1355F3535E1), SPH_C64(0x066FD069BB6969D0), + SPH_C64(0x54AB24FF1CFFFF24), SPH_C64(0x40D4FE94A19494FE), + SPH_C64(0x1F52644DD74D4D64), SPH_C64(0xD7A7AD70907070AD), + SPH_C64(0xDB7910A2FBA2A210), SPH_C64(0xBE1129AFECAFAF29), + SPH_C64(0xEB26DECD4ACDCDDE), SPH_C64(0x28FEA9D667D6D6A9), + SPH_C64(0x2B47C16CB46C6CC1), SPH_C64(0x66D151B7C4B7B751), + SPH_C64(0x6B933FF815F8F83F), SPH_C64(0x41482D091B09092D), + SPH_C64(0x38CB18F308F3F318), SPH_C64(0x781FE667A96767E6), + SPH_C64(0xED490EA4F1A4A40E), SPH_C64(0xE90365EA23EAEA65), + SPH_C64(0xDF337BEC29ECEC7B), SPH_C64(0x6FD954B6C7B6B654), + SPH_C64(0x3AEEA3D461D4D4A3), SPH_C64(0x0CDEBDD26BD2D2BD), + SPH_C64(0xB4A044143C141444), SPH_C64(0xEEF0661E221E1E66), + SPH_C64(0xBA5B42E13EE1E142), SPH_C64(0x193DB4246C2424B4), + SPH_C64(0xE5DDD838483838D8), SPH_C64(0xB87EF9C657C6C6F9), + SPH_C64(0x4D9690DB70DBDB90), SPH_C64(0x29627A4BDD4B4B7A), + SPH_C64(0x8DF78F7A8E7A7A8F), SPH_C64(0xF7CDD23A4E3A3AD2), + SPH_C64(0x60BE81DE7FDEDE81), SPH_C64(0x94CA3B5EE25E5E3B), + SPH_C64(0x69B684DF7CDFDF84), SPH_C64(0x49DCFB95A29595FB), + SPH_C64(0x4FB32BFC19FCFC2B), SPH_C64(0x933938AAE3AAAA38), + SPH_C64(0x21F6ACD764D7D7AC), SPH_C64(0xF03ED1CE4FCECED1), + SPH_C64(0x3F381B070907071B), SPH_C64(0x7778330F110F0F33), + SPH_C64(0xC8F5C93D473D3DC9), SPH_C64(0xA2FA2558E8585825), + SPH_C64(0x3EA4C89AB39A9AC8), SPH_C64(0x2CB4C298B59898C2), + SPH_C64(0x0894D69CB99C9CD6), SPH_C64(0x31C31DF20BF2F21D), + SPH_C64(0xF65101A7F4A7A701), SPH_C64(0x9988551133111155), + SPH_C64(0xA9D79B7E827E7E9B), SPH_C64(0xA72C9D8B808B8B9D), + SPH_C64(0x61225243C5434352), SPH_C64(0x1B180F030503030F), + SPH_C64(0xA1434DE23BE2E24D), SPH_C64(0x72AE8BDC79DCDC8B), + SPH_C64(0x9E7B56E532E5E556), SPH_C64(0x4BF940B2CBB2B240), + SPH_C64(0x044A6B4ED24E4E6B), SPH_C64(0xB176FCC754C7C7FC), + SPH_C64(0x224FC46DB76D6DC4), SPH_C64(0xF21B6AE926E9E96A), + SPH_C64(0x0225BB27692727BB), SPH_C64(0x7A3A5D40C040405D), + SPH_C64(0x568E9FD875D8D89F), SPH_C64(0x92A5EB37593737EB), + SPH_C64(0x76E4E092AB9292E0), SPH_C64(0x830C898F8C8F8F89), + SPH_C64(0x0908050103010105), SPH_C64(0xF5E8691D271D1D69), + SPH_C64(0xF1A20253F5535302), SPH_C64(0xD3EDC63E423E3EC6), + SPH_C64(0xABF22059EB595920), SPH_C64(0x8746E2C15EC1C1E2), + SPH_C64(0x0D426E4FD14F4F6E), SPH_C64(0xBF8DFA32563232FA), + SPH_C64(0xA6B04E163A16164E), SPH_C64(0x798335FA13FAFA35), + SPH_C64(0xF387B9749C7474B9), SPH_C64(0x708B30FB10FBFB30), + SPH_C64(0x5C3FF263A56363F2), SPH_C64(0x138CD99FBC9F9FD9), + SPH_C64(0x89BDE4345C3434E4), SPH_C64(0xCAD0721A2E1A1A72), + SPH_C64(0x674D822A7E2A2A82), SPH_C64(0xB0EA2F5AEE5A5A2F), + SPH_C64(0x911C838D8A8D8D83), SPH_C64(0xCF06CAC946C9C9CA), + SPH_C64(0xF936D4CF4CCFCFD4), SPH_C64(0x15E309F607F6F609), + SPH_C64(0x64F4EA90AD9090EA), SPH_C64(0x755D882878282888), + SPH_C64(0xBC34928885888892), SPH_C64(0x37ACCD9BB09B9BCD), + SPH_C64(0xA495F531533131F5), SPH_C64(0x7E70360E120E0E36), + SPH_C64(0x3C8173BDDABDBD73), SPH_C64(0x206A7F4ADE4A4A7F), + SPH_C64(0xFB136FE825E8E86F), SPH_C64(0x52C4F496A79696F4), + SPH_C64(0xFF5904A6F7A6A604), SPH_C64(0x6C603C0C140C0C3C), + SPH_C64(0xC60ECFC845C8C8CF), SPH_C64(0x96EF80798B797980), + SPH_C64(0x358976BCD9BCBC76), SPH_C64(0x27997CBEDFBEBE7C), + SPH_C64(0xC42B74EF2CEFEF74), SPH_C64(0x3957CB6EB26E6ECB), + SPH_C64(0x4C0A4346CA464643), SPH_C64(0x5BCCF197A49797F1), + SPH_C64(0xB9E22A5BED5B5B2A), SPH_C64(0xD63B7EED2AEDED7E), + SPH_C64(0xD1C87D192B19197D), SPH_C64(0x5F869AD976D9D99A), + SPH_C64(0xA50926ACE9ACAC26), SPH_C64(0x25BCC799B69999C7), + SPH_C64(0x812932A8E5A8A832), SPH_C64(0x7C558D297B29298D), + SPH_C64(0x6307E964AC6464E9), SPH_C64(0xE7F8631F211F1F63), + SPH_C64(0xAC0123ADEAADAD23), SPH_C64(0xC7921C55FF55551C), + SPH_C64(0x8B985F133513135F), SPH_C64(0x0AB16DBBD0BBBB6D), + SPH_C64(0x1CEB0CF704F7F70C), SPH_C64(0x305FCE6FB16F6FCE), + SPH_C64(0x18A167B9D6B9B967), SPH_C64(0x45024647C9474746), + SPH_C64(0x4A65932F712F2F93), SPH_C64(0xCD2371EE2FEEEE71), + SPH_C64(0x11A962B8D5B8B862), SPH_C64(0x84FF8A7B8D7B7B8A), + SPH_C64(0xB53C978986898997), SPH_C64(0xAD9DF030503030F0), + SPH_C64(0x05D6B8D368D3D3B8), SPH_C64(0xA0DF9E7F817F7F9E), + SPH_C64(0xE197B3769A7676B3), SPH_C64(0xE664B0829B8282B0) +}; + +static const sph_u64 old0_T2[256] = { + SPH_C64(0x67D568B86868D50F), SPH_C64(0xCEB7D06DD0D0B71E), + SPH_C64(0x0B60EB20EBEB60E0), SPH_C64(0x45872B7D2B2B876E), + SPH_C64(0x7A7548D848487532), SPH_C64(0x9CD39DBA9D9DD301), + SPH_C64(0x77DF6ABE6A6ADF1D), SPH_C64(0x7353E431E4E45397), + SPH_C64(0x4B48E338E3E348A8), SPH_C64(0x7115A3F8A3A315D2), + SPH_C64(0x8A1356FA565613DC), SPH_C64(0x7CBF819E8181BFFD), + SPH_C64(0xCF947D877D7D94B2), SPH_C64(0xDB12F10EF1F1122A), + SPH_C64(0x5CAB85928585ABD9), SPH_C64(0x84DC9EBF9E9EDC1A), + SPH_C64(0x7D9C2C742C2C9C51), SPH_C64(0x048C8E8F8E8E8C8A), + SPH_C64(0xE78578887878859F), SPH_C64(0x1EC5CA43CACAC5D4), + SPH_C64(0xB84B173917174BAF), SPH_C64(0x2137A9E6A9A93788), + SPH_C64(0x2FF861A36161F84E), SPH_C64(0xE6A6D562D5D5A633), + SPH_C64(0xD2345DE75D5D348F), SPH_C64(0x58270B1D0B0B2753), + SPH_C64(0x14868C898C8C8698), SPH_C64(0xFDCC3C443C3CCCC1), + SPH_C64(0x9FB677997777B6E8), SPH_C64(0xB20851F3515108E3), + SPH_C64(0x0DAA22662222AA2F), SPH_C64(0x2A5742C642425768), + SPH_C64(0xE5C33F413F3FC3DA), SPH_C64(0x9A1954FC545419CE), + SPH_C64(0x325841C341415873), SPH_C64(0x74BA809D8080BAF4), + SPH_C64(0x2EDBCC49CCCCDBE2), SPH_C64(0x44A486978686A4C2), + SPH_C64(0xF145B3C8B3B34542), SPH_C64(0xC0781828181878D8), + SPH_C64(0x6D962E722E2E9643), SPH_C64(0x821657F9575716D5), + SPH_C64(0x301E060A06061E36), SPH_C64(0x37F762A66262F755), + SPH_C64(0xF303F401F4F40307), SPH_C64(0xADEE365A3636EE9B), + SPH_C64(0xC6B2D16ED1D1B217), SPH_C64(0x7FDA6BBD6B6BDA14), + SPH_C64(0xD8771B2D1B1B77C3), SPH_C64(0x0FEC65AF6565EC6A), + SPH_C64(0x8FBC759F7575BCFA), SPH_C64(0x8050103010105090), + SPH_C64(0x9E95DA73DADA9544), SPH_C64(0x727049DB4949703B), + SPH_C64(0x2DBE266A2626BE0B), SPH_C64(0x9B3AF916F9F93A62), + SPH_C64(0x16C0CB40CBCBC0DD), SPH_C64(0x17E366AA6666E371), + SPH_C64(0x6B5CE734E7E75C8C), SPH_C64(0xB968BAD3BABA6803), + SPH_C64(0x192CAEEFAEAE2CB7), SPH_C64(0xBA0D50F050500DEA), + SPH_C64(0xAA0752F6525207F8), SPH_C64(0x313DABE0ABAB3D9A), + SPH_C64(0x2811050F0505112D), SPH_C64(0xD317F00DF0F01723), + SPH_C64(0x68390D170D0D3965), SPH_C64(0xBFA273957373A2CC), + SPH_C64(0xC5D73B4D3B3BD7FE), SPH_C64(0x2014040C04041424), + SPH_C64(0x1DA020602020A03D), SPH_C64(0xA321FE1FFEFE215D), + SPH_C64(0xA68EDD7ADDDD8E7B), SPH_C64(0xFB06F502F5F5060E), + SPH_C64(0xC95EB4C1B4B45E7D), SPH_C64(0xC23E5FE15F5F3E9D), + SPH_C64(0x50220A1E0A0A225A), SPH_C64(0xC15BB5C2B5B55B74), + SPH_C64(0x4EE7C05DC0C0E78E), SPH_C64(0x691AA0FDA0A01AC9), + SPH_C64(0xAFA871937171A8DE), SPH_C64(0x410BA5F2A5A50BE4), + SPH_C64(0x75992D772D2D9958), SPH_C64(0x27FD60A06060FD47), + SPH_C64(0xB7A772967272A7C5), SPH_C64(0xECE593A89393E57F), + SPH_C64(0xD5DD394B3939DDEC), SPH_C64(0x4028081808082848), + SPH_C64(0x6CB583988383B5EF), SPH_C64(0x15A521632121A534), + SPH_C64(0xDA315CE45C5C3186), SPH_C64(0x4CA187948787A1CB), + SPH_C64(0xE14FB1CEB1B14F50), SPH_C64(0x5347E03DE0E047B3), + SPH_C64(0x0000000000000000), SPH_C64(0x56E8C358C3C3E895), + SPH_C64(0x905A123612125A82), SPH_C64(0xFCEF91AE9191EF6D), + SPH_C64(0x24988A838A8A98AE), SPH_C64(0x100A020602020A12), + SPH_C64(0xE06C1C241C1C6CFC), SPH_C64(0x6359E637E6E65985), + SPH_C64(0x124C45CF45454C57), SPH_C64(0x5EEDC25BC2C2ED9C), + SPH_C64(0x6EF3C451C4C4F3AA), SPH_C64(0xBB2EFD1AFDFD2E46), + SPH_C64(0x9179BFDCBFBF792E), SPH_C64(0x1A4944CC4444495E), + SPH_C64(0x611FA1FEA1A11FC0), SPH_C64(0x5A614CD44C4C6116), + SPH_C64(0x85FF33553333FFB6), SPH_C64(0x66F6C552C5C5F6A3), + SPH_C64(0x54AE84918484AED0), SPH_C64(0x05AF23652323AF26), + SPH_C64(0xC7917C847C7C91BB), SPH_C64(0xE94AB0CDB0B04A59), + SPH_C64(0x35B1256F2525B110), SPH_C64(0xA841153F151541BD), + SPH_C64(0xB5E1355F3535E180), SPH_C64(0x6FD069BB6969D006), + SPH_C64(0xAB24FF1CFFFF2454), SPH_C64(0xD4FE94A19494FE40), + SPH_C64(0x52644DD74D4D641F), SPH_C64(0xA7AD70907070ADD7), + SPH_C64(0x7910A2FBA2A210DB), SPH_C64(0x1129AFECAFAF29BE), + SPH_C64(0x26DECD4ACDCDDEEB), SPH_C64(0xFEA9D667D6D6A928), + SPH_C64(0x47C16CB46C6CC12B), SPH_C64(0xD151B7C4B7B75166), + SPH_C64(0x933FF815F8F83F6B), SPH_C64(0x482D091B09092D41), + SPH_C64(0xCB18F308F3F31838), SPH_C64(0x1FE667A96767E678), + SPH_C64(0x490EA4F1A4A40EED), SPH_C64(0x0365EA23EAEA65E9), + SPH_C64(0x337BEC29ECEC7BDF), SPH_C64(0xD954B6C7B6B6546F), + SPH_C64(0xEEA3D461D4D4A33A), SPH_C64(0xDEBDD26BD2D2BD0C), + SPH_C64(0xA044143C141444B4), SPH_C64(0xF0661E221E1E66EE), + SPH_C64(0x5B42E13EE1E142BA), SPH_C64(0x3DB4246C2424B419), + SPH_C64(0xDDD838483838D8E5), SPH_C64(0x7EF9C657C6C6F9B8), + SPH_C64(0x9690DB70DBDB904D), SPH_C64(0x627A4BDD4B4B7A29), + SPH_C64(0xF78F7A8E7A7A8F8D), SPH_C64(0xCDD23A4E3A3AD2F7), + SPH_C64(0xBE81DE7FDEDE8160), SPH_C64(0xCA3B5EE25E5E3B94), + SPH_C64(0xB684DF7CDFDF8469), SPH_C64(0xDCFB95A29595FB49), + SPH_C64(0xB32BFC19FCFC2B4F), SPH_C64(0x3938AAE3AAAA3893), + SPH_C64(0xF6ACD764D7D7AC21), SPH_C64(0x3ED1CE4FCECED1F0), + SPH_C64(0x381B070907071B3F), SPH_C64(0x78330F110F0F3377), + SPH_C64(0xF5C93D473D3DC9C8), SPH_C64(0xFA2558E8585825A2), + SPH_C64(0xA4C89AB39A9AC83E), SPH_C64(0xB4C298B59898C22C), + SPH_C64(0x94D69CB99C9CD608), SPH_C64(0xC31DF20BF2F21D31), + SPH_C64(0x5101A7F4A7A701F6), SPH_C64(0x8855113311115599), + SPH_C64(0xD79B7E827E7E9BA9), SPH_C64(0x2C9D8B808B8B9DA7), + SPH_C64(0x225243C543435261), SPH_C64(0x180F030503030F1B), + SPH_C64(0x434DE23BE2E24DA1), SPH_C64(0xAE8BDC79DCDC8B72), + SPH_C64(0x7B56E532E5E5569E), SPH_C64(0xF940B2CBB2B2404B), + SPH_C64(0x4A6B4ED24E4E6B04), SPH_C64(0x76FCC754C7C7FCB1), + SPH_C64(0x4FC46DB76D6DC422), SPH_C64(0x1B6AE926E9E96AF2), + SPH_C64(0x25BB27692727BB02), SPH_C64(0x3A5D40C040405D7A), + SPH_C64(0x8E9FD875D8D89F56), SPH_C64(0xA5EB37593737EB92), + SPH_C64(0xE4E092AB9292E076), SPH_C64(0x0C898F8C8F8F8983), + SPH_C64(0x0805010301010509), SPH_C64(0xE8691D271D1D69F5), + SPH_C64(0xA20253F5535302F1), SPH_C64(0xEDC63E423E3EC6D3), + SPH_C64(0xF22059EB595920AB), SPH_C64(0x46E2C15EC1C1E287), + SPH_C64(0x426E4FD14F4F6E0D), SPH_C64(0x8DFA32563232FABF), + SPH_C64(0xB04E163A16164EA6), SPH_C64(0x8335FA13FAFA3579), + SPH_C64(0x87B9749C7474B9F3), SPH_C64(0x8B30FB10FBFB3070), + SPH_C64(0x3FF263A56363F25C), SPH_C64(0x8CD99FBC9F9FD913), + SPH_C64(0xBDE4345C3434E489), SPH_C64(0xD0721A2E1A1A72CA), + SPH_C64(0x4D822A7E2A2A8267), SPH_C64(0xEA2F5AEE5A5A2FB0), + SPH_C64(0x1C838D8A8D8D8391), SPH_C64(0x06CAC946C9C9CACF), + SPH_C64(0x36D4CF4CCFCFD4F9), SPH_C64(0xE309F607F6F60915), + SPH_C64(0xF4EA90AD9090EA64), SPH_C64(0x5D88287828288875), + SPH_C64(0x34928885888892BC), SPH_C64(0xACCD9BB09B9BCD37), + SPH_C64(0x95F531533131F5A4), SPH_C64(0x70360E120E0E367E), + SPH_C64(0x8173BDDABDBD733C), SPH_C64(0x6A7F4ADE4A4A7F20), + SPH_C64(0x136FE825E8E86FFB), SPH_C64(0xC4F496A79696F452), + SPH_C64(0x5904A6F7A6A604FF), SPH_C64(0x603C0C140C0C3C6C), + SPH_C64(0x0ECFC845C8C8CFC6), SPH_C64(0xEF80798B79798096), + SPH_C64(0x8976BCD9BCBC7635), SPH_C64(0x997CBEDFBEBE7C27), + SPH_C64(0x2B74EF2CEFEF74C4), SPH_C64(0x57CB6EB26E6ECB39), + SPH_C64(0x0A4346CA4646434C), SPH_C64(0xCCF197A49797F15B), + SPH_C64(0xE22A5BED5B5B2AB9), SPH_C64(0x3B7EED2AEDED7ED6), + SPH_C64(0xC87D192B19197DD1), SPH_C64(0x869AD976D9D99A5F), + SPH_C64(0x0926ACE9ACAC26A5), SPH_C64(0xBCC799B69999C725), + SPH_C64(0x2932A8E5A8A83281), SPH_C64(0x558D297B29298D7C), + SPH_C64(0x07E964AC6464E963), SPH_C64(0xF8631F211F1F63E7), + SPH_C64(0x0123ADEAADAD23AC), SPH_C64(0x921C55FF55551CC7), + SPH_C64(0x985F133513135F8B), SPH_C64(0xB16DBBD0BBBB6D0A), + SPH_C64(0xEB0CF704F7F70C1C), SPH_C64(0x5FCE6FB16F6FCE30), + SPH_C64(0xA167B9D6B9B96718), SPH_C64(0x024647C947474645), + SPH_C64(0x65932F712F2F934A), SPH_C64(0x2371EE2FEEEE71CD), + SPH_C64(0xA962B8D5B8B86211), SPH_C64(0xFF8A7B8D7B7B8A84), + SPH_C64(0x3C978986898997B5), SPH_C64(0x9DF030503030F0AD), + SPH_C64(0xD6B8D368D3D3B805), SPH_C64(0xDF9E7F817F7F9EA0), + SPH_C64(0x97B3769A7676B3E1), SPH_C64(0x64B0829B8282B0E6) +}; + +static const sph_u64 old0_T3[256] = { + SPH_C64(0xD568B86868D50F67), SPH_C64(0xB7D06DD0D0B71ECE), + SPH_C64(0x60EB20EBEB60E00B), SPH_C64(0x872B7D2B2B876E45), + SPH_C64(0x7548D8484875327A), SPH_C64(0xD39DBA9D9DD3019C), + SPH_C64(0xDF6ABE6A6ADF1D77), SPH_C64(0x53E431E4E4539773), + SPH_C64(0x48E338E3E348A84B), SPH_C64(0x15A3F8A3A315D271), + SPH_C64(0x1356FA565613DC8A), SPH_C64(0xBF819E8181BFFD7C), + SPH_C64(0x947D877D7D94B2CF), SPH_C64(0x12F10EF1F1122ADB), + SPH_C64(0xAB85928585ABD95C), SPH_C64(0xDC9EBF9E9EDC1A84), + SPH_C64(0x9C2C742C2C9C517D), SPH_C64(0x8C8E8F8E8E8C8A04), + SPH_C64(0x8578887878859FE7), SPH_C64(0xC5CA43CACAC5D41E), + SPH_C64(0x4B173917174BAFB8), SPH_C64(0x37A9E6A9A9378821), + SPH_C64(0xF861A36161F84E2F), SPH_C64(0xA6D562D5D5A633E6), + SPH_C64(0x345DE75D5D348FD2), SPH_C64(0x270B1D0B0B275358), + SPH_C64(0x868C898C8C869814), SPH_C64(0xCC3C443C3CCCC1FD), + SPH_C64(0xB677997777B6E89F), SPH_C64(0x0851F3515108E3B2), + SPH_C64(0xAA22662222AA2F0D), SPH_C64(0x5742C6424257682A), + SPH_C64(0xC33F413F3FC3DAE5), SPH_C64(0x1954FC545419CE9A), + SPH_C64(0x5841C34141587332), SPH_C64(0xBA809D8080BAF474), + SPH_C64(0xDBCC49CCCCDBE22E), SPH_C64(0xA486978686A4C244), + SPH_C64(0x45B3C8B3B34542F1), SPH_C64(0x781828181878D8C0), + SPH_C64(0x962E722E2E96436D), SPH_C64(0x1657F9575716D582), + SPH_C64(0x1E060A06061E3630), SPH_C64(0xF762A66262F75537), + SPH_C64(0x03F401F4F40307F3), SPH_C64(0xEE365A3636EE9BAD), + SPH_C64(0xB2D16ED1D1B217C6), SPH_C64(0xDA6BBD6B6BDA147F), + SPH_C64(0x771B2D1B1B77C3D8), SPH_C64(0xEC65AF6565EC6A0F), + SPH_C64(0xBC759F7575BCFA8F), SPH_C64(0x5010301010509080), + SPH_C64(0x95DA73DADA95449E), SPH_C64(0x7049DB4949703B72), + SPH_C64(0xBE266A2626BE0B2D), SPH_C64(0x3AF916F9F93A629B), + SPH_C64(0xC0CB40CBCBC0DD16), SPH_C64(0xE366AA6666E37117), + SPH_C64(0x5CE734E7E75C8C6B), SPH_C64(0x68BAD3BABA6803B9), + SPH_C64(0x2CAEEFAEAE2CB719), SPH_C64(0x0D50F050500DEABA), + SPH_C64(0x0752F6525207F8AA), SPH_C64(0x3DABE0ABAB3D9A31), + SPH_C64(0x11050F0505112D28), SPH_C64(0x17F00DF0F01723D3), + SPH_C64(0x390D170D0D396568), SPH_C64(0xA273957373A2CCBF), + SPH_C64(0xD73B4D3B3BD7FEC5), SPH_C64(0x14040C0404142420), + SPH_C64(0xA020602020A03D1D), SPH_C64(0x21FE1FFEFE215DA3), + SPH_C64(0x8EDD7ADDDD8E7BA6), SPH_C64(0x06F502F5F5060EFB), + SPH_C64(0x5EB4C1B4B45E7DC9), SPH_C64(0x3E5FE15F5F3E9DC2), + SPH_C64(0x220A1E0A0A225A50), SPH_C64(0x5BB5C2B5B55B74C1), + SPH_C64(0xE7C05DC0C0E78E4E), SPH_C64(0x1AA0FDA0A01AC969), + SPH_C64(0xA871937171A8DEAF), SPH_C64(0x0BA5F2A5A50BE441), + SPH_C64(0x992D772D2D995875), SPH_C64(0xFD60A06060FD4727), + SPH_C64(0xA772967272A7C5B7), SPH_C64(0xE593A89393E57FEC), + SPH_C64(0xDD394B3939DDECD5), SPH_C64(0x2808180808284840), + SPH_C64(0xB583988383B5EF6C), SPH_C64(0xA521632121A53415), + SPH_C64(0x315CE45C5C3186DA), SPH_C64(0xA187948787A1CB4C), + SPH_C64(0x4FB1CEB1B14F50E1), SPH_C64(0x47E03DE0E047B353), + SPH_C64(0x0000000000000000), SPH_C64(0xE8C358C3C3E89556), + SPH_C64(0x5A123612125A8290), SPH_C64(0xEF91AE9191EF6DFC), + SPH_C64(0x988A838A8A98AE24), SPH_C64(0x0A020602020A1210), + SPH_C64(0x6C1C241C1C6CFCE0), SPH_C64(0x59E637E6E6598563), + SPH_C64(0x4C45CF45454C5712), SPH_C64(0xEDC25BC2C2ED9C5E), + SPH_C64(0xF3C451C4C4F3AA6E), SPH_C64(0x2EFD1AFDFD2E46BB), + SPH_C64(0x79BFDCBFBF792E91), SPH_C64(0x4944CC4444495E1A), + SPH_C64(0x1FA1FEA1A11FC061), SPH_C64(0x614CD44C4C61165A), + SPH_C64(0xFF33553333FFB685), SPH_C64(0xF6C552C5C5F6A366), + SPH_C64(0xAE84918484AED054), SPH_C64(0xAF23652323AF2605), + SPH_C64(0x917C847C7C91BBC7), SPH_C64(0x4AB0CDB0B04A59E9), + SPH_C64(0xB1256F2525B11035), SPH_C64(0x41153F151541BDA8), + SPH_C64(0xE1355F3535E180B5), SPH_C64(0xD069BB6969D0066F), + SPH_C64(0x24FF1CFFFF2454AB), SPH_C64(0xFE94A19494FE40D4), + SPH_C64(0x644DD74D4D641F52), SPH_C64(0xAD70907070ADD7A7), + SPH_C64(0x10A2FBA2A210DB79), SPH_C64(0x29AFECAFAF29BE11), + SPH_C64(0xDECD4ACDCDDEEB26), SPH_C64(0xA9D667D6D6A928FE), + SPH_C64(0xC16CB46C6CC12B47), SPH_C64(0x51B7C4B7B75166D1), + SPH_C64(0x3FF815F8F83F6B93), SPH_C64(0x2D091B09092D4148), + SPH_C64(0x18F308F3F31838CB), SPH_C64(0xE667A96767E6781F), + SPH_C64(0x0EA4F1A4A40EED49), SPH_C64(0x65EA23EAEA65E903), + SPH_C64(0x7BEC29ECEC7BDF33), SPH_C64(0x54B6C7B6B6546FD9), + SPH_C64(0xA3D461D4D4A33AEE), SPH_C64(0xBDD26BD2D2BD0CDE), + SPH_C64(0x44143C141444B4A0), SPH_C64(0x661E221E1E66EEF0), + SPH_C64(0x42E13EE1E142BA5B), SPH_C64(0xB4246C2424B4193D), + SPH_C64(0xD838483838D8E5DD), SPH_C64(0xF9C657C6C6F9B87E), + SPH_C64(0x90DB70DBDB904D96), SPH_C64(0x7A4BDD4B4B7A2962), + SPH_C64(0x8F7A8E7A7A8F8DF7), SPH_C64(0xD23A4E3A3AD2F7CD), + SPH_C64(0x81DE7FDEDE8160BE), SPH_C64(0x3B5EE25E5E3B94CA), + SPH_C64(0x84DF7CDFDF8469B6), SPH_C64(0xFB95A29595FB49DC), + SPH_C64(0x2BFC19FCFC2B4FB3), SPH_C64(0x38AAE3AAAA389339), + SPH_C64(0xACD764D7D7AC21F6), SPH_C64(0xD1CE4FCECED1F03E), + SPH_C64(0x1B070907071B3F38), SPH_C64(0x330F110F0F337778), + SPH_C64(0xC93D473D3DC9C8F5), SPH_C64(0x2558E8585825A2FA), + SPH_C64(0xC89AB39A9AC83EA4), SPH_C64(0xC298B59898C22CB4), + SPH_C64(0xD69CB99C9CD60894), SPH_C64(0x1DF20BF2F21D31C3), + SPH_C64(0x01A7F4A7A701F651), SPH_C64(0x5511331111559988), + SPH_C64(0x9B7E827E7E9BA9D7), SPH_C64(0x9D8B808B8B9DA72C), + SPH_C64(0x5243C54343526122), SPH_C64(0x0F030503030F1B18), + SPH_C64(0x4DE23BE2E24DA143), SPH_C64(0x8BDC79DCDC8B72AE), + SPH_C64(0x56E532E5E5569E7B), SPH_C64(0x40B2CBB2B2404BF9), + SPH_C64(0x6B4ED24E4E6B044A), SPH_C64(0xFCC754C7C7FCB176), + SPH_C64(0xC46DB76D6DC4224F), SPH_C64(0x6AE926E9E96AF21B), + SPH_C64(0xBB27692727BB0225), SPH_C64(0x5D40C040405D7A3A), + SPH_C64(0x9FD875D8D89F568E), SPH_C64(0xEB37593737EB92A5), + SPH_C64(0xE092AB9292E076E4), SPH_C64(0x898F8C8F8F89830C), + SPH_C64(0x0501030101050908), SPH_C64(0x691D271D1D69F5E8), + SPH_C64(0x0253F5535302F1A2), SPH_C64(0xC63E423E3EC6D3ED), + SPH_C64(0x2059EB595920ABF2), SPH_C64(0xE2C15EC1C1E28746), + SPH_C64(0x6E4FD14F4F6E0D42), SPH_C64(0xFA32563232FABF8D), + SPH_C64(0x4E163A16164EA6B0), SPH_C64(0x35FA13FAFA357983), + SPH_C64(0xB9749C7474B9F387), SPH_C64(0x30FB10FBFB30708B), + SPH_C64(0xF263A56363F25C3F), SPH_C64(0xD99FBC9F9FD9138C), + SPH_C64(0xE4345C3434E489BD), SPH_C64(0x721A2E1A1A72CAD0), + SPH_C64(0x822A7E2A2A82674D), SPH_C64(0x2F5AEE5A5A2FB0EA), + SPH_C64(0x838D8A8D8D83911C), SPH_C64(0xCAC946C9C9CACF06), + SPH_C64(0xD4CF4CCFCFD4F936), SPH_C64(0x09F607F6F60915E3), + SPH_C64(0xEA90AD9090EA64F4), SPH_C64(0x882878282888755D), + SPH_C64(0x928885888892BC34), SPH_C64(0xCD9BB09B9BCD37AC), + SPH_C64(0xF531533131F5A495), SPH_C64(0x360E120E0E367E70), + SPH_C64(0x73BDDABDBD733C81), SPH_C64(0x7F4ADE4A4A7F206A), + SPH_C64(0x6FE825E8E86FFB13), SPH_C64(0xF496A79696F452C4), + SPH_C64(0x04A6F7A6A604FF59), SPH_C64(0x3C0C140C0C3C6C60), + SPH_C64(0xCFC845C8C8CFC60E), SPH_C64(0x80798B79798096EF), + SPH_C64(0x76BCD9BCBC763589), SPH_C64(0x7CBEDFBEBE7C2799), + SPH_C64(0x74EF2CEFEF74C42B), SPH_C64(0xCB6EB26E6ECB3957), + SPH_C64(0x4346CA4646434C0A), SPH_C64(0xF197A49797F15BCC), + SPH_C64(0x2A5BED5B5B2AB9E2), SPH_C64(0x7EED2AEDED7ED63B), + SPH_C64(0x7D192B19197DD1C8), SPH_C64(0x9AD976D9D99A5F86), + SPH_C64(0x26ACE9ACAC26A509), SPH_C64(0xC799B69999C725BC), + SPH_C64(0x32A8E5A8A8328129), SPH_C64(0x8D297B29298D7C55), + SPH_C64(0xE964AC6464E96307), SPH_C64(0x631F211F1F63E7F8), + SPH_C64(0x23ADEAADAD23AC01), SPH_C64(0x1C55FF55551CC792), + SPH_C64(0x5F133513135F8B98), SPH_C64(0x6DBBD0BBBB6D0AB1), + SPH_C64(0x0CF704F7F70C1CEB), SPH_C64(0xCE6FB16F6FCE305F), + SPH_C64(0x67B9D6B9B96718A1), SPH_C64(0x4647C94747464502), + SPH_C64(0x932F712F2F934A65), SPH_C64(0x71EE2FEEEE71CD23), + SPH_C64(0x62B8D5B8B86211A9), SPH_C64(0x8A7B8D7B7B8A84FF), + SPH_C64(0x978986898997B53C), SPH_C64(0xF030503030F0AD9D), + SPH_C64(0xB8D368D3D3B805D6), SPH_C64(0x9E7F817F7F9EA0DF), + SPH_C64(0xB3769A7676B3E197), SPH_C64(0xB0829B8282B0E664) +}; + +static const sph_u64 old0_T4[256] = { + SPH_C64(0x68B86868D50F67D5), SPH_C64(0xD06DD0D0B71ECEB7), + SPH_C64(0xEB20EBEB60E00B60), SPH_C64(0x2B7D2B2B876E4587), + SPH_C64(0x48D8484875327A75), SPH_C64(0x9DBA9D9DD3019CD3), + SPH_C64(0x6ABE6A6ADF1D77DF), SPH_C64(0xE431E4E453977353), + SPH_C64(0xE338E3E348A84B48), SPH_C64(0xA3F8A3A315D27115), + SPH_C64(0x56FA565613DC8A13), SPH_C64(0x819E8181BFFD7CBF), + SPH_C64(0x7D877D7D94B2CF94), SPH_C64(0xF10EF1F1122ADB12), + SPH_C64(0x85928585ABD95CAB), SPH_C64(0x9EBF9E9EDC1A84DC), + SPH_C64(0x2C742C2C9C517D9C), SPH_C64(0x8E8F8E8E8C8A048C), + SPH_C64(0x78887878859FE785), SPH_C64(0xCA43CACAC5D41EC5), + SPH_C64(0x173917174BAFB84B), SPH_C64(0xA9E6A9A937882137), + SPH_C64(0x61A36161F84E2FF8), SPH_C64(0xD562D5D5A633E6A6), + SPH_C64(0x5DE75D5D348FD234), SPH_C64(0x0B1D0B0B27535827), + SPH_C64(0x8C898C8C86981486), SPH_C64(0x3C443C3CCCC1FDCC), + SPH_C64(0x77997777B6E89FB6), SPH_C64(0x51F3515108E3B208), + SPH_C64(0x22662222AA2F0DAA), SPH_C64(0x42C6424257682A57), + SPH_C64(0x3F413F3FC3DAE5C3), SPH_C64(0x54FC545419CE9A19), + SPH_C64(0x41C3414158733258), SPH_C64(0x809D8080BAF474BA), + SPH_C64(0xCC49CCCCDBE22EDB), SPH_C64(0x86978686A4C244A4), + SPH_C64(0xB3C8B3B34542F145), SPH_C64(0x1828181878D8C078), + SPH_C64(0x2E722E2E96436D96), SPH_C64(0x57F9575716D58216), + SPH_C64(0x060A06061E36301E), SPH_C64(0x62A66262F75537F7), + SPH_C64(0xF401F4F40307F303), SPH_C64(0x365A3636EE9BADEE), + SPH_C64(0xD16ED1D1B217C6B2), SPH_C64(0x6BBD6B6BDA147FDA), + SPH_C64(0x1B2D1B1B77C3D877), SPH_C64(0x65AF6565EC6A0FEC), + SPH_C64(0x759F7575BCFA8FBC), SPH_C64(0x1030101050908050), + SPH_C64(0xDA73DADA95449E95), SPH_C64(0x49DB4949703B7270), + SPH_C64(0x266A2626BE0B2DBE), SPH_C64(0xF916F9F93A629B3A), + SPH_C64(0xCB40CBCBC0DD16C0), SPH_C64(0x66AA6666E37117E3), + SPH_C64(0xE734E7E75C8C6B5C), SPH_C64(0xBAD3BABA6803B968), + SPH_C64(0xAEEFAEAE2CB7192C), SPH_C64(0x50F050500DEABA0D), + SPH_C64(0x52F6525207F8AA07), SPH_C64(0xABE0ABAB3D9A313D), + SPH_C64(0x050F0505112D2811), SPH_C64(0xF00DF0F01723D317), + SPH_C64(0x0D170D0D39656839), SPH_C64(0x73957373A2CCBFA2), + SPH_C64(0x3B4D3B3BD7FEC5D7), SPH_C64(0x040C040414242014), + SPH_C64(0x20602020A03D1DA0), SPH_C64(0xFE1FFEFE215DA321), + SPH_C64(0xDD7ADDDD8E7BA68E), SPH_C64(0xF502F5F5060EFB06), + SPH_C64(0xB4C1B4B45E7DC95E), SPH_C64(0x5FE15F5F3E9DC23E), + SPH_C64(0x0A1E0A0A225A5022), SPH_C64(0xB5C2B5B55B74C15B), + SPH_C64(0xC05DC0C0E78E4EE7), SPH_C64(0xA0FDA0A01AC9691A), + SPH_C64(0x71937171A8DEAFA8), SPH_C64(0xA5F2A5A50BE4410B), + SPH_C64(0x2D772D2D99587599), SPH_C64(0x60A06060FD4727FD), + SPH_C64(0x72967272A7C5B7A7), SPH_C64(0x93A89393E57FECE5), + SPH_C64(0x394B3939DDECD5DD), SPH_C64(0x0818080828484028), + SPH_C64(0x83988383B5EF6CB5), SPH_C64(0x21632121A53415A5), + SPH_C64(0x5CE45C5C3186DA31), SPH_C64(0x87948787A1CB4CA1), + SPH_C64(0xB1CEB1B14F50E14F), SPH_C64(0xE03DE0E047B35347), + SPH_C64(0x0000000000000000), SPH_C64(0xC358C3C3E89556E8), + SPH_C64(0x123612125A82905A), SPH_C64(0x91AE9191EF6DFCEF), + SPH_C64(0x8A838A8A98AE2498), SPH_C64(0x020602020A12100A), + SPH_C64(0x1C241C1C6CFCE06C), SPH_C64(0xE637E6E659856359), + SPH_C64(0x45CF45454C57124C), SPH_C64(0xC25BC2C2ED9C5EED), + SPH_C64(0xC451C4C4F3AA6EF3), SPH_C64(0xFD1AFDFD2E46BB2E), + SPH_C64(0xBFDCBFBF792E9179), SPH_C64(0x44CC4444495E1A49), + SPH_C64(0xA1FEA1A11FC0611F), SPH_C64(0x4CD44C4C61165A61), + SPH_C64(0x33553333FFB685FF), SPH_C64(0xC552C5C5F6A366F6), + SPH_C64(0x84918484AED054AE), SPH_C64(0x23652323AF2605AF), + SPH_C64(0x7C847C7C91BBC791), SPH_C64(0xB0CDB0B04A59E94A), + SPH_C64(0x256F2525B11035B1), SPH_C64(0x153F151541BDA841), + SPH_C64(0x355F3535E180B5E1), SPH_C64(0x69BB6969D0066FD0), + SPH_C64(0xFF1CFFFF2454AB24), SPH_C64(0x94A19494FE40D4FE), + SPH_C64(0x4DD74D4D641F5264), SPH_C64(0x70907070ADD7A7AD), + SPH_C64(0xA2FBA2A210DB7910), SPH_C64(0xAFECAFAF29BE1129), + SPH_C64(0xCD4ACDCDDEEB26DE), SPH_C64(0xD667D6D6A928FEA9), + SPH_C64(0x6CB46C6CC12B47C1), SPH_C64(0xB7C4B7B75166D151), + SPH_C64(0xF815F8F83F6B933F), SPH_C64(0x091B09092D41482D), + SPH_C64(0xF308F3F31838CB18), SPH_C64(0x67A96767E6781FE6), + SPH_C64(0xA4F1A4A40EED490E), SPH_C64(0xEA23EAEA65E90365), + SPH_C64(0xEC29ECEC7BDF337B), SPH_C64(0xB6C7B6B6546FD954), + SPH_C64(0xD461D4D4A33AEEA3), SPH_C64(0xD26BD2D2BD0CDEBD), + SPH_C64(0x143C141444B4A044), SPH_C64(0x1E221E1E66EEF066), + SPH_C64(0xE13EE1E142BA5B42), SPH_C64(0x246C2424B4193DB4), + SPH_C64(0x38483838D8E5DDD8), SPH_C64(0xC657C6C6F9B87EF9), + SPH_C64(0xDB70DBDB904D9690), SPH_C64(0x4BDD4B4B7A29627A), + SPH_C64(0x7A8E7A7A8F8DF78F), SPH_C64(0x3A4E3A3AD2F7CDD2), + SPH_C64(0xDE7FDEDE8160BE81), SPH_C64(0x5EE25E5E3B94CA3B), + SPH_C64(0xDF7CDFDF8469B684), SPH_C64(0x95A29595FB49DCFB), + SPH_C64(0xFC19FCFC2B4FB32B), SPH_C64(0xAAE3AAAA38933938), + SPH_C64(0xD764D7D7AC21F6AC), SPH_C64(0xCE4FCECED1F03ED1), + SPH_C64(0x070907071B3F381B), SPH_C64(0x0F110F0F33777833), + SPH_C64(0x3D473D3DC9C8F5C9), SPH_C64(0x58E8585825A2FA25), + SPH_C64(0x9AB39A9AC83EA4C8), SPH_C64(0x98B59898C22CB4C2), + SPH_C64(0x9CB99C9CD60894D6), SPH_C64(0xF20BF2F21D31C31D), + SPH_C64(0xA7F4A7A701F65101), SPH_C64(0x1133111155998855), + SPH_C64(0x7E827E7E9BA9D79B), SPH_C64(0x8B808B8B9DA72C9D), + SPH_C64(0x43C5434352612252), SPH_C64(0x030503030F1B180F), + SPH_C64(0xE23BE2E24DA1434D), SPH_C64(0xDC79DCDC8B72AE8B), + SPH_C64(0xE532E5E5569E7B56), SPH_C64(0xB2CBB2B2404BF940), + SPH_C64(0x4ED24E4E6B044A6B), SPH_C64(0xC754C7C7FCB176FC), + SPH_C64(0x6DB76D6DC4224FC4), SPH_C64(0xE926E9E96AF21B6A), + SPH_C64(0x27692727BB0225BB), SPH_C64(0x40C040405D7A3A5D), + SPH_C64(0xD875D8D89F568E9F), SPH_C64(0x37593737EB92A5EB), + SPH_C64(0x92AB9292E076E4E0), SPH_C64(0x8F8C8F8F89830C89), + SPH_C64(0x0103010105090805), SPH_C64(0x1D271D1D69F5E869), + SPH_C64(0x53F5535302F1A202), SPH_C64(0x3E423E3EC6D3EDC6), + SPH_C64(0x59EB595920ABF220), SPH_C64(0xC15EC1C1E28746E2), + SPH_C64(0x4FD14F4F6E0D426E), SPH_C64(0x32563232FABF8DFA), + SPH_C64(0x163A16164EA6B04E), SPH_C64(0xFA13FAFA35798335), + SPH_C64(0x749C7474B9F387B9), SPH_C64(0xFB10FBFB30708B30), + SPH_C64(0x63A56363F25C3FF2), SPH_C64(0x9FBC9F9FD9138CD9), + SPH_C64(0x345C3434E489BDE4), SPH_C64(0x1A2E1A1A72CAD072), + SPH_C64(0x2A7E2A2A82674D82), SPH_C64(0x5AEE5A5A2FB0EA2F), + SPH_C64(0x8D8A8D8D83911C83), SPH_C64(0xC946C9C9CACF06CA), + SPH_C64(0xCF4CCFCFD4F936D4), SPH_C64(0xF607F6F60915E309), + SPH_C64(0x90AD9090EA64F4EA), SPH_C64(0x2878282888755D88), + SPH_C64(0x8885888892BC3492), SPH_C64(0x9BB09B9BCD37ACCD), + SPH_C64(0x31533131F5A495F5), SPH_C64(0x0E120E0E367E7036), + SPH_C64(0xBDDABDBD733C8173), SPH_C64(0x4ADE4A4A7F206A7F), + SPH_C64(0xE825E8E86FFB136F), SPH_C64(0x96A79696F452C4F4), + SPH_C64(0xA6F7A6A604FF5904), SPH_C64(0x0C140C0C3C6C603C), + SPH_C64(0xC845C8C8CFC60ECF), SPH_C64(0x798B79798096EF80), + SPH_C64(0xBCD9BCBC76358976), SPH_C64(0xBEDFBEBE7C27997C), + SPH_C64(0xEF2CEFEF74C42B74), SPH_C64(0x6EB26E6ECB3957CB), + SPH_C64(0x46CA4646434C0A43), SPH_C64(0x97A49797F15BCCF1), + SPH_C64(0x5BED5B5B2AB9E22A), SPH_C64(0xED2AEDED7ED63B7E), + SPH_C64(0x192B19197DD1C87D), SPH_C64(0xD976D9D99A5F869A), + SPH_C64(0xACE9ACAC26A50926), SPH_C64(0x99B69999C725BCC7), + SPH_C64(0xA8E5A8A832812932), SPH_C64(0x297B29298D7C558D), + SPH_C64(0x64AC6464E96307E9), SPH_C64(0x1F211F1F63E7F863), + SPH_C64(0xADEAADAD23AC0123), SPH_C64(0x55FF55551CC7921C), + SPH_C64(0x133513135F8B985F), SPH_C64(0xBBD0BBBB6D0AB16D), + SPH_C64(0xF704F7F70C1CEB0C), SPH_C64(0x6FB16F6FCE305FCE), + SPH_C64(0xB9D6B9B96718A167), SPH_C64(0x47C9474746450246), + SPH_C64(0x2F712F2F934A6593), SPH_C64(0xEE2FEEEE71CD2371), + SPH_C64(0xB8D5B8B86211A962), SPH_C64(0x7B8D7B7B8A84FF8A), + SPH_C64(0x8986898997B53C97), SPH_C64(0x30503030F0AD9DF0), + SPH_C64(0xD368D3D3B805D6B8), SPH_C64(0x7F817F7F9EA0DF9E), + SPH_C64(0x769A7676B3E197B3), SPH_C64(0x829B8282B0E664B0) +}; + +static const sph_u64 old0_T5[256] = { + SPH_C64(0xB86868D50F67D568), SPH_C64(0x6DD0D0B71ECEB7D0), + SPH_C64(0x20EBEB60E00B60EB), SPH_C64(0x7D2B2B876E45872B), + SPH_C64(0xD8484875327A7548), SPH_C64(0xBA9D9DD3019CD39D), + SPH_C64(0xBE6A6ADF1D77DF6A), SPH_C64(0x31E4E453977353E4), + SPH_C64(0x38E3E348A84B48E3), SPH_C64(0xF8A3A315D27115A3), + SPH_C64(0xFA565613DC8A1356), SPH_C64(0x9E8181BFFD7CBF81), + SPH_C64(0x877D7D94B2CF947D), SPH_C64(0x0EF1F1122ADB12F1), + SPH_C64(0x928585ABD95CAB85), SPH_C64(0xBF9E9EDC1A84DC9E), + SPH_C64(0x742C2C9C517D9C2C), SPH_C64(0x8F8E8E8C8A048C8E), + SPH_C64(0x887878859FE78578), SPH_C64(0x43CACAC5D41EC5CA), + SPH_C64(0x3917174BAFB84B17), SPH_C64(0xE6A9A937882137A9), + SPH_C64(0xA36161F84E2FF861), SPH_C64(0x62D5D5A633E6A6D5), + SPH_C64(0xE75D5D348FD2345D), SPH_C64(0x1D0B0B275358270B), + SPH_C64(0x898C8C869814868C), SPH_C64(0x443C3CCCC1FDCC3C), + SPH_C64(0x997777B6E89FB677), SPH_C64(0xF3515108E3B20851), + SPH_C64(0x662222AA2F0DAA22), SPH_C64(0xC6424257682A5742), + SPH_C64(0x413F3FC3DAE5C33F), SPH_C64(0xFC545419CE9A1954), + SPH_C64(0xC341415873325841), SPH_C64(0x9D8080BAF474BA80), + SPH_C64(0x49CCCCDBE22EDBCC), SPH_C64(0x978686A4C244A486), + SPH_C64(0xC8B3B34542F145B3), SPH_C64(0x28181878D8C07818), + SPH_C64(0x722E2E96436D962E), SPH_C64(0xF9575716D5821657), + SPH_C64(0x0A06061E36301E06), SPH_C64(0xA66262F75537F762), + SPH_C64(0x01F4F40307F303F4), SPH_C64(0x5A3636EE9BADEE36), + SPH_C64(0x6ED1D1B217C6B2D1), SPH_C64(0xBD6B6BDA147FDA6B), + SPH_C64(0x2D1B1B77C3D8771B), SPH_C64(0xAF6565EC6A0FEC65), + SPH_C64(0x9F7575BCFA8FBC75), SPH_C64(0x3010105090805010), + SPH_C64(0x73DADA95449E95DA), SPH_C64(0xDB4949703B727049), + SPH_C64(0x6A2626BE0B2DBE26), SPH_C64(0x16F9F93A629B3AF9), + SPH_C64(0x40CBCBC0DD16C0CB), SPH_C64(0xAA6666E37117E366), + SPH_C64(0x34E7E75C8C6B5CE7), SPH_C64(0xD3BABA6803B968BA), + SPH_C64(0xEFAEAE2CB7192CAE), SPH_C64(0xF050500DEABA0D50), + SPH_C64(0xF6525207F8AA0752), SPH_C64(0xE0ABAB3D9A313DAB), + SPH_C64(0x0F0505112D281105), SPH_C64(0x0DF0F01723D317F0), + SPH_C64(0x170D0D396568390D), SPH_C64(0x957373A2CCBFA273), + SPH_C64(0x4D3B3BD7FEC5D73B), SPH_C64(0x0C04041424201404), + SPH_C64(0x602020A03D1DA020), SPH_C64(0x1FFEFE215DA321FE), + SPH_C64(0x7ADDDD8E7BA68EDD), SPH_C64(0x02F5F5060EFB06F5), + SPH_C64(0xC1B4B45E7DC95EB4), SPH_C64(0xE15F5F3E9DC23E5F), + SPH_C64(0x1E0A0A225A50220A), SPH_C64(0xC2B5B55B74C15BB5), + SPH_C64(0x5DC0C0E78E4EE7C0), SPH_C64(0xFDA0A01AC9691AA0), + SPH_C64(0x937171A8DEAFA871), SPH_C64(0xF2A5A50BE4410BA5), + SPH_C64(0x772D2D995875992D), SPH_C64(0xA06060FD4727FD60), + SPH_C64(0x967272A7C5B7A772), SPH_C64(0xA89393E57FECE593), + SPH_C64(0x4B3939DDECD5DD39), SPH_C64(0x1808082848402808), + SPH_C64(0x988383B5EF6CB583), SPH_C64(0x632121A53415A521), + SPH_C64(0xE45C5C3186DA315C), SPH_C64(0x948787A1CB4CA187), + SPH_C64(0xCEB1B14F50E14FB1), SPH_C64(0x3DE0E047B35347E0), + SPH_C64(0x0000000000000000), SPH_C64(0x58C3C3E89556E8C3), + SPH_C64(0x3612125A82905A12), SPH_C64(0xAE9191EF6DFCEF91), + SPH_C64(0x838A8A98AE24988A), SPH_C64(0x0602020A12100A02), + SPH_C64(0x241C1C6CFCE06C1C), SPH_C64(0x37E6E659856359E6), + SPH_C64(0xCF45454C57124C45), SPH_C64(0x5BC2C2ED9C5EEDC2), + SPH_C64(0x51C4C4F3AA6EF3C4), SPH_C64(0x1AFDFD2E46BB2EFD), + SPH_C64(0xDCBFBF792E9179BF), SPH_C64(0xCC4444495E1A4944), + SPH_C64(0xFEA1A11FC0611FA1), SPH_C64(0xD44C4C61165A614C), + SPH_C64(0x553333FFB685FF33), SPH_C64(0x52C5C5F6A366F6C5), + SPH_C64(0x918484AED054AE84), SPH_C64(0x652323AF2605AF23), + SPH_C64(0x847C7C91BBC7917C), SPH_C64(0xCDB0B04A59E94AB0), + SPH_C64(0x6F2525B11035B125), SPH_C64(0x3F151541BDA84115), + SPH_C64(0x5F3535E180B5E135), SPH_C64(0xBB6969D0066FD069), + SPH_C64(0x1CFFFF2454AB24FF), SPH_C64(0xA19494FE40D4FE94), + SPH_C64(0xD74D4D641F52644D), SPH_C64(0x907070ADD7A7AD70), + SPH_C64(0xFBA2A210DB7910A2), SPH_C64(0xECAFAF29BE1129AF), + SPH_C64(0x4ACDCDDEEB26DECD), SPH_C64(0x67D6D6A928FEA9D6), + SPH_C64(0xB46C6CC12B47C16C), SPH_C64(0xC4B7B75166D151B7), + SPH_C64(0x15F8F83F6B933FF8), SPH_C64(0x1B09092D41482D09), + SPH_C64(0x08F3F31838CB18F3), SPH_C64(0xA96767E6781FE667), + SPH_C64(0xF1A4A40EED490EA4), SPH_C64(0x23EAEA65E90365EA), + SPH_C64(0x29ECEC7BDF337BEC), SPH_C64(0xC7B6B6546FD954B6), + SPH_C64(0x61D4D4A33AEEA3D4), SPH_C64(0x6BD2D2BD0CDEBDD2), + SPH_C64(0x3C141444B4A04414), SPH_C64(0x221E1E66EEF0661E), + SPH_C64(0x3EE1E142BA5B42E1), SPH_C64(0x6C2424B4193DB424), + SPH_C64(0x483838D8E5DDD838), SPH_C64(0x57C6C6F9B87EF9C6), + SPH_C64(0x70DBDB904D9690DB), SPH_C64(0xDD4B4B7A29627A4B), + SPH_C64(0x8E7A7A8F8DF78F7A), SPH_C64(0x4E3A3AD2F7CDD23A), + SPH_C64(0x7FDEDE8160BE81DE), SPH_C64(0xE25E5E3B94CA3B5E), + SPH_C64(0x7CDFDF8469B684DF), SPH_C64(0xA29595FB49DCFB95), + SPH_C64(0x19FCFC2B4FB32BFC), SPH_C64(0xE3AAAA38933938AA), + SPH_C64(0x64D7D7AC21F6ACD7), SPH_C64(0x4FCECED1F03ED1CE), + SPH_C64(0x0907071B3F381B07), SPH_C64(0x110F0F337778330F), + SPH_C64(0x473D3DC9C8F5C93D), SPH_C64(0xE8585825A2FA2558), + SPH_C64(0xB39A9AC83EA4C89A), SPH_C64(0xB59898C22CB4C298), + SPH_C64(0xB99C9CD60894D69C), SPH_C64(0x0BF2F21D31C31DF2), + SPH_C64(0xF4A7A701F65101A7), SPH_C64(0x3311115599885511), + SPH_C64(0x827E7E9BA9D79B7E), SPH_C64(0x808B8B9DA72C9D8B), + SPH_C64(0xC543435261225243), SPH_C64(0x0503030F1B180F03), + SPH_C64(0x3BE2E24DA1434DE2), SPH_C64(0x79DCDC8B72AE8BDC), + SPH_C64(0x32E5E5569E7B56E5), SPH_C64(0xCBB2B2404BF940B2), + SPH_C64(0xD24E4E6B044A6B4E), SPH_C64(0x54C7C7FCB176FCC7), + SPH_C64(0xB76D6DC4224FC46D), SPH_C64(0x26E9E96AF21B6AE9), + SPH_C64(0x692727BB0225BB27), SPH_C64(0xC040405D7A3A5D40), + SPH_C64(0x75D8D89F568E9FD8), SPH_C64(0x593737EB92A5EB37), + SPH_C64(0xAB9292E076E4E092), SPH_C64(0x8C8F8F89830C898F), + SPH_C64(0x0301010509080501), SPH_C64(0x271D1D69F5E8691D), + SPH_C64(0xF5535302F1A20253), SPH_C64(0x423E3EC6D3EDC63E), + SPH_C64(0xEB595920ABF22059), SPH_C64(0x5EC1C1E28746E2C1), + SPH_C64(0xD14F4F6E0D426E4F), SPH_C64(0x563232FABF8DFA32), + SPH_C64(0x3A16164EA6B04E16), SPH_C64(0x13FAFA35798335FA), + SPH_C64(0x9C7474B9F387B974), SPH_C64(0x10FBFB30708B30FB), + SPH_C64(0xA56363F25C3FF263), SPH_C64(0xBC9F9FD9138CD99F), + SPH_C64(0x5C3434E489BDE434), SPH_C64(0x2E1A1A72CAD0721A), + SPH_C64(0x7E2A2A82674D822A), SPH_C64(0xEE5A5A2FB0EA2F5A), + SPH_C64(0x8A8D8D83911C838D), SPH_C64(0x46C9C9CACF06CAC9), + SPH_C64(0x4CCFCFD4F936D4CF), SPH_C64(0x07F6F60915E309F6), + SPH_C64(0xAD9090EA64F4EA90), SPH_C64(0x78282888755D8828), + SPH_C64(0x85888892BC349288), SPH_C64(0xB09B9BCD37ACCD9B), + SPH_C64(0x533131F5A495F531), SPH_C64(0x120E0E367E70360E), + SPH_C64(0xDABDBD733C8173BD), SPH_C64(0xDE4A4A7F206A7F4A), + SPH_C64(0x25E8E86FFB136FE8), SPH_C64(0xA79696F452C4F496), + SPH_C64(0xF7A6A604FF5904A6), SPH_C64(0x140C0C3C6C603C0C), + SPH_C64(0x45C8C8CFC60ECFC8), SPH_C64(0x8B79798096EF8079), + SPH_C64(0xD9BCBC76358976BC), SPH_C64(0xDFBEBE7C27997CBE), + SPH_C64(0x2CEFEF74C42B74EF), SPH_C64(0xB26E6ECB3957CB6E), + SPH_C64(0xCA4646434C0A4346), SPH_C64(0xA49797F15BCCF197), + SPH_C64(0xED5B5B2AB9E22A5B), SPH_C64(0x2AEDED7ED63B7EED), + SPH_C64(0x2B19197DD1C87D19), SPH_C64(0x76D9D99A5F869AD9), + SPH_C64(0xE9ACAC26A50926AC), SPH_C64(0xB69999C725BCC799), + SPH_C64(0xE5A8A832812932A8), SPH_C64(0x7B29298D7C558D29), + SPH_C64(0xAC6464E96307E964), SPH_C64(0x211F1F63E7F8631F), + SPH_C64(0xEAADAD23AC0123AD), SPH_C64(0xFF55551CC7921C55), + SPH_C64(0x3513135F8B985F13), SPH_C64(0xD0BBBB6D0AB16DBB), + SPH_C64(0x04F7F70C1CEB0CF7), SPH_C64(0xB16F6FCE305FCE6F), + SPH_C64(0xD6B9B96718A167B9), SPH_C64(0xC947474645024647), + SPH_C64(0x712F2F934A65932F), SPH_C64(0x2FEEEE71CD2371EE), + SPH_C64(0xD5B8B86211A962B8), SPH_C64(0x8D7B7B8A84FF8A7B), + SPH_C64(0x86898997B53C9789), SPH_C64(0x503030F0AD9DF030), + SPH_C64(0x68D3D3B805D6B8D3), SPH_C64(0x817F7F9EA0DF9E7F), + SPH_C64(0x9A7676B3E197B376), SPH_C64(0x9B8282B0E664B082) +}; + +static const sph_u64 old0_T6[256] = { + SPH_C64(0x6868D50F67D568B8), SPH_C64(0xD0D0B71ECEB7D06D), + SPH_C64(0xEBEB60E00B60EB20), SPH_C64(0x2B2B876E45872B7D), + SPH_C64(0x484875327A7548D8), SPH_C64(0x9D9DD3019CD39DBA), + SPH_C64(0x6A6ADF1D77DF6ABE), SPH_C64(0xE4E453977353E431), + SPH_C64(0xE3E348A84B48E338), SPH_C64(0xA3A315D27115A3F8), + SPH_C64(0x565613DC8A1356FA), SPH_C64(0x8181BFFD7CBF819E), + SPH_C64(0x7D7D94B2CF947D87), SPH_C64(0xF1F1122ADB12F10E), + SPH_C64(0x8585ABD95CAB8592), SPH_C64(0x9E9EDC1A84DC9EBF), + SPH_C64(0x2C2C9C517D9C2C74), SPH_C64(0x8E8E8C8A048C8E8F), + SPH_C64(0x7878859FE7857888), SPH_C64(0xCACAC5D41EC5CA43), + SPH_C64(0x17174BAFB84B1739), SPH_C64(0xA9A937882137A9E6), + SPH_C64(0x6161F84E2FF861A3), SPH_C64(0xD5D5A633E6A6D562), + SPH_C64(0x5D5D348FD2345DE7), SPH_C64(0x0B0B275358270B1D), + SPH_C64(0x8C8C869814868C89), SPH_C64(0x3C3CCCC1FDCC3C44), + SPH_C64(0x7777B6E89FB67799), SPH_C64(0x515108E3B20851F3), + SPH_C64(0x2222AA2F0DAA2266), SPH_C64(0x424257682A5742C6), + SPH_C64(0x3F3FC3DAE5C33F41), SPH_C64(0x545419CE9A1954FC), + SPH_C64(0x41415873325841C3), SPH_C64(0x8080BAF474BA809D), + SPH_C64(0xCCCCDBE22EDBCC49), SPH_C64(0x8686A4C244A48697), + SPH_C64(0xB3B34542F145B3C8), SPH_C64(0x181878D8C0781828), + SPH_C64(0x2E2E96436D962E72), SPH_C64(0x575716D5821657F9), + SPH_C64(0x06061E36301E060A), SPH_C64(0x6262F75537F762A6), + SPH_C64(0xF4F40307F303F401), SPH_C64(0x3636EE9BADEE365A), + SPH_C64(0xD1D1B217C6B2D16E), SPH_C64(0x6B6BDA147FDA6BBD), + SPH_C64(0x1B1B77C3D8771B2D), SPH_C64(0x6565EC6A0FEC65AF), + SPH_C64(0x7575BCFA8FBC759F), SPH_C64(0x1010509080501030), + SPH_C64(0xDADA95449E95DA73), SPH_C64(0x4949703B727049DB), + SPH_C64(0x2626BE0B2DBE266A), SPH_C64(0xF9F93A629B3AF916), + SPH_C64(0xCBCBC0DD16C0CB40), SPH_C64(0x6666E37117E366AA), + SPH_C64(0xE7E75C8C6B5CE734), SPH_C64(0xBABA6803B968BAD3), + SPH_C64(0xAEAE2CB7192CAEEF), SPH_C64(0x50500DEABA0D50F0), + SPH_C64(0x525207F8AA0752F6), SPH_C64(0xABAB3D9A313DABE0), + SPH_C64(0x0505112D2811050F), SPH_C64(0xF0F01723D317F00D), + SPH_C64(0x0D0D396568390D17), SPH_C64(0x7373A2CCBFA27395), + SPH_C64(0x3B3BD7FEC5D73B4D), SPH_C64(0x040414242014040C), + SPH_C64(0x2020A03D1DA02060), SPH_C64(0xFEFE215DA321FE1F), + SPH_C64(0xDDDD8E7BA68EDD7A), SPH_C64(0xF5F5060EFB06F502), + SPH_C64(0xB4B45E7DC95EB4C1), SPH_C64(0x5F5F3E9DC23E5FE1), + SPH_C64(0x0A0A225A50220A1E), SPH_C64(0xB5B55B74C15BB5C2), + SPH_C64(0xC0C0E78E4EE7C05D), SPH_C64(0xA0A01AC9691AA0FD), + SPH_C64(0x7171A8DEAFA87193), SPH_C64(0xA5A50BE4410BA5F2), + SPH_C64(0x2D2D995875992D77), SPH_C64(0x6060FD4727FD60A0), + SPH_C64(0x7272A7C5B7A77296), SPH_C64(0x9393E57FECE593A8), + SPH_C64(0x3939DDECD5DD394B), SPH_C64(0x0808284840280818), + SPH_C64(0x8383B5EF6CB58398), SPH_C64(0x2121A53415A52163), + SPH_C64(0x5C5C3186DA315CE4), SPH_C64(0x8787A1CB4CA18794), + SPH_C64(0xB1B14F50E14FB1CE), SPH_C64(0xE0E047B35347E03D), + SPH_C64(0x0000000000000000), SPH_C64(0xC3C3E89556E8C358), + SPH_C64(0x12125A82905A1236), SPH_C64(0x9191EF6DFCEF91AE), + SPH_C64(0x8A8A98AE24988A83), SPH_C64(0x02020A12100A0206), + SPH_C64(0x1C1C6CFCE06C1C24), SPH_C64(0xE6E659856359E637), + SPH_C64(0x45454C57124C45CF), SPH_C64(0xC2C2ED9C5EEDC25B), + SPH_C64(0xC4C4F3AA6EF3C451), SPH_C64(0xFDFD2E46BB2EFD1A), + SPH_C64(0xBFBF792E9179BFDC), SPH_C64(0x4444495E1A4944CC), + SPH_C64(0xA1A11FC0611FA1FE), SPH_C64(0x4C4C61165A614CD4), + SPH_C64(0x3333FFB685FF3355), SPH_C64(0xC5C5F6A366F6C552), + SPH_C64(0x8484AED054AE8491), SPH_C64(0x2323AF2605AF2365), + SPH_C64(0x7C7C91BBC7917C84), SPH_C64(0xB0B04A59E94AB0CD), + SPH_C64(0x2525B11035B1256F), SPH_C64(0x151541BDA841153F), + SPH_C64(0x3535E180B5E1355F), SPH_C64(0x6969D0066FD069BB), + SPH_C64(0xFFFF2454AB24FF1C), SPH_C64(0x9494FE40D4FE94A1), + SPH_C64(0x4D4D641F52644DD7), SPH_C64(0x7070ADD7A7AD7090), + SPH_C64(0xA2A210DB7910A2FB), SPH_C64(0xAFAF29BE1129AFEC), + SPH_C64(0xCDCDDEEB26DECD4A), SPH_C64(0xD6D6A928FEA9D667), + SPH_C64(0x6C6CC12B47C16CB4), SPH_C64(0xB7B75166D151B7C4), + SPH_C64(0xF8F83F6B933FF815), SPH_C64(0x09092D41482D091B), + SPH_C64(0xF3F31838CB18F308), SPH_C64(0x6767E6781FE667A9), + SPH_C64(0xA4A40EED490EA4F1), SPH_C64(0xEAEA65E90365EA23), + SPH_C64(0xECEC7BDF337BEC29), SPH_C64(0xB6B6546FD954B6C7), + SPH_C64(0xD4D4A33AEEA3D461), SPH_C64(0xD2D2BD0CDEBDD26B), + SPH_C64(0x141444B4A044143C), SPH_C64(0x1E1E66EEF0661E22), + SPH_C64(0xE1E142BA5B42E13E), SPH_C64(0x2424B4193DB4246C), + SPH_C64(0x3838D8E5DDD83848), SPH_C64(0xC6C6F9B87EF9C657), + SPH_C64(0xDBDB904D9690DB70), SPH_C64(0x4B4B7A29627A4BDD), + SPH_C64(0x7A7A8F8DF78F7A8E), SPH_C64(0x3A3AD2F7CDD23A4E), + SPH_C64(0xDEDE8160BE81DE7F), SPH_C64(0x5E5E3B94CA3B5EE2), + SPH_C64(0xDFDF8469B684DF7C), SPH_C64(0x9595FB49DCFB95A2), + SPH_C64(0xFCFC2B4FB32BFC19), SPH_C64(0xAAAA38933938AAE3), + SPH_C64(0xD7D7AC21F6ACD764), SPH_C64(0xCECED1F03ED1CE4F), + SPH_C64(0x07071B3F381B0709), SPH_C64(0x0F0F337778330F11), + SPH_C64(0x3D3DC9C8F5C93D47), SPH_C64(0x585825A2FA2558E8), + SPH_C64(0x9A9AC83EA4C89AB3), SPH_C64(0x9898C22CB4C298B5), + SPH_C64(0x9C9CD60894D69CB9), SPH_C64(0xF2F21D31C31DF20B), + SPH_C64(0xA7A701F65101A7F4), SPH_C64(0x1111559988551133), + SPH_C64(0x7E7E9BA9D79B7E82), SPH_C64(0x8B8B9DA72C9D8B80), + SPH_C64(0x43435261225243C5), SPH_C64(0x03030F1B180F0305), + SPH_C64(0xE2E24DA1434DE23B), SPH_C64(0xDCDC8B72AE8BDC79), + SPH_C64(0xE5E5569E7B56E532), SPH_C64(0xB2B2404BF940B2CB), + SPH_C64(0x4E4E6B044A6B4ED2), SPH_C64(0xC7C7FCB176FCC754), + SPH_C64(0x6D6DC4224FC46DB7), SPH_C64(0xE9E96AF21B6AE926), + SPH_C64(0x2727BB0225BB2769), SPH_C64(0x40405D7A3A5D40C0), + SPH_C64(0xD8D89F568E9FD875), SPH_C64(0x3737EB92A5EB3759), + SPH_C64(0x9292E076E4E092AB), SPH_C64(0x8F8F89830C898F8C), + SPH_C64(0x0101050908050103), SPH_C64(0x1D1D69F5E8691D27), + SPH_C64(0x535302F1A20253F5), SPH_C64(0x3E3EC6D3EDC63E42), + SPH_C64(0x595920ABF22059EB), SPH_C64(0xC1C1E28746E2C15E), + SPH_C64(0x4F4F6E0D426E4FD1), SPH_C64(0x3232FABF8DFA3256), + SPH_C64(0x16164EA6B04E163A), SPH_C64(0xFAFA35798335FA13), + SPH_C64(0x7474B9F387B9749C), SPH_C64(0xFBFB30708B30FB10), + SPH_C64(0x6363F25C3FF263A5), SPH_C64(0x9F9FD9138CD99FBC), + SPH_C64(0x3434E489BDE4345C), SPH_C64(0x1A1A72CAD0721A2E), + SPH_C64(0x2A2A82674D822A7E), SPH_C64(0x5A5A2FB0EA2F5AEE), + SPH_C64(0x8D8D83911C838D8A), SPH_C64(0xC9C9CACF06CAC946), + SPH_C64(0xCFCFD4F936D4CF4C), SPH_C64(0xF6F60915E309F607), + SPH_C64(0x9090EA64F4EA90AD), SPH_C64(0x282888755D882878), + SPH_C64(0x888892BC34928885), SPH_C64(0x9B9BCD37ACCD9BB0), + SPH_C64(0x3131F5A495F53153), SPH_C64(0x0E0E367E70360E12), + SPH_C64(0xBDBD733C8173BDDA), SPH_C64(0x4A4A7F206A7F4ADE), + SPH_C64(0xE8E86FFB136FE825), SPH_C64(0x9696F452C4F496A7), + SPH_C64(0xA6A604FF5904A6F7), SPH_C64(0x0C0C3C6C603C0C14), + SPH_C64(0xC8C8CFC60ECFC845), SPH_C64(0x79798096EF80798B), + SPH_C64(0xBCBC76358976BCD9), SPH_C64(0xBEBE7C27997CBEDF), + SPH_C64(0xEFEF74C42B74EF2C), SPH_C64(0x6E6ECB3957CB6EB2), + SPH_C64(0x4646434C0A4346CA), SPH_C64(0x9797F15BCCF197A4), + SPH_C64(0x5B5B2AB9E22A5BED), SPH_C64(0xEDED7ED63B7EED2A), + SPH_C64(0x19197DD1C87D192B), SPH_C64(0xD9D99A5F869AD976), + SPH_C64(0xACAC26A50926ACE9), SPH_C64(0x9999C725BCC799B6), + SPH_C64(0xA8A832812932A8E5), SPH_C64(0x29298D7C558D297B), + SPH_C64(0x6464E96307E964AC), SPH_C64(0x1F1F63E7F8631F21), + SPH_C64(0xADAD23AC0123ADEA), SPH_C64(0x55551CC7921C55FF), + SPH_C64(0x13135F8B985F1335), SPH_C64(0xBBBB6D0AB16DBBD0), + SPH_C64(0xF7F70C1CEB0CF704), SPH_C64(0x6F6FCE305FCE6FB1), + SPH_C64(0xB9B96718A167B9D6), SPH_C64(0x47474645024647C9), + SPH_C64(0x2F2F934A65932F71), SPH_C64(0xEEEE71CD2371EE2F), + SPH_C64(0xB8B86211A962B8D5), SPH_C64(0x7B7B8A84FF8A7B8D), + SPH_C64(0x898997B53C978986), SPH_C64(0x3030F0AD9DF03050), + SPH_C64(0xD3D3B805D6B8D368), SPH_C64(0x7F7F9EA0DF9E7F81), + SPH_C64(0x7676B3E197B3769A), SPH_C64(0x8282B0E664B0829B) +}; + +static const sph_u64 old0_T7[256] = { + SPH_C64(0x68D50F67D568B868), SPH_C64(0xD0B71ECEB7D06DD0), + SPH_C64(0xEB60E00B60EB20EB), SPH_C64(0x2B876E45872B7D2B), + SPH_C64(0x4875327A7548D848), SPH_C64(0x9DD3019CD39DBA9D), + SPH_C64(0x6ADF1D77DF6ABE6A), SPH_C64(0xE453977353E431E4), + SPH_C64(0xE348A84B48E338E3), SPH_C64(0xA315D27115A3F8A3), + SPH_C64(0x5613DC8A1356FA56), SPH_C64(0x81BFFD7CBF819E81), + SPH_C64(0x7D94B2CF947D877D), SPH_C64(0xF1122ADB12F10EF1), + SPH_C64(0x85ABD95CAB859285), SPH_C64(0x9EDC1A84DC9EBF9E), + SPH_C64(0x2C9C517D9C2C742C), SPH_C64(0x8E8C8A048C8E8F8E), + SPH_C64(0x78859FE785788878), SPH_C64(0xCAC5D41EC5CA43CA), + SPH_C64(0x174BAFB84B173917), SPH_C64(0xA937882137A9E6A9), + SPH_C64(0x61F84E2FF861A361), SPH_C64(0xD5A633E6A6D562D5), + SPH_C64(0x5D348FD2345DE75D), SPH_C64(0x0B275358270B1D0B), + SPH_C64(0x8C869814868C898C), SPH_C64(0x3CCCC1FDCC3C443C), + SPH_C64(0x77B6E89FB6779977), SPH_C64(0x5108E3B20851F351), + SPH_C64(0x22AA2F0DAA226622), SPH_C64(0x4257682A5742C642), + SPH_C64(0x3FC3DAE5C33F413F), SPH_C64(0x5419CE9A1954FC54), + SPH_C64(0x415873325841C341), SPH_C64(0x80BAF474BA809D80), + SPH_C64(0xCCDBE22EDBCC49CC), SPH_C64(0x86A4C244A4869786), + SPH_C64(0xB34542F145B3C8B3), SPH_C64(0x1878D8C078182818), + SPH_C64(0x2E96436D962E722E), SPH_C64(0x5716D5821657F957), + SPH_C64(0x061E36301E060A06), SPH_C64(0x62F75537F762A662), + SPH_C64(0xF40307F303F401F4), SPH_C64(0x36EE9BADEE365A36), + SPH_C64(0xD1B217C6B2D16ED1), SPH_C64(0x6BDA147FDA6BBD6B), + SPH_C64(0x1B77C3D8771B2D1B), SPH_C64(0x65EC6A0FEC65AF65), + SPH_C64(0x75BCFA8FBC759F75), SPH_C64(0x1050908050103010), + SPH_C64(0xDA95449E95DA73DA), SPH_C64(0x49703B727049DB49), + SPH_C64(0x26BE0B2DBE266A26), SPH_C64(0xF93A629B3AF916F9), + SPH_C64(0xCBC0DD16C0CB40CB), SPH_C64(0x66E37117E366AA66), + SPH_C64(0xE75C8C6B5CE734E7), SPH_C64(0xBA6803B968BAD3BA), + SPH_C64(0xAE2CB7192CAEEFAE), SPH_C64(0x500DEABA0D50F050), + SPH_C64(0x5207F8AA0752F652), SPH_C64(0xAB3D9A313DABE0AB), + SPH_C64(0x05112D2811050F05), SPH_C64(0xF01723D317F00DF0), + SPH_C64(0x0D396568390D170D), SPH_C64(0x73A2CCBFA2739573), + SPH_C64(0x3BD7FEC5D73B4D3B), SPH_C64(0x0414242014040C04), + SPH_C64(0x20A03D1DA0206020), SPH_C64(0xFE215DA321FE1FFE), + SPH_C64(0xDD8E7BA68EDD7ADD), SPH_C64(0xF5060EFB06F502F5), + SPH_C64(0xB45E7DC95EB4C1B4), SPH_C64(0x5F3E9DC23E5FE15F), + SPH_C64(0x0A225A50220A1E0A), SPH_C64(0xB55B74C15BB5C2B5), + SPH_C64(0xC0E78E4EE7C05DC0), SPH_C64(0xA01AC9691AA0FDA0), + SPH_C64(0x71A8DEAFA8719371), SPH_C64(0xA50BE4410BA5F2A5), + SPH_C64(0x2D995875992D772D), SPH_C64(0x60FD4727FD60A060), + SPH_C64(0x72A7C5B7A7729672), SPH_C64(0x93E57FECE593A893), + SPH_C64(0x39DDECD5DD394B39), SPH_C64(0x0828484028081808), + SPH_C64(0x83B5EF6CB5839883), SPH_C64(0x21A53415A5216321), + SPH_C64(0x5C3186DA315CE45C), SPH_C64(0x87A1CB4CA1879487), + SPH_C64(0xB14F50E14FB1CEB1), SPH_C64(0xE047B35347E03DE0), + SPH_C64(0x0000000000000000), SPH_C64(0xC3E89556E8C358C3), + SPH_C64(0x125A82905A123612), SPH_C64(0x91EF6DFCEF91AE91), + SPH_C64(0x8A98AE24988A838A), SPH_C64(0x020A12100A020602), + SPH_C64(0x1C6CFCE06C1C241C), SPH_C64(0xE659856359E637E6), + SPH_C64(0x454C57124C45CF45), SPH_C64(0xC2ED9C5EEDC25BC2), + SPH_C64(0xC4F3AA6EF3C451C4), SPH_C64(0xFD2E46BB2EFD1AFD), + SPH_C64(0xBF792E9179BFDCBF), SPH_C64(0x44495E1A4944CC44), + SPH_C64(0xA11FC0611FA1FEA1), SPH_C64(0x4C61165A614CD44C), + SPH_C64(0x33FFB685FF335533), SPH_C64(0xC5F6A366F6C552C5), + SPH_C64(0x84AED054AE849184), SPH_C64(0x23AF2605AF236523), + SPH_C64(0x7C91BBC7917C847C), SPH_C64(0xB04A59E94AB0CDB0), + SPH_C64(0x25B11035B1256F25), SPH_C64(0x1541BDA841153F15), + SPH_C64(0x35E180B5E1355F35), SPH_C64(0x69D0066FD069BB69), + SPH_C64(0xFF2454AB24FF1CFF), SPH_C64(0x94FE40D4FE94A194), + SPH_C64(0x4D641F52644DD74D), SPH_C64(0x70ADD7A7AD709070), + SPH_C64(0xA210DB7910A2FBA2), SPH_C64(0xAF29BE1129AFECAF), + SPH_C64(0xCDDEEB26DECD4ACD), SPH_C64(0xD6A928FEA9D667D6), + SPH_C64(0x6CC12B47C16CB46C), SPH_C64(0xB75166D151B7C4B7), + SPH_C64(0xF83F6B933FF815F8), SPH_C64(0x092D41482D091B09), + SPH_C64(0xF31838CB18F308F3), SPH_C64(0x67E6781FE667A967), + SPH_C64(0xA40EED490EA4F1A4), SPH_C64(0xEA65E90365EA23EA), + SPH_C64(0xEC7BDF337BEC29EC), SPH_C64(0xB6546FD954B6C7B6), + SPH_C64(0xD4A33AEEA3D461D4), SPH_C64(0xD2BD0CDEBDD26BD2), + SPH_C64(0x1444B4A044143C14), SPH_C64(0x1E66EEF0661E221E), + SPH_C64(0xE142BA5B42E13EE1), SPH_C64(0x24B4193DB4246C24), + SPH_C64(0x38D8E5DDD8384838), SPH_C64(0xC6F9B87EF9C657C6), + SPH_C64(0xDB904D9690DB70DB), SPH_C64(0x4B7A29627A4BDD4B), + SPH_C64(0x7A8F8DF78F7A8E7A), SPH_C64(0x3AD2F7CDD23A4E3A), + SPH_C64(0xDE8160BE81DE7FDE), SPH_C64(0x5E3B94CA3B5EE25E), + SPH_C64(0xDF8469B684DF7CDF), SPH_C64(0x95FB49DCFB95A295), + SPH_C64(0xFC2B4FB32BFC19FC), SPH_C64(0xAA38933938AAE3AA), + SPH_C64(0xD7AC21F6ACD764D7), SPH_C64(0xCED1F03ED1CE4FCE), + SPH_C64(0x071B3F381B070907), SPH_C64(0x0F337778330F110F), + SPH_C64(0x3DC9C8F5C93D473D), SPH_C64(0x5825A2FA2558E858), + SPH_C64(0x9AC83EA4C89AB39A), SPH_C64(0x98C22CB4C298B598), + SPH_C64(0x9CD60894D69CB99C), SPH_C64(0xF21D31C31DF20BF2), + SPH_C64(0xA701F65101A7F4A7), SPH_C64(0x1155998855113311), + SPH_C64(0x7E9BA9D79B7E827E), SPH_C64(0x8B9DA72C9D8B808B), + SPH_C64(0x435261225243C543), SPH_C64(0x030F1B180F030503), + SPH_C64(0xE24DA1434DE23BE2), SPH_C64(0xDC8B72AE8BDC79DC), + SPH_C64(0xE5569E7B56E532E5), SPH_C64(0xB2404BF940B2CBB2), + SPH_C64(0x4E6B044A6B4ED24E), SPH_C64(0xC7FCB176FCC754C7), + SPH_C64(0x6DC4224FC46DB76D), SPH_C64(0xE96AF21B6AE926E9), + SPH_C64(0x27BB0225BB276927), SPH_C64(0x405D7A3A5D40C040), + SPH_C64(0xD89F568E9FD875D8), SPH_C64(0x37EB92A5EB375937), + SPH_C64(0x92E076E4E092AB92), SPH_C64(0x8F89830C898F8C8F), + SPH_C64(0x0105090805010301), SPH_C64(0x1D69F5E8691D271D), + SPH_C64(0x5302F1A20253F553), SPH_C64(0x3EC6D3EDC63E423E), + SPH_C64(0x5920ABF22059EB59), SPH_C64(0xC1E28746E2C15EC1), + SPH_C64(0x4F6E0D426E4FD14F), SPH_C64(0x32FABF8DFA325632), + SPH_C64(0x164EA6B04E163A16), SPH_C64(0xFA35798335FA13FA), + SPH_C64(0x74B9F387B9749C74), SPH_C64(0xFB30708B30FB10FB), + SPH_C64(0x63F25C3FF263A563), SPH_C64(0x9FD9138CD99FBC9F), + SPH_C64(0x34E489BDE4345C34), SPH_C64(0x1A72CAD0721A2E1A), + SPH_C64(0x2A82674D822A7E2A), SPH_C64(0x5A2FB0EA2F5AEE5A), + SPH_C64(0x8D83911C838D8A8D), SPH_C64(0xC9CACF06CAC946C9), + SPH_C64(0xCFD4F936D4CF4CCF), SPH_C64(0xF60915E309F607F6), + SPH_C64(0x90EA64F4EA90AD90), SPH_C64(0x2888755D88287828), + SPH_C64(0x8892BC3492888588), SPH_C64(0x9BCD37ACCD9BB09B), + SPH_C64(0x31F5A495F5315331), SPH_C64(0x0E367E70360E120E), + SPH_C64(0xBD733C8173BDDABD), SPH_C64(0x4A7F206A7F4ADE4A), + SPH_C64(0xE86FFB136FE825E8), SPH_C64(0x96F452C4F496A796), + SPH_C64(0xA604FF5904A6F7A6), SPH_C64(0x0C3C6C603C0C140C), + SPH_C64(0xC8CFC60ECFC845C8), SPH_C64(0x798096EF80798B79), + SPH_C64(0xBC76358976BCD9BC), SPH_C64(0xBE7C27997CBEDFBE), + SPH_C64(0xEF74C42B74EF2CEF), SPH_C64(0x6ECB3957CB6EB26E), + SPH_C64(0x46434C0A4346CA46), SPH_C64(0x97F15BCCF197A497), + SPH_C64(0x5B2AB9E22A5BED5B), SPH_C64(0xED7ED63B7EED2AED), + SPH_C64(0x197DD1C87D192B19), SPH_C64(0xD99A5F869AD976D9), + SPH_C64(0xAC26A50926ACE9AC), SPH_C64(0x99C725BCC799B699), + SPH_C64(0xA832812932A8E5A8), SPH_C64(0x298D7C558D297B29), + SPH_C64(0x64E96307E964AC64), SPH_C64(0x1F63E7F8631F211F), + SPH_C64(0xAD23AC0123ADEAAD), SPH_C64(0x551CC7921C55FF55), + SPH_C64(0x135F8B985F133513), SPH_C64(0xBB6D0AB16DBBD0BB), + SPH_C64(0xF70C1CEB0CF704F7), SPH_C64(0x6FCE305FCE6FB16F), + SPH_C64(0xB96718A167B9D6B9), SPH_C64(0x474645024647C947), + SPH_C64(0x2F934A65932F712F), SPH_C64(0xEE71CD2371EE2FEE), + SPH_C64(0xB86211A962B8D5B8), SPH_C64(0x7B8A84FF8A7B8D7B), + SPH_C64(0x8997B53C97898689), SPH_C64(0x30F0AD9DF0305030), + SPH_C64(0xD3B805D6B8D368D3), SPH_C64(0x7F9EA0DF9E7F817F), + SPH_C64(0x76B3E197B3769A76), SPH_C64(0x82B0E664B0829B82) +}; + +#endif + +static const sph_u64 old0_RC[10] = { + SPH_C64(0xE46A9D482BEBD068), + SPH_C64(0x9E85F17D8156A3E3), + SPH_C64(0xD561A917CA788E2C), + SPH_C64(0x422251773C8C0B5D), + SPH_C64(0x18B386CC8041543F), + SPH_C64(0x6BD136F46206572E), + SPH_C64(0xF92649DA1075651B), + SPH_C64(0xAB5250AEBAE766CB), + SPH_C64(0xFE20043B730DF005), + SPH_C64(0xA0C0B50A5FB4F5DD) +}; + +/* ====================================================================== */ +/* + * Constants for plain WHIRLPOOL-1 (second version). + */ + +static const sph_u64 old1_T0[256] = { + SPH_C64(0x78D8C07818281818), SPH_C64(0xAF2605AF23652323), + SPH_C64(0xF9B87EF9C657C6C6), SPH_C64(0x6FFB136FE825E8E8), + SPH_C64(0xA1CB4CA187948787), SPH_C64(0x6211A962B8D5B8B8), + SPH_C64(0x0509080501030101), SPH_C64(0x6E0D426E4FD14F4F), + SPH_C64(0xEE9BADEE365A3636), SPH_C64(0x04FF5904A6F7A6A6), + SPH_C64(0xBD0CDEBDD26BD2D2), SPH_C64(0x060EFB06F502F5F5), + SPH_C64(0x8096EF80798B7979), SPH_C64(0xCE305FCE6FB16F6F), + SPH_C64(0xEF6DFCEF91AE9191), SPH_C64(0x07F8AA0752F65252), + SPH_C64(0xFD4727FD60A06060), SPH_C64(0x76358976BCD9BCBC), + SPH_C64(0xCD37ACCD9BB09B9B), SPH_C64(0x8C8A048C8E8F8E8E), + SPH_C64(0x15D27115A3F8A3A3), SPH_C64(0x3C6C603C0C140C0C), + SPH_C64(0x8A84FF8A7B8D7B7B), SPH_C64(0xE180B5E1355F3535), + SPH_C64(0x69F5E8691D271D1D), SPH_C64(0x47B35347E03DE0E0), + SPH_C64(0xAC21F6ACD764D7D7), SPH_C64(0xED9C5EEDC25BC2C2), + SPH_C64(0x96436D962E722E2E), SPH_C64(0x7A29627A4BDD4B4B), + SPH_C64(0x215DA321FE1FFEFE), SPH_C64(0x16D5821657F95757), + SPH_C64(0x41BDA841153F1515), SPH_C64(0xB6E89FB677997777), + SPH_C64(0xEB92A5EB37593737), SPH_C64(0x569E7B56E532E5E5), + SPH_C64(0xD9138CD99FBC9F9F), SPH_C64(0x1723D317F00DF0F0), + SPH_C64(0x7F206A7F4ADE4A4A), SPH_C64(0x95449E95DA73DADA), + SPH_C64(0x25A2FA2558E85858), SPH_C64(0xCACF06CAC946C9C9), + SPH_C64(0x8D7C558D297B2929), SPH_C64(0x225A50220A1E0A0A), + SPH_C64(0x4F50E14FB1CEB1B1), SPH_C64(0x1AC9691AA0FDA0A0), + SPH_C64(0xDA147FDA6BBD6B6B), SPH_C64(0xABD95CAB85928585), + SPH_C64(0x733C8173BDDABDBD), SPH_C64(0x348FD2345DE75D5D), + SPH_C64(0x5090805010301010), SPH_C64(0x0307F303F401F4F4), + SPH_C64(0xC0DD16C0CB40CBCB), SPH_C64(0xC6D3EDC63E423E3E), + SPH_C64(0x112D2811050F0505), SPH_C64(0xE6781FE667A96767), + SPH_C64(0x53977353E431E4E4), SPH_C64(0xBB0225BB27692727), + SPH_C64(0x5873325841C34141), SPH_C64(0x9DA72C9D8B808B8B), + SPH_C64(0x01F65101A7F4A7A7), SPH_C64(0x94B2CF947D877D7D), + SPH_C64(0xFB49DCFB95A29595), SPH_C64(0x9F568E9FD875D8D8), + SPH_C64(0x30708B30FB10FBFB), SPH_C64(0x71CD2371EE2FEEEE), + SPH_C64(0x91BBC7917C847C7C), SPH_C64(0xE37117E366AA6666), + SPH_C64(0x8E7BA68EDD7ADDDD), SPH_C64(0x4BAFB84B17391717), + SPH_C64(0x4645024647C94747), SPH_C64(0xDC1A84DC9EBF9E9E), + SPH_C64(0xC5D41EC5CA43CACA), SPH_C64(0x995875992D772D2D), + SPH_C64(0x792E9179BFDCBFBF), SPH_C64(0x1B3F381B07090707), + SPH_C64(0x23AC0123ADEAADAD), SPH_C64(0x2FB0EA2F5AEE5A5A), + SPH_C64(0xB5EF6CB583988383), SPH_C64(0xFFB685FF33553333), + SPH_C64(0xF25C3FF263A56363), SPH_C64(0x0A12100A02060202), + SPH_C64(0x38933938AAE3AAAA), SPH_C64(0xA8DEAFA871937171), + SPH_C64(0xCFC60ECFC845C8C8), SPH_C64(0x7DD1C87D192B1919), + SPH_C64(0x703B727049DB4949), SPH_C64(0x9A5F869AD976D9D9), + SPH_C64(0x1D31C31DF20BF2F2), SPH_C64(0x48A84B48E338E3E3), + SPH_C64(0x2AB9E22A5BED5B5B), SPH_C64(0x92BC349288858888), + SPH_C64(0xC83EA4C89AB39A9A), SPH_C64(0xBE0B2DBE266A2626), + SPH_C64(0xFABF8DFA32563232), SPH_C64(0x4A59E94AB0CDB0B0), + SPH_C64(0x6AF21B6AE926E9E9), SPH_C64(0x337778330F110F0F), + SPH_C64(0xA633E6A6D562D5D5), SPH_C64(0xBAF474BA809D8080), + SPH_C64(0x7C27997CBEDFBEBE), SPH_C64(0xDEEB26DECD4ACDCD), + SPH_C64(0xE489BDE4345C3434), SPH_C64(0x75327A7548D84848), + SPH_C64(0x2454AB24FF1CFFFF), SPH_C64(0x8F8DF78F7A8E7A7A), + SPH_C64(0xEA64F4EA90AD9090), SPH_C64(0x3E9DC23E5FE15F5F), + SPH_C64(0xA03D1DA020602020), SPH_C64(0xD50F67D568B86868), + SPH_C64(0x72CAD0721A2E1A1A), SPH_C64(0x2CB7192CAEEFAEAE), + SPH_C64(0x5E7DC95EB4C1B4B4), SPH_C64(0x19CE9A1954FC5454), + SPH_C64(0xE57FECE593A89393), SPH_C64(0xAA2F0DAA22662222), + SPH_C64(0xE96307E964AC6464), SPH_C64(0x122ADB12F10EF1F1), + SPH_C64(0xA2CCBFA273957373), SPH_C64(0x5A82905A12361212), + SPH_C64(0x5D7A3A5D40C04040), SPH_C64(0x2848402808180808), + SPH_C64(0xE89556E8C358C3C3), SPH_C64(0x7BDF337BEC29ECEC), + SPH_C64(0x904D9690DB70DBDB), SPH_C64(0x1FC0611FA1FEA1A1), + SPH_C64(0x83911C838D8A8D8D), SPH_C64(0xC9C8F5C93D473D3D), + SPH_C64(0xF15BCCF197A49797), SPH_C64(0x0000000000000000), + SPH_C64(0xD4F936D4CF4CCFCF), SPH_C64(0x876E45872B7D2B2B), + SPH_C64(0xB3E197B3769A7676), SPH_C64(0xB0E664B0829B8282), + SPH_C64(0xA928FEA9D667D6D6), SPH_C64(0x77C3D8771B2D1B1B), + SPH_C64(0x5B74C15BB5C2B5B5), SPH_C64(0x29BE1129AFECAFAF), + SPH_C64(0xDF1D77DF6ABE6A6A), SPH_C64(0x0DEABA0D50F05050), + SPH_C64(0x4C57124C45CF4545), SPH_C64(0x1838CB18F308F3F3), + SPH_C64(0xF0AD9DF030503030), SPH_C64(0x74C42B74EF2CEFEF), + SPH_C64(0xC3DAE5C33F413F3F), SPH_C64(0x1CC7921C55FF5555), + SPH_C64(0x10DB7910A2FBA2A2), SPH_C64(0x65E90365EA23EAEA), + SPH_C64(0xEC6A0FEC65AF6565), SPH_C64(0x6803B968BAD3BABA), + SPH_C64(0x934A65932F712F2F), SPH_C64(0xE78E4EE7C05DC0C0), + SPH_C64(0x8160BE81DE7FDEDE), SPH_C64(0x6CFCE06C1C241C1C), + SPH_C64(0x2E46BB2EFD1AFDFD), SPH_C64(0x641F52644DD74D4D), + SPH_C64(0xE076E4E092AB9292), SPH_C64(0xBCFA8FBC759F7575), + SPH_C64(0x1E36301E060A0606), SPH_C64(0x98AE24988A838A8A), + SPH_C64(0x404BF940B2CBB2B2), SPH_C64(0x59856359E637E6E6), + SPH_C64(0x367E70360E120E0E), SPH_C64(0x63E7F8631F211F1F), + SPH_C64(0xF75537F762A66262), SPH_C64(0xA33AEEA3D461D4D4), + SPH_C64(0x32812932A8E5A8A8), SPH_C64(0xF452C4F496A79696), + SPH_C64(0x3A629B3AF916F9F9), SPH_C64(0xF6A366F6C552C5C5), + SPH_C64(0xB11035B1256F2525), SPH_C64(0x20ABF22059EB5959), + SPH_C64(0xAED054AE84918484), SPH_C64(0xA7C5B7A772967272), + SPH_C64(0xDDECD5DD394B3939), SPH_C64(0x61165A614CD44C4C), + SPH_C64(0x3B94CA3B5EE25E5E), SPH_C64(0x859FE78578887878), + SPH_C64(0xD8E5DDD838483838), SPH_C64(0x869814868C898C8C), + SPH_C64(0xB217C6B2D16ED1D1), SPH_C64(0x0BE4410BA5F2A5A5), + SPH_C64(0x4DA1434DE23BE2E2), SPH_C64(0xF84E2FF861A36161), + SPH_C64(0x4542F145B3C8B3B3), SPH_C64(0xA53415A521632121), + SPH_C64(0xD60894D69CB99C9C), SPH_C64(0x66EEF0661E221E1E), + SPH_C64(0x5261225243C54343), SPH_C64(0xFCB176FCC754C7C7), + SPH_C64(0x2B4FB32BFC19FCFC), SPH_C64(0x14242014040C0404), + SPH_C64(0x08E3B20851F35151), SPH_C64(0xC725BCC799B69999), + SPH_C64(0xC4224FC46DB76D6D), SPH_C64(0x396568390D170D0D), + SPH_C64(0x35798335FA13FAFA), SPH_C64(0x8469B684DF7CDFDF), + SPH_C64(0x9BA9D79B7E827E7E), SPH_C64(0xB4193DB4246C2424), + SPH_C64(0xD7FEC5D73B4D3B3B), SPH_C64(0x3D9A313DABE0ABAB), + SPH_C64(0xD1F03ED1CE4FCECE), SPH_C64(0x5599885511331111), + SPH_C64(0x89830C898F8C8F8F), SPH_C64(0x6B044A6B4ED24E4E), + SPH_C64(0x5166D151B7C4B7B7), SPH_C64(0x60E00B60EB20EBEB), + SPH_C64(0xCCC1FDCC3C443C3C), SPH_C64(0xBFFD7CBF819E8181), + SPH_C64(0xFE40D4FE94A19494), SPH_C64(0x0C1CEB0CF704F7F7), + SPH_C64(0x6718A167B9D6B9B9), SPH_C64(0x5F8B985F13351313), + SPH_C64(0x9C517D9C2C742C2C), SPH_C64(0xB805D6B8D368D3D3), + SPH_C64(0x5C8C6B5CE734E7E7), SPH_C64(0xCB3957CB6EB26E6E), + SPH_C64(0xF3AA6EF3C451C4C4), SPH_C64(0x0F1B180F03050303), + SPH_C64(0x13DC8A1356FA5656), SPH_C64(0x495E1A4944CC4444), + SPH_C64(0x9EA0DF9E7F817F7F), SPH_C64(0x37882137A9E6A9A9), + SPH_C64(0x82674D822A7E2A2A), SPH_C64(0x6D0AB16DBBD0BBBB), + SPH_C64(0xE28746E2C15EC1C1), SPH_C64(0x02F1A20253F55353), + SPH_C64(0x8B72AE8BDC79DCDC), SPH_C64(0x275358270B1D0B0B), + SPH_C64(0xD3019CD39DBA9D9D), SPH_C64(0xC12B47C16CB46C6C), + SPH_C64(0xF5A495F531533131), SPH_C64(0xB9F387B9749C7474), + SPH_C64(0x0915E309F607F6F6), SPH_C64(0x434C0A4346CA4646), + SPH_C64(0x26A50926ACE9ACAC), SPH_C64(0x97B53C9789868989), + SPH_C64(0x44B4A044143C1414), SPH_C64(0x42BA5B42E13EE1E1), + SPH_C64(0x4EA6B04E163A1616), SPH_C64(0xD2F7CDD23A4E3A3A), + SPH_C64(0xD0066FD069BB6969), SPH_C64(0x2D41482D091B0909), + SPH_C64(0xADD7A7AD70907070), SPH_C64(0x546FD954B6C7B6B6), + SPH_C64(0xB71ECEB7D06DD0D0), SPH_C64(0x7ED63B7EED2AEDED), + SPH_C64(0xDBE22EDBCC49CCCC), SPH_C64(0x57682A5742C64242), + SPH_C64(0xC22CB4C298B59898), SPH_C64(0x0EED490EA4F1A4A4), + SPH_C64(0x88755D8828782828), SPH_C64(0x3186DA315CE45C5C), + SPH_C64(0x3F6B933FF815F8F8), SPH_C64(0xA4C244A486978686) +}; + +#if !SPH_SMALL_FOOTPRINT_WHIRLPOOL + +static const sph_u64 old1_T1[256] = { + SPH_C64(0xD8C0781828181878), SPH_C64(0x2605AF23652323AF), + SPH_C64(0xB87EF9C657C6C6F9), SPH_C64(0xFB136FE825E8E86F), + SPH_C64(0xCB4CA187948787A1), SPH_C64(0x11A962B8D5B8B862), + SPH_C64(0x0908050103010105), SPH_C64(0x0D426E4FD14F4F6E), + SPH_C64(0x9BADEE365A3636EE), SPH_C64(0xFF5904A6F7A6A604), + SPH_C64(0x0CDEBDD26BD2D2BD), SPH_C64(0x0EFB06F502F5F506), + SPH_C64(0x96EF80798B797980), SPH_C64(0x305FCE6FB16F6FCE), + SPH_C64(0x6DFCEF91AE9191EF), SPH_C64(0xF8AA0752F6525207), + SPH_C64(0x4727FD60A06060FD), SPH_C64(0x358976BCD9BCBC76), + SPH_C64(0x37ACCD9BB09B9BCD), SPH_C64(0x8A048C8E8F8E8E8C), + SPH_C64(0xD27115A3F8A3A315), SPH_C64(0x6C603C0C140C0C3C), + SPH_C64(0x84FF8A7B8D7B7B8A), SPH_C64(0x80B5E1355F3535E1), + SPH_C64(0xF5E8691D271D1D69), SPH_C64(0xB35347E03DE0E047), + SPH_C64(0x21F6ACD764D7D7AC), SPH_C64(0x9C5EEDC25BC2C2ED), + SPH_C64(0x436D962E722E2E96), SPH_C64(0x29627A4BDD4B4B7A), + SPH_C64(0x5DA321FE1FFEFE21), SPH_C64(0xD5821657F9575716), + SPH_C64(0xBDA841153F151541), SPH_C64(0xE89FB677997777B6), + SPH_C64(0x92A5EB37593737EB), SPH_C64(0x9E7B56E532E5E556), + SPH_C64(0x138CD99FBC9F9FD9), SPH_C64(0x23D317F00DF0F017), + SPH_C64(0x206A7F4ADE4A4A7F), SPH_C64(0x449E95DA73DADA95), + SPH_C64(0xA2FA2558E8585825), SPH_C64(0xCF06CAC946C9C9CA), + SPH_C64(0x7C558D297B29298D), SPH_C64(0x5A50220A1E0A0A22), + SPH_C64(0x50E14FB1CEB1B14F), SPH_C64(0xC9691AA0FDA0A01A), + SPH_C64(0x147FDA6BBD6B6BDA), SPH_C64(0xD95CAB85928585AB), + SPH_C64(0x3C8173BDDABDBD73), SPH_C64(0x8FD2345DE75D5D34), + SPH_C64(0x9080501030101050), SPH_C64(0x07F303F401F4F403), + SPH_C64(0xDD16C0CB40CBCBC0), SPH_C64(0xD3EDC63E423E3EC6), + SPH_C64(0x2D2811050F050511), SPH_C64(0x781FE667A96767E6), + SPH_C64(0x977353E431E4E453), SPH_C64(0x0225BB27692727BB), + SPH_C64(0x73325841C3414158), SPH_C64(0xA72C9D8B808B8B9D), + SPH_C64(0xF65101A7F4A7A701), SPH_C64(0xB2CF947D877D7D94), + SPH_C64(0x49DCFB95A29595FB), SPH_C64(0x568E9FD875D8D89F), + SPH_C64(0x708B30FB10FBFB30), SPH_C64(0xCD2371EE2FEEEE71), + SPH_C64(0xBBC7917C847C7C91), SPH_C64(0x7117E366AA6666E3), + SPH_C64(0x7BA68EDD7ADDDD8E), SPH_C64(0xAFB84B173917174B), + SPH_C64(0x45024647C9474746), SPH_C64(0x1A84DC9EBF9E9EDC), + SPH_C64(0xD41EC5CA43CACAC5), SPH_C64(0x5875992D772D2D99), + SPH_C64(0x2E9179BFDCBFBF79), SPH_C64(0x3F381B070907071B), + SPH_C64(0xAC0123ADEAADAD23), SPH_C64(0xB0EA2F5AEE5A5A2F), + SPH_C64(0xEF6CB583988383B5), SPH_C64(0xB685FF33553333FF), + SPH_C64(0x5C3FF263A56363F2), SPH_C64(0x12100A020602020A), + SPH_C64(0x933938AAE3AAAA38), SPH_C64(0xDEAFA871937171A8), + SPH_C64(0xC60ECFC845C8C8CF), SPH_C64(0xD1C87D192B19197D), + SPH_C64(0x3B727049DB494970), SPH_C64(0x5F869AD976D9D99A), + SPH_C64(0x31C31DF20BF2F21D), SPH_C64(0xA84B48E338E3E348), + SPH_C64(0xB9E22A5BED5B5B2A), SPH_C64(0xBC34928885888892), + SPH_C64(0x3EA4C89AB39A9AC8), SPH_C64(0x0B2DBE266A2626BE), + SPH_C64(0xBF8DFA32563232FA), SPH_C64(0x59E94AB0CDB0B04A), + SPH_C64(0xF21B6AE926E9E96A), SPH_C64(0x7778330F110F0F33), + SPH_C64(0x33E6A6D562D5D5A6), SPH_C64(0xF474BA809D8080BA), + SPH_C64(0x27997CBEDFBEBE7C), SPH_C64(0xEB26DECD4ACDCDDE), + SPH_C64(0x89BDE4345C3434E4), SPH_C64(0x327A7548D8484875), + SPH_C64(0x54AB24FF1CFFFF24), SPH_C64(0x8DF78F7A8E7A7A8F), + SPH_C64(0x64F4EA90AD9090EA), SPH_C64(0x9DC23E5FE15F5F3E), + SPH_C64(0x3D1DA020602020A0), SPH_C64(0x0F67D568B86868D5), + SPH_C64(0xCAD0721A2E1A1A72), SPH_C64(0xB7192CAEEFAEAE2C), + SPH_C64(0x7DC95EB4C1B4B45E), SPH_C64(0xCE9A1954FC545419), + SPH_C64(0x7FECE593A89393E5), SPH_C64(0x2F0DAA22662222AA), + SPH_C64(0x6307E964AC6464E9), SPH_C64(0x2ADB12F10EF1F112), + SPH_C64(0xCCBFA273957373A2), SPH_C64(0x82905A123612125A), + SPH_C64(0x7A3A5D40C040405D), SPH_C64(0x4840280818080828), + SPH_C64(0x9556E8C358C3C3E8), SPH_C64(0xDF337BEC29ECEC7B), + SPH_C64(0x4D9690DB70DBDB90), SPH_C64(0xC0611FA1FEA1A11F), + SPH_C64(0x911C838D8A8D8D83), SPH_C64(0xC8F5C93D473D3DC9), + SPH_C64(0x5BCCF197A49797F1), SPH_C64(0x0000000000000000), + SPH_C64(0xF936D4CF4CCFCFD4), SPH_C64(0x6E45872B7D2B2B87), + SPH_C64(0xE197B3769A7676B3), SPH_C64(0xE664B0829B8282B0), + SPH_C64(0x28FEA9D667D6D6A9), SPH_C64(0xC3D8771B2D1B1B77), + SPH_C64(0x74C15BB5C2B5B55B), SPH_C64(0xBE1129AFECAFAF29), + SPH_C64(0x1D77DF6ABE6A6ADF), SPH_C64(0xEABA0D50F050500D), + SPH_C64(0x57124C45CF45454C), SPH_C64(0x38CB18F308F3F318), + SPH_C64(0xAD9DF030503030F0), SPH_C64(0xC42B74EF2CEFEF74), + SPH_C64(0xDAE5C33F413F3FC3), SPH_C64(0xC7921C55FF55551C), + SPH_C64(0xDB7910A2FBA2A210), SPH_C64(0xE90365EA23EAEA65), + SPH_C64(0x6A0FEC65AF6565EC), SPH_C64(0x03B968BAD3BABA68), + SPH_C64(0x4A65932F712F2F93), SPH_C64(0x8E4EE7C05DC0C0E7), + SPH_C64(0x60BE81DE7FDEDE81), SPH_C64(0xFCE06C1C241C1C6C), + SPH_C64(0x46BB2EFD1AFDFD2E), SPH_C64(0x1F52644DD74D4D64), + SPH_C64(0x76E4E092AB9292E0), SPH_C64(0xFA8FBC759F7575BC), + SPH_C64(0x36301E060A06061E), SPH_C64(0xAE24988A838A8A98), + SPH_C64(0x4BF940B2CBB2B240), SPH_C64(0x856359E637E6E659), + SPH_C64(0x7E70360E120E0E36), SPH_C64(0xE7F8631F211F1F63), + SPH_C64(0x5537F762A66262F7), SPH_C64(0x3AEEA3D461D4D4A3), + SPH_C64(0x812932A8E5A8A832), SPH_C64(0x52C4F496A79696F4), + SPH_C64(0x629B3AF916F9F93A), SPH_C64(0xA366F6C552C5C5F6), + SPH_C64(0x1035B1256F2525B1), SPH_C64(0xABF22059EB595920), + SPH_C64(0xD054AE84918484AE), SPH_C64(0xC5B7A772967272A7), + SPH_C64(0xECD5DD394B3939DD), SPH_C64(0x165A614CD44C4C61), + SPH_C64(0x94CA3B5EE25E5E3B), SPH_C64(0x9FE7857888787885), + SPH_C64(0xE5DDD838483838D8), SPH_C64(0x9814868C898C8C86), + SPH_C64(0x17C6B2D16ED1D1B2), SPH_C64(0xE4410BA5F2A5A50B), + SPH_C64(0xA1434DE23BE2E24D), SPH_C64(0x4E2FF861A36161F8), + SPH_C64(0x42F145B3C8B3B345), SPH_C64(0x3415A521632121A5), + SPH_C64(0x0894D69CB99C9CD6), SPH_C64(0xEEF0661E221E1E66), + SPH_C64(0x61225243C5434352), SPH_C64(0xB176FCC754C7C7FC), + SPH_C64(0x4FB32BFC19FCFC2B), SPH_C64(0x242014040C040414), + SPH_C64(0xE3B20851F3515108), SPH_C64(0x25BCC799B69999C7), + SPH_C64(0x224FC46DB76D6DC4), SPH_C64(0x6568390D170D0D39), + SPH_C64(0x798335FA13FAFA35), SPH_C64(0x69B684DF7CDFDF84), + SPH_C64(0xA9D79B7E827E7E9B), SPH_C64(0x193DB4246C2424B4), + SPH_C64(0xFEC5D73B4D3B3BD7), SPH_C64(0x9A313DABE0ABAB3D), + SPH_C64(0xF03ED1CE4FCECED1), SPH_C64(0x9988551133111155), + SPH_C64(0x830C898F8C8F8F89), SPH_C64(0x044A6B4ED24E4E6B), + SPH_C64(0x66D151B7C4B7B751), SPH_C64(0xE00B60EB20EBEB60), + SPH_C64(0xC1FDCC3C443C3CCC), SPH_C64(0xFD7CBF819E8181BF), + SPH_C64(0x40D4FE94A19494FE), SPH_C64(0x1CEB0CF704F7F70C), + SPH_C64(0x18A167B9D6B9B967), SPH_C64(0x8B985F133513135F), + SPH_C64(0x517D9C2C742C2C9C), SPH_C64(0x05D6B8D368D3D3B8), + SPH_C64(0x8C6B5CE734E7E75C), SPH_C64(0x3957CB6EB26E6ECB), + SPH_C64(0xAA6EF3C451C4C4F3), SPH_C64(0x1B180F030503030F), + SPH_C64(0xDC8A1356FA565613), SPH_C64(0x5E1A4944CC444449), + SPH_C64(0xA0DF9E7F817F7F9E), SPH_C64(0x882137A9E6A9A937), + SPH_C64(0x674D822A7E2A2A82), SPH_C64(0x0AB16DBBD0BBBB6D), + SPH_C64(0x8746E2C15EC1C1E2), SPH_C64(0xF1A20253F5535302), + SPH_C64(0x72AE8BDC79DCDC8B), SPH_C64(0x5358270B1D0B0B27), + SPH_C64(0x019CD39DBA9D9DD3), SPH_C64(0x2B47C16CB46C6CC1), + SPH_C64(0xA495F531533131F5), SPH_C64(0xF387B9749C7474B9), + SPH_C64(0x15E309F607F6F609), SPH_C64(0x4C0A4346CA464643), + SPH_C64(0xA50926ACE9ACAC26), SPH_C64(0xB53C978986898997), + SPH_C64(0xB4A044143C141444), SPH_C64(0xBA5B42E13EE1E142), + SPH_C64(0xA6B04E163A16164E), SPH_C64(0xF7CDD23A4E3A3AD2), + SPH_C64(0x066FD069BB6969D0), SPH_C64(0x41482D091B09092D), + SPH_C64(0xD7A7AD70907070AD), SPH_C64(0x6FD954B6C7B6B654), + SPH_C64(0x1ECEB7D06DD0D0B7), SPH_C64(0xD63B7EED2AEDED7E), + SPH_C64(0xE22EDBCC49CCCCDB), SPH_C64(0x682A5742C6424257), + SPH_C64(0x2CB4C298B59898C2), SPH_C64(0xED490EA4F1A4A40E), + SPH_C64(0x755D882878282888), SPH_C64(0x86DA315CE45C5C31), + SPH_C64(0x6B933FF815F8F83F), SPH_C64(0xC244A486978686A4) +}; + +static const sph_u64 old1_T2[256] = { + SPH_C64(0xC0781828181878D8), SPH_C64(0x05AF23652323AF26), + SPH_C64(0x7EF9C657C6C6F9B8), SPH_C64(0x136FE825E8E86FFB), + SPH_C64(0x4CA187948787A1CB), SPH_C64(0xA962B8D5B8B86211), + SPH_C64(0x0805010301010509), SPH_C64(0x426E4FD14F4F6E0D), + SPH_C64(0xADEE365A3636EE9B), SPH_C64(0x5904A6F7A6A604FF), + SPH_C64(0xDEBDD26BD2D2BD0C), SPH_C64(0xFB06F502F5F5060E), + SPH_C64(0xEF80798B79798096), SPH_C64(0x5FCE6FB16F6FCE30), + SPH_C64(0xFCEF91AE9191EF6D), SPH_C64(0xAA0752F6525207F8), + SPH_C64(0x27FD60A06060FD47), SPH_C64(0x8976BCD9BCBC7635), + SPH_C64(0xACCD9BB09B9BCD37), SPH_C64(0x048C8E8F8E8E8C8A), + SPH_C64(0x7115A3F8A3A315D2), SPH_C64(0x603C0C140C0C3C6C), + SPH_C64(0xFF8A7B8D7B7B8A84), SPH_C64(0xB5E1355F3535E180), + SPH_C64(0xE8691D271D1D69F5), SPH_C64(0x5347E03DE0E047B3), + SPH_C64(0xF6ACD764D7D7AC21), SPH_C64(0x5EEDC25BC2C2ED9C), + SPH_C64(0x6D962E722E2E9643), SPH_C64(0x627A4BDD4B4B7A29), + SPH_C64(0xA321FE1FFEFE215D), SPH_C64(0x821657F9575716D5), + SPH_C64(0xA841153F151541BD), SPH_C64(0x9FB677997777B6E8), + SPH_C64(0xA5EB37593737EB92), SPH_C64(0x7B56E532E5E5569E), + SPH_C64(0x8CD99FBC9F9FD913), SPH_C64(0xD317F00DF0F01723), + SPH_C64(0x6A7F4ADE4A4A7F20), SPH_C64(0x9E95DA73DADA9544), + SPH_C64(0xFA2558E8585825A2), SPH_C64(0x06CAC946C9C9CACF), + SPH_C64(0x558D297B29298D7C), SPH_C64(0x50220A1E0A0A225A), + SPH_C64(0xE14FB1CEB1B14F50), SPH_C64(0x691AA0FDA0A01AC9), + SPH_C64(0x7FDA6BBD6B6BDA14), SPH_C64(0x5CAB85928585ABD9), + SPH_C64(0x8173BDDABDBD733C), SPH_C64(0xD2345DE75D5D348F), + SPH_C64(0x8050103010105090), SPH_C64(0xF303F401F4F40307), + SPH_C64(0x16C0CB40CBCBC0DD), SPH_C64(0xEDC63E423E3EC6D3), + SPH_C64(0x2811050F0505112D), SPH_C64(0x1FE667A96767E678), + SPH_C64(0x7353E431E4E45397), SPH_C64(0x25BB27692727BB02), + SPH_C64(0x325841C341415873), SPH_C64(0x2C9D8B808B8B9DA7), + SPH_C64(0x5101A7F4A7A701F6), SPH_C64(0xCF947D877D7D94B2), + SPH_C64(0xDCFB95A29595FB49), SPH_C64(0x8E9FD875D8D89F56), + SPH_C64(0x8B30FB10FBFB3070), SPH_C64(0x2371EE2FEEEE71CD), + SPH_C64(0xC7917C847C7C91BB), SPH_C64(0x17E366AA6666E371), + SPH_C64(0xA68EDD7ADDDD8E7B), SPH_C64(0xB84B173917174BAF), + SPH_C64(0x024647C947474645), SPH_C64(0x84DC9EBF9E9EDC1A), + SPH_C64(0x1EC5CA43CACAC5D4), SPH_C64(0x75992D772D2D9958), + SPH_C64(0x9179BFDCBFBF792E), SPH_C64(0x381B070907071B3F), + SPH_C64(0x0123ADEAADAD23AC), SPH_C64(0xEA2F5AEE5A5A2FB0), + SPH_C64(0x6CB583988383B5EF), SPH_C64(0x85FF33553333FFB6), + SPH_C64(0x3FF263A56363F25C), SPH_C64(0x100A020602020A12), + SPH_C64(0x3938AAE3AAAA3893), SPH_C64(0xAFA871937171A8DE), + SPH_C64(0x0ECFC845C8C8CFC6), SPH_C64(0xC87D192B19197DD1), + SPH_C64(0x727049DB4949703B), SPH_C64(0x869AD976D9D99A5F), + SPH_C64(0xC31DF20BF2F21D31), SPH_C64(0x4B48E338E3E348A8), + SPH_C64(0xE22A5BED5B5B2AB9), SPH_C64(0x34928885888892BC), + SPH_C64(0xA4C89AB39A9AC83E), SPH_C64(0x2DBE266A2626BE0B), + SPH_C64(0x8DFA32563232FABF), SPH_C64(0xE94AB0CDB0B04A59), + SPH_C64(0x1B6AE926E9E96AF2), SPH_C64(0x78330F110F0F3377), + SPH_C64(0xE6A6D562D5D5A633), SPH_C64(0x74BA809D8080BAF4), + SPH_C64(0x997CBEDFBEBE7C27), SPH_C64(0x26DECD4ACDCDDEEB), + SPH_C64(0xBDE4345C3434E489), SPH_C64(0x7A7548D848487532), + SPH_C64(0xAB24FF1CFFFF2454), SPH_C64(0xF78F7A8E7A7A8F8D), + SPH_C64(0xF4EA90AD9090EA64), SPH_C64(0xC23E5FE15F5F3E9D), + SPH_C64(0x1DA020602020A03D), SPH_C64(0x67D568B86868D50F), + SPH_C64(0xD0721A2E1A1A72CA), SPH_C64(0x192CAEEFAEAE2CB7), + SPH_C64(0xC95EB4C1B4B45E7D), SPH_C64(0x9A1954FC545419CE), + SPH_C64(0xECE593A89393E57F), SPH_C64(0x0DAA22662222AA2F), + SPH_C64(0x07E964AC6464E963), SPH_C64(0xDB12F10EF1F1122A), + SPH_C64(0xBFA273957373A2CC), SPH_C64(0x905A123612125A82), + SPH_C64(0x3A5D40C040405D7A), SPH_C64(0x4028081808082848), + SPH_C64(0x56E8C358C3C3E895), SPH_C64(0x337BEC29ECEC7BDF), + SPH_C64(0x9690DB70DBDB904D), SPH_C64(0x611FA1FEA1A11FC0), + SPH_C64(0x1C838D8A8D8D8391), SPH_C64(0xF5C93D473D3DC9C8), + SPH_C64(0xCCF197A49797F15B), SPH_C64(0x0000000000000000), + SPH_C64(0x36D4CF4CCFCFD4F9), SPH_C64(0x45872B7D2B2B876E), + SPH_C64(0x97B3769A7676B3E1), SPH_C64(0x64B0829B8282B0E6), + SPH_C64(0xFEA9D667D6D6A928), SPH_C64(0xD8771B2D1B1B77C3), + SPH_C64(0xC15BB5C2B5B55B74), SPH_C64(0x1129AFECAFAF29BE), + SPH_C64(0x77DF6ABE6A6ADF1D), SPH_C64(0xBA0D50F050500DEA), + SPH_C64(0x124C45CF45454C57), SPH_C64(0xCB18F308F3F31838), + SPH_C64(0x9DF030503030F0AD), SPH_C64(0x2B74EF2CEFEF74C4), + SPH_C64(0xE5C33F413F3FC3DA), SPH_C64(0x921C55FF55551CC7), + SPH_C64(0x7910A2FBA2A210DB), SPH_C64(0x0365EA23EAEA65E9), + SPH_C64(0x0FEC65AF6565EC6A), SPH_C64(0xB968BAD3BABA6803), + SPH_C64(0x65932F712F2F934A), SPH_C64(0x4EE7C05DC0C0E78E), + SPH_C64(0xBE81DE7FDEDE8160), SPH_C64(0xE06C1C241C1C6CFC), + SPH_C64(0xBB2EFD1AFDFD2E46), SPH_C64(0x52644DD74D4D641F), + SPH_C64(0xE4E092AB9292E076), SPH_C64(0x8FBC759F7575BCFA), + SPH_C64(0x301E060A06061E36), SPH_C64(0x24988A838A8A98AE), + SPH_C64(0xF940B2CBB2B2404B), SPH_C64(0x6359E637E6E65985), + SPH_C64(0x70360E120E0E367E), SPH_C64(0xF8631F211F1F63E7), + SPH_C64(0x37F762A66262F755), SPH_C64(0xEEA3D461D4D4A33A), + SPH_C64(0x2932A8E5A8A83281), SPH_C64(0xC4F496A79696F452), + SPH_C64(0x9B3AF916F9F93A62), SPH_C64(0x66F6C552C5C5F6A3), + SPH_C64(0x35B1256F2525B110), SPH_C64(0xF22059EB595920AB), + SPH_C64(0x54AE84918484AED0), SPH_C64(0xB7A772967272A7C5), + SPH_C64(0xD5DD394B3939DDEC), SPH_C64(0x5A614CD44C4C6116), + SPH_C64(0xCA3B5EE25E5E3B94), SPH_C64(0xE78578887878859F), + SPH_C64(0xDDD838483838D8E5), SPH_C64(0x14868C898C8C8698), + SPH_C64(0xC6B2D16ED1D1B217), SPH_C64(0x410BA5F2A5A50BE4), + SPH_C64(0x434DE23BE2E24DA1), SPH_C64(0x2FF861A36161F84E), + SPH_C64(0xF145B3C8B3B34542), SPH_C64(0x15A521632121A534), + SPH_C64(0x94D69CB99C9CD608), SPH_C64(0xF0661E221E1E66EE), + SPH_C64(0x225243C543435261), SPH_C64(0x76FCC754C7C7FCB1), + SPH_C64(0xB32BFC19FCFC2B4F), SPH_C64(0x2014040C04041424), + SPH_C64(0xB20851F3515108E3), SPH_C64(0xBCC799B69999C725), + SPH_C64(0x4FC46DB76D6DC422), SPH_C64(0x68390D170D0D3965), + SPH_C64(0x8335FA13FAFA3579), SPH_C64(0xB684DF7CDFDF8469), + SPH_C64(0xD79B7E827E7E9BA9), SPH_C64(0x3DB4246C2424B419), + SPH_C64(0xC5D73B4D3B3BD7FE), SPH_C64(0x313DABE0ABAB3D9A), + SPH_C64(0x3ED1CE4FCECED1F0), SPH_C64(0x8855113311115599), + SPH_C64(0x0C898F8C8F8F8983), SPH_C64(0x4A6B4ED24E4E6B04), + SPH_C64(0xD151B7C4B7B75166), SPH_C64(0x0B60EB20EBEB60E0), + SPH_C64(0xFDCC3C443C3CCCC1), SPH_C64(0x7CBF819E8181BFFD), + SPH_C64(0xD4FE94A19494FE40), SPH_C64(0xEB0CF704F7F70C1C), + SPH_C64(0xA167B9D6B9B96718), SPH_C64(0x985F133513135F8B), + SPH_C64(0x7D9C2C742C2C9C51), SPH_C64(0xD6B8D368D3D3B805), + SPH_C64(0x6B5CE734E7E75C8C), SPH_C64(0x57CB6EB26E6ECB39), + SPH_C64(0x6EF3C451C4C4F3AA), SPH_C64(0x180F030503030F1B), + SPH_C64(0x8A1356FA565613DC), SPH_C64(0x1A4944CC4444495E), + SPH_C64(0xDF9E7F817F7F9EA0), SPH_C64(0x2137A9E6A9A93788), + SPH_C64(0x4D822A7E2A2A8267), SPH_C64(0xB16DBBD0BBBB6D0A), + SPH_C64(0x46E2C15EC1C1E287), SPH_C64(0xA20253F5535302F1), + SPH_C64(0xAE8BDC79DCDC8B72), SPH_C64(0x58270B1D0B0B2753), + SPH_C64(0x9CD39DBA9D9DD301), SPH_C64(0x47C16CB46C6CC12B), + SPH_C64(0x95F531533131F5A4), SPH_C64(0x87B9749C7474B9F3), + SPH_C64(0xE309F607F6F60915), SPH_C64(0x0A4346CA4646434C), + SPH_C64(0x0926ACE9ACAC26A5), SPH_C64(0x3C978986898997B5), + SPH_C64(0xA044143C141444B4), SPH_C64(0x5B42E13EE1E142BA), + SPH_C64(0xB04E163A16164EA6), SPH_C64(0xCDD23A4E3A3AD2F7), + SPH_C64(0x6FD069BB6969D006), SPH_C64(0x482D091B09092D41), + SPH_C64(0xA7AD70907070ADD7), SPH_C64(0xD954B6C7B6B6546F), + SPH_C64(0xCEB7D06DD0D0B71E), SPH_C64(0x3B7EED2AEDED7ED6), + SPH_C64(0x2EDBCC49CCCCDBE2), SPH_C64(0x2A5742C642425768), + SPH_C64(0xB4C298B59898C22C), SPH_C64(0x490EA4F1A4A40EED), + SPH_C64(0x5D88287828288875), SPH_C64(0xDA315CE45C5C3186), + SPH_C64(0x933FF815F8F83F6B), SPH_C64(0x44A486978686A4C2) +}; + +static const sph_u64 old1_T3[256] = { + SPH_C64(0x781828181878D8C0), SPH_C64(0xAF23652323AF2605), + SPH_C64(0xF9C657C6C6F9B87E), SPH_C64(0x6FE825E8E86FFB13), + SPH_C64(0xA187948787A1CB4C), SPH_C64(0x62B8D5B8B86211A9), + SPH_C64(0x0501030101050908), SPH_C64(0x6E4FD14F4F6E0D42), + SPH_C64(0xEE365A3636EE9BAD), SPH_C64(0x04A6F7A6A604FF59), + SPH_C64(0xBDD26BD2D2BD0CDE), SPH_C64(0x06F502F5F5060EFB), + SPH_C64(0x80798B79798096EF), SPH_C64(0xCE6FB16F6FCE305F), + SPH_C64(0xEF91AE9191EF6DFC), SPH_C64(0x0752F6525207F8AA), + SPH_C64(0xFD60A06060FD4727), SPH_C64(0x76BCD9BCBC763589), + SPH_C64(0xCD9BB09B9BCD37AC), SPH_C64(0x8C8E8F8E8E8C8A04), + SPH_C64(0x15A3F8A3A315D271), SPH_C64(0x3C0C140C0C3C6C60), + SPH_C64(0x8A7B8D7B7B8A84FF), SPH_C64(0xE1355F3535E180B5), + SPH_C64(0x691D271D1D69F5E8), SPH_C64(0x47E03DE0E047B353), + SPH_C64(0xACD764D7D7AC21F6), SPH_C64(0xEDC25BC2C2ED9C5E), + SPH_C64(0x962E722E2E96436D), SPH_C64(0x7A4BDD4B4B7A2962), + SPH_C64(0x21FE1FFEFE215DA3), SPH_C64(0x1657F9575716D582), + SPH_C64(0x41153F151541BDA8), SPH_C64(0xB677997777B6E89F), + SPH_C64(0xEB37593737EB92A5), SPH_C64(0x56E532E5E5569E7B), + SPH_C64(0xD99FBC9F9FD9138C), SPH_C64(0x17F00DF0F01723D3), + SPH_C64(0x7F4ADE4A4A7F206A), SPH_C64(0x95DA73DADA95449E), + SPH_C64(0x2558E8585825A2FA), SPH_C64(0xCAC946C9C9CACF06), + SPH_C64(0x8D297B29298D7C55), SPH_C64(0x220A1E0A0A225A50), + SPH_C64(0x4FB1CEB1B14F50E1), SPH_C64(0x1AA0FDA0A01AC969), + SPH_C64(0xDA6BBD6B6BDA147F), SPH_C64(0xAB85928585ABD95C), + SPH_C64(0x73BDDABDBD733C81), SPH_C64(0x345DE75D5D348FD2), + SPH_C64(0x5010301010509080), SPH_C64(0x03F401F4F40307F3), + SPH_C64(0xC0CB40CBCBC0DD16), SPH_C64(0xC63E423E3EC6D3ED), + SPH_C64(0x11050F0505112D28), SPH_C64(0xE667A96767E6781F), + SPH_C64(0x53E431E4E4539773), SPH_C64(0xBB27692727BB0225), + SPH_C64(0x5841C34141587332), SPH_C64(0x9D8B808B8B9DA72C), + SPH_C64(0x01A7F4A7A701F651), SPH_C64(0x947D877D7D94B2CF), + SPH_C64(0xFB95A29595FB49DC), SPH_C64(0x9FD875D8D89F568E), + SPH_C64(0x30FB10FBFB30708B), SPH_C64(0x71EE2FEEEE71CD23), + SPH_C64(0x917C847C7C91BBC7), SPH_C64(0xE366AA6666E37117), + SPH_C64(0x8EDD7ADDDD8E7BA6), SPH_C64(0x4B173917174BAFB8), + SPH_C64(0x4647C94747464502), SPH_C64(0xDC9EBF9E9EDC1A84), + SPH_C64(0xC5CA43CACAC5D41E), SPH_C64(0x992D772D2D995875), + SPH_C64(0x79BFDCBFBF792E91), SPH_C64(0x1B070907071B3F38), + SPH_C64(0x23ADEAADAD23AC01), SPH_C64(0x2F5AEE5A5A2FB0EA), + SPH_C64(0xB583988383B5EF6C), SPH_C64(0xFF33553333FFB685), + SPH_C64(0xF263A56363F25C3F), SPH_C64(0x0A020602020A1210), + SPH_C64(0x38AAE3AAAA389339), SPH_C64(0xA871937171A8DEAF), + SPH_C64(0xCFC845C8C8CFC60E), SPH_C64(0x7D192B19197DD1C8), + SPH_C64(0x7049DB4949703B72), SPH_C64(0x9AD976D9D99A5F86), + SPH_C64(0x1DF20BF2F21D31C3), SPH_C64(0x48E338E3E348A84B), + SPH_C64(0x2A5BED5B5B2AB9E2), SPH_C64(0x928885888892BC34), + SPH_C64(0xC89AB39A9AC83EA4), SPH_C64(0xBE266A2626BE0B2D), + SPH_C64(0xFA32563232FABF8D), SPH_C64(0x4AB0CDB0B04A59E9), + SPH_C64(0x6AE926E9E96AF21B), SPH_C64(0x330F110F0F337778), + SPH_C64(0xA6D562D5D5A633E6), SPH_C64(0xBA809D8080BAF474), + SPH_C64(0x7CBEDFBEBE7C2799), SPH_C64(0xDECD4ACDCDDEEB26), + SPH_C64(0xE4345C3434E489BD), SPH_C64(0x7548D8484875327A), + SPH_C64(0x24FF1CFFFF2454AB), SPH_C64(0x8F7A8E7A7A8F8DF7), + SPH_C64(0xEA90AD9090EA64F4), SPH_C64(0x3E5FE15F5F3E9DC2), + SPH_C64(0xA020602020A03D1D), SPH_C64(0xD568B86868D50F67), + SPH_C64(0x721A2E1A1A72CAD0), SPH_C64(0x2CAEEFAEAE2CB719), + SPH_C64(0x5EB4C1B4B45E7DC9), SPH_C64(0x1954FC545419CE9A), + SPH_C64(0xE593A89393E57FEC), SPH_C64(0xAA22662222AA2F0D), + SPH_C64(0xE964AC6464E96307), SPH_C64(0x12F10EF1F1122ADB), + SPH_C64(0xA273957373A2CCBF), SPH_C64(0x5A123612125A8290), + SPH_C64(0x5D40C040405D7A3A), SPH_C64(0x2808180808284840), + SPH_C64(0xE8C358C3C3E89556), SPH_C64(0x7BEC29ECEC7BDF33), + SPH_C64(0x90DB70DBDB904D96), SPH_C64(0x1FA1FEA1A11FC061), + SPH_C64(0x838D8A8D8D83911C), SPH_C64(0xC93D473D3DC9C8F5), + SPH_C64(0xF197A49797F15BCC), SPH_C64(0x0000000000000000), + SPH_C64(0xD4CF4CCFCFD4F936), SPH_C64(0x872B7D2B2B876E45), + SPH_C64(0xB3769A7676B3E197), SPH_C64(0xB0829B8282B0E664), + SPH_C64(0xA9D667D6D6A928FE), SPH_C64(0x771B2D1B1B77C3D8), + SPH_C64(0x5BB5C2B5B55B74C1), SPH_C64(0x29AFECAFAF29BE11), + SPH_C64(0xDF6ABE6A6ADF1D77), SPH_C64(0x0D50F050500DEABA), + SPH_C64(0x4C45CF45454C5712), SPH_C64(0x18F308F3F31838CB), + SPH_C64(0xF030503030F0AD9D), SPH_C64(0x74EF2CEFEF74C42B), + SPH_C64(0xC33F413F3FC3DAE5), SPH_C64(0x1C55FF55551CC792), + SPH_C64(0x10A2FBA2A210DB79), SPH_C64(0x65EA23EAEA65E903), + SPH_C64(0xEC65AF6565EC6A0F), SPH_C64(0x68BAD3BABA6803B9), + SPH_C64(0x932F712F2F934A65), SPH_C64(0xE7C05DC0C0E78E4E), + SPH_C64(0x81DE7FDEDE8160BE), SPH_C64(0x6C1C241C1C6CFCE0), + SPH_C64(0x2EFD1AFDFD2E46BB), SPH_C64(0x644DD74D4D641F52), + SPH_C64(0xE092AB9292E076E4), SPH_C64(0xBC759F7575BCFA8F), + SPH_C64(0x1E060A06061E3630), SPH_C64(0x988A838A8A98AE24), + SPH_C64(0x40B2CBB2B2404BF9), SPH_C64(0x59E637E6E6598563), + SPH_C64(0x360E120E0E367E70), SPH_C64(0x631F211F1F63E7F8), + SPH_C64(0xF762A66262F75537), SPH_C64(0xA3D461D4D4A33AEE), + SPH_C64(0x32A8E5A8A8328129), SPH_C64(0xF496A79696F452C4), + SPH_C64(0x3AF916F9F93A629B), SPH_C64(0xF6C552C5C5F6A366), + SPH_C64(0xB1256F2525B11035), SPH_C64(0x2059EB595920ABF2), + SPH_C64(0xAE84918484AED054), SPH_C64(0xA772967272A7C5B7), + SPH_C64(0xDD394B3939DDECD5), SPH_C64(0x614CD44C4C61165A), + SPH_C64(0x3B5EE25E5E3B94CA), SPH_C64(0x8578887878859FE7), + SPH_C64(0xD838483838D8E5DD), SPH_C64(0x868C898C8C869814), + SPH_C64(0xB2D16ED1D1B217C6), SPH_C64(0x0BA5F2A5A50BE441), + SPH_C64(0x4DE23BE2E24DA143), SPH_C64(0xF861A36161F84E2F), + SPH_C64(0x45B3C8B3B34542F1), SPH_C64(0xA521632121A53415), + SPH_C64(0xD69CB99C9CD60894), SPH_C64(0x661E221E1E66EEF0), + SPH_C64(0x5243C54343526122), SPH_C64(0xFCC754C7C7FCB176), + SPH_C64(0x2BFC19FCFC2B4FB3), SPH_C64(0x14040C0404142420), + SPH_C64(0x0851F3515108E3B2), SPH_C64(0xC799B69999C725BC), + SPH_C64(0xC46DB76D6DC4224F), SPH_C64(0x390D170D0D396568), + SPH_C64(0x35FA13FAFA357983), SPH_C64(0x84DF7CDFDF8469B6), + SPH_C64(0x9B7E827E7E9BA9D7), SPH_C64(0xB4246C2424B4193D), + SPH_C64(0xD73B4D3B3BD7FEC5), SPH_C64(0x3DABE0ABAB3D9A31), + SPH_C64(0xD1CE4FCECED1F03E), SPH_C64(0x5511331111559988), + SPH_C64(0x898F8C8F8F89830C), SPH_C64(0x6B4ED24E4E6B044A), + SPH_C64(0x51B7C4B7B75166D1), SPH_C64(0x60EB20EBEB60E00B), + SPH_C64(0xCC3C443C3CCCC1FD), SPH_C64(0xBF819E8181BFFD7C), + SPH_C64(0xFE94A19494FE40D4), SPH_C64(0x0CF704F7F70C1CEB), + SPH_C64(0x67B9D6B9B96718A1), SPH_C64(0x5F133513135F8B98), + SPH_C64(0x9C2C742C2C9C517D), SPH_C64(0xB8D368D3D3B805D6), + SPH_C64(0x5CE734E7E75C8C6B), SPH_C64(0xCB6EB26E6ECB3957), + SPH_C64(0xF3C451C4C4F3AA6E), SPH_C64(0x0F030503030F1B18), + SPH_C64(0x1356FA565613DC8A), SPH_C64(0x4944CC4444495E1A), + SPH_C64(0x9E7F817F7F9EA0DF), SPH_C64(0x37A9E6A9A9378821), + SPH_C64(0x822A7E2A2A82674D), SPH_C64(0x6DBBD0BBBB6D0AB1), + SPH_C64(0xE2C15EC1C1E28746), SPH_C64(0x0253F5535302F1A2), + SPH_C64(0x8BDC79DCDC8B72AE), SPH_C64(0x270B1D0B0B275358), + SPH_C64(0xD39DBA9D9DD3019C), SPH_C64(0xC16CB46C6CC12B47), + SPH_C64(0xF531533131F5A495), SPH_C64(0xB9749C7474B9F387), + SPH_C64(0x09F607F6F60915E3), SPH_C64(0x4346CA4646434C0A), + SPH_C64(0x26ACE9ACAC26A509), SPH_C64(0x978986898997B53C), + SPH_C64(0x44143C141444B4A0), SPH_C64(0x42E13EE1E142BA5B), + SPH_C64(0x4E163A16164EA6B0), SPH_C64(0xD23A4E3A3AD2F7CD), + SPH_C64(0xD069BB6969D0066F), SPH_C64(0x2D091B09092D4148), + SPH_C64(0xAD70907070ADD7A7), SPH_C64(0x54B6C7B6B6546FD9), + SPH_C64(0xB7D06DD0D0B71ECE), SPH_C64(0x7EED2AEDED7ED63B), + SPH_C64(0xDBCC49CCCCDBE22E), SPH_C64(0x5742C6424257682A), + SPH_C64(0xC298B59898C22CB4), SPH_C64(0x0EA4F1A4A40EED49), + SPH_C64(0x882878282888755D), SPH_C64(0x315CE45C5C3186DA), + SPH_C64(0x3FF815F8F83F6B93), SPH_C64(0xA486978686A4C244) +}; + +static const sph_u64 old1_T4[256] = { + SPH_C64(0x1828181878D8C078), SPH_C64(0x23652323AF2605AF), + SPH_C64(0xC657C6C6F9B87EF9), SPH_C64(0xE825E8E86FFB136F), + SPH_C64(0x87948787A1CB4CA1), SPH_C64(0xB8D5B8B86211A962), + SPH_C64(0x0103010105090805), SPH_C64(0x4FD14F4F6E0D426E), + SPH_C64(0x365A3636EE9BADEE), SPH_C64(0xA6F7A6A604FF5904), + SPH_C64(0xD26BD2D2BD0CDEBD), SPH_C64(0xF502F5F5060EFB06), + SPH_C64(0x798B79798096EF80), SPH_C64(0x6FB16F6FCE305FCE), + SPH_C64(0x91AE9191EF6DFCEF), SPH_C64(0x52F6525207F8AA07), + SPH_C64(0x60A06060FD4727FD), SPH_C64(0xBCD9BCBC76358976), + SPH_C64(0x9BB09B9BCD37ACCD), SPH_C64(0x8E8F8E8E8C8A048C), + SPH_C64(0xA3F8A3A315D27115), SPH_C64(0x0C140C0C3C6C603C), + SPH_C64(0x7B8D7B7B8A84FF8A), SPH_C64(0x355F3535E180B5E1), + SPH_C64(0x1D271D1D69F5E869), SPH_C64(0xE03DE0E047B35347), + SPH_C64(0xD764D7D7AC21F6AC), SPH_C64(0xC25BC2C2ED9C5EED), + SPH_C64(0x2E722E2E96436D96), SPH_C64(0x4BDD4B4B7A29627A), + SPH_C64(0xFE1FFEFE215DA321), SPH_C64(0x57F9575716D58216), + SPH_C64(0x153F151541BDA841), SPH_C64(0x77997777B6E89FB6), + SPH_C64(0x37593737EB92A5EB), SPH_C64(0xE532E5E5569E7B56), + SPH_C64(0x9FBC9F9FD9138CD9), SPH_C64(0xF00DF0F01723D317), + SPH_C64(0x4ADE4A4A7F206A7F), SPH_C64(0xDA73DADA95449E95), + SPH_C64(0x58E8585825A2FA25), SPH_C64(0xC946C9C9CACF06CA), + SPH_C64(0x297B29298D7C558D), SPH_C64(0x0A1E0A0A225A5022), + SPH_C64(0xB1CEB1B14F50E14F), SPH_C64(0xA0FDA0A01AC9691A), + SPH_C64(0x6BBD6B6BDA147FDA), SPH_C64(0x85928585ABD95CAB), + SPH_C64(0xBDDABDBD733C8173), SPH_C64(0x5DE75D5D348FD234), + SPH_C64(0x1030101050908050), SPH_C64(0xF401F4F40307F303), + SPH_C64(0xCB40CBCBC0DD16C0), SPH_C64(0x3E423E3EC6D3EDC6), + SPH_C64(0x050F0505112D2811), SPH_C64(0x67A96767E6781FE6), + SPH_C64(0xE431E4E453977353), SPH_C64(0x27692727BB0225BB), + SPH_C64(0x41C3414158733258), SPH_C64(0x8B808B8B9DA72C9D), + SPH_C64(0xA7F4A7A701F65101), SPH_C64(0x7D877D7D94B2CF94), + SPH_C64(0x95A29595FB49DCFB), SPH_C64(0xD875D8D89F568E9F), + SPH_C64(0xFB10FBFB30708B30), SPH_C64(0xEE2FEEEE71CD2371), + SPH_C64(0x7C847C7C91BBC791), SPH_C64(0x66AA6666E37117E3), + SPH_C64(0xDD7ADDDD8E7BA68E), SPH_C64(0x173917174BAFB84B), + SPH_C64(0x47C9474746450246), SPH_C64(0x9EBF9E9EDC1A84DC), + SPH_C64(0xCA43CACAC5D41EC5), SPH_C64(0x2D772D2D99587599), + SPH_C64(0xBFDCBFBF792E9179), SPH_C64(0x070907071B3F381B), + SPH_C64(0xADEAADAD23AC0123), SPH_C64(0x5AEE5A5A2FB0EA2F), + SPH_C64(0x83988383B5EF6CB5), SPH_C64(0x33553333FFB685FF), + SPH_C64(0x63A56363F25C3FF2), SPH_C64(0x020602020A12100A), + SPH_C64(0xAAE3AAAA38933938), SPH_C64(0x71937171A8DEAFA8), + SPH_C64(0xC845C8C8CFC60ECF), SPH_C64(0x192B19197DD1C87D), + SPH_C64(0x49DB4949703B7270), SPH_C64(0xD976D9D99A5F869A), + SPH_C64(0xF20BF2F21D31C31D), SPH_C64(0xE338E3E348A84B48), + SPH_C64(0x5BED5B5B2AB9E22A), SPH_C64(0x8885888892BC3492), + SPH_C64(0x9AB39A9AC83EA4C8), SPH_C64(0x266A2626BE0B2DBE), + SPH_C64(0x32563232FABF8DFA), SPH_C64(0xB0CDB0B04A59E94A), + SPH_C64(0xE926E9E96AF21B6A), SPH_C64(0x0F110F0F33777833), + SPH_C64(0xD562D5D5A633E6A6), SPH_C64(0x809D8080BAF474BA), + SPH_C64(0xBEDFBEBE7C27997C), SPH_C64(0xCD4ACDCDDEEB26DE), + SPH_C64(0x345C3434E489BDE4), SPH_C64(0x48D8484875327A75), + SPH_C64(0xFF1CFFFF2454AB24), SPH_C64(0x7A8E7A7A8F8DF78F), + SPH_C64(0x90AD9090EA64F4EA), SPH_C64(0x5FE15F5F3E9DC23E), + SPH_C64(0x20602020A03D1DA0), SPH_C64(0x68B86868D50F67D5), + SPH_C64(0x1A2E1A1A72CAD072), SPH_C64(0xAEEFAEAE2CB7192C), + SPH_C64(0xB4C1B4B45E7DC95E), SPH_C64(0x54FC545419CE9A19), + SPH_C64(0x93A89393E57FECE5), SPH_C64(0x22662222AA2F0DAA), + SPH_C64(0x64AC6464E96307E9), SPH_C64(0xF10EF1F1122ADB12), + SPH_C64(0x73957373A2CCBFA2), SPH_C64(0x123612125A82905A), + SPH_C64(0x40C040405D7A3A5D), SPH_C64(0x0818080828484028), + SPH_C64(0xC358C3C3E89556E8), SPH_C64(0xEC29ECEC7BDF337B), + SPH_C64(0xDB70DBDB904D9690), SPH_C64(0xA1FEA1A11FC0611F), + SPH_C64(0x8D8A8D8D83911C83), SPH_C64(0x3D473D3DC9C8F5C9), + SPH_C64(0x97A49797F15BCCF1), SPH_C64(0x0000000000000000), + SPH_C64(0xCF4CCFCFD4F936D4), SPH_C64(0x2B7D2B2B876E4587), + SPH_C64(0x769A7676B3E197B3), SPH_C64(0x829B8282B0E664B0), + SPH_C64(0xD667D6D6A928FEA9), SPH_C64(0x1B2D1B1B77C3D877), + SPH_C64(0xB5C2B5B55B74C15B), SPH_C64(0xAFECAFAF29BE1129), + SPH_C64(0x6ABE6A6ADF1D77DF), SPH_C64(0x50F050500DEABA0D), + SPH_C64(0x45CF45454C57124C), SPH_C64(0xF308F3F31838CB18), + SPH_C64(0x30503030F0AD9DF0), SPH_C64(0xEF2CEFEF74C42B74), + SPH_C64(0x3F413F3FC3DAE5C3), SPH_C64(0x55FF55551CC7921C), + SPH_C64(0xA2FBA2A210DB7910), SPH_C64(0xEA23EAEA65E90365), + SPH_C64(0x65AF6565EC6A0FEC), SPH_C64(0xBAD3BABA6803B968), + SPH_C64(0x2F712F2F934A6593), SPH_C64(0xC05DC0C0E78E4EE7), + SPH_C64(0xDE7FDEDE8160BE81), SPH_C64(0x1C241C1C6CFCE06C), + SPH_C64(0xFD1AFDFD2E46BB2E), SPH_C64(0x4DD74D4D641F5264), + SPH_C64(0x92AB9292E076E4E0), SPH_C64(0x759F7575BCFA8FBC), + SPH_C64(0x060A06061E36301E), SPH_C64(0x8A838A8A98AE2498), + SPH_C64(0xB2CBB2B2404BF940), SPH_C64(0xE637E6E659856359), + SPH_C64(0x0E120E0E367E7036), SPH_C64(0x1F211F1F63E7F863), + SPH_C64(0x62A66262F75537F7), SPH_C64(0xD461D4D4A33AEEA3), + SPH_C64(0xA8E5A8A832812932), SPH_C64(0x96A79696F452C4F4), + SPH_C64(0xF916F9F93A629B3A), SPH_C64(0xC552C5C5F6A366F6), + SPH_C64(0x256F2525B11035B1), SPH_C64(0x59EB595920ABF220), + SPH_C64(0x84918484AED054AE), SPH_C64(0x72967272A7C5B7A7), + SPH_C64(0x394B3939DDECD5DD), SPH_C64(0x4CD44C4C61165A61), + SPH_C64(0x5EE25E5E3B94CA3B), SPH_C64(0x78887878859FE785), + SPH_C64(0x38483838D8E5DDD8), SPH_C64(0x8C898C8C86981486), + SPH_C64(0xD16ED1D1B217C6B2), SPH_C64(0xA5F2A5A50BE4410B), + SPH_C64(0xE23BE2E24DA1434D), SPH_C64(0x61A36161F84E2FF8), + SPH_C64(0xB3C8B3B34542F145), SPH_C64(0x21632121A53415A5), + SPH_C64(0x9CB99C9CD60894D6), SPH_C64(0x1E221E1E66EEF066), + SPH_C64(0x43C5434352612252), SPH_C64(0xC754C7C7FCB176FC), + SPH_C64(0xFC19FCFC2B4FB32B), SPH_C64(0x040C040414242014), + SPH_C64(0x51F3515108E3B208), SPH_C64(0x99B69999C725BCC7), + SPH_C64(0x6DB76D6DC4224FC4), SPH_C64(0x0D170D0D39656839), + SPH_C64(0xFA13FAFA35798335), SPH_C64(0xDF7CDFDF8469B684), + SPH_C64(0x7E827E7E9BA9D79B), SPH_C64(0x246C2424B4193DB4), + SPH_C64(0x3B4D3B3BD7FEC5D7), SPH_C64(0xABE0ABAB3D9A313D), + SPH_C64(0xCE4FCECED1F03ED1), SPH_C64(0x1133111155998855), + SPH_C64(0x8F8C8F8F89830C89), SPH_C64(0x4ED24E4E6B044A6B), + SPH_C64(0xB7C4B7B75166D151), SPH_C64(0xEB20EBEB60E00B60), + SPH_C64(0x3C443C3CCCC1FDCC), SPH_C64(0x819E8181BFFD7CBF), + SPH_C64(0x94A19494FE40D4FE), SPH_C64(0xF704F7F70C1CEB0C), + SPH_C64(0xB9D6B9B96718A167), SPH_C64(0x133513135F8B985F), + SPH_C64(0x2C742C2C9C517D9C), SPH_C64(0xD368D3D3B805D6B8), + SPH_C64(0xE734E7E75C8C6B5C), SPH_C64(0x6EB26E6ECB3957CB), + SPH_C64(0xC451C4C4F3AA6EF3), SPH_C64(0x030503030F1B180F), + SPH_C64(0x56FA565613DC8A13), SPH_C64(0x44CC4444495E1A49), + SPH_C64(0x7F817F7F9EA0DF9E), SPH_C64(0xA9E6A9A937882137), + SPH_C64(0x2A7E2A2A82674D82), SPH_C64(0xBBD0BBBB6D0AB16D), + SPH_C64(0xC15EC1C1E28746E2), SPH_C64(0x53F5535302F1A202), + SPH_C64(0xDC79DCDC8B72AE8B), SPH_C64(0x0B1D0B0B27535827), + SPH_C64(0x9DBA9D9DD3019CD3), SPH_C64(0x6CB46C6CC12B47C1), + SPH_C64(0x31533131F5A495F5), SPH_C64(0x749C7474B9F387B9), + SPH_C64(0xF607F6F60915E309), SPH_C64(0x46CA4646434C0A43), + SPH_C64(0xACE9ACAC26A50926), SPH_C64(0x8986898997B53C97), + SPH_C64(0x143C141444B4A044), SPH_C64(0xE13EE1E142BA5B42), + SPH_C64(0x163A16164EA6B04E), SPH_C64(0x3A4E3A3AD2F7CDD2), + SPH_C64(0x69BB6969D0066FD0), SPH_C64(0x091B09092D41482D), + SPH_C64(0x70907070ADD7A7AD), SPH_C64(0xB6C7B6B6546FD954), + SPH_C64(0xD06DD0D0B71ECEB7), SPH_C64(0xED2AEDED7ED63B7E), + SPH_C64(0xCC49CCCCDBE22EDB), SPH_C64(0x42C6424257682A57), + SPH_C64(0x98B59898C22CB4C2), SPH_C64(0xA4F1A4A40EED490E), + SPH_C64(0x2878282888755D88), SPH_C64(0x5CE45C5C3186DA31), + SPH_C64(0xF815F8F83F6B933F), SPH_C64(0x86978686A4C244A4) +}; + +static const sph_u64 old1_T5[256] = { + SPH_C64(0x28181878D8C07818), SPH_C64(0x652323AF2605AF23), + SPH_C64(0x57C6C6F9B87EF9C6), SPH_C64(0x25E8E86FFB136FE8), + SPH_C64(0x948787A1CB4CA187), SPH_C64(0xD5B8B86211A962B8), + SPH_C64(0x0301010509080501), SPH_C64(0xD14F4F6E0D426E4F), + SPH_C64(0x5A3636EE9BADEE36), SPH_C64(0xF7A6A604FF5904A6), + SPH_C64(0x6BD2D2BD0CDEBDD2), SPH_C64(0x02F5F5060EFB06F5), + SPH_C64(0x8B79798096EF8079), SPH_C64(0xB16F6FCE305FCE6F), + SPH_C64(0xAE9191EF6DFCEF91), SPH_C64(0xF6525207F8AA0752), + SPH_C64(0xA06060FD4727FD60), SPH_C64(0xD9BCBC76358976BC), + SPH_C64(0xB09B9BCD37ACCD9B), SPH_C64(0x8F8E8E8C8A048C8E), + SPH_C64(0xF8A3A315D27115A3), SPH_C64(0x140C0C3C6C603C0C), + SPH_C64(0x8D7B7B8A84FF8A7B), SPH_C64(0x5F3535E180B5E135), + SPH_C64(0x271D1D69F5E8691D), SPH_C64(0x3DE0E047B35347E0), + SPH_C64(0x64D7D7AC21F6ACD7), SPH_C64(0x5BC2C2ED9C5EEDC2), + SPH_C64(0x722E2E96436D962E), SPH_C64(0xDD4B4B7A29627A4B), + SPH_C64(0x1FFEFE215DA321FE), SPH_C64(0xF9575716D5821657), + SPH_C64(0x3F151541BDA84115), SPH_C64(0x997777B6E89FB677), + SPH_C64(0x593737EB92A5EB37), SPH_C64(0x32E5E5569E7B56E5), + SPH_C64(0xBC9F9FD9138CD99F), SPH_C64(0x0DF0F01723D317F0), + SPH_C64(0xDE4A4A7F206A7F4A), SPH_C64(0x73DADA95449E95DA), + SPH_C64(0xE8585825A2FA2558), SPH_C64(0x46C9C9CACF06CAC9), + SPH_C64(0x7B29298D7C558D29), SPH_C64(0x1E0A0A225A50220A), + SPH_C64(0xCEB1B14F50E14FB1), SPH_C64(0xFDA0A01AC9691AA0), + SPH_C64(0xBD6B6BDA147FDA6B), SPH_C64(0x928585ABD95CAB85), + SPH_C64(0xDABDBD733C8173BD), SPH_C64(0xE75D5D348FD2345D), + SPH_C64(0x3010105090805010), SPH_C64(0x01F4F40307F303F4), + SPH_C64(0x40CBCBC0DD16C0CB), SPH_C64(0x423E3EC6D3EDC63E), + SPH_C64(0x0F0505112D281105), SPH_C64(0xA96767E6781FE667), + SPH_C64(0x31E4E453977353E4), SPH_C64(0x692727BB0225BB27), + SPH_C64(0xC341415873325841), SPH_C64(0x808B8B9DA72C9D8B), + SPH_C64(0xF4A7A701F65101A7), SPH_C64(0x877D7D94B2CF947D), + SPH_C64(0xA29595FB49DCFB95), SPH_C64(0x75D8D89F568E9FD8), + SPH_C64(0x10FBFB30708B30FB), SPH_C64(0x2FEEEE71CD2371EE), + SPH_C64(0x847C7C91BBC7917C), SPH_C64(0xAA6666E37117E366), + SPH_C64(0x7ADDDD8E7BA68EDD), SPH_C64(0x3917174BAFB84B17), + SPH_C64(0xC947474645024647), SPH_C64(0xBF9E9EDC1A84DC9E), + SPH_C64(0x43CACAC5D41EC5CA), SPH_C64(0x772D2D995875992D), + SPH_C64(0xDCBFBF792E9179BF), SPH_C64(0x0907071B3F381B07), + SPH_C64(0xEAADAD23AC0123AD), SPH_C64(0xEE5A5A2FB0EA2F5A), + SPH_C64(0x988383B5EF6CB583), SPH_C64(0x553333FFB685FF33), + SPH_C64(0xA56363F25C3FF263), SPH_C64(0x0602020A12100A02), + SPH_C64(0xE3AAAA38933938AA), SPH_C64(0x937171A8DEAFA871), + SPH_C64(0x45C8C8CFC60ECFC8), SPH_C64(0x2B19197DD1C87D19), + SPH_C64(0xDB4949703B727049), SPH_C64(0x76D9D99A5F869AD9), + SPH_C64(0x0BF2F21D31C31DF2), SPH_C64(0x38E3E348A84B48E3), + SPH_C64(0xED5B5B2AB9E22A5B), SPH_C64(0x85888892BC349288), + SPH_C64(0xB39A9AC83EA4C89A), SPH_C64(0x6A2626BE0B2DBE26), + SPH_C64(0x563232FABF8DFA32), SPH_C64(0xCDB0B04A59E94AB0), + SPH_C64(0x26E9E96AF21B6AE9), SPH_C64(0x110F0F337778330F), + SPH_C64(0x62D5D5A633E6A6D5), SPH_C64(0x9D8080BAF474BA80), + SPH_C64(0xDFBEBE7C27997CBE), SPH_C64(0x4ACDCDDEEB26DECD), + SPH_C64(0x5C3434E489BDE434), SPH_C64(0xD8484875327A7548), + SPH_C64(0x1CFFFF2454AB24FF), SPH_C64(0x8E7A7A8F8DF78F7A), + SPH_C64(0xAD9090EA64F4EA90), SPH_C64(0xE15F5F3E9DC23E5F), + SPH_C64(0x602020A03D1DA020), SPH_C64(0xB86868D50F67D568), + SPH_C64(0x2E1A1A72CAD0721A), SPH_C64(0xEFAEAE2CB7192CAE), + SPH_C64(0xC1B4B45E7DC95EB4), SPH_C64(0xFC545419CE9A1954), + SPH_C64(0xA89393E57FECE593), SPH_C64(0x662222AA2F0DAA22), + SPH_C64(0xAC6464E96307E964), SPH_C64(0x0EF1F1122ADB12F1), + SPH_C64(0x957373A2CCBFA273), SPH_C64(0x3612125A82905A12), + SPH_C64(0xC040405D7A3A5D40), SPH_C64(0x1808082848402808), + SPH_C64(0x58C3C3E89556E8C3), SPH_C64(0x29ECEC7BDF337BEC), + SPH_C64(0x70DBDB904D9690DB), SPH_C64(0xFEA1A11FC0611FA1), + SPH_C64(0x8A8D8D83911C838D), SPH_C64(0x473D3DC9C8F5C93D), + SPH_C64(0xA49797F15BCCF197), SPH_C64(0x0000000000000000), + SPH_C64(0x4CCFCFD4F936D4CF), SPH_C64(0x7D2B2B876E45872B), + SPH_C64(0x9A7676B3E197B376), SPH_C64(0x9B8282B0E664B082), + SPH_C64(0x67D6D6A928FEA9D6), SPH_C64(0x2D1B1B77C3D8771B), + SPH_C64(0xC2B5B55B74C15BB5), SPH_C64(0xECAFAF29BE1129AF), + SPH_C64(0xBE6A6ADF1D77DF6A), SPH_C64(0xF050500DEABA0D50), + SPH_C64(0xCF45454C57124C45), SPH_C64(0x08F3F31838CB18F3), + SPH_C64(0x503030F0AD9DF030), SPH_C64(0x2CEFEF74C42B74EF), + SPH_C64(0x413F3FC3DAE5C33F), SPH_C64(0xFF55551CC7921C55), + SPH_C64(0xFBA2A210DB7910A2), SPH_C64(0x23EAEA65E90365EA), + SPH_C64(0xAF6565EC6A0FEC65), SPH_C64(0xD3BABA6803B968BA), + SPH_C64(0x712F2F934A65932F), SPH_C64(0x5DC0C0E78E4EE7C0), + SPH_C64(0x7FDEDE8160BE81DE), SPH_C64(0x241C1C6CFCE06C1C), + SPH_C64(0x1AFDFD2E46BB2EFD), SPH_C64(0xD74D4D641F52644D), + SPH_C64(0xAB9292E076E4E092), SPH_C64(0x9F7575BCFA8FBC75), + SPH_C64(0x0A06061E36301E06), SPH_C64(0x838A8A98AE24988A), + SPH_C64(0xCBB2B2404BF940B2), SPH_C64(0x37E6E659856359E6), + SPH_C64(0x120E0E367E70360E), SPH_C64(0x211F1F63E7F8631F), + SPH_C64(0xA66262F75537F762), SPH_C64(0x61D4D4A33AEEA3D4), + SPH_C64(0xE5A8A832812932A8), SPH_C64(0xA79696F452C4F496), + SPH_C64(0x16F9F93A629B3AF9), SPH_C64(0x52C5C5F6A366F6C5), + SPH_C64(0x6F2525B11035B125), SPH_C64(0xEB595920ABF22059), + SPH_C64(0x918484AED054AE84), SPH_C64(0x967272A7C5B7A772), + SPH_C64(0x4B3939DDECD5DD39), SPH_C64(0xD44C4C61165A614C), + SPH_C64(0xE25E5E3B94CA3B5E), SPH_C64(0x887878859FE78578), + SPH_C64(0x483838D8E5DDD838), SPH_C64(0x898C8C869814868C), + SPH_C64(0x6ED1D1B217C6B2D1), SPH_C64(0xF2A5A50BE4410BA5), + SPH_C64(0x3BE2E24DA1434DE2), SPH_C64(0xA36161F84E2FF861), + SPH_C64(0xC8B3B34542F145B3), SPH_C64(0x632121A53415A521), + SPH_C64(0xB99C9CD60894D69C), SPH_C64(0x221E1E66EEF0661E), + SPH_C64(0xC543435261225243), SPH_C64(0x54C7C7FCB176FCC7), + SPH_C64(0x19FCFC2B4FB32BFC), SPH_C64(0x0C04041424201404), + SPH_C64(0xF3515108E3B20851), SPH_C64(0xB69999C725BCC799), + SPH_C64(0xB76D6DC4224FC46D), SPH_C64(0x170D0D396568390D), + SPH_C64(0x13FAFA35798335FA), SPH_C64(0x7CDFDF8469B684DF), + SPH_C64(0x827E7E9BA9D79B7E), SPH_C64(0x6C2424B4193DB424), + SPH_C64(0x4D3B3BD7FEC5D73B), SPH_C64(0xE0ABAB3D9A313DAB), + SPH_C64(0x4FCECED1F03ED1CE), SPH_C64(0x3311115599885511), + SPH_C64(0x8C8F8F89830C898F), SPH_C64(0xD24E4E6B044A6B4E), + SPH_C64(0xC4B7B75166D151B7), SPH_C64(0x20EBEB60E00B60EB), + SPH_C64(0x443C3CCCC1FDCC3C), SPH_C64(0x9E8181BFFD7CBF81), + SPH_C64(0xA19494FE40D4FE94), SPH_C64(0x04F7F70C1CEB0CF7), + SPH_C64(0xD6B9B96718A167B9), SPH_C64(0x3513135F8B985F13), + SPH_C64(0x742C2C9C517D9C2C), SPH_C64(0x68D3D3B805D6B8D3), + SPH_C64(0x34E7E75C8C6B5CE7), SPH_C64(0xB26E6ECB3957CB6E), + SPH_C64(0x51C4C4F3AA6EF3C4), SPH_C64(0x0503030F1B180F03), + SPH_C64(0xFA565613DC8A1356), SPH_C64(0xCC4444495E1A4944), + SPH_C64(0x817F7F9EA0DF9E7F), SPH_C64(0xE6A9A937882137A9), + SPH_C64(0x7E2A2A82674D822A), SPH_C64(0xD0BBBB6D0AB16DBB), + SPH_C64(0x5EC1C1E28746E2C1), SPH_C64(0xF5535302F1A20253), + SPH_C64(0x79DCDC8B72AE8BDC), SPH_C64(0x1D0B0B275358270B), + SPH_C64(0xBA9D9DD3019CD39D), SPH_C64(0xB46C6CC12B47C16C), + SPH_C64(0x533131F5A495F531), SPH_C64(0x9C7474B9F387B974), + SPH_C64(0x07F6F60915E309F6), SPH_C64(0xCA4646434C0A4346), + SPH_C64(0xE9ACAC26A50926AC), SPH_C64(0x86898997B53C9789), + SPH_C64(0x3C141444B4A04414), SPH_C64(0x3EE1E142BA5B42E1), + SPH_C64(0x3A16164EA6B04E16), SPH_C64(0x4E3A3AD2F7CDD23A), + SPH_C64(0xBB6969D0066FD069), SPH_C64(0x1B09092D41482D09), + SPH_C64(0x907070ADD7A7AD70), SPH_C64(0xC7B6B6546FD954B6), + SPH_C64(0x6DD0D0B71ECEB7D0), SPH_C64(0x2AEDED7ED63B7EED), + SPH_C64(0x49CCCCDBE22EDBCC), SPH_C64(0xC6424257682A5742), + SPH_C64(0xB59898C22CB4C298), SPH_C64(0xF1A4A40EED490EA4), + SPH_C64(0x78282888755D8828), SPH_C64(0xE45C5C3186DA315C), + SPH_C64(0x15F8F83F6B933FF8), SPH_C64(0x978686A4C244A486) +}; + +static const sph_u64 old1_T6[256] = { + SPH_C64(0x181878D8C0781828), SPH_C64(0x2323AF2605AF2365), + SPH_C64(0xC6C6F9B87EF9C657), SPH_C64(0xE8E86FFB136FE825), + SPH_C64(0x8787A1CB4CA18794), SPH_C64(0xB8B86211A962B8D5), + SPH_C64(0x0101050908050103), SPH_C64(0x4F4F6E0D426E4FD1), + SPH_C64(0x3636EE9BADEE365A), SPH_C64(0xA6A604FF5904A6F7), + SPH_C64(0xD2D2BD0CDEBDD26B), SPH_C64(0xF5F5060EFB06F502), + SPH_C64(0x79798096EF80798B), SPH_C64(0x6F6FCE305FCE6FB1), + SPH_C64(0x9191EF6DFCEF91AE), SPH_C64(0x525207F8AA0752F6), + SPH_C64(0x6060FD4727FD60A0), SPH_C64(0xBCBC76358976BCD9), + SPH_C64(0x9B9BCD37ACCD9BB0), SPH_C64(0x8E8E8C8A048C8E8F), + SPH_C64(0xA3A315D27115A3F8), SPH_C64(0x0C0C3C6C603C0C14), + SPH_C64(0x7B7B8A84FF8A7B8D), SPH_C64(0x3535E180B5E1355F), + SPH_C64(0x1D1D69F5E8691D27), SPH_C64(0xE0E047B35347E03D), + SPH_C64(0xD7D7AC21F6ACD764), SPH_C64(0xC2C2ED9C5EEDC25B), + SPH_C64(0x2E2E96436D962E72), SPH_C64(0x4B4B7A29627A4BDD), + SPH_C64(0xFEFE215DA321FE1F), SPH_C64(0x575716D5821657F9), + SPH_C64(0x151541BDA841153F), SPH_C64(0x7777B6E89FB67799), + SPH_C64(0x3737EB92A5EB3759), SPH_C64(0xE5E5569E7B56E532), + SPH_C64(0x9F9FD9138CD99FBC), SPH_C64(0xF0F01723D317F00D), + SPH_C64(0x4A4A7F206A7F4ADE), SPH_C64(0xDADA95449E95DA73), + SPH_C64(0x585825A2FA2558E8), SPH_C64(0xC9C9CACF06CAC946), + SPH_C64(0x29298D7C558D297B), SPH_C64(0x0A0A225A50220A1E), + SPH_C64(0xB1B14F50E14FB1CE), SPH_C64(0xA0A01AC9691AA0FD), + SPH_C64(0x6B6BDA147FDA6BBD), SPH_C64(0x8585ABD95CAB8592), + SPH_C64(0xBDBD733C8173BDDA), SPH_C64(0x5D5D348FD2345DE7), + SPH_C64(0x1010509080501030), SPH_C64(0xF4F40307F303F401), + SPH_C64(0xCBCBC0DD16C0CB40), SPH_C64(0x3E3EC6D3EDC63E42), + SPH_C64(0x0505112D2811050F), SPH_C64(0x6767E6781FE667A9), + SPH_C64(0xE4E453977353E431), SPH_C64(0x2727BB0225BB2769), + SPH_C64(0x41415873325841C3), SPH_C64(0x8B8B9DA72C9D8B80), + SPH_C64(0xA7A701F65101A7F4), SPH_C64(0x7D7D94B2CF947D87), + SPH_C64(0x9595FB49DCFB95A2), SPH_C64(0xD8D89F568E9FD875), + SPH_C64(0xFBFB30708B30FB10), SPH_C64(0xEEEE71CD2371EE2F), + SPH_C64(0x7C7C91BBC7917C84), SPH_C64(0x6666E37117E366AA), + SPH_C64(0xDDDD8E7BA68EDD7A), SPH_C64(0x17174BAFB84B1739), + SPH_C64(0x47474645024647C9), SPH_C64(0x9E9EDC1A84DC9EBF), + SPH_C64(0xCACAC5D41EC5CA43), SPH_C64(0x2D2D995875992D77), + SPH_C64(0xBFBF792E9179BFDC), SPH_C64(0x07071B3F381B0709), + SPH_C64(0xADAD23AC0123ADEA), SPH_C64(0x5A5A2FB0EA2F5AEE), + SPH_C64(0x8383B5EF6CB58398), SPH_C64(0x3333FFB685FF3355), + SPH_C64(0x6363F25C3FF263A5), SPH_C64(0x02020A12100A0206), + SPH_C64(0xAAAA38933938AAE3), SPH_C64(0x7171A8DEAFA87193), + SPH_C64(0xC8C8CFC60ECFC845), SPH_C64(0x19197DD1C87D192B), + SPH_C64(0x4949703B727049DB), SPH_C64(0xD9D99A5F869AD976), + SPH_C64(0xF2F21D31C31DF20B), SPH_C64(0xE3E348A84B48E338), + SPH_C64(0x5B5B2AB9E22A5BED), SPH_C64(0x888892BC34928885), + SPH_C64(0x9A9AC83EA4C89AB3), SPH_C64(0x2626BE0B2DBE266A), + SPH_C64(0x3232FABF8DFA3256), SPH_C64(0xB0B04A59E94AB0CD), + SPH_C64(0xE9E96AF21B6AE926), SPH_C64(0x0F0F337778330F11), + SPH_C64(0xD5D5A633E6A6D562), SPH_C64(0x8080BAF474BA809D), + SPH_C64(0xBEBE7C27997CBEDF), SPH_C64(0xCDCDDEEB26DECD4A), + SPH_C64(0x3434E489BDE4345C), SPH_C64(0x484875327A7548D8), + SPH_C64(0xFFFF2454AB24FF1C), SPH_C64(0x7A7A8F8DF78F7A8E), + SPH_C64(0x9090EA64F4EA90AD), SPH_C64(0x5F5F3E9DC23E5FE1), + SPH_C64(0x2020A03D1DA02060), SPH_C64(0x6868D50F67D568B8), + SPH_C64(0x1A1A72CAD0721A2E), SPH_C64(0xAEAE2CB7192CAEEF), + SPH_C64(0xB4B45E7DC95EB4C1), SPH_C64(0x545419CE9A1954FC), + SPH_C64(0x9393E57FECE593A8), SPH_C64(0x2222AA2F0DAA2266), + SPH_C64(0x6464E96307E964AC), SPH_C64(0xF1F1122ADB12F10E), + SPH_C64(0x7373A2CCBFA27395), SPH_C64(0x12125A82905A1236), + SPH_C64(0x40405D7A3A5D40C0), SPH_C64(0x0808284840280818), + SPH_C64(0xC3C3E89556E8C358), SPH_C64(0xECEC7BDF337BEC29), + SPH_C64(0xDBDB904D9690DB70), SPH_C64(0xA1A11FC0611FA1FE), + SPH_C64(0x8D8D83911C838D8A), SPH_C64(0x3D3DC9C8F5C93D47), + SPH_C64(0x9797F15BCCF197A4), SPH_C64(0x0000000000000000), + SPH_C64(0xCFCFD4F936D4CF4C), SPH_C64(0x2B2B876E45872B7D), + SPH_C64(0x7676B3E197B3769A), SPH_C64(0x8282B0E664B0829B), + SPH_C64(0xD6D6A928FEA9D667), SPH_C64(0x1B1B77C3D8771B2D), + SPH_C64(0xB5B55B74C15BB5C2), SPH_C64(0xAFAF29BE1129AFEC), + SPH_C64(0x6A6ADF1D77DF6ABE), SPH_C64(0x50500DEABA0D50F0), + SPH_C64(0x45454C57124C45CF), SPH_C64(0xF3F31838CB18F308), + SPH_C64(0x3030F0AD9DF03050), SPH_C64(0xEFEF74C42B74EF2C), + SPH_C64(0x3F3FC3DAE5C33F41), SPH_C64(0x55551CC7921C55FF), + SPH_C64(0xA2A210DB7910A2FB), SPH_C64(0xEAEA65E90365EA23), + SPH_C64(0x6565EC6A0FEC65AF), SPH_C64(0xBABA6803B968BAD3), + SPH_C64(0x2F2F934A65932F71), SPH_C64(0xC0C0E78E4EE7C05D), + SPH_C64(0xDEDE8160BE81DE7F), SPH_C64(0x1C1C6CFCE06C1C24), + SPH_C64(0xFDFD2E46BB2EFD1A), SPH_C64(0x4D4D641F52644DD7), + SPH_C64(0x9292E076E4E092AB), SPH_C64(0x7575BCFA8FBC759F), + SPH_C64(0x06061E36301E060A), SPH_C64(0x8A8A98AE24988A83), + SPH_C64(0xB2B2404BF940B2CB), SPH_C64(0xE6E659856359E637), + SPH_C64(0x0E0E367E70360E12), SPH_C64(0x1F1F63E7F8631F21), + SPH_C64(0x6262F75537F762A6), SPH_C64(0xD4D4A33AEEA3D461), + SPH_C64(0xA8A832812932A8E5), SPH_C64(0x9696F452C4F496A7), + SPH_C64(0xF9F93A629B3AF916), SPH_C64(0xC5C5F6A366F6C552), + SPH_C64(0x2525B11035B1256F), SPH_C64(0x595920ABF22059EB), + SPH_C64(0x8484AED054AE8491), SPH_C64(0x7272A7C5B7A77296), + SPH_C64(0x3939DDECD5DD394B), SPH_C64(0x4C4C61165A614CD4), + SPH_C64(0x5E5E3B94CA3B5EE2), SPH_C64(0x7878859FE7857888), + SPH_C64(0x3838D8E5DDD83848), SPH_C64(0x8C8C869814868C89), + SPH_C64(0xD1D1B217C6B2D16E), SPH_C64(0xA5A50BE4410BA5F2), + SPH_C64(0xE2E24DA1434DE23B), SPH_C64(0x6161F84E2FF861A3), + SPH_C64(0xB3B34542F145B3C8), SPH_C64(0x2121A53415A52163), + SPH_C64(0x9C9CD60894D69CB9), SPH_C64(0x1E1E66EEF0661E22), + SPH_C64(0x43435261225243C5), SPH_C64(0xC7C7FCB176FCC754), + SPH_C64(0xFCFC2B4FB32BFC19), SPH_C64(0x040414242014040C), + SPH_C64(0x515108E3B20851F3), SPH_C64(0x9999C725BCC799B6), + SPH_C64(0x6D6DC4224FC46DB7), SPH_C64(0x0D0D396568390D17), + SPH_C64(0xFAFA35798335FA13), SPH_C64(0xDFDF8469B684DF7C), + SPH_C64(0x7E7E9BA9D79B7E82), SPH_C64(0x2424B4193DB4246C), + SPH_C64(0x3B3BD7FEC5D73B4D), SPH_C64(0xABAB3D9A313DABE0), + SPH_C64(0xCECED1F03ED1CE4F), SPH_C64(0x1111559988551133), + SPH_C64(0x8F8F89830C898F8C), SPH_C64(0x4E4E6B044A6B4ED2), + SPH_C64(0xB7B75166D151B7C4), SPH_C64(0xEBEB60E00B60EB20), + SPH_C64(0x3C3CCCC1FDCC3C44), SPH_C64(0x8181BFFD7CBF819E), + SPH_C64(0x9494FE40D4FE94A1), SPH_C64(0xF7F70C1CEB0CF704), + SPH_C64(0xB9B96718A167B9D6), SPH_C64(0x13135F8B985F1335), + SPH_C64(0x2C2C9C517D9C2C74), SPH_C64(0xD3D3B805D6B8D368), + SPH_C64(0xE7E75C8C6B5CE734), SPH_C64(0x6E6ECB3957CB6EB2), + SPH_C64(0xC4C4F3AA6EF3C451), SPH_C64(0x03030F1B180F0305), + SPH_C64(0x565613DC8A1356FA), SPH_C64(0x4444495E1A4944CC), + SPH_C64(0x7F7F9EA0DF9E7F81), SPH_C64(0xA9A937882137A9E6), + SPH_C64(0x2A2A82674D822A7E), SPH_C64(0xBBBB6D0AB16DBBD0), + SPH_C64(0xC1C1E28746E2C15E), SPH_C64(0x535302F1A20253F5), + SPH_C64(0xDCDC8B72AE8BDC79), SPH_C64(0x0B0B275358270B1D), + SPH_C64(0x9D9DD3019CD39DBA), SPH_C64(0x6C6CC12B47C16CB4), + SPH_C64(0x3131F5A495F53153), SPH_C64(0x7474B9F387B9749C), + SPH_C64(0xF6F60915E309F607), SPH_C64(0x4646434C0A4346CA), + SPH_C64(0xACAC26A50926ACE9), SPH_C64(0x898997B53C978986), + SPH_C64(0x141444B4A044143C), SPH_C64(0xE1E142BA5B42E13E), + SPH_C64(0x16164EA6B04E163A), SPH_C64(0x3A3AD2F7CDD23A4E), + SPH_C64(0x6969D0066FD069BB), SPH_C64(0x09092D41482D091B), + SPH_C64(0x7070ADD7A7AD7090), SPH_C64(0xB6B6546FD954B6C7), + SPH_C64(0xD0D0B71ECEB7D06D), SPH_C64(0xEDED7ED63B7EED2A), + SPH_C64(0xCCCCDBE22EDBCC49), SPH_C64(0x424257682A5742C6), + SPH_C64(0x9898C22CB4C298B5), SPH_C64(0xA4A40EED490EA4F1), + SPH_C64(0x282888755D882878), SPH_C64(0x5C5C3186DA315CE4), + SPH_C64(0xF8F83F6B933FF815), SPH_C64(0x8686A4C244A48697) +}; + +static const sph_u64 old1_T7[256] = { + SPH_C64(0x1878D8C078182818), SPH_C64(0x23AF2605AF236523), + SPH_C64(0xC6F9B87EF9C657C6), SPH_C64(0xE86FFB136FE825E8), + SPH_C64(0x87A1CB4CA1879487), SPH_C64(0xB86211A962B8D5B8), + SPH_C64(0x0105090805010301), SPH_C64(0x4F6E0D426E4FD14F), + SPH_C64(0x36EE9BADEE365A36), SPH_C64(0xA604FF5904A6F7A6), + SPH_C64(0xD2BD0CDEBDD26BD2), SPH_C64(0xF5060EFB06F502F5), + SPH_C64(0x798096EF80798B79), SPH_C64(0x6FCE305FCE6FB16F), + SPH_C64(0x91EF6DFCEF91AE91), SPH_C64(0x5207F8AA0752F652), + SPH_C64(0x60FD4727FD60A060), SPH_C64(0xBC76358976BCD9BC), + SPH_C64(0x9BCD37ACCD9BB09B), SPH_C64(0x8E8C8A048C8E8F8E), + SPH_C64(0xA315D27115A3F8A3), SPH_C64(0x0C3C6C603C0C140C), + SPH_C64(0x7B8A84FF8A7B8D7B), SPH_C64(0x35E180B5E1355F35), + SPH_C64(0x1D69F5E8691D271D), SPH_C64(0xE047B35347E03DE0), + SPH_C64(0xD7AC21F6ACD764D7), SPH_C64(0xC2ED9C5EEDC25BC2), + SPH_C64(0x2E96436D962E722E), SPH_C64(0x4B7A29627A4BDD4B), + SPH_C64(0xFE215DA321FE1FFE), SPH_C64(0x5716D5821657F957), + SPH_C64(0x1541BDA841153F15), SPH_C64(0x77B6E89FB6779977), + SPH_C64(0x37EB92A5EB375937), SPH_C64(0xE5569E7B56E532E5), + SPH_C64(0x9FD9138CD99FBC9F), SPH_C64(0xF01723D317F00DF0), + SPH_C64(0x4A7F206A7F4ADE4A), SPH_C64(0xDA95449E95DA73DA), + SPH_C64(0x5825A2FA2558E858), SPH_C64(0xC9CACF06CAC946C9), + SPH_C64(0x298D7C558D297B29), SPH_C64(0x0A225A50220A1E0A), + SPH_C64(0xB14F50E14FB1CEB1), SPH_C64(0xA01AC9691AA0FDA0), + SPH_C64(0x6BDA147FDA6BBD6B), SPH_C64(0x85ABD95CAB859285), + SPH_C64(0xBD733C8173BDDABD), SPH_C64(0x5D348FD2345DE75D), + SPH_C64(0x1050908050103010), SPH_C64(0xF40307F303F401F4), + SPH_C64(0xCBC0DD16C0CB40CB), SPH_C64(0x3EC6D3EDC63E423E), + SPH_C64(0x05112D2811050F05), SPH_C64(0x67E6781FE667A967), + SPH_C64(0xE453977353E431E4), SPH_C64(0x27BB0225BB276927), + SPH_C64(0x415873325841C341), SPH_C64(0x8B9DA72C9D8B808B), + SPH_C64(0xA701F65101A7F4A7), SPH_C64(0x7D94B2CF947D877D), + SPH_C64(0x95FB49DCFB95A295), SPH_C64(0xD89F568E9FD875D8), + SPH_C64(0xFB30708B30FB10FB), SPH_C64(0xEE71CD2371EE2FEE), + SPH_C64(0x7C91BBC7917C847C), SPH_C64(0x66E37117E366AA66), + SPH_C64(0xDD8E7BA68EDD7ADD), SPH_C64(0x174BAFB84B173917), + SPH_C64(0x474645024647C947), SPH_C64(0x9EDC1A84DC9EBF9E), + SPH_C64(0xCAC5D41EC5CA43CA), SPH_C64(0x2D995875992D772D), + SPH_C64(0xBF792E9179BFDCBF), SPH_C64(0x071B3F381B070907), + SPH_C64(0xAD23AC0123ADEAAD), SPH_C64(0x5A2FB0EA2F5AEE5A), + SPH_C64(0x83B5EF6CB5839883), SPH_C64(0x33FFB685FF335533), + SPH_C64(0x63F25C3FF263A563), SPH_C64(0x020A12100A020602), + SPH_C64(0xAA38933938AAE3AA), SPH_C64(0x71A8DEAFA8719371), + SPH_C64(0xC8CFC60ECFC845C8), SPH_C64(0x197DD1C87D192B19), + SPH_C64(0x49703B727049DB49), SPH_C64(0xD99A5F869AD976D9), + SPH_C64(0xF21D31C31DF20BF2), SPH_C64(0xE348A84B48E338E3), + SPH_C64(0x5B2AB9E22A5BED5B), SPH_C64(0x8892BC3492888588), + SPH_C64(0x9AC83EA4C89AB39A), SPH_C64(0x26BE0B2DBE266A26), + SPH_C64(0x32FABF8DFA325632), SPH_C64(0xB04A59E94AB0CDB0), + SPH_C64(0xE96AF21B6AE926E9), SPH_C64(0x0F337778330F110F), + SPH_C64(0xD5A633E6A6D562D5), SPH_C64(0x80BAF474BA809D80), + SPH_C64(0xBE7C27997CBEDFBE), SPH_C64(0xCDDEEB26DECD4ACD), + SPH_C64(0x34E489BDE4345C34), SPH_C64(0x4875327A7548D848), + SPH_C64(0xFF2454AB24FF1CFF), SPH_C64(0x7A8F8DF78F7A8E7A), + SPH_C64(0x90EA64F4EA90AD90), SPH_C64(0x5F3E9DC23E5FE15F), + SPH_C64(0x20A03D1DA0206020), SPH_C64(0x68D50F67D568B868), + SPH_C64(0x1A72CAD0721A2E1A), SPH_C64(0xAE2CB7192CAEEFAE), + SPH_C64(0xB45E7DC95EB4C1B4), SPH_C64(0x5419CE9A1954FC54), + SPH_C64(0x93E57FECE593A893), SPH_C64(0x22AA2F0DAA226622), + SPH_C64(0x64E96307E964AC64), SPH_C64(0xF1122ADB12F10EF1), + SPH_C64(0x73A2CCBFA2739573), SPH_C64(0x125A82905A123612), + SPH_C64(0x405D7A3A5D40C040), SPH_C64(0x0828484028081808), + SPH_C64(0xC3E89556E8C358C3), SPH_C64(0xEC7BDF337BEC29EC), + SPH_C64(0xDB904D9690DB70DB), SPH_C64(0xA11FC0611FA1FEA1), + SPH_C64(0x8D83911C838D8A8D), SPH_C64(0x3DC9C8F5C93D473D), + SPH_C64(0x97F15BCCF197A497), SPH_C64(0x0000000000000000), + SPH_C64(0xCFD4F936D4CF4CCF), SPH_C64(0x2B876E45872B7D2B), + SPH_C64(0x76B3E197B3769A76), SPH_C64(0x82B0E664B0829B82), + SPH_C64(0xD6A928FEA9D667D6), SPH_C64(0x1B77C3D8771B2D1B), + SPH_C64(0xB55B74C15BB5C2B5), SPH_C64(0xAF29BE1129AFECAF), + SPH_C64(0x6ADF1D77DF6ABE6A), SPH_C64(0x500DEABA0D50F050), + SPH_C64(0x454C57124C45CF45), SPH_C64(0xF31838CB18F308F3), + SPH_C64(0x30F0AD9DF0305030), SPH_C64(0xEF74C42B74EF2CEF), + SPH_C64(0x3FC3DAE5C33F413F), SPH_C64(0x551CC7921C55FF55), + SPH_C64(0xA210DB7910A2FBA2), SPH_C64(0xEA65E90365EA23EA), + SPH_C64(0x65EC6A0FEC65AF65), SPH_C64(0xBA6803B968BAD3BA), + SPH_C64(0x2F934A65932F712F), SPH_C64(0xC0E78E4EE7C05DC0), + SPH_C64(0xDE8160BE81DE7FDE), SPH_C64(0x1C6CFCE06C1C241C), + SPH_C64(0xFD2E46BB2EFD1AFD), SPH_C64(0x4D641F52644DD74D), + SPH_C64(0x92E076E4E092AB92), SPH_C64(0x75BCFA8FBC759F75), + SPH_C64(0x061E36301E060A06), SPH_C64(0x8A98AE24988A838A), + SPH_C64(0xB2404BF940B2CBB2), SPH_C64(0xE659856359E637E6), + SPH_C64(0x0E367E70360E120E), SPH_C64(0x1F63E7F8631F211F), + SPH_C64(0x62F75537F762A662), SPH_C64(0xD4A33AEEA3D461D4), + SPH_C64(0xA832812932A8E5A8), SPH_C64(0x96F452C4F496A796), + SPH_C64(0xF93A629B3AF916F9), SPH_C64(0xC5F6A366F6C552C5), + SPH_C64(0x25B11035B1256F25), SPH_C64(0x5920ABF22059EB59), + SPH_C64(0x84AED054AE849184), SPH_C64(0x72A7C5B7A7729672), + SPH_C64(0x39DDECD5DD394B39), SPH_C64(0x4C61165A614CD44C), + SPH_C64(0x5E3B94CA3B5EE25E), SPH_C64(0x78859FE785788878), + SPH_C64(0x38D8E5DDD8384838), SPH_C64(0x8C869814868C898C), + SPH_C64(0xD1B217C6B2D16ED1), SPH_C64(0xA50BE4410BA5F2A5), + SPH_C64(0xE24DA1434DE23BE2), SPH_C64(0x61F84E2FF861A361), + SPH_C64(0xB34542F145B3C8B3), SPH_C64(0x21A53415A5216321), + SPH_C64(0x9CD60894D69CB99C), SPH_C64(0x1E66EEF0661E221E), + SPH_C64(0x435261225243C543), SPH_C64(0xC7FCB176FCC754C7), + SPH_C64(0xFC2B4FB32BFC19FC), SPH_C64(0x0414242014040C04), + SPH_C64(0x5108E3B20851F351), SPH_C64(0x99C725BCC799B699), + SPH_C64(0x6DC4224FC46DB76D), SPH_C64(0x0D396568390D170D), + SPH_C64(0xFA35798335FA13FA), SPH_C64(0xDF8469B684DF7CDF), + SPH_C64(0x7E9BA9D79B7E827E), SPH_C64(0x24B4193DB4246C24), + SPH_C64(0x3BD7FEC5D73B4D3B), SPH_C64(0xAB3D9A313DABE0AB), + SPH_C64(0xCED1F03ED1CE4FCE), SPH_C64(0x1155998855113311), + SPH_C64(0x8F89830C898F8C8F), SPH_C64(0x4E6B044A6B4ED24E), + SPH_C64(0xB75166D151B7C4B7), SPH_C64(0xEB60E00B60EB20EB), + SPH_C64(0x3CCCC1FDCC3C443C), SPH_C64(0x81BFFD7CBF819E81), + SPH_C64(0x94FE40D4FE94A194), SPH_C64(0xF70C1CEB0CF704F7), + SPH_C64(0xB96718A167B9D6B9), SPH_C64(0x135F8B985F133513), + SPH_C64(0x2C9C517D9C2C742C), SPH_C64(0xD3B805D6B8D368D3), + SPH_C64(0xE75C8C6B5CE734E7), SPH_C64(0x6ECB3957CB6EB26E), + SPH_C64(0xC4F3AA6EF3C451C4), SPH_C64(0x030F1B180F030503), + SPH_C64(0x5613DC8A1356FA56), SPH_C64(0x44495E1A4944CC44), + SPH_C64(0x7F9EA0DF9E7F817F), SPH_C64(0xA937882137A9E6A9), + SPH_C64(0x2A82674D822A7E2A), SPH_C64(0xBB6D0AB16DBBD0BB), + SPH_C64(0xC1E28746E2C15EC1), SPH_C64(0x5302F1A20253F553), + SPH_C64(0xDC8B72AE8BDC79DC), SPH_C64(0x0B275358270B1D0B), + SPH_C64(0x9DD3019CD39DBA9D), SPH_C64(0x6CC12B47C16CB46C), + SPH_C64(0x31F5A495F5315331), SPH_C64(0x74B9F387B9749C74), + SPH_C64(0xF60915E309F607F6), SPH_C64(0x46434C0A4346CA46), + SPH_C64(0xAC26A50926ACE9AC), SPH_C64(0x8997B53C97898689), + SPH_C64(0x1444B4A044143C14), SPH_C64(0xE142BA5B42E13EE1), + SPH_C64(0x164EA6B04E163A16), SPH_C64(0x3AD2F7CDD23A4E3A), + SPH_C64(0x69D0066FD069BB69), SPH_C64(0x092D41482D091B09), + SPH_C64(0x70ADD7A7AD709070), SPH_C64(0xB6546FD954B6C7B6), + SPH_C64(0xD0B71ECEB7D06DD0), SPH_C64(0xED7ED63B7EED2AED), + SPH_C64(0xCCDBE22EDBCC49CC), SPH_C64(0x4257682A5742C642), + SPH_C64(0x98C22CB4C298B598), SPH_C64(0xA40EED490EA4F1A4), + SPH_C64(0x2888755D88287828), SPH_C64(0x5C3186DA315CE45C), + SPH_C64(0xF83F6B933FF815F8), SPH_C64(0x86A4C244A4869786) +}; + +#endif + +static const sph_u64 old1_RC[10] = { + SPH_C64(0x4F01B887E8C62318), + SPH_C64(0x52916F79F5D2A636), + SPH_C64(0x357B0CA38E9BBC60), + SPH_C64(0x57FE4B2EC2D7E01D), + SPH_C64(0xDA4AF09FE5377715), + SPH_C64(0x856BA0B10A29C958), + SPH_C64(0x67053ECBF4105DBD), + SPH_C64(0xD8957DA78B4127E4), + SPH_C64(0x9E4717DD667CEEFB), + SPH_C64(0x33835AAD07BF2DCA) +}; + +/* ====================================================================== */ + +#define DECL8(z) sph_u64 z ## 0, z ## 1, z ## 2, z ## 3, \ + z ## 4, z ## 5, z ## 6, z ## 7 + +#if SPH_LITTLE_FAST +#define READ_DATA_W(x) do { \ + n ## x = sph_dec64le_aligned( \ + (const unsigned char *)src + 8 * (x)); \ + } while (0) +#define UPDATE_STATE_W(x) do { \ + state[x] ^= n ## x ^ sph_dec64le_aligned( \ + (const unsigned char *)src + 8 * (x)); \ + } while (0) +#define LVARS DECL8(n); DECL8(h); +#else +#define READ_DATA_W(x) do { \ + sn ## x = n ## x = sph_dec64le_aligned( \ + (const unsigned char *)src + 8 * (x)); \ + } while (0) +#define UPDATE_STATE_W(x) do { \ + state[x] ^= n ## x ^ sn ## x; \ + } while (0) +#define LVARS DECL8(n); DECL8(sn); DECL8(h); +#endif + +#define READ_STATE_W(x) do { h ## x = state[x]; } while (0) + +#define MUL8(FUN) do { \ + FUN(0); \ + FUN(1); \ + FUN(2); \ + FUN(3); \ + FUN(4); \ + FUN(5); \ + FUN(6); \ + FUN(7); \ + } while (0) + +/* + * First operation: XOR the input data with the first round key. + */ +#define ROUND0_W(x) do { \ + n ## x ^= h ## x; \ + } while (0) + +#define READ_DATA MUL8(READ_DATA_W) +#define READ_STATE MUL8(READ_STATE_W) +#define ROUND0 MUL8(ROUND0_W) +#define UPDATE_STATE MUL8(UPDATE_STATE_W) + +#define BYTE(x, n) ((unsigned)((x) >> (8 * (n))) & 0xFF) + +#if SPH_SMALL_FOOTPRINT_WHIRLPOOL + +static SPH_INLINE sph_u64 +table_skew(sph_u64 val, int num) +{ + return SPH_ROTL64(val, 8 * num); +} + +#define ROUND_ELT(table, in, i0, i1, i2, i3, i4, i5, i6, i7) \ + (table ## 0[BYTE(in ## i0, 0)] \ + ^ table_skew(table ## 0[BYTE(in ## i1, 1)], 1) \ + ^ table_skew(table ## 0[BYTE(in ## i2, 2)], 2) \ + ^ table_skew(table ## 0[BYTE(in ## i3, 3)], 3) \ + ^ table_skew(table ## 0[BYTE(in ## i4, 4)], 4) \ + ^ table_skew(table ## 0[BYTE(in ## i5, 5)], 5) \ + ^ table_skew(table ## 0[BYTE(in ## i6, 6)], 6) \ + ^ table_skew(table ## 0[BYTE(in ## i7, 7)], 7)) +#else +#define ROUND_ELT(table, in, i0, i1, i2, i3, i4, i5, i6, i7) \ + (table ## 0[BYTE(in ## i0, 0)] \ + ^ table ## 1[BYTE(in ## i1, 1)] \ + ^ table ## 2[BYTE(in ## i2, 2)] \ + ^ table ## 3[BYTE(in ## i3, 3)] \ + ^ table ## 4[BYTE(in ## i4, 4)] \ + ^ table ## 5[BYTE(in ## i5, 5)] \ + ^ table ## 6[BYTE(in ## i6, 6)] \ + ^ table ## 7[BYTE(in ## i7, 7)]) +#endif + +#define ROUND(table, in, out, c0, c1, c2, c3, c4, c5, c6, c7) do { \ + out ## 0 = ROUND_ELT(table, in, 0, 7, 6, 5, 4, 3, 2, 1) ^ c0; \ + out ## 1 = ROUND_ELT(table, in, 1, 0, 7, 6, 5, 4, 3, 2) ^ c1; \ + out ## 2 = ROUND_ELT(table, in, 2, 1, 0, 7, 6, 5, 4, 3) ^ c2; \ + out ## 3 = ROUND_ELT(table, in, 3, 2, 1, 0, 7, 6, 5, 4) ^ c3; \ + out ## 4 = ROUND_ELT(table, in, 4, 3, 2, 1, 0, 7, 6, 5) ^ c4; \ + out ## 5 = ROUND_ELT(table, in, 5, 4, 3, 2, 1, 0, 7, 6) ^ c5; \ + out ## 6 = ROUND_ELT(table, in, 6, 5, 4, 3, 2, 1, 0, 7) ^ c6; \ + out ## 7 = ROUND_ELT(table, in, 7, 6, 5, 4, 3, 2, 1, 0) ^ c7; \ + } while (0) + +#define ROUND_KSCHED(table, in, out, c) \ + ROUND(table, in, out, c, 0, 0, 0, 0, 0, 0, 0) + +#define ROUND_WENC(table, in, key, out) \ + ROUND(table, in, out, key ## 0, key ## 1, key ## 2, \ + key ## 3, key ## 4, key ## 5, key ## 6, key ## 7) + +#define TRANSFER(dst, src) do { \ + dst ## 0 = src ## 0; \ + dst ## 1 = src ## 1; \ + dst ## 2 = src ## 2; \ + dst ## 3 = src ## 3; \ + dst ## 4 = src ## 4; \ + dst ## 5 = src ## 5; \ + dst ## 6 = src ## 6; \ + dst ## 7 = src ## 7; \ + } while (0) + +/* see sph_whirlpool.h */ +void +sph_whirlpool_init(void *cc) +{ + sph_whirlpool_context *sc; + + sc = (sph_whirlpool_context*)cc; + /* + * We want to set all eight 64-bit words to 0. A "memset()" + * is not, theoretically, fully standard, but in practice it + * will work everywhere. + */ + memset(sc->state, 0, sizeof sc->state); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define ROUND_FUN(name, type) \ +static void \ +name ## _round(const void *src, sph_u64 *state) \ +{ \ + LVARS \ + int r; \ + \ + READ_DATA; \ + READ_STATE; \ + ROUND0; \ + for (r = 0; r < 10; r ++) { \ + DECL8(tmp); \ + \ + ROUND_KSCHED(type ## _T, h, tmp, type ## _RC[r]); \ + TRANSFER(h, tmp); \ + ROUND_WENC(type ## _T, n, h, tmp); \ + TRANSFER(n, tmp); \ + } \ + UPDATE_STATE; \ +} + +ROUND_FUN(whirlpool, plain) +ROUND_FUN(whirlpool0, old0) +ROUND_FUN(whirlpool1, old1) + +/* + * We want big-endian encoding of the message length, over 256 bits. BE64 + * triggers that. However, our block length is 512 bits, not 1024 bits. + * Internally, our encoding/decoding is little-endian, which is not a + * problem here since we also deactivate output in md_helper.c. + */ +#define BE64 1 +#define SVAL sc->state +#define BLEN 64U +#define PLW4 1 + +#define RFUN whirlpool_round +#define HASH whirlpool +#include "md_helper.c" +#undef RFUN +#undef HASH + +#define RFUN whirlpool0_round +#define HASH whirlpool0 +#include "md_helper.c" +#undef RFUN +#undef HASH + +#define RFUN whirlpool1_round +#define HASH whirlpool1 +#include "md_helper.c" +#undef RFUN +#undef HASH + +#define MAKE_CLOSE(name) \ +void \ +sph_ ## name ## _close(void *cc, void *dst) \ +{ \ + sph_ ## name ## _context *sc; \ + int i; \ + \ + name ## _close(cc, dst, 0); \ + sc = (sph_ ## name ## _context *)cc; \ + for (i = 0; i < 8; i ++) \ + sph_enc64le((unsigned char *)dst + 8 * i, sc->state[i]); \ + sph_ ## name ## _init(cc); \ +} + +MAKE_CLOSE(whirlpool) +MAKE_CLOSE(whirlpool0) +MAKE_CLOSE(whirlpool1) + +#ifdef __cplusplus +} +#endif diff --git a/winbuild/sgminer.vcxproj b/winbuild/sgminer.vcxproj index 37bc41b5..b73afae1 100644 --- a/winbuild/sgminer.vcxproj +++ b/winbuild/sgminer.vcxproj @@ -1,4 +1,4 @@ - + @@ -28,23 +28,27 @@ Application true MultiByte + v110 Application true MultiByte + v110 Application false true MultiByte + v110 Application false true MultiByte + v110 @@ -257,7 +261,9 @@ + + @@ -310,17 +316,21 @@ + + + + @@ -365,11 +375,13 @@ + + @@ -382,4 +394,4 @@ - \ No newline at end of file + diff --git a/winbuild/sgminer.vcxproj.filters b/winbuild/sgminer.vcxproj.filters index 95548801..05e877c4 100644 --- a/winbuild/sgminer.vcxproj.filters +++ b/winbuild/sgminer.vcxproj.filters @@ -1,4 +1,4 @@ - + @@ -218,6 +218,18 @@ Source Files\algorithm + + Source Files + + + Source Files + + + Source Files\algorithm + + + Source Files\algorithm + @@ -394,8 +406,20 @@ Header Files\algorithm + + Header Files + + + Header Files + + + Header Files\algorithm + + + Header Files\algorithm + - \ No newline at end of file +