From 4bc23048b5a379c859a7538ea8e1a16f17977d0c Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Wed, 20 Aug 2014 05:44:02 +0200 Subject: [PATCH] x15: use djm34 code with asm xor64 + my rot64 some optimizations could be done later, after whirlcoin integration --- cuda_helper.h | 189 +++- x15/cuda_x15_whirlpool.cu | 1799 +++++++++++++++++++++++++++++++++---- x15/x15.cu | 8 +- 3 files changed, 1797 insertions(+), 199 deletions(-) diff --git a/cuda_helper.h b/cuda_helper.h index a5da3b3..3a7e400 100644 --- a/cuda_helper.h +++ b/cuda_helper.h @@ -113,9 +113,135 @@ __device__ __forceinline__ uint64_t cuda_swab64(uint64_t x) (((uint64_t)(x) & 0x00000000000000ffULL) << 56))) #endif -// diese 64 Bit Rotates werden unter Compute 3.5 (und besser) mit dem Funnel Shifter beschleunigt -#if __CUDA_ARCH__ >= 350 -__device__ __forceinline__ uint64_t ROTR64(const uint64_t value, const int offset) { +/*********************************************************************/ +// Macro to catch CUDA errors in CUDA runtime calls +#define CUDA_SAFE_CALL(call) \ +do { \ + cudaError_t err = call; \ + if (cudaSuccess != err) { \ + fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\ + __FILE__, __LINE__, cudaGetErrorString(err) ); \ + exit(EXIT_FAILURE); \ + } \ +} while (0) + +/*********************************************************************/ + +// device asm for whirpool +__device__ __forceinline__ +uint64_t xor1(uint64_t a, uint64_t b) +{ + uint64_t result; + asm("xor.b64 %0, %1, %2;" : "=l"(result) : "l"(a) ,"l"(b)); + return result; +} + +// device asm for whirpool +__device__ __forceinline__ +uint64_t xor3(uint64_t a, uint64_t b, uint64_t c) +{ + uint64_t result; + asm("{\n\t" + " .reg .u64 t1;\n\t" + "xor.b64 t1, %2, %3;\n\t" + "xor.b64 %0, %1, t1;\n\t" + "}" + : "=l"(result) : "l"(a) ,"l"(b),"l"(c)); + return result; +} + +// device asm for whirpool +__device__ __forceinline__ +uint64_t xor8(uint64_t a, uint64_t b, uint64_t c, uint64_t d,uint64_t e,uint64_t f,uint64_t g, uint64_t h) +{ + uint64_t result; + asm("xor.b64 %0, %1, %2;" : "=l"(result) : "l"(g) ,"l"(h)); + asm("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(f)); + asm("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(e)); + asm("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(d)); + asm("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(c)); + asm("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(b)); + asm("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(a)); + return result; +} + +// device asm for whirpool +__device__ __forceinline__ +uint64_t xandx(uint64_t a, uint64_t b, uint64_t c) +{ + uint64_t result; + asm("{\n\t" + ".reg .u64 m,n;\n\t" + "xor.b64 m, %2,%3;\n\t" + "and.b64 n, m,%1;\n\t" + "xor.b64 %0, n,%3;\n\t" + "}\n\t" + : "=l"(result) : "l"(a), "l"(b), "l"(c)); + return result; +} + +// device asm for whirpool +__device__ __forceinline__ +uint64_t sph_t64(uint64_t x) +{ + uint64_t result; + asm("{\n\t" + "and.b64 %0,%1,0xFFFFFFFFFFFFFFFF;\n\t" + "}\n\t" + : "=l"(result) : "l"(x)); + return result; +} + +// device asm for whirpool +__device__ __forceinline__ +uint64_t andor(uint64_t a, uint64_t b, uint64_t c) +{ + uint64_t result; + asm("{\n\t" + ".reg .u64 m,n,o;\n\t" + "and.b64 m, %1, %2;\n\t" + " or.b64 n, %1, %2;\n\t" + "and.b64 o, n, %3;\n\t" + " or.b64 %0, m, o ;\n\t" + "}\n\t" + : "=l"(result) : "l"(a), "l"(b), "l"(c)); + return result; +} + +// device asm for whirpool +__device__ __forceinline__ +uint64_t shr_t64(uint64_t x, uint32_t n) +{ + uint64_t result; + asm("{\n\t" + ".reg .u64 m;\n\t" + "shr.b64 m,%1,%2;\n\t" + "and.b64 %0,m,0xFFFFFFFFFFFFFFFF;\n\t" + "}\n\t" + : "=l"(result) : "l"(x), "r"(n)); + return result; +} + +// device asm for whirpool +__device__ __forceinline__ +uint64_t shl_t64(uint64_t x, uint32_t n) +{ + uint64_t result; + asm("{\n\t" + ".reg .u64 m;\n\t" + "shl.b64 m,%1,%2;\n\t" + "and.b64 %0,m,0xFFFFFFFFFFFFFFFF;\n\t" + "}\n\t" + : "=l"(result) : "l"(x), "r"(n)); + return result; +} + + +// 64-bit ROTATE RIGHT +#ifdef DJM_SM35_ROT64 +/* complicated sm >= 3.5 one (with Funnel Shifter beschleunigt), to bench */ +__device__ __forceinline__ +uint64_t ROTR64(const uint64_t value, const int offset) { uint2 result; if(offset < 32) { asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset)); @@ -126,13 +252,32 @@ __device__ __forceinline__ uint64_t ROTR64(const uint64_t value, const int offse } return __double_as_longlong(__hiloint2double(result.y, result.x)); } +#elif __CUDA_ARCH__ >= 120 +__device__ __forceinline__ +uint64_t ROTR64(const uint64_t x, const int offset) +{ + uint64_t result; + asm("{\n\t" + ".reg .b64 lhs, rhs;\n\t" + ".reg .u32 amt2;\n\t" + "shr.b64 lhs, %1, %2;\n\t" + "sub.u32 amt2, 64, %2;\n\t" + "shl.b64 rhs, %1, amt2;\n\t" + "add.u64 %0, lhs, rhs;\n\t" + "}\n\t" + : "=l"(result) : "l"(x), "r"(offset)); + return result; +} #else -#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +/* host */ +#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) #endif -// diese 64 Bit Rotates werden unter Compute 3.5 (und besser) mit dem Funnel Shifter beschleunigt -#if __CUDA_ARCH__ >= 350 -__device__ __forceinline__ uint64_t ROTL64(const uint64_t value, const int offset) { +// 64-bit ROTATE LEFT +#ifdef DJM_SM35_ROT64 +/* complicated sm >= 3.5 one, to bench */ +__device__ __forceinline__ +uint64_t ROTL64(const uint64_t value, const int offset) { uint2 result; if(offset >= 32) { asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset)); @@ -143,19 +288,25 @@ __device__ __forceinline__ uint64_t ROTL64(const uint64_t value, const int offse } return __double_as_longlong(__hiloint2double(result.y, result.x)); } +#elif __CUDA_ARCH__ >= 120 +__device__ __forceinline__ +uint64_t ROTL64(const uint64_t x, const int offset) +{ + uint64_t result; + asm("{\n\t" + ".reg .b64 lhs, rhs;\n\t" + ".reg .u32 amt2;\n\t" + "shl.b64 lhs, %1, %2;\n\t" + "sub.u32 amt2, 64, %2;\n\t" + "shr.b64 rhs, %1, amt2;\n\t" + "add.u64 %0, lhs, rhs;\n\t" + "}\n\t" + : "=l"(result) : "l"(x), "r"(offset)); + return result; +} #else -#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +/* host */ +#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) #endif -// Macro to catch CUDA errors in CUDA runtime calls -#define CUDA_SAFE_CALL(call) \ -do { \ - cudaError_t err = call; \ - if (cudaSuccess != err) { \ - fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\ - __FILE__, __LINE__, cudaGetErrorString(err) ); \ - exit(EXIT_FAILURE); \ - } \ -} while (0) - #endif // #ifndef CUDA_HELPER_H diff --git a/x15/cuda_x15_whirlpool.cu b/x15/cuda_x15_whirlpool.cu index 7a14cfe..8fae72d 100644 --- a/x15/cuda_x15_whirlpool.cu +++ b/x15/cuda_x15_whirlpool.cu @@ -1,77 +1,1131 @@ -/** - * Whirlpool for X15 - * Adaptation from different sources (cpuminer-multi + sgminer) - * tpruvot@github +/* + * Built on cbuchner1's implementation, actual hashing code + * based on sphlib 3.0 */ #include +#include -#include "cuda_helper.h" +#define threadsperblock 512 -#define NULLTEST 0 +#define USE_SHARED 1 + +#include "cuda_helper.h" extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); -#define SPH_64 (1) -#define SPH_SMALL_FOOTPRINT_WHIRLPOOL (1) +__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) +__constant__ uint32_t pTarget[8]; -// defined in cuda_helper.h -#define SPH_ROTL64(x,n) ROTL64(x,n) +uint32_t *d_wnounce[8]; +uint32_t *d_WNonce[8]; -/* $Id: whirlpool.c 227 2010-06-16 17:28:38Z tp $ */ -/* -* WHIRLPOOL implementation. -* -* Internally, we use little-endian convention, on the assumption that -* architectures which favour big-endian encoding are: -* 1. rarer -* 2. in decreasing numbers -* 3. able to decode little-endian data efficiently anyway -* -* The most common big-endian architecture is Sparc, and Ultrasparc CPU -* include special opcodes to perform little-endian accesses, which we use -* (see sph_types.h). Most modern CPU designs can work with both endian.ss -* and architecture designer now favour little-endian (basically, x86 has -* won the endian.ss war). -* -* TODO: implement a 32-bit version. Not only such a version would be handy -* for non-64-bit-able architectures, but it may also use smaller tables, -* at the expense of more lookups and XORs. -* -* ==========================(LICENSE BEGIN)============================ -* -* Copyright (c) 2007-2010 Projet RNRT SAPHIR -* -* Permission is hereby granted, free of charge, to any person obtaining -* a copy of this software and associated documentation files (the -* "Software"), to deal in the Software without restriction, including -* without limitation the rights to use, copy, modify, merge, publish, -* distribute, sublicense, and/or sell copies of the Software, and to -* permit persons to whom the Software is furnished to do so, subject to -* the following conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -* -* ===========================(LICENSE END)============================= -* -* @author Thomas Pornin -*/ - -#if SPH_64 +#define USE_ALL_TABLES 1 -/* ====================================================================== */ -/* -* Constants for plain WHIRLPOOL (current version). -*/ -__device__ __constant__ static const uint64_t plain_T0[256] = { +__constant__ static uint64_t mixTob0Tox[256]; +#if USE_ALL_TABLES +__constant__ static uint64_t mixTob1Tox[256]; +__constant__ static uint64_t mixTob2Tox[256]; +__constant__ static uint64_t mixTob3Tox[256]; +__constant__ static uint64_t mixTob4Tox[256]; +__constant__ static uint64_t mixTob5Tox[256]; +__constant__ static uint64_t mixTob6Tox[256]; +__constant__ static uint64_t mixTob7Tox[256]; +#endif + +/** + * Whirlpool CUDA kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 djm34 & tpruvot + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * @author djm34 + * @author tpruvot + */ + +static const uint64_t old1_T0[256] = { + SPH_C64(0x78D8C07818281818), SPH_C64(0xAF2605AF23652323), + SPH_C64(0xF9B87EF9C657C6C6), SPH_C64(0x6FFB136FE825E8E8), + SPH_C64(0xA1CB4CA187948787), SPH_C64(0x6211A962B8D5B8B8), + SPH_C64(0x0509080501030101), SPH_C64(0x6E0D426E4FD14F4F), + SPH_C64(0xEE9BADEE365A3636), SPH_C64(0x04FF5904A6F7A6A6), + SPH_C64(0xBD0CDEBDD26BD2D2), SPH_C64(0x060EFB06F502F5F5), + SPH_C64(0x8096EF80798B7979), SPH_C64(0xCE305FCE6FB16F6F), + SPH_C64(0xEF6DFCEF91AE9191), SPH_C64(0x07F8AA0752F65252), + SPH_C64(0xFD4727FD60A06060), SPH_C64(0x76358976BCD9BCBC), + SPH_C64(0xCD37ACCD9BB09B9B), SPH_C64(0x8C8A048C8E8F8E8E), + SPH_C64(0x15D27115A3F8A3A3), SPH_C64(0x3C6C603C0C140C0C), + SPH_C64(0x8A84FF8A7B8D7B7B), SPH_C64(0xE180B5E1355F3535), + SPH_C64(0x69F5E8691D271D1D), SPH_C64(0x47B35347E03DE0E0), + SPH_C64(0xAC21F6ACD764D7D7), SPH_C64(0xED9C5EEDC25BC2C2), + SPH_C64(0x96436D962E722E2E), SPH_C64(0x7A29627A4BDD4B4B), + SPH_C64(0x215DA321FE1FFEFE), SPH_C64(0x16D5821657F95757), + SPH_C64(0x41BDA841153F1515), SPH_C64(0xB6E89FB677997777), + SPH_C64(0xEB92A5EB37593737), SPH_C64(0x569E7B56E532E5E5), + SPH_C64(0xD9138CD99FBC9F9F), SPH_C64(0x1723D317F00DF0F0), + SPH_C64(0x7F206A7F4ADE4A4A), SPH_C64(0x95449E95DA73DADA), + SPH_C64(0x25A2FA2558E85858), SPH_C64(0xCACF06CAC946C9C9), + SPH_C64(0x8D7C558D297B2929), SPH_C64(0x225A50220A1E0A0A), + SPH_C64(0x4F50E14FB1CEB1B1), SPH_C64(0x1AC9691AA0FDA0A0), + SPH_C64(0xDA147FDA6BBD6B6B), SPH_C64(0xABD95CAB85928585), + SPH_C64(0x733C8173BDDABDBD), SPH_C64(0x348FD2345DE75D5D), + SPH_C64(0x5090805010301010), SPH_C64(0x0307F303F401F4F4), + SPH_C64(0xC0DD16C0CB40CBCB), SPH_C64(0xC6D3EDC63E423E3E), + SPH_C64(0x112D2811050F0505), SPH_C64(0xE6781FE667A96767), + SPH_C64(0x53977353E431E4E4), SPH_C64(0xBB0225BB27692727), + SPH_C64(0x5873325841C34141), SPH_C64(0x9DA72C9D8B808B8B), + SPH_C64(0x01F65101A7F4A7A7), SPH_C64(0x94B2CF947D877D7D), + SPH_C64(0xFB49DCFB95A29595), SPH_C64(0x9F568E9FD875D8D8), + SPH_C64(0x30708B30FB10FBFB), SPH_C64(0x71CD2371EE2FEEEE), + SPH_C64(0x91BBC7917C847C7C), SPH_C64(0xE37117E366AA6666), + SPH_C64(0x8E7BA68EDD7ADDDD), SPH_C64(0x4BAFB84B17391717), + SPH_C64(0x4645024647C94747), SPH_C64(0xDC1A84DC9EBF9E9E), + SPH_C64(0xC5D41EC5CA43CACA), SPH_C64(0x995875992D772D2D), + SPH_C64(0x792E9179BFDCBFBF), SPH_C64(0x1B3F381B07090707), + SPH_C64(0x23AC0123ADEAADAD), SPH_C64(0x2FB0EA2F5AEE5A5A), + SPH_C64(0xB5EF6CB583988383), SPH_C64(0xFFB685FF33553333), + SPH_C64(0xF25C3FF263A56363), SPH_C64(0x0A12100A02060202), + SPH_C64(0x38933938AAE3AAAA), SPH_C64(0xA8DEAFA871937171), + SPH_C64(0xCFC60ECFC845C8C8), SPH_C64(0x7DD1C87D192B1919), + SPH_C64(0x703B727049DB4949), SPH_C64(0x9A5F869AD976D9D9), + SPH_C64(0x1D31C31DF20BF2F2), SPH_C64(0x48A84B48E338E3E3), + SPH_C64(0x2AB9E22A5BED5B5B), SPH_C64(0x92BC349288858888), + SPH_C64(0xC83EA4C89AB39A9A), SPH_C64(0xBE0B2DBE266A2626), + SPH_C64(0xFABF8DFA32563232), SPH_C64(0x4A59E94AB0CDB0B0), + SPH_C64(0x6AF21B6AE926E9E9), SPH_C64(0x337778330F110F0F), + SPH_C64(0xA633E6A6D562D5D5), SPH_C64(0xBAF474BA809D8080), + SPH_C64(0x7C27997CBEDFBEBE), SPH_C64(0xDEEB26DECD4ACDCD), + SPH_C64(0xE489BDE4345C3434), SPH_C64(0x75327A7548D84848), + SPH_C64(0x2454AB24FF1CFFFF), SPH_C64(0x8F8DF78F7A8E7A7A), + SPH_C64(0xEA64F4EA90AD9090), SPH_C64(0x3E9DC23E5FE15F5F), + SPH_C64(0xA03D1DA020602020), SPH_C64(0xD50F67D568B86868), + SPH_C64(0x72CAD0721A2E1A1A), SPH_C64(0x2CB7192CAEEFAEAE), + SPH_C64(0x5E7DC95EB4C1B4B4), SPH_C64(0x19CE9A1954FC5454), + SPH_C64(0xE57FECE593A89393), SPH_C64(0xAA2F0DAA22662222), + SPH_C64(0xE96307E964AC6464), SPH_C64(0x122ADB12F10EF1F1), + SPH_C64(0xA2CCBFA273957373), SPH_C64(0x5A82905A12361212), + SPH_C64(0x5D7A3A5D40C04040), SPH_C64(0x2848402808180808), + SPH_C64(0xE89556E8C358C3C3), SPH_C64(0x7BDF337BEC29ECEC), + SPH_C64(0x904D9690DB70DBDB), SPH_C64(0x1FC0611FA1FEA1A1), + SPH_C64(0x83911C838D8A8D8D), SPH_C64(0xC9C8F5C93D473D3D), + SPH_C64(0xF15BCCF197A49797), SPH_C64(0x0000000000000000), + SPH_C64(0xD4F936D4CF4CCFCF), SPH_C64(0x876E45872B7D2B2B), + SPH_C64(0xB3E197B3769A7676), SPH_C64(0xB0E664B0829B8282), + SPH_C64(0xA928FEA9D667D6D6), SPH_C64(0x77C3D8771B2D1B1B), + SPH_C64(0x5B74C15BB5C2B5B5), SPH_C64(0x29BE1129AFECAFAF), + SPH_C64(0xDF1D77DF6ABE6A6A), SPH_C64(0x0DEABA0D50F05050), + SPH_C64(0x4C57124C45CF4545), SPH_C64(0x1838CB18F308F3F3), + SPH_C64(0xF0AD9DF030503030), SPH_C64(0x74C42B74EF2CEFEF), + SPH_C64(0xC3DAE5C33F413F3F), SPH_C64(0x1CC7921C55FF5555), + SPH_C64(0x10DB7910A2FBA2A2), SPH_C64(0x65E90365EA23EAEA), + SPH_C64(0xEC6A0FEC65AF6565), SPH_C64(0x6803B968BAD3BABA), + SPH_C64(0x934A65932F712F2F), SPH_C64(0xE78E4EE7C05DC0C0), + SPH_C64(0x8160BE81DE7FDEDE), SPH_C64(0x6CFCE06C1C241C1C), + SPH_C64(0x2E46BB2EFD1AFDFD), SPH_C64(0x641F52644DD74D4D), + SPH_C64(0xE076E4E092AB9292), SPH_C64(0xBCFA8FBC759F7575), + SPH_C64(0x1E36301E060A0606), SPH_C64(0x98AE24988A838A8A), + SPH_C64(0x404BF940B2CBB2B2), SPH_C64(0x59856359E637E6E6), + SPH_C64(0x367E70360E120E0E), SPH_C64(0x63E7F8631F211F1F), + SPH_C64(0xF75537F762A66262), SPH_C64(0xA33AEEA3D461D4D4), + SPH_C64(0x32812932A8E5A8A8), SPH_C64(0xF452C4F496A79696), + SPH_C64(0x3A629B3AF916F9F9), SPH_C64(0xF6A366F6C552C5C5), + SPH_C64(0xB11035B1256F2525), SPH_C64(0x20ABF22059EB5959), + SPH_C64(0xAED054AE84918484), SPH_C64(0xA7C5B7A772967272), + SPH_C64(0xDDECD5DD394B3939), SPH_C64(0x61165A614CD44C4C), + SPH_C64(0x3B94CA3B5EE25E5E), SPH_C64(0x859FE78578887878), + SPH_C64(0xD8E5DDD838483838), SPH_C64(0x869814868C898C8C), + SPH_C64(0xB217C6B2D16ED1D1), SPH_C64(0x0BE4410BA5F2A5A5), + SPH_C64(0x4DA1434DE23BE2E2), SPH_C64(0xF84E2FF861A36161), + SPH_C64(0x4542F145B3C8B3B3), SPH_C64(0xA53415A521632121), + SPH_C64(0xD60894D69CB99C9C), SPH_C64(0x66EEF0661E221E1E), + SPH_C64(0x5261225243C54343), SPH_C64(0xFCB176FCC754C7C7), + SPH_C64(0x2B4FB32BFC19FCFC), SPH_C64(0x14242014040C0404), + SPH_C64(0x08E3B20851F35151), SPH_C64(0xC725BCC799B69999), + SPH_C64(0xC4224FC46DB76D6D), SPH_C64(0x396568390D170D0D), + SPH_C64(0x35798335FA13FAFA), SPH_C64(0x8469B684DF7CDFDF), + SPH_C64(0x9BA9D79B7E827E7E), SPH_C64(0xB4193DB4246C2424), + SPH_C64(0xD7FEC5D73B4D3B3B), SPH_C64(0x3D9A313DABE0ABAB), + SPH_C64(0xD1F03ED1CE4FCECE), SPH_C64(0x5599885511331111), + SPH_C64(0x89830C898F8C8F8F), SPH_C64(0x6B044A6B4ED24E4E), + SPH_C64(0x5166D151B7C4B7B7), SPH_C64(0x60E00B60EB20EBEB), + SPH_C64(0xCCC1FDCC3C443C3C), SPH_C64(0xBFFD7CBF819E8181), + SPH_C64(0xFE40D4FE94A19494), SPH_C64(0x0C1CEB0CF704F7F7), + SPH_C64(0x6718A167B9D6B9B9), SPH_C64(0x5F8B985F13351313), + SPH_C64(0x9C517D9C2C742C2C), SPH_C64(0xB805D6B8D368D3D3), + SPH_C64(0x5C8C6B5CE734E7E7), SPH_C64(0xCB3957CB6EB26E6E), + SPH_C64(0xF3AA6EF3C451C4C4), SPH_C64(0x0F1B180F03050303), + SPH_C64(0x13DC8A1356FA5656), SPH_C64(0x495E1A4944CC4444), + SPH_C64(0x9EA0DF9E7F817F7F), SPH_C64(0x37882137A9E6A9A9), + SPH_C64(0x82674D822A7E2A2A), SPH_C64(0x6D0AB16DBBD0BBBB), + SPH_C64(0xE28746E2C15EC1C1), SPH_C64(0x02F1A20253F55353), + SPH_C64(0x8B72AE8BDC79DCDC), SPH_C64(0x275358270B1D0B0B), + SPH_C64(0xD3019CD39DBA9D9D), SPH_C64(0xC12B47C16CB46C6C), + SPH_C64(0xF5A495F531533131), SPH_C64(0xB9F387B9749C7474), + SPH_C64(0x0915E309F607F6F6), SPH_C64(0x434C0A4346CA4646), + SPH_C64(0x26A50926ACE9ACAC), SPH_C64(0x97B53C9789868989), + SPH_C64(0x44B4A044143C1414), SPH_C64(0x42BA5B42E13EE1E1), + SPH_C64(0x4EA6B04E163A1616), SPH_C64(0xD2F7CDD23A4E3A3A), + SPH_C64(0xD0066FD069BB6969), SPH_C64(0x2D41482D091B0909), + SPH_C64(0xADD7A7AD70907070), SPH_C64(0x546FD954B6C7B6B6), + SPH_C64(0xB71ECEB7D06DD0D0), SPH_C64(0x7ED63B7EED2AEDED), + SPH_C64(0xDBE22EDBCC49CCCC), SPH_C64(0x57682A5742C64242), + SPH_C64(0xC22CB4C298B59898), SPH_C64(0x0EED490EA4F1A4A4), + SPH_C64(0x88755D8828782828), SPH_C64(0x3186DA315CE45C5C), + SPH_C64(0x3F6B933FF815F8F8), SPH_C64(0xA4C244A486978686) +}; + + +static const uint64_t old1_T1[256] = { + SPH_C64(0xD8C0781828181878), SPH_C64(0x2605AF23652323AF), + SPH_C64(0xB87EF9C657C6C6F9), SPH_C64(0xFB136FE825E8E86F), + SPH_C64(0xCB4CA187948787A1), SPH_C64(0x11A962B8D5B8B862), + SPH_C64(0x0908050103010105), SPH_C64(0x0D426E4FD14F4F6E), + SPH_C64(0x9BADEE365A3636EE), SPH_C64(0xFF5904A6F7A6A604), + SPH_C64(0x0CDEBDD26BD2D2BD), SPH_C64(0x0EFB06F502F5F506), + SPH_C64(0x96EF80798B797980), SPH_C64(0x305FCE6FB16F6FCE), + SPH_C64(0x6DFCEF91AE9191EF), SPH_C64(0xF8AA0752F6525207), + SPH_C64(0x4727FD60A06060FD), SPH_C64(0x358976BCD9BCBC76), + SPH_C64(0x37ACCD9BB09B9BCD), SPH_C64(0x8A048C8E8F8E8E8C), + SPH_C64(0xD27115A3F8A3A315), SPH_C64(0x6C603C0C140C0C3C), + SPH_C64(0x84FF8A7B8D7B7B8A), SPH_C64(0x80B5E1355F3535E1), + SPH_C64(0xF5E8691D271D1D69), SPH_C64(0xB35347E03DE0E047), + SPH_C64(0x21F6ACD764D7D7AC), SPH_C64(0x9C5EEDC25BC2C2ED), + SPH_C64(0x436D962E722E2E96), SPH_C64(0x29627A4BDD4B4B7A), + SPH_C64(0x5DA321FE1FFEFE21), SPH_C64(0xD5821657F9575716), + SPH_C64(0xBDA841153F151541), SPH_C64(0xE89FB677997777B6), + SPH_C64(0x92A5EB37593737EB), SPH_C64(0x9E7B56E532E5E556), + SPH_C64(0x138CD99FBC9F9FD9), SPH_C64(0x23D317F00DF0F017), + SPH_C64(0x206A7F4ADE4A4A7F), SPH_C64(0x449E95DA73DADA95), + SPH_C64(0xA2FA2558E8585825), SPH_C64(0xCF06CAC946C9C9CA), + SPH_C64(0x7C558D297B29298D), SPH_C64(0x5A50220A1E0A0A22), + SPH_C64(0x50E14FB1CEB1B14F), SPH_C64(0xC9691AA0FDA0A01A), + SPH_C64(0x147FDA6BBD6B6BDA), SPH_C64(0xD95CAB85928585AB), + SPH_C64(0x3C8173BDDABDBD73), SPH_C64(0x8FD2345DE75D5D34), + SPH_C64(0x9080501030101050), SPH_C64(0x07F303F401F4F403), + SPH_C64(0xDD16C0CB40CBCBC0), SPH_C64(0xD3EDC63E423E3EC6), + SPH_C64(0x2D2811050F050511), SPH_C64(0x781FE667A96767E6), + SPH_C64(0x977353E431E4E453), SPH_C64(0x0225BB27692727BB), + SPH_C64(0x73325841C3414158), SPH_C64(0xA72C9D8B808B8B9D), + SPH_C64(0xF65101A7F4A7A701), SPH_C64(0xB2CF947D877D7D94), + SPH_C64(0x49DCFB95A29595FB), SPH_C64(0x568E9FD875D8D89F), + SPH_C64(0x708B30FB10FBFB30), SPH_C64(0xCD2371EE2FEEEE71), + SPH_C64(0xBBC7917C847C7C91), SPH_C64(0x7117E366AA6666E3), + SPH_C64(0x7BA68EDD7ADDDD8E), SPH_C64(0xAFB84B173917174B), + SPH_C64(0x45024647C9474746), SPH_C64(0x1A84DC9EBF9E9EDC), + SPH_C64(0xD41EC5CA43CACAC5), SPH_C64(0x5875992D772D2D99), + SPH_C64(0x2E9179BFDCBFBF79), SPH_C64(0x3F381B070907071B), + SPH_C64(0xAC0123ADEAADAD23), SPH_C64(0xB0EA2F5AEE5A5A2F), + SPH_C64(0xEF6CB583988383B5), SPH_C64(0xB685FF33553333FF), + SPH_C64(0x5C3FF263A56363F2), SPH_C64(0x12100A020602020A), + SPH_C64(0x933938AAE3AAAA38), SPH_C64(0xDEAFA871937171A8), + SPH_C64(0xC60ECFC845C8C8CF), SPH_C64(0xD1C87D192B19197D), + SPH_C64(0x3B727049DB494970), SPH_C64(0x5F869AD976D9D99A), + SPH_C64(0x31C31DF20BF2F21D), SPH_C64(0xA84B48E338E3E348), + SPH_C64(0xB9E22A5BED5B5B2A), SPH_C64(0xBC34928885888892), + SPH_C64(0x3EA4C89AB39A9AC8), SPH_C64(0x0B2DBE266A2626BE), + SPH_C64(0xBF8DFA32563232FA), SPH_C64(0x59E94AB0CDB0B04A), + SPH_C64(0xF21B6AE926E9E96A), SPH_C64(0x7778330F110F0F33), + SPH_C64(0x33E6A6D562D5D5A6), SPH_C64(0xF474BA809D8080BA), + SPH_C64(0x27997CBEDFBEBE7C), SPH_C64(0xEB26DECD4ACDCDDE), + SPH_C64(0x89BDE4345C3434E4), SPH_C64(0x327A7548D8484875), + SPH_C64(0x54AB24FF1CFFFF24), SPH_C64(0x8DF78F7A8E7A7A8F), + SPH_C64(0x64F4EA90AD9090EA), SPH_C64(0x9DC23E5FE15F5F3E), + SPH_C64(0x3D1DA020602020A0), SPH_C64(0x0F67D568B86868D5), + SPH_C64(0xCAD0721A2E1A1A72), SPH_C64(0xB7192CAEEFAEAE2C), + SPH_C64(0x7DC95EB4C1B4B45E), SPH_C64(0xCE9A1954FC545419), + SPH_C64(0x7FECE593A89393E5), SPH_C64(0x2F0DAA22662222AA), + SPH_C64(0x6307E964AC6464E9), SPH_C64(0x2ADB12F10EF1F112), + SPH_C64(0xCCBFA273957373A2), SPH_C64(0x82905A123612125A), + SPH_C64(0x7A3A5D40C040405D), SPH_C64(0x4840280818080828), + SPH_C64(0x9556E8C358C3C3E8), SPH_C64(0xDF337BEC29ECEC7B), + SPH_C64(0x4D9690DB70DBDB90), SPH_C64(0xC0611FA1FEA1A11F), + SPH_C64(0x911C838D8A8D8D83), SPH_C64(0xC8F5C93D473D3DC9), + SPH_C64(0x5BCCF197A49797F1), SPH_C64(0x0000000000000000), + SPH_C64(0xF936D4CF4CCFCFD4), SPH_C64(0x6E45872B7D2B2B87), + SPH_C64(0xE197B3769A7676B3), SPH_C64(0xE664B0829B8282B0), + SPH_C64(0x28FEA9D667D6D6A9), SPH_C64(0xC3D8771B2D1B1B77), + SPH_C64(0x74C15BB5C2B5B55B), SPH_C64(0xBE1129AFECAFAF29), + SPH_C64(0x1D77DF6ABE6A6ADF), SPH_C64(0xEABA0D50F050500D), + SPH_C64(0x57124C45CF45454C), SPH_C64(0x38CB18F308F3F318), + SPH_C64(0xAD9DF030503030F0), SPH_C64(0xC42B74EF2CEFEF74), + SPH_C64(0xDAE5C33F413F3FC3), SPH_C64(0xC7921C55FF55551C), + SPH_C64(0xDB7910A2FBA2A210), SPH_C64(0xE90365EA23EAEA65), + SPH_C64(0x6A0FEC65AF6565EC), SPH_C64(0x03B968BAD3BABA68), + SPH_C64(0x4A65932F712F2F93), SPH_C64(0x8E4EE7C05DC0C0E7), + SPH_C64(0x60BE81DE7FDEDE81), SPH_C64(0xFCE06C1C241C1C6C), + SPH_C64(0x46BB2EFD1AFDFD2E), SPH_C64(0x1F52644DD74D4D64), + SPH_C64(0x76E4E092AB9292E0), SPH_C64(0xFA8FBC759F7575BC), + SPH_C64(0x36301E060A06061E), SPH_C64(0xAE24988A838A8A98), + SPH_C64(0x4BF940B2CBB2B240), SPH_C64(0x856359E637E6E659), + SPH_C64(0x7E70360E120E0E36), SPH_C64(0xE7F8631F211F1F63), + SPH_C64(0x5537F762A66262F7), SPH_C64(0x3AEEA3D461D4D4A3), + SPH_C64(0x812932A8E5A8A832), SPH_C64(0x52C4F496A79696F4), + SPH_C64(0x629B3AF916F9F93A), SPH_C64(0xA366F6C552C5C5F6), + SPH_C64(0x1035B1256F2525B1), SPH_C64(0xABF22059EB595920), + SPH_C64(0xD054AE84918484AE), SPH_C64(0xC5B7A772967272A7), + SPH_C64(0xECD5DD394B3939DD), SPH_C64(0x165A614CD44C4C61), + SPH_C64(0x94CA3B5EE25E5E3B), SPH_C64(0x9FE7857888787885), + SPH_C64(0xE5DDD838483838D8), SPH_C64(0x9814868C898C8C86), + SPH_C64(0x17C6B2D16ED1D1B2), SPH_C64(0xE4410BA5F2A5A50B), + SPH_C64(0xA1434DE23BE2E24D), SPH_C64(0x4E2FF861A36161F8), + SPH_C64(0x42F145B3C8B3B345), SPH_C64(0x3415A521632121A5), + SPH_C64(0x0894D69CB99C9CD6), SPH_C64(0xEEF0661E221E1E66), + SPH_C64(0x61225243C5434352), SPH_C64(0xB176FCC754C7C7FC), + SPH_C64(0x4FB32BFC19FCFC2B), SPH_C64(0x242014040C040414), + SPH_C64(0xE3B20851F3515108), SPH_C64(0x25BCC799B69999C7), + SPH_C64(0x224FC46DB76D6DC4), SPH_C64(0x6568390D170D0D39), + SPH_C64(0x798335FA13FAFA35), SPH_C64(0x69B684DF7CDFDF84), + SPH_C64(0xA9D79B7E827E7E9B), SPH_C64(0x193DB4246C2424B4), + SPH_C64(0xFEC5D73B4D3B3BD7), SPH_C64(0x9A313DABE0ABAB3D), + SPH_C64(0xF03ED1CE4FCECED1), SPH_C64(0x9988551133111155), + SPH_C64(0x830C898F8C8F8F89), SPH_C64(0x044A6B4ED24E4E6B), + SPH_C64(0x66D151B7C4B7B751), SPH_C64(0xE00B60EB20EBEB60), + SPH_C64(0xC1FDCC3C443C3CCC), SPH_C64(0xFD7CBF819E8181BF), + SPH_C64(0x40D4FE94A19494FE), SPH_C64(0x1CEB0CF704F7F70C), + SPH_C64(0x18A167B9D6B9B967), SPH_C64(0x8B985F133513135F), + SPH_C64(0x517D9C2C742C2C9C), SPH_C64(0x05D6B8D368D3D3B8), + SPH_C64(0x8C6B5CE734E7E75C), SPH_C64(0x3957CB6EB26E6ECB), + SPH_C64(0xAA6EF3C451C4C4F3), SPH_C64(0x1B180F030503030F), + SPH_C64(0xDC8A1356FA565613), SPH_C64(0x5E1A4944CC444449), + SPH_C64(0xA0DF9E7F817F7F9E), SPH_C64(0x882137A9E6A9A937), + SPH_C64(0x674D822A7E2A2A82), SPH_C64(0x0AB16DBBD0BBBB6D), + SPH_C64(0x8746E2C15EC1C1E2), SPH_C64(0xF1A20253F5535302), + SPH_C64(0x72AE8BDC79DCDC8B), SPH_C64(0x5358270B1D0B0B27), + SPH_C64(0x019CD39DBA9D9DD3), SPH_C64(0x2B47C16CB46C6CC1), + SPH_C64(0xA495F531533131F5), SPH_C64(0xF387B9749C7474B9), + SPH_C64(0x15E309F607F6F609), SPH_C64(0x4C0A4346CA464643), + SPH_C64(0xA50926ACE9ACAC26), SPH_C64(0xB53C978986898997), + SPH_C64(0xB4A044143C141444), SPH_C64(0xBA5B42E13EE1E142), + SPH_C64(0xA6B04E163A16164E), SPH_C64(0xF7CDD23A4E3A3AD2), + SPH_C64(0x066FD069BB6969D0), SPH_C64(0x41482D091B09092D), + SPH_C64(0xD7A7AD70907070AD), SPH_C64(0x6FD954B6C7B6B654), + SPH_C64(0x1ECEB7D06DD0D0B7), SPH_C64(0xD63B7EED2AEDED7E), + SPH_C64(0xE22EDBCC49CCCCDB), SPH_C64(0x682A5742C6424257), + SPH_C64(0x2CB4C298B59898C2), SPH_C64(0xED490EA4F1A4A40E), + SPH_C64(0x755D882878282888), SPH_C64(0x86DA315CE45C5C31), + SPH_C64(0x6B933FF815F8F83F), SPH_C64(0xC244A486978686A4) +}; + +static const uint64_t old1_T2[256] = { + SPH_C64(0xC0781828181878D8), SPH_C64(0x05AF23652323AF26), + SPH_C64(0x7EF9C657C6C6F9B8), SPH_C64(0x136FE825E8E86FFB), + SPH_C64(0x4CA187948787A1CB), SPH_C64(0xA962B8D5B8B86211), + SPH_C64(0x0805010301010509), SPH_C64(0x426E4FD14F4F6E0D), + SPH_C64(0xADEE365A3636EE9B), SPH_C64(0x5904A6F7A6A604FF), + SPH_C64(0xDEBDD26BD2D2BD0C), SPH_C64(0xFB06F502F5F5060E), + SPH_C64(0xEF80798B79798096), SPH_C64(0x5FCE6FB16F6FCE30), + SPH_C64(0xFCEF91AE9191EF6D), SPH_C64(0xAA0752F6525207F8), + SPH_C64(0x27FD60A06060FD47), SPH_C64(0x8976BCD9BCBC7635), + SPH_C64(0xACCD9BB09B9BCD37), SPH_C64(0x048C8E8F8E8E8C8A), + SPH_C64(0x7115A3F8A3A315D2), SPH_C64(0x603C0C140C0C3C6C), + SPH_C64(0xFF8A7B8D7B7B8A84), SPH_C64(0xB5E1355F3535E180), + SPH_C64(0xE8691D271D1D69F5), SPH_C64(0x5347E03DE0E047B3), + SPH_C64(0xF6ACD764D7D7AC21), SPH_C64(0x5EEDC25BC2C2ED9C), + SPH_C64(0x6D962E722E2E9643), SPH_C64(0x627A4BDD4B4B7A29), + SPH_C64(0xA321FE1FFEFE215D), SPH_C64(0x821657F9575716D5), + SPH_C64(0xA841153F151541BD), SPH_C64(0x9FB677997777B6E8), + SPH_C64(0xA5EB37593737EB92), SPH_C64(0x7B56E532E5E5569E), + SPH_C64(0x8CD99FBC9F9FD913), SPH_C64(0xD317F00DF0F01723), + SPH_C64(0x6A7F4ADE4A4A7F20), SPH_C64(0x9E95DA73DADA9544), + SPH_C64(0xFA2558E8585825A2), SPH_C64(0x06CAC946C9C9CACF), + SPH_C64(0x558D297B29298D7C), SPH_C64(0x50220A1E0A0A225A), + SPH_C64(0xE14FB1CEB1B14F50), SPH_C64(0x691AA0FDA0A01AC9), + SPH_C64(0x7FDA6BBD6B6BDA14), SPH_C64(0x5CAB85928585ABD9), + SPH_C64(0x8173BDDABDBD733C), SPH_C64(0xD2345DE75D5D348F), + SPH_C64(0x8050103010105090), SPH_C64(0xF303F401F4F40307), + SPH_C64(0x16C0CB40CBCBC0DD), SPH_C64(0xEDC63E423E3EC6D3), + SPH_C64(0x2811050F0505112D), SPH_C64(0x1FE667A96767E678), + SPH_C64(0x7353E431E4E45397), SPH_C64(0x25BB27692727BB02), + SPH_C64(0x325841C341415873), SPH_C64(0x2C9D8B808B8B9DA7), + SPH_C64(0x5101A7F4A7A701F6), SPH_C64(0xCF947D877D7D94B2), + SPH_C64(0xDCFB95A29595FB49), SPH_C64(0x8E9FD875D8D89F56), + SPH_C64(0x8B30FB10FBFB3070), SPH_C64(0x2371EE2FEEEE71CD), + SPH_C64(0xC7917C847C7C91BB), SPH_C64(0x17E366AA6666E371), + SPH_C64(0xA68EDD7ADDDD8E7B), SPH_C64(0xB84B173917174BAF), + SPH_C64(0x024647C947474645), SPH_C64(0x84DC9EBF9E9EDC1A), + SPH_C64(0x1EC5CA43CACAC5D4), SPH_C64(0x75992D772D2D9958), + SPH_C64(0x9179BFDCBFBF792E), SPH_C64(0x381B070907071B3F), + SPH_C64(0x0123ADEAADAD23AC), SPH_C64(0xEA2F5AEE5A5A2FB0), + SPH_C64(0x6CB583988383B5EF), SPH_C64(0x85FF33553333FFB6), + SPH_C64(0x3FF263A56363F25C), SPH_C64(0x100A020602020A12), + SPH_C64(0x3938AAE3AAAA3893), SPH_C64(0xAFA871937171A8DE), + SPH_C64(0x0ECFC845C8C8CFC6), SPH_C64(0xC87D192B19197DD1), + SPH_C64(0x727049DB4949703B), SPH_C64(0x869AD976D9D99A5F), + SPH_C64(0xC31DF20BF2F21D31), SPH_C64(0x4B48E338E3E348A8), + SPH_C64(0xE22A5BED5B5B2AB9), SPH_C64(0x34928885888892BC), + SPH_C64(0xA4C89AB39A9AC83E), SPH_C64(0x2DBE266A2626BE0B), + SPH_C64(0x8DFA32563232FABF), SPH_C64(0xE94AB0CDB0B04A59), + SPH_C64(0x1B6AE926E9E96AF2), SPH_C64(0x78330F110F0F3377), + SPH_C64(0xE6A6D562D5D5A633), SPH_C64(0x74BA809D8080BAF4), + SPH_C64(0x997CBEDFBEBE7C27), SPH_C64(0x26DECD4ACDCDDEEB), + SPH_C64(0xBDE4345C3434E489), SPH_C64(0x7A7548D848487532), + SPH_C64(0xAB24FF1CFFFF2454), SPH_C64(0xF78F7A8E7A7A8F8D), + SPH_C64(0xF4EA90AD9090EA64), SPH_C64(0xC23E5FE15F5F3E9D), + SPH_C64(0x1DA020602020A03D), SPH_C64(0x67D568B86868D50F), + SPH_C64(0xD0721A2E1A1A72CA), SPH_C64(0x192CAEEFAEAE2CB7), + SPH_C64(0xC95EB4C1B4B45E7D), SPH_C64(0x9A1954FC545419CE), + SPH_C64(0xECE593A89393E57F), SPH_C64(0x0DAA22662222AA2F), + SPH_C64(0x07E964AC6464E963), SPH_C64(0xDB12F10EF1F1122A), + SPH_C64(0xBFA273957373A2CC), SPH_C64(0x905A123612125A82), + SPH_C64(0x3A5D40C040405D7A), SPH_C64(0x4028081808082848), + SPH_C64(0x56E8C358C3C3E895), SPH_C64(0x337BEC29ECEC7BDF), + SPH_C64(0x9690DB70DBDB904D), SPH_C64(0x611FA1FEA1A11FC0), + SPH_C64(0x1C838D8A8D8D8391), SPH_C64(0xF5C93D473D3DC9C8), + SPH_C64(0xCCF197A49797F15B), SPH_C64(0x0000000000000000), + SPH_C64(0x36D4CF4CCFCFD4F9), SPH_C64(0x45872B7D2B2B876E), + SPH_C64(0x97B3769A7676B3E1), SPH_C64(0x64B0829B8282B0E6), + SPH_C64(0xFEA9D667D6D6A928), SPH_C64(0xD8771B2D1B1B77C3), + SPH_C64(0xC15BB5C2B5B55B74), SPH_C64(0x1129AFECAFAF29BE), + SPH_C64(0x77DF6ABE6A6ADF1D), SPH_C64(0xBA0D50F050500DEA), + SPH_C64(0x124C45CF45454C57), SPH_C64(0xCB18F308F3F31838), + SPH_C64(0x9DF030503030F0AD), SPH_C64(0x2B74EF2CEFEF74C4), + SPH_C64(0xE5C33F413F3FC3DA), SPH_C64(0x921C55FF55551CC7), + SPH_C64(0x7910A2FBA2A210DB), SPH_C64(0x0365EA23EAEA65E9), + SPH_C64(0x0FEC65AF6565EC6A), SPH_C64(0xB968BAD3BABA6803), + SPH_C64(0x65932F712F2F934A), SPH_C64(0x4EE7C05DC0C0E78E), + SPH_C64(0xBE81DE7FDEDE8160), SPH_C64(0xE06C1C241C1C6CFC), + SPH_C64(0xBB2EFD1AFDFD2E46), SPH_C64(0x52644DD74D4D641F), + SPH_C64(0xE4E092AB9292E076), SPH_C64(0x8FBC759F7575BCFA), + SPH_C64(0x301E060A06061E36), SPH_C64(0x24988A838A8A98AE), + SPH_C64(0xF940B2CBB2B2404B), SPH_C64(0x6359E637E6E65985), + SPH_C64(0x70360E120E0E367E), SPH_C64(0xF8631F211F1F63E7), + SPH_C64(0x37F762A66262F755), SPH_C64(0xEEA3D461D4D4A33A), + SPH_C64(0x2932A8E5A8A83281), SPH_C64(0xC4F496A79696F452), + SPH_C64(0x9B3AF916F9F93A62), SPH_C64(0x66F6C552C5C5F6A3), + SPH_C64(0x35B1256F2525B110), SPH_C64(0xF22059EB595920AB), + SPH_C64(0x54AE84918484AED0), SPH_C64(0xB7A772967272A7C5), + SPH_C64(0xD5DD394B3939DDEC), SPH_C64(0x5A614CD44C4C6116), + SPH_C64(0xCA3B5EE25E5E3B94), SPH_C64(0xE78578887878859F), + SPH_C64(0xDDD838483838D8E5), SPH_C64(0x14868C898C8C8698), + SPH_C64(0xC6B2D16ED1D1B217), SPH_C64(0x410BA5F2A5A50BE4), + SPH_C64(0x434DE23BE2E24DA1), SPH_C64(0x2FF861A36161F84E), + SPH_C64(0xF145B3C8B3B34542), SPH_C64(0x15A521632121A534), + SPH_C64(0x94D69CB99C9CD608), SPH_C64(0xF0661E221E1E66EE), + SPH_C64(0x225243C543435261), SPH_C64(0x76FCC754C7C7FCB1), + SPH_C64(0xB32BFC19FCFC2B4F), SPH_C64(0x2014040C04041424), + SPH_C64(0xB20851F3515108E3), SPH_C64(0xBCC799B69999C725), + SPH_C64(0x4FC46DB76D6DC422), SPH_C64(0x68390D170D0D3965), + SPH_C64(0x8335FA13FAFA3579), SPH_C64(0xB684DF7CDFDF8469), + SPH_C64(0xD79B7E827E7E9BA9), SPH_C64(0x3DB4246C2424B419), + SPH_C64(0xC5D73B4D3B3BD7FE), SPH_C64(0x313DABE0ABAB3D9A), + SPH_C64(0x3ED1CE4FCECED1F0), SPH_C64(0x8855113311115599), + SPH_C64(0x0C898F8C8F8F8983), SPH_C64(0x4A6B4ED24E4E6B04), + SPH_C64(0xD151B7C4B7B75166), SPH_C64(0x0B60EB20EBEB60E0), + SPH_C64(0xFDCC3C443C3CCCC1), SPH_C64(0x7CBF819E8181BFFD), + SPH_C64(0xD4FE94A19494FE40), SPH_C64(0xEB0CF704F7F70C1C), + SPH_C64(0xA167B9D6B9B96718), SPH_C64(0x985F133513135F8B), + SPH_C64(0x7D9C2C742C2C9C51), SPH_C64(0xD6B8D368D3D3B805), + SPH_C64(0x6B5CE734E7E75C8C), SPH_C64(0x57CB6EB26E6ECB39), + SPH_C64(0x6EF3C451C4C4F3AA), SPH_C64(0x180F030503030F1B), + SPH_C64(0x8A1356FA565613DC), SPH_C64(0x1A4944CC4444495E), + SPH_C64(0xDF9E7F817F7F9EA0), SPH_C64(0x2137A9E6A9A93788), + SPH_C64(0x4D822A7E2A2A8267), SPH_C64(0xB16DBBD0BBBB6D0A), + SPH_C64(0x46E2C15EC1C1E287), SPH_C64(0xA20253F5535302F1), + SPH_C64(0xAE8BDC79DCDC8B72), SPH_C64(0x58270B1D0B0B2753), + SPH_C64(0x9CD39DBA9D9DD301), SPH_C64(0x47C16CB46C6CC12B), + SPH_C64(0x95F531533131F5A4), SPH_C64(0x87B9749C7474B9F3), + SPH_C64(0xE309F607F6F60915), SPH_C64(0x0A4346CA4646434C), + SPH_C64(0x0926ACE9ACAC26A5), SPH_C64(0x3C978986898997B5), + SPH_C64(0xA044143C141444B4), SPH_C64(0x5B42E13EE1E142BA), + SPH_C64(0xB04E163A16164EA6), SPH_C64(0xCDD23A4E3A3AD2F7), + SPH_C64(0x6FD069BB6969D006), SPH_C64(0x482D091B09092D41), + SPH_C64(0xA7AD70907070ADD7), SPH_C64(0xD954B6C7B6B6546F), + SPH_C64(0xCEB7D06DD0D0B71E), SPH_C64(0x3B7EED2AEDED7ED6), + SPH_C64(0x2EDBCC49CCCCDBE2), SPH_C64(0x2A5742C642425768), + SPH_C64(0xB4C298B59898C22C), SPH_C64(0x490EA4F1A4A40EED), + SPH_C64(0x5D88287828288875), SPH_C64(0xDA315CE45C5C3186), + SPH_C64(0x933FF815F8F83F6B), SPH_C64(0x44A486978686A4C2) +}; + +static const uint64_t old1_T3[256] = { + SPH_C64(0x781828181878D8C0), SPH_C64(0xAF23652323AF2605), + SPH_C64(0xF9C657C6C6F9B87E), SPH_C64(0x6FE825E8E86FFB13), + SPH_C64(0xA187948787A1CB4C), SPH_C64(0x62B8D5B8B86211A9), + SPH_C64(0x0501030101050908), SPH_C64(0x6E4FD14F4F6E0D42), + SPH_C64(0xEE365A3636EE9BAD), SPH_C64(0x04A6F7A6A604FF59), + SPH_C64(0xBDD26BD2D2BD0CDE), SPH_C64(0x06F502F5F5060EFB), + SPH_C64(0x80798B79798096EF), SPH_C64(0xCE6FB16F6FCE305F), + SPH_C64(0xEF91AE9191EF6DFC), SPH_C64(0x0752F6525207F8AA), + SPH_C64(0xFD60A06060FD4727), SPH_C64(0x76BCD9BCBC763589), + SPH_C64(0xCD9BB09B9BCD37AC), SPH_C64(0x8C8E8F8E8E8C8A04), + SPH_C64(0x15A3F8A3A315D271), SPH_C64(0x3C0C140C0C3C6C60), + SPH_C64(0x8A7B8D7B7B8A84FF), SPH_C64(0xE1355F3535E180B5), + SPH_C64(0x691D271D1D69F5E8), SPH_C64(0x47E03DE0E047B353), + SPH_C64(0xACD764D7D7AC21F6), SPH_C64(0xEDC25BC2C2ED9C5E), + SPH_C64(0x962E722E2E96436D), SPH_C64(0x7A4BDD4B4B7A2962), + SPH_C64(0x21FE1FFEFE215DA3), SPH_C64(0x1657F9575716D582), + SPH_C64(0x41153F151541BDA8), SPH_C64(0xB677997777B6E89F), + SPH_C64(0xEB37593737EB92A5), SPH_C64(0x56E532E5E5569E7B), + SPH_C64(0xD99FBC9F9FD9138C), SPH_C64(0x17F00DF0F01723D3), + SPH_C64(0x7F4ADE4A4A7F206A), SPH_C64(0x95DA73DADA95449E), + SPH_C64(0x2558E8585825A2FA), SPH_C64(0xCAC946C9C9CACF06), + SPH_C64(0x8D297B29298D7C55), SPH_C64(0x220A1E0A0A225A50), + SPH_C64(0x4FB1CEB1B14F50E1), SPH_C64(0x1AA0FDA0A01AC969), + SPH_C64(0xDA6BBD6B6BDA147F), SPH_C64(0xAB85928585ABD95C), + SPH_C64(0x73BDDABDBD733C81), SPH_C64(0x345DE75D5D348FD2), + SPH_C64(0x5010301010509080), SPH_C64(0x03F401F4F40307F3), + SPH_C64(0xC0CB40CBCBC0DD16), SPH_C64(0xC63E423E3EC6D3ED), + SPH_C64(0x11050F0505112D28), SPH_C64(0xE667A96767E6781F), + SPH_C64(0x53E431E4E4539773), SPH_C64(0xBB27692727BB0225), + SPH_C64(0x5841C34141587332), SPH_C64(0x9D8B808B8B9DA72C), + SPH_C64(0x01A7F4A7A701F651), SPH_C64(0x947D877D7D94B2CF), + SPH_C64(0xFB95A29595FB49DC), SPH_C64(0x9FD875D8D89F568E), + SPH_C64(0x30FB10FBFB30708B), SPH_C64(0x71EE2FEEEE71CD23), + SPH_C64(0x917C847C7C91BBC7), SPH_C64(0xE366AA6666E37117), + SPH_C64(0x8EDD7ADDDD8E7BA6), SPH_C64(0x4B173917174BAFB8), + SPH_C64(0x4647C94747464502), SPH_C64(0xDC9EBF9E9EDC1A84), + SPH_C64(0xC5CA43CACAC5D41E), SPH_C64(0x992D772D2D995875), + SPH_C64(0x79BFDCBFBF792E91), SPH_C64(0x1B070907071B3F38), + SPH_C64(0x23ADEAADAD23AC01), SPH_C64(0x2F5AEE5A5A2FB0EA), + SPH_C64(0xB583988383B5EF6C), SPH_C64(0xFF33553333FFB685), + SPH_C64(0xF263A56363F25C3F), SPH_C64(0x0A020602020A1210), + SPH_C64(0x38AAE3AAAA389339), SPH_C64(0xA871937171A8DEAF), + SPH_C64(0xCFC845C8C8CFC60E), SPH_C64(0x7D192B19197DD1C8), + SPH_C64(0x7049DB4949703B72), SPH_C64(0x9AD976D9D99A5F86), + SPH_C64(0x1DF20BF2F21D31C3), SPH_C64(0x48E338E3E348A84B), + SPH_C64(0x2A5BED5B5B2AB9E2), SPH_C64(0x928885888892BC34), + SPH_C64(0xC89AB39A9AC83EA4), SPH_C64(0xBE266A2626BE0B2D), + SPH_C64(0xFA32563232FABF8D), SPH_C64(0x4AB0CDB0B04A59E9), + SPH_C64(0x6AE926E9E96AF21B), SPH_C64(0x330F110F0F337778), + SPH_C64(0xA6D562D5D5A633E6), SPH_C64(0xBA809D8080BAF474), + SPH_C64(0x7CBEDFBEBE7C2799), SPH_C64(0xDECD4ACDCDDEEB26), + SPH_C64(0xE4345C3434E489BD), SPH_C64(0x7548D8484875327A), + SPH_C64(0x24FF1CFFFF2454AB), SPH_C64(0x8F7A8E7A7A8F8DF7), + SPH_C64(0xEA90AD9090EA64F4), SPH_C64(0x3E5FE15F5F3E9DC2), + SPH_C64(0xA020602020A03D1D), SPH_C64(0xD568B86868D50F67), + SPH_C64(0x721A2E1A1A72CAD0), SPH_C64(0x2CAEEFAEAE2CB719), + SPH_C64(0x5EB4C1B4B45E7DC9), SPH_C64(0x1954FC545419CE9A), + SPH_C64(0xE593A89393E57FEC), SPH_C64(0xAA22662222AA2F0D), + SPH_C64(0xE964AC6464E96307), SPH_C64(0x12F10EF1F1122ADB), + SPH_C64(0xA273957373A2CCBF), SPH_C64(0x5A123612125A8290), + SPH_C64(0x5D40C040405D7A3A), SPH_C64(0x2808180808284840), + SPH_C64(0xE8C358C3C3E89556), SPH_C64(0x7BEC29ECEC7BDF33), + SPH_C64(0x90DB70DBDB904D96), SPH_C64(0x1FA1FEA1A11FC061), + SPH_C64(0x838D8A8D8D83911C), SPH_C64(0xC93D473D3DC9C8F5), + SPH_C64(0xF197A49797F15BCC), SPH_C64(0x0000000000000000), + SPH_C64(0xD4CF4CCFCFD4F936), SPH_C64(0x872B7D2B2B876E45), + SPH_C64(0xB3769A7676B3E197), SPH_C64(0xB0829B8282B0E664), + SPH_C64(0xA9D667D6D6A928FE), SPH_C64(0x771B2D1B1B77C3D8), + SPH_C64(0x5BB5C2B5B55B74C1), SPH_C64(0x29AFECAFAF29BE11), + SPH_C64(0xDF6ABE6A6ADF1D77), SPH_C64(0x0D50F050500DEABA), + SPH_C64(0x4C45CF45454C5712), SPH_C64(0x18F308F3F31838CB), + SPH_C64(0xF030503030F0AD9D), SPH_C64(0x74EF2CEFEF74C42B), + SPH_C64(0xC33F413F3FC3DAE5), SPH_C64(0x1C55FF55551CC792), + SPH_C64(0x10A2FBA2A210DB79), SPH_C64(0x65EA23EAEA65E903), + SPH_C64(0xEC65AF6565EC6A0F), SPH_C64(0x68BAD3BABA6803B9), + SPH_C64(0x932F712F2F934A65), SPH_C64(0xE7C05DC0C0E78E4E), + SPH_C64(0x81DE7FDEDE8160BE), SPH_C64(0x6C1C241C1C6CFCE0), + SPH_C64(0x2EFD1AFDFD2E46BB), SPH_C64(0x644DD74D4D641F52), + SPH_C64(0xE092AB9292E076E4), SPH_C64(0xBC759F7575BCFA8F), + SPH_C64(0x1E060A06061E3630), SPH_C64(0x988A838A8A98AE24), + SPH_C64(0x40B2CBB2B2404BF9), SPH_C64(0x59E637E6E6598563), + SPH_C64(0x360E120E0E367E70), SPH_C64(0x631F211F1F63E7F8), + SPH_C64(0xF762A66262F75537), SPH_C64(0xA3D461D4D4A33AEE), + SPH_C64(0x32A8E5A8A8328129), SPH_C64(0xF496A79696F452C4), + SPH_C64(0x3AF916F9F93A629B), SPH_C64(0xF6C552C5C5F6A366), + SPH_C64(0xB1256F2525B11035), SPH_C64(0x2059EB595920ABF2), + SPH_C64(0xAE84918484AED054), SPH_C64(0xA772967272A7C5B7), + SPH_C64(0xDD394B3939DDECD5), SPH_C64(0x614CD44C4C61165A), + SPH_C64(0x3B5EE25E5E3B94CA), SPH_C64(0x8578887878859FE7), + SPH_C64(0xD838483838D8E5DD), SPH_C64(0x868C898C8C869814), + SPH_C64(0xB2D16ED1D1B217C6), SPH_C64(0x0BA5F2A5A50BE441), + SPH_C64(0x4DE23BE2E24DA143), SPH_C64(0xF861A36161F84E2F), + SPH_C64(0x45B3C8B3B34542F1), SPH_C64(0xA521632121A53415), + SPH_C64(0xD69CB99C9CD60894), SPH_C64(0x661E221E1E66EEF0), + SPH_C64(0x5243C54343526122), SPH_C64(0xFCC754C7C7FCB176), + SPH_C64(0x2BFC19FCFC2B4FB3), SPH_C64(0x14040C0404142420), + SPH_C64(0x0851F3515108E3B2), SPH_C64(0xC799B69999C725BC), + SPH_C64(0xC46DB76D6DC4224F), SPH_C64(0x390D170D0D396568), + SPH_C64(0x35FA13FAFA357983), SPH_C64(0x84DF7CDFDF8469B6), + SPH_C64(0x9B7E827E7E9BA9D7), SPH_C64(0xB4246C2424B4193D), + SPH_C64(0xD73B4D3B3BD7FEC5), SPH_C64(0x3DABE0ABAB3D9A31), + SPH_C64(0xD1CE4FCECED1F03E), SPH_C64(0x5511331111559988), + SPH_C64(0x898F8C8F8F89830C), SPH_C64(0x6B4ED24E4E6B044A), + SPH_C64(0x51B7C4B7B75166D1), SPH_C64(0x60EB20EBEB60E00B), + SPH_C64(0xCC3C443C3CCCC1FD), SPH_C64(0xBF819E8181BFFD7C), + SPH_C64(0xFE94A19494FE40D4), SPH_C64(0x0CF704F7F70C1CEB), + SPH_C64(0x67B9D6B9B96718A1), SPH_C64(0x5F133513135F8B98), + SPH_C64(0x9C2C742C2C9C517D), SPH_C64(0xB8D368D3D3B805D6), + SPH_C64(0x5CE734E7E75C8C6B), SPH_C64(0xCB6EB26E6ECB3957), + SPH_C64(0xF3C451C4C4F3AA6E), SPH_C64(0x0F030503030F1B18), + SPH_C64(0x1356FA565613DC8A), SPH_C64(0x4944CC4444495E1A), + SPH_C64(0x9E7F817F7F9EA0DF), SPH_C64(0x37A9E6A9A9378821), + SPH_C64(0x822A7E2A2A82674D), SPH_C64(0x6DBBD0BBBB6D0AB1), + SPH_C64(0xE2C15EC1C1E28746), SPH_C64(0x0253F5535302F1A2), + SPH_C64(0x8BDC79DCDC8B72AE), SPH_C64(0x270B1D0B0B275358), + SPH_C64(0xD39DBA9D9DD3019C), SPH_C64(0xC16CB46C6CC12B47), + SPH_C64(0xF531533131F5A495), SPH_C64(0xB9749C7474B9F387), + SPH_C64(0x09F607F6F60915E3), SPH_C64(0x4346CA4646434C0A), + SPH_C64(0x26ACE9ACAC26A509), SPH_C64(0x978986898997B53C), + SPH_C64(0x44143C141444B4A0), SPH_C64(0x42E13EE1E142BA5B), + SPH_C64(0x4E163A16164EA6B0), SPH_C64(0xD23A4E3A3AD2F7CD), + SPH_C64(0xD069BB6969D0066F), SPH_C64(0x2D091B09092D4148), + SPH_C64(0xAD70907070ADD7A7), SPH_C64(0x54B6C7B6B6546FD9), + SPH_C64(0xB7D06DD0D0B71ECE), SPH_C64(0x7EED2AEDED7ED63B), + SPH_C64(0xDBCC49CCCCDBE22E), SPH_C64(0x5742C6424257682A), + SPH_C64(0xC298B59898C22CB4), SPH_C64(0x0EA4F1A4A40EED49), + SPH_C64(0x882878282888755D), SPH_C64(0x315CE45C5C3186DA), + SPH_C64(0x3FF815F8F83F6B93), SPH_C64(0xA486978686A4C244) +}; + +static const uint64_t old1_T4[256] = { + SPH_C64(0x1828181878D8C078), SPH_C64(0x23652323AF2605AF), + SPH_C64(0xC657C6C6F9B87EF9), SPH_C64(0xE825E8E86FFB136F), + SPH_C64(0x87948787A1CB4CA1), SPH_C64(0xB8D5B8B86211A962), + SPH_C64(0x0103010105090805), SPH_C64(0x4FD14F4F6E0D426E), + SPH_C64(0x365A3636EE9BADEE), SPH_C64(0xA6F7A6A604FF5904), + SPH_C64(0xD26BD2D2BD0CDEBD), SPH_C64(0xF502F5F5060EFB06), + SPH_C64(0x798B79798096EF80), SPH_C64(0x6FB16F6FCE305FCE), + SPH_C64(0x91AE9191EF6DFCEF), SPH_C64(0x52F6525207F8AA07), + SPH_C64(0x60A06060FD4727FD), SPH_C64(0xBCD9BCBC76358976), + SPH_C64(0x9BB09B9BCD37ACCD), SPH_C64(0x8E8F8E8E8C8A048C), + SPH_C64(0xA3F8A3A315D27115), SPH_C64(0x0C140C0C3C6C603C), + SPH_C64(0x7B8D7B7B8A84FF8A), SPH_C64(0x355F3535E180B5E1), + SPH_C64(0x1D271D1D69F5E869), SPH_C64(0xE03DE0E047B35347), + SPH_C64(0xD764D7D7AC21F6AC), SPH_C64(0xC25BC2C2ED9C5EED), + SPH_C64(0x2E722E2E96436D96), SPH_C64(0x4BDD4B4B7A29627A), + SPH_C64(0xFE1FFEFE215DA321), SPH_C64(0x57F9575716D58216), + SPH_C64(0x153F151541BDA841), SPH_C64(0x77997777B6E89FB6), + SPH_C64(0x37593737EB92A5EB), SPH_C64(0xE532E5E5569E7B56), + SPH_C64(0x9FBC9F9FD9138CD9), SPH_C64(0xF00DF0F01723D317), + SPH_C64(0x4ADE4A4A7F206A7F), SPH_C64(0xDA73DADA95449E95), + SPH_C64(0x58E8585825A2FA25), SPH_C64(0xC946C9C9CACF06CA), + SPH_C64(0x297B29298D7C558D), SPH_C64(0x0A1E0A0A225A5022), + SPH_C64(0xB1CEB1B14F50E14F), SPH_C64(0xA0FDA0A01AC9691A), + SPH_C64(0x6BBD6B6BDA147FDA), SPH_C64(0x85928585ABD95CAB), + SPH_C64(0xBDDABDBD733C8173), SPH_C64(0x5DE75D5D348FD234), + SPH_C64(0x1030101050908050), SPH_C64(0xF401F4F40307F303), + SPH_C64(0xCB40CBCBC0DD16C0), SPH_C64(0x3E423E3EC6D3EDC6), + SPH_C64(0x050F0505112D2811), SPH_C64(0x67A96767E6781FE6), + SPH_C64(0xE431E4E453977353), SPH_C64(0x27692727BB0225BB), + SPH_C64(0x41C3414158733258), SPH_C64(0x8B808B8B9DA72C9D), + SPH_C64(0xA7F4A7A701F65101), SPH_C64(0x7D877D7D94B2CF94), + SPH_C64(0x95A29595FB49DCFB), SPH_C64(0xD875D8D89F568E9F), + SPH_C64(0xFB10FBFB30708B30), SPH_C64(0xEE2FEEEE71CD2371), + SPH_C64(0x7C847C7C91BBC791), SPH_C64(0x66AA6666E37117E3), + SPH_C64(0xDD7ADDDD8E7BA68E), SPH_C64(0x173917174BAFB84B), + SPH_C64(0x47C9474746450246), SPH_C64(0x9EBF9E9EDC1A84DC), + SPH_C64(0xCA43CACAC5D41EC5), SPH_C64(0x2D772D2D99587599), + SPH_C64(0xBFDCBFBF792E9179), SPH_C64(0x070907071B3F381B), + SPH_C64(0xADEAADAD23AC0123), SPH_C64(0x5AEE5A5A2FB0EA2F), + SPH_C64(0x83988383B5EF6CB5), SPH_C64(0x33553333FFB685FF), + SPH_C64(0x63A56363F25C3FF2), SPH_C64(0x020602020A12100A), + SPH_C64(0xAAE3AAAA38933938), SPH_C64(0x71937171A8DEAFA8), + SPH_C64(0xC845C8C8CFC60ECF), SPH_C64(0x192B19197DD1C87D), + SPH_C64(0x49DB4949703B7270), SPH_C64(0xD976D9D99A5F869A), + SPH_C64(0xF20BF2F21D31C31D), SPH_C64(0xE338E3E348A84B48), + SPH_C64(0x5BED5B5B2AB9E22A), SPH_C64(0x8885888892BC3492), + SPH_C64(0x9AB39A9AC83EA4C8), SPH_C64(0x266A2626BE0B2DBE), + SPH_C64(0x32563232FABF8DFA), SPH_C64(0xB0CDB0B04A59E94A), + SPH_C64(0xE926E9E96AF21B6A), SPH_C64(0x0F110F0F33777833), + SPH_C64(0xD562D5D5A633E6A6), SPH_C64(0x809D8080BAF474BA), + SPH_C64(0xBEDFBEBE7C27997C), SPH_C64(0xCD4ACDCDDEEB26DE), + SPH_C64(0x345C3434E489BDE4), SPH_C64(0x48D8484875327A75), + SPH_C64(0xFF1CFFFF2454AB24), SPH_C64(0x7A8E7A7A8F8DF78F), + SPH_C64(0x90AD9090EA64F4EA), SPH_C64(0x5FE15F5F3E9DC23E), + SPH_C64(0x20602020A03D1DA0), SPH_C64(0x68B86868D50F67D5), + SPH_C64(0x1A2E1A1A72CAD072), SPH_C64(0xAEEFAEAE2CB7192C), + SPH_C64(0xB4C1B4B45E7DC95E), SPH_C64(0x54FC545419CE9A19), + SPH_C64(0x93A89393E57FECE5), SPH_C64(0x22662222AA2F0DAA), + SPH_C64(0x64AC6464E96307E9), SPH_C64(0xF10EF1F1122ADB12), + SPH_C64(0x73957373A2CCBFA2), SPH_C64(0x123612125A82905A), + SPH_C64(0x40C040405D7A3A5D), SPH_C64(0x0818080828484028), + SPH_C64(0xC358C3C3E89556E8), SPH_C64(0xEC29ECEC7BDF337B), + SPH_C64(0xDB70DBDB904D9690), SPH_C64(0xA1FEA1A11FC0611F), + SPH_C64(0x8D8A8D8D83911C83), SPH_C64(0x3D473D3DC9C8F5C9), + SPH_C64(0x97A49797F15BCCF1), SPH_C64(0x0000000000000000), + SPH_C64(0xCF4CCFCFD4F936D4), SPH_C64(0x2B7D2B2B876E4587), + SPH_C64(0x769A7676B3E197B3), SPH_C64(0x829B8282B0E664B0), + SPH_C64(0xD667D6D6A928FEA9), SPH_C64(0x1B2D1B1B77C3D877), + SPH_C64(0xB5C2B5B55B74C15B), SPH_C64(0xAFECAFAF29BE1129), + SPH_C64(0x6ABE6A6ADF1D77DF), SPH_C64(0x50F050500DEABA0D), + SPH_C64(0x45CF45454C57124C), SPH_C64(0xF308F3F31838CB18), + SPH_C64(0x30503030F0AD9DF0), SPH_C64(0xEF2CEFEF74C42B74), + SPH_C64(0x3F413F3FC3DAE5C3), SPH_C64(0x55FF55551CC7921C), + SPH_C64(0xA2FBA2A210DB7910), SPH_C64(0xEA23EAEA65E90365), + SPH_C64(0x65AF6565EC6A0FEC), SPH_C64(0xBAD3BABA6803B968), + SPH_C64(0x2F712F2F934A6593), SPH_C64(0xC05DC0C0E78E4EE7), + SPH_C64(0xDE7FDEDE8160BE81), SPH_C64(0x1C241C1C6CFCE06C), + SPH_C64(0xFD1AFDFD2E46BB2E), SPH_C64(0x4DD74D4D641F5264), + SPH_C64(0x92AB9292E076E4E0), SPH_C64(0x759F7575BCFA8FBC), + SPH_C64(0x060A06061E36301E), SPH_C64(0x8A838A8A98AE2498), + SPH_C64(0xB2CBB2B2404BF940), SPH_C64(0xE637E6E659856359), + SPH_C64(0x0E120E0E367E7036), SPH_C64(0x1F211F1F63E7F863), + SPH_C64(0x62A66262F75537F7), SPH_C64(0xD461D4D4A33AEEA3), + SPH_C64(0xA8E5A8A832812932), SPH_C64(0x96A79696F452C4F4), + SPH_C64(0xF916F9F93A629B3A), SPH_C64(0xC552C5C5F6A366F6), + SPH_C64(0x256F2525B11035B1), SPH_C64(0x59EB595920ABF220), + SPH_C64(0x84918484AED054AE), SPH_C64(0x72967272A7C5B7A7), + SPH_C64(0x394B3939DDECD5DD), SPH_C64(0x4CD44C4C61165A61), + SPH_C64(0x5EE25E5E3B94CA3B), SPH_C64(0x78887878859FE785), + SPH_C64(0x38483838D8E5DDD8), SPH_C64(0x8C898C8C86981486), + SPH_C64(0xD16ED1D1B217C6B2), SPH_C64(0xA5F2A5A50BE4410B), + SPH_C64(0xE23BE2E24DA1434D), SPH_C64(0x61A36161F84E2FF8), + SPH_C64(0xB3C8B3B34542F145), SPH_C64(0x21632121A53415A5), + SPH_C64(0x9CB99C9CD60894D6), SPH_C64(0x1E221E1E66EEF066), + SPH_C64(0x43C5434352612252), SPH_C64(0xC754C7C7FCB176FC), + SPH_C64(0xFC19FCFC2B4FB32B), SPH_C64(0x040C040414242014), + SPH_C64(0x51F3515108E3B208), SPH_C64(0x99B69999C725BCC7), + SPH_C64(0x6DB76D6DC4224FC4), SPH_C64(0x0D170D0D39656839), + SPH_C64(0xFA13FAFA35798335), SPH_C64(0xDF7CDFDF8469B684), + SPH_C64(0x7E827E7E9BA9D79B), SPH_C64(0x246C2424B4193DB4), + SPH_C64(0x3B4D3B3BD7FEC5D7), SPH_C64(0xABE0ABAB3D9A313D), + SPH_C64(0xCE4FCECED1F03ED1), SPH_C64(0x1133111155998855), + SPH_C64(0x8F8C8F8F89830C89), SPH_C64(0x4ED24E4E6B044A6B), + SPH_C64(0xB7C4B7B75166D151), SPH_C64(0xEB20EBEB60E00B60), + SPH_C64(0x3C443C3CCCC1FDCC), SPH_C64(0x819E8181BFFD7CBF), + SPH_C64(0x94A19494FE40D4FE), SPH_C64(0xF704F7F70C1CEB0C), + SPH_C64(0xB9D6B9B96718A167), SPH_C64(0x133513135F8B985F), + SPH_C64(0x2C742C2C9C517D9C), SPH_C64(0xD368D3D3B805D6B8), + SPH_C64(0xE734E7E75C8C6B5C), SPH_C64(0x6EB26E6ECB3957CB), + SPH_C64(0xC451C4C4F3AA6EF3), SPH_C64(0x030503030F1B180F), + SPH_C64(0x56FA565613DC8A13), SPH_C64(0x44CC4444495E1A49), + SPH_C64(0x7F817F7F9EA0DF9E), SPH_C64(0xA9E6A9A937882137), + SPH_C64(0x2A7E2A2A82674D82), SPH_C64(0xBBD0BBBB6D0AB16D), + SPH_C64(0xC15EC1C1E28746E2), SPH_C64(0x53F5535302F1A202), + SPH_C64(0xDC79DCDC8B72AE8B), SPH_C64(0x0B1D0B0B27535827), + SPH_C64(0x9DBA9D9DD3019CD3), SPH_C64(0x6CB46C6CC12B47C1), + SPH_C64(0x31533131F5A495F5), SPH_C64(0x749C7474B9F387B9), + SPH_C64(0xF607F6F60915E309), SPH_C64(0x46CA4646434C0A43), + SPH_C64(0xACE9ACAC26A50926), SPH_C64(0x8986898997B53C97), + SPH_C64(0x143C141444B4A044), SPH_C64(0xE13EE1E142BA5B42), + SPH_C64(0x163A16164EA6B04E), SPH_C64(0x3A4E3A3AD2F7CDD2), + SPH_C64(0x69BB6969D0066FD0), SPH_C64(0x091B09092D41482D), + SPH_C64(0x70907070ADD7A7AD), SPH_C64(0xB6C7B6B6546FD954), + SPH_C64(0xD06DD0D0B71ECEB7), SPH_C64(0xED2AEDED7ED63B7E), + SPH_C64(0xCC49CCCCDBE22EDB), SPH_C64(0x42C6424257682A57), + SPH_C64(0x98B59898C22CB4C2), SPH_C64(0xA4F1A4A40EED490E), + SPH_C64(0x2878282888755D88), SPH_C64(0x5CE45C5C3186DA31), + SPH_C64(0xF815F8F83F6B933F), SPH_C64(0x86978686A4C244A4) +}; + +static const uint64_t old1_T5[256] = { + SPH_C64(0x28181878D8C07818), SPH_C64(0x652323AF2605AF23), + SPH_C64(0x57C6C6F9B87EF9C6), SPH_C64(0x25E8E86FFB136FE8), + SPH_C64(0x948787A1CB4CA187), SPH_C64(0xD5B8B86211A962B8), + SPH_C64(0x0301010509080501), SPH_C64(0xD14F4F6E0D426E4F), + SPH_C64(0x5A3636EE9BADEE36), SPH_C64(0xF7A6A604FF5904A6), + SPH_C64(0x6BD2D2BD0CDEBDD2), SPH_C64(0x02F5F5060EFB06F5), + SPH_C64(0x8B79798096EF8079), SPH_C64(0xB16F6FCE305FCE6F), + SPH_C64(0xAE9191EF6DFCEF91), SPH_C64(0xF6525207F8AA0752), + SPH_C64(0xA06060FD4727FD60), SPH_C64(0xD9BCBC76358976BC), + SPH_C64(0xB09B9BCD37ACCD9B), SPH_C64(0x8F8E8E8C8A048C8E), + SPH_C64(0xF8A3A315D27115A3), SPH_C64(0x140C0C3C6C603C0C), + SPH_C64(0x8D7B7B8A84FF8A7B), SPH_C64(0x5F3535E180B5E135), + SPH_C64(0x271D1D69F5E8691D), SPH_C64(0x3DE0E047B35347E0), + SPH_C64(0x64D7D7AC21F6ACD7), SPH_C64(0x5BC2C2ED9C5EEDC2), + SPH_C64(0x722E2E96436D962E), SPH_C64(0xDD4B4B7A29627A4B), + SPH_C64(0x1FFEFE215DA321FE), SPH_C64(0xF9575716D5821657), + SPH_C64(0x3F151541BDA84115), SPH_C64(0x997777B6E89FB677), + SPH_C64(0x593737EB92A5EB37), SPH_C64(0x32E5E5569E7B56E5), + SPH_C64(0xBC9F9FD9138CD99F), SPH_C64(0x0DF0F01723D317F0), + SPH_C64(0xDE4A4A7F206A7F4A), SPH_C64(0x73DADA95449E95DA), + SPH_C64(0xE8585825A2FA2558), SPH_C64(0x46C9C9CACF06CAC9), + SPH_C64(0x7B29298D7C558D29), SPH_C64(0x1E0A0A225A50220A), + SPH_C64(0xCEB1B14F50E14FB1), SPH_C64(0xFDA0A01AC9691AA0), + SPH_C64(0xBD6B6BDA147FDA6B), SPH_C64(0x928585ABD95CAB85), + SPH_C64(0xDABDBD733C8173BD), SPH_C64(0xE75D5D348FD2345D), + SPH_C64(0x3010105090805010), SPH_C64(0x01F4F40307F303F4), + SPH_C64(0x40CBCBC0DD16C0CB), SPH_C64(0x423E3EC6D3EDC63E), + SPH_C64(0x0F0505112D281105), SPH_C64(0xA96767E6781FE667), + SPH_C64(0x31E4E453977353E4), SPH_C64(0x692727BB0225BB27), + SPH_C64(0xC341415873325841), SPH_C64(0x808B8B9DA72C9D8B), + SPH_C64(0xF4A7A701F65101A7), SPH_C64(0x877D7D94B2CF947D), + SPH_C64(0xA29595FB49DCFB95), SPH_C64(0x75D8D89F568E9FD8), + SPH_C64(0x10FBFB30708B30FB), SPH_C64(0x2FEEEE71CD2371EE), + SPH_C64(0x847C7C91BBC7917C), SPH_C64(0xAA6666E37117E366), + SPH_C64(0x7ADDDD8E7BA68EDD), SPH_C64(0x3917174BAFB84B17), + SPH_C64(0xC947474645024647), SPH_C64(0xBF9E9EDC1A84DC9E), + SPH_C64(0x43CACAC5D41EC5CA), SPH_C64(0x772D2D995875992D), + SPH_C64(0xDCBFBF792E9179BF), SPH_C64(0x0907071B3F381B07), + SPH_C64(0xEAADAD23AC0123AD), SPH_C64(0xEE5A5A2FB0EA2F5A), + SPH_C64(0x988383B5EF6CB583), SPH_C64(0x553333FFB685FF33), + SPH_C64(0xA56363F25C3FF263), SPH_C64(0x0602020A12100A02), + SPH_C64(0xE3AAAA38933938AA), SPH_C64(0x937171A8DEAFA871), + SPH_C64(0x45C8C8CFC60ECFC8), SPH_C64(0x2B19197DD1C87D19), + SPH_C64(0xDB4949703B727049), SPH_C64(0x76D9D99A5F869AD9), + SPH_C64(0x0BF2F21D31C31DF2), SPH_C64(0x38E3E348A84B48E3), + SPH_C64(0xED5B5B2AB9E22A5B), SPH_C64(0x85888892BC349288), + SPH_C64(0xB39A9AC83EA4C89A), SPH_C64(0x6A2626BE0B2DBE26), + SPH_C64(0x563232FABF8DFA32), SPH_C64(0xCDB0B04A59E94AB0), + SPH_C64(0x26E9E96AF21B6AE9), SPH_C64(0x110F0F337778330F), + SPH_C64(0x62D5D5A633E6A6D5), SPH_C64(0x9D8080BAF474BA80), + SPH_C64(0xDFBEBE7C27997CBE), SPH_C64(0x4ACDCDDEEB26DECD), + SPH_C64(0x5C3434E489BDE434), SPH_C64(0xD8484875327A7548), + SPH_C64(0x1CFFFF2454AB24FF), SPH_C64(0x8E7A7A8F8DF78F7A), + SPH_C64(0xAD9090EA64F4EA90), SPH_C64(0xE15F5F3E9DC23E5F), + SPH_C64(0x602020A03D1DA020), SPH_C64(0xB86868D50F67D568), + SPH_C64(0x2E1A1A72CAD0721A), SPH_C64(0xEFAEAE2CB7192CAE), + SPH_C64(0xC1B4B45E7DC95EB4), SPH_C64(0xFC545419CE9A1954), + SPH_C64(0xA89393E57FECE593), SPH_C64(0x662222AA2F0DAA22), + SPH_C64(0xAC6464E96307E964), SPH_C64(0x0EF1F1122ADB12F1), + SPH_C64(0x957373A2CCBFA273), SPH_C64(0x3612125A82905A12), + SPH_C64(0xC040405D7A3A5D40), SPH_C64(0x1808082848402808), + SPH_C64(0x58C3C3E89556E8C3), SPH_C64(0x29ECEC7BDF337BEC), + SPH_C64(0x70DBDB904D9690DB), SPH_C64(0xFEA1A11FC0611FA1), + SPH_C64(0x8A8D8D83911C838D), SPH_C64(0x473D3DC9C8F5C93D), + SPH_C64(0xA49797F15BCCF197), SPH_C64(0x0000000000000000), + SPH_C64(0x4CCFCFD4F936D4CF), SPH_C64(0x7D2B2B876E45872B), + SPH_C64(0x9A7676B3E197B376), SPH_C64(0x9B8282B0E664B082), + SPH_C64(0x67D6D6A928FEA9D6), SPH_C64(0x2D1B1B77C3D8771B), + SPH_C64(0xC2B5B55B74C15BB5), SPH_C64(0xECAFAF29BE1129AF), + SPH_C64(0xBE6A6ADF1D77DF6A), SPH_C64(0xF050500DEABA0D50), + SPH_C64(0xCF45454C57124C45), SPH_C64(0x08F3F31838CB18F3), + SPH_C64(0x503030F0AD9DF030), SPH_C64(0x2CEFEF74C42B74EF), + SPH_C64(0x413F3FC3DAE5C33F), SPH_C64(0xFF55551CC7921C55), + SPH_C64(0xFBA2A210DB7910A2), SPH_C64(0x23EAEA65E90365EA), + SPH_C64(0xAF6565EC6A0FEC65), SPH_C64(0xD3BABA6803B968BA), + SPH_C64(0x712F2F934A65932F), SPH_C64(0x5DC0C0E78E4EE7C0), + SPH_C64(0x7FDEDE8160BE81DE), SPH_C64(0x241C1C6CFCE06C1C), + SPH_C64(0x1AFDFD2E46BB2EFD), SPH_C64(0xD74D4D641F52644D), + SPH_C64(0xAB9292E076E4E092), SPH_C64(0x9F7575BCFA8FBC75), + SPH_C64(0x0A06061E36301E06), SPH_C64(0x838A8A98AE24988A), + SPH_C64(0xCBB2B2404BF940B2), SPH_C64(0x37E6E659856359E6), + SPH_C64(0x120E0E367E70360E), SPH_C64(0x211F1F63E7F8631F), + SPH_C64(0xA66262F75537F762), SPH_C64(0x61D4D4A33AEEA3D4), + SPH_C64(0xE5A8A832812932A8), SPH_C64(0xA79696F452C4F496), + SPH_C64(0x16F9F93A629B3AF9), SPH_C64(0x52C5C5F6A366F6C5), + SPH_C64(0x6F2525B11035B125), SPH_C64(0xEB595920ABF22059), + SPH_C64(0x918484AED054AE84), SPH_C64(0x967272A7C5B7A772), + SPH_C64(0x4B3939DDECD5DD39), SPH_C64(0xD44C4C61165A614C), + SPH_C64(0xE25E5E3B94CA3B5E), SPH_C64(0x887878859FE78578), + SPH_C64(0x483838D8E5DDD838), SPH_C64(0x898C8C869814868C), + SPH_C64(0x6ED1D1B217C6B2D1), SPH_C64(0xF2A5A50BE4410BA5), + SPH_C64(0x3BE2E24DA1434DE2), SPH_C64(0xA36161F84E2FF861), + SPH_C64(0xC8B3B34542F145B3), SPH_C64(0x632121A53415A521), + SPH_C64(0xB99C9CD60894D69C), SPH_C64(0x221E1E66EEF0661E), + SPH_C64(0xC543435261225243), SPH_C64(0x54C7C7FCB176FCC7), + SPH_C64(0x19FCFC2B4FB32BFC), SPH_C64(0x0C04041424201404), + SPH_C64(0xF3515108E3B20851), SPH_C64(0xB69999C725BCC799), + SPH_C64(0xB76D6DC4224FC46D), SPH_C64(0x170D0D396568390D), + SPH_C64(0x13FAFA35798335FA), SPH_C64(0x7CDFDF8469B684DF), + SPH_C64(0x827E7E9BA9D79B7E), SPH_C64(0x6C2424B4193DB424), + SPH_C64(0x4D3B3BD7FEC5D73B), SPH_C64(0xE0ABAB3D9A313DAB), + SPH_C64(0x4FCECED1F03ED1CE), SPH_C64(0x3311115599885511), + SPH_C64(0x8C8F8F89830C898F), SPH_C64(0xD24E4E6B044A6B4E), + SPH_C64(0xC4B7B75166D151B7), SPH_C64(0x20EBEB60E00B60EB), + SPH_C64(0x443C3CCCC1FDCC3C), SPH_C64(0x9E8181BFFD7CBF81), + SPH_C64(0xA19494FE40D4FE94), SPH_C64(0x04F7F70C1CEB0CF7), + SPH_C64(0xD6B9B96718A167B9), SPH_C64(0x3513135F8B985F13), + SPH_C64(0x742C2C9C517D9C2C), SPH_C64(0x68D3D3B805D6B8D3), + SPH_C64(0x34E7E75C8C6B5CE7), SPH_C64(0xB26E6ECB3957CB6E), + SPH_C64(0x51C4C4F3AA6EF3C4), SPH_C64(0x0503030F1B180F03), + SPH_C64(0xFA565613DC8A1356), SPH_C64(0xCC4444495E1A4944), + SPH_C64(0x817F7F9EA0DF9E7F), SPH_C64(0xE6A9A937882137A9), + SPH_C64(0x7E2A2A82674D822A), SPH_C64(0xD0BBBB6D0AB16DBB), + SPH_C64(0x5EC1C1E28746E2C1), SPH_C64(0xF5535302F1A20253), + SPH_C64(0x79DCDC8B72AE8BDC), SPH_C64(0x1D0B0B275358270B), + SPH_C64(0xBA9D9DD3019CD39D), SPH_C64(0xB46C6CC12B47C16C), + SPH_C64(0x533131F5A495F531), SPH_C64(0x9C7474B9F387B974), + SPH_C64(0x07F6F60915E309F6), SPH_C64(0xCA4646434C0A4346), + SPH_C64(0xE9ACAC26A50926AC), SPH_C64(0x86898997B53C9789), + SPH_C64(0x3C141444B4A04414), SPH_C64(0x3EE1E142BA5B42E1), + SPH_C64(0x3A16164EA6B04E16), SPH_C64(0x4E3A3AD2F7CDD23A), + SPH_C64(0xBB6969D0066FD069), SPH_C64(0x1B09092D41482D09), + SPH_C64(0x907070ADD7A7AD70), SPH_C64(0xC7B6B6546FD954B6), + SPH_C64(0x6DD0D0B71ECEB7D0), SPH_C64(0x2AEDED7ED63B7EED), + SPH_C64(0x49CCCCDBE22EDBCC), SPH_C64(0xC6424257682A5742), + SPH_C64(0xB59898C22CB4C298), SPH_C64(0xF1A4A40EED490EA4), + SPH_C64(0x78282888755D8828), SPH_C64(0xE45C5C3186DA315C), + SPH_C64(0x15F8F83F6B933FF8), SPH_C64(0x978686A4C244A486) +}; + +static const uint64_t old1_T6[256] = { + SPH_C64(0x181878D8C0781828), SPH_C64(0x2323AF2605AF2365), + SPH_C64(0xC6C6F9B87EF9C657), SPH_C64(0xE8E86FFB136FE825), + SPH_C64(0x8787A1CB4CA18794), SPH_C64(0xB8B86211A962B8D5), + SPH_C64(0x0101050908050103), SPH_C64(0x4F4F6E0D426E4FD1), + SPH_C64(0x3636EE9BADEE365A), SPH_C64(0xA6A604FF5904A6F7), + SPH_C64(0xD2D2BD0CDEBDD26B), SPH_C64(0xF5F5060EFB06F502), + SPH_C64(0x79798096EF80798B), SPH_C64(0x6F6FCE305FCE6FB1), + SPH_C64(0x9191EF6DFCEF91AE), SPH_C64(0x525207F8AA0752F6), + SPH_C64(0x6060FD4727FD60A0), SPH_C64(0xBCBC76358976BCD9), + SPH_C64(0x9B9BCD37ACCD9BB0), SPH_C64(0x8E8E8C8A048C8E8F), + SPH_C64(0xA3A315D27115A3F8), SPH_C64(0x0C0C3C6C603C0C14), + SPH_C64(0x7B7B8A84FF8A7B8D), SPH_C64(0x3535E180B5E1355F), + SPH_C64(0x1D1D69F5E8691D27), SPH_C64(0xE0E047B35347E03D), + SPH_C64(0xD7D7AC21F6ACD764), SPH_C64(0xC2C2ED9C5EEDC25B), + SPH_C64(0x2E2E96436D962E72), SPH_C64(0x4B4B7A29627A4BDD), + SPH_C64(0xFEFE215DA321FE1F), SPH_C64(0x575716D5821657F9), + SPH_C64(0x151541BDA841153F), SPH_C64(0x7777B6E89FB67799), + SPH_C64(0x3737EB92A5EB3759), SPH_C64(0xE5E5569E7B56E532), + SPH_C64(0x9F9FD9138CD99FBC), SPH_C64(0xF0F01723D317F00D), + SPH_C64(0x4A4A7F206A7F4ADE), SPH_C64(0xDADA95449E95DA73), + SPH_C64(0x585825A2FA2558E8), SPH_C64(0xC9C9CACF06CAC946), + SPH_C64(0x29298D7C558D297B), SPH_C64(0x0A0A225A50220A1E), + SPH_C64(0xB1B14F50E14FB1CE), SPH_C64(0xA0A01AC9691AA0FD), + SPH_C64(0x6B6BDA147FDA6BBD), SPH_C64(0x8585ABD95CAB8592), + SPH_C64(0xBDBD733C8173BDDA), SPH_C64(0x5D5D348FD2345DE7), + SPH_C64(0x1010509080501030), SPH_C64(0xF4F40307F303F401), + SPH_C64(0xCBCBC0DD16C0CB40), SPH_C64(0x3E3EC6D3EDC63E42), + SPH_C64(0x0505112D2811050F), SPH_C64(0x6767E6781FE667A9), + SPH_C64(0xE4E453977353E431), SPH_C64(0x2727BB0225BB2769), + SPH_C64(0x41415873325841C3), SPH_C64(0x8B8B9DA72C9D8B80), + SPH_C64(0xA7A701F65101A7F4), SPH_C64(0x7D7D94B2CF947D87), + SPH_C64(0x9595FB49DCFB95A2), SPH_C64(0xD8D89F568E9FD875), + SPH_C64(0xFBFB30708B30FB10), SPH_C64(0xEEEE71CD2371EE2F), + SPH_C64(0x7C7C91BBC7917C84), SPH_C64(0x6666E37117E366AA), + SPH_C64(0xDDDD8E7BA68EDD7A), SPH_C64(0x17174BAFB84B1739), + SPH_C64(0x47474645024647C9), SPH_C64(0x9E9EDC1A84DC9EBF), + SPH_C64(0xCACAC5D41EC5CA43), SPH_C64(0x2D2D995875992D77), + SPH_C64(0xBFBF792E9179BFDC), SPH_C64(0x07071B3F381B0709), + SPH_C64(0xADAD23AC0123ADEA), SPH_C64(0x5A5A2FB0EA2F5AEE), + SPH_C64(0x8383B5EF6CB58398), SPH_C64(0x3333FFB685FF3355), + SPH_C64(0x6363F25C3FF263A5), SPH_C64(0x02020A12100A0206), + SPH_C64(0xAAAA38933938AAE3), SPH_C64(0x7171A8DEAFA87193), + SPH_C64(0xC8C8CFC60ECFC845), SPH_C64(0x19197DD1C87D192B), + SPH_C64(0x4949703B727049DB), SPH_C64(0xD9D99A5F869AD976), + SPH_C64(0xF2F21D31C31DF20B), SPH_C64(0xE3E348A84B48E338), + SPH_C64(0x5B5B2AB9E22A5BED), SPH_C64(0x888892BC34928885), + SPH_C64(0x9A9AC83EA4C89AB3), SPH_C64(0x2626BE0B2DBE266A), + SPH_C64(0x3232FABF8DFA3256), SPH_C64(0xB0B04A59E94AB0CD), + SPH_C64(0xE9E96AF21B6AE926), SPH_C64(0x0F0F337778330F11), + SPH_C64(0xD5D5A633E6A6D562), SPH_C64(0x8080BAF474BA809D), + SPH_C64(0xBEBE7C27997CBEDF), SPH_C64(0xCDCDDEEB26DECD4A), + SPH_C64(0x3434E489BDE4345C), SPH_C64(0x484875327A7548D8), + SPH_C64(0xFFFF2454AB24FF1C), SPH_C64(0x7A7A8F8DF78F7A8E), + SPH_C64(0x9090EA64F4EA90AD), SPH_C64(0x5F5F3E9DC23E5FE1), + SPH_C64(0x2020A03D1DA02060), SPH_C64(0x6868D50F67D568B8), + SPH_C64(0x1A1A72CAD0721A2E), SPH_C64(0xAEAE2CB7192CAEEF), + SPH_C64(0xB4B45E7DC95EB4C1), SPH_C64(0x545419CE9A1954FC), + SPH_C64(0x9393E57FECE593A8), SPH_C64(0x2222AA2F0DAA2266), + SPH_C64(0x6464E96307E964AC), SPH_C64(0xF1F1122ADB12F10E), + SPH_C64(0x7373A2CCBFA27395), SPH_C64(0x12125A82905A1236), + SPH_C64(0x40405D7A3A5D40C0), SPH_C64(0x0808284840280818), + SPH_C64(0xC3C3E89556E8C358), SPH_C64(0xECEC7BDF337BEC29), + SPH_C64(0xDBDB904D9690DB70), SPH_C64(0xA1A11FC0611FA1FE), + SPH_C64(0x8D8D83911C838D8A), SPH_C64(0x3D3DC9C8F5C93D47), + SPH_C64(0x9797F15BCCF197A4), SPH_C64(0x0000000000000000), + SPH_C64(0xCFCFD4F936D4CF4C), SPH_C64(0x2B2B876E45872B7D), + SPH_C64(0x7676B3E197B3769A), SPH_C64(0x8282B0E664B0829B), + SPH_C64(0xD6D6A928FEA9D667), SPH_C64(0x1B1B77C3D8771B2D), + SPH_C64(0xB5B55B74C15BB5C2), SPH_C64(0xAFAF29BE1129AFEC), + SPH_C64(0x6A6ADF1D77DF6ABE), SPH_C64(0x50500DEABA0D50F0), + SPH_C64(0x45454C57124C45CF), SPH_C64(0xF3F31838CB18F308), + SPH_C64(0x3030F0AD9DF03050), SPH_C64(0xEFEF74C42B74EF2C), + SPH_C64(0x3F3FC3DAE5C33F41), SPH_C64(0x55551CC7921C55FF), + SPH_C64(0xA2A210DB7910A2FB), SPH_C64(0xEAEA65E90365EA23), + SPH_C64(0x6565EC6A0FEC65AF), SPH_C64(0xBABA6803B968BAD3), + SPH_C64(0x2F2F934A65932F71), SPH_C64(0xC0C0E78E4EE7C05D), + SPH_C64(0xDEDE8160BE81DE7F), SPH_C64(0x1C1C6CFCE06C1C24), + SPH_C64(0xFDFD2E46BB2EFD1A), SPH_C64(0x4D4D641F52644DD7), + SPH_C64(0x9292E076E4E092AB), SPH_C64(0x7575BCFA8FBC759F), + SPH_C64(0x06061E36301E060A), SPH_C64(0x8A8A98AE24988A83), + SPH_C64(0xB2B2404BF940B2CB), SPH_C64(0xE6E659856359E637), + SPH_C64(0x0E0E367E70360E12), SPH_C64(0x1F1F63E7F8631F21), + SPH_C64(0x6262F75537F762A6), SPH_C64(0xD4D4A33AEEA3D461), + SPH_C64(0xA8A832812932A8E5), SPH_C64(0x9696F452C4F496A7), + SPH_C64(0xF9F93A629B3AF916), SPH_C64(0xC5C5F6A366F6C552), + SPH_C64(0x2525B11035B1256F), SPH_C64(0x595920ABF22059EB), + SPH_C64(0x8484AED054AE8491), SPH_C64(0x7272A7C5B7A77296), + SPH_C64(0x3939DDECD5DD394B), SPH_C64(0x4C4C61165A614CD4), + SPH_C64(0x5E5E3B94CA3B5EE2), SPH_C64(0x7878859FE7857888), + SPH_C64(0x3838D8E5DDD83848), SPH_C64(0x8C8C869814868C89), + SPH_C64(0xD1D1B217C6B2D16E), SPH_C64(0xA5A50BE4410BA5F2), + SPH_C64(0xE2E24DA1434DE23B), SPH_C64(0x6161F84E2FF861A3), + SPH_C64(0xB3B34542F145B3C8), SPH_C64(0x2121A53415A52163), + SPH_C64(0x9C9CD60894D69CB9), SPH_C64(0x1E1E66EEF0661E22), + SPH_C64(0x43435261225243C5), SPH_C64(0xC7C7FCB176FCC754), + SPH_C64(0xFCFC2B4FB32BFC19), SPH_C64(0x040414242014040C), + SPH_C64(0x515108E3B20851F3), SPH_C64(0x9999C725BCC799B6), + SPH_C64(0x6D6DC4224FC46DB7), SPH_C64(0x0D0D396568390D17), + SPH_C64(0xFAFA35798335FA13), SPH_C64(0xDFDF8469B684DF7C), + SPH_C64(0x7E7E9BA9D79B7E82), SPH_C64(0x2424B4193DB4246C), + SPH_C64(0x3B3BD7FEC5D73B4D), SPH_C64(0xABAB3D9A313DABE0), + SPH_C64(0xCECED1F03ED1CE4F), SPH_C64(0x1111559988551133), + SPH_C64(0x8F8F89830C898F8C), SPH_C64(0x4E4E6B044A6B4ED2), + SPH_C64(0xB7B75166D151B7C4), SPH_C64(0xEBEB60E00B60EB20), + SPH_C64(0x3C3CCCC1FDCC3C44), SPH_C64(0x8181BFFD7CBF819E), + SPH_C64(0x9494FE40D4FE94A1), SPH_C64(0xF7F70C1CEB0CF704), + SPH_C64(0xB9B96718A167B9D6), SPH_C64(0x13135F8B985F1335), + SPH_C64(0x2C2C9C517D9C2C74), SPH_C64(0xD3D3B805D6B8D368), + SPH_C64(0xE7E75C8C6B5CE734), SPH_C64(0x6E6ECB3957CB6EB2), + SPH_C64(0xC4C4F3AA6EF3C451), SPH_C64(0x03030F1B180F0305), + SPH_C64(0x565613DC8A1356FA), SPH_C64(0x4444495E1A4944CC), + SPH_C64(0x7F7F9EA0DF9E7F81), SPH_C64(0xA9A937882137A9E6), + SPH_C64(0x2A2A82674D822A7E), SPH_C64(0xBBBB6D0AB16DBBD0), + SPH_C64(0xC1C1E28746E2C15E), SPH_C64(0x535302F1A20253F5), + SPH_C64(0xDCDC8B72AE8BDC79), SPH_C64(0x0B0B275358270B1D), + SPH_C64(0x9D9DD3019CD39DBA), SPH_C64(0x6C6CC12B47C16CB4), + SPH_C64(0x3131F5A495F53153), SPH_C64(0x7474B9F387B9749C), + SPH_C64(0xF6F60915E309F607), SPH_C64(0x4646434C0A4346CA), + SPH_C64(0xACAC26A50926ACE9), SPH_C64(0x898997B53C978986), + SPH_C64(0x141444B4A044143C), SPH_C64(0xE1E142BA5B42E13E), + SPH_C64(0x16164EA6B04E163A), SPH_C64(0x3A3AD2F7CDD23A4E), + SPH_C64(0x6969D0066FD069BB), SPH_C64(0x09092D41482D091B), + SPH_C64(0x7070ADD7A7AD7090), SPH_C64(0xB6B6546FD954B6C7), + SPH_C64(0xD0D0B71ECEB7D06D), SPH_C64(0xEDED7ED63B7EED2A), + SPH_C64(0xCCCCDBE22EDBCC49), SPH_C64(0x424257682A5742C6), + SPH_C64(0x9898C22CB4C298B5), SPH_C64(0xA4A40EED490EA4F1), + SPH_C64(0x282888755D882878), SPH_C64(0x5C5C3186DA315CE4), + SPH_C64(0xF8F83F6B933FF815), SPH_C64(0x8686A4C244A48697) +}; + +static const uint64_t old1_T7[256] = { + SPH_C64(0x1878D8C078182818), SPH_C64(0x23AF2605AF236523), + SPH_C64(0xC6F9B87EF9C657C6), SPH_C64(0xE86FFB136FE825E8), + SPH_C64(0x87A1CB4CA1879487), SPH_C64(0xB86211A962B8D5B8), + SPH_C64(0x0105090805010301), SPH_C64(0x4F6E0D426E4FD14F), + SPH_C64(0x36EE9BADEE365A36), SPH_C64(0xA604FF5904A6F7A6), + SPH_C64(0xD2BD0CDEBDD26BD2), SPH_C64(0xF5060EFB06F502F5), + SPH_C64(0x798096EF80798B79), SPH_C64(0x6FCE305FCE6FB16F), + SPH_C64(0x91EF6DFCEF91AE91), SPH_C64(0x5207F8AA0752F652), + SPH_C64(0x60FD4727FD60A060), SPH_C64(0xBC76358976BCD9BC), + SPH_C64(0x9BCD37ACCD9BB09B), SPH_C64(0x8E8C8A048C8E8F8E), + SPH_C64(0xA315D27115A3F8A3), SPH_C64(0x0C3C6C603C0C140C), + SPH_C64(0x7B8A84FF8A7B8D7B), SPH_C64(0x35E180B5E1355F35), + SPH_C64(0x1D69F5E8691D271D), SPH_C64(0xE047B35347E03DE0), + SPH_C64(0xD7AC21F6ACD764D7), SPH_C64(0xC2ED9C5EEDC25BC2), + SPH_C64(0x2E96436D962E722E), SPH_C64(0x4B7A29627A4BDD4B), + SPH_C64(0xFE215DA321FE1FFE), SPH_C64(0x5716D5821657F957), + SPH_C64(0x1541BDA841153F15), SPH_C64(0x77B6E89FB6779977), + SPH_C64(0x37EB92A5EB375937), SPH_C64(0xE5569E7B56E532E5), + SPH_C64(0x9FD9138CD99FBC9F), SPH_C64(0xF01723D317F00DF0), + SPH_C64(0x4A7F206A7F4ADE4A), SPH_C64(0xDA95449E95DA73DA), + SPH_C64(0x5825A2FA2558E858), SPH_C64(0xC9CACF06CAC946C9), + SPH_C64(0x298D7C558D297B29), SPH_C64(0x0A225A50220A1E0A), + SPH_C64(0xB14F50E14FB1CEB1), SPH_C64(0xA01AC9691AA0FDA0), + SPH_C64(0x6BDA147FDA6BBD6B), SPH_C64(0x85ABD95CAB859285), + SPH_C64(0xBD733C8173BDDABD), SPH_C64(0x5D348FD2345DE75D), + SPH_C64(0x1050908050103010), SPH_C64(0xF40307F303F401F4), + SPH_C64(0xCBC0DD16C0CB40CB), SPH_C64(0x3EC6D3EDC63E423E), + SPH_C64(0x05112D2811050F05), SPH_C64(0x67E6781FE667A967), + SPH_C64(0xE453977353E431E4), SPH_C64(0x27BB0225BB276927), + SPH_C64(0x415873325841C341), SPH_C64(0x8B9DA72C9D8B808B), + SPH_C64(0xA701F65101A7F4A7), SPH_C64(0x7D94B2CF947D877D), + SPH_C64(0x95FB49DCFB95A295), SPH_C64(0xD89F568E9FD875D8), + SPH_C64(0xFB30708B30FB10FB), SPH_C64(0xEE71CD2371EE2FEE), + SPH_C64(0x7C91BBC7917C847C), SPH_C64(0x66E37117E366AA66), + SPH_C64(0xDD8E7BA68EDD7ADD), SPH_C64(0x174BAFB84B173917), + SPH_C64(0x474645024647C947), SPH_C64(0x9EDC1A84DC9EBF9E), + SPH_C64(0xCAC5D41EC5CA43CA), SPH_C64(0x2D995875992D772D), + SPH_C64(0xBF792E9179BFDCBF), SPH_C64(0x071B3F381B070907), + SPH_C64(0xAD23AC0123ADEAAD), SPH_C64(0x5A2FB0EA2F5AEE5A), + SPH_C64(0x83B5EF6CB5839883), SPH_C64(0x33FFB685FF335533), + SPH_C64(0x63F25C3FF263A563), SPH_C64(0x020A12100A020602), + SPH_C64(0xAA38933938AAE3AA), SPH_C64(0x71A8DEAFA8719371), + SPH_C64(0xC8CFC60ECFC845C8), SPH_C64(0x197DD1C87D192B19), + SPH_C64(0x49703B727049DB49), SPH_C64(0xD99A5F869AD976D9), + SPH_C64(0xF21D31C31DF20BF2), SPH_C64(0xE348A84B48E338E3), + SPH_C64(0x5B2AB9E22A5BED5B), SPH_C64(0x8892BC3492888588), + SPH_C64(0x9AC83EA4C89AB39A), SPH_C64(0x26BE0B2DBE266A26), + SPH_C64(0x32FABF8DFA325632), SPH_C64(0xB04A59E94AB0CDB0), + SPH_C64(0xE96AF21B6AE926E9), SPH_C64(0x0F337778330F110F), + SPH_C64(0xD5A633E6A6D562D5), SPH_C64(0x80BAF474BA809D80), + SPH_C64(0xBE7C27997CBEDFBE), SPH_C64(0xCDDEEB26DECD4ACD), + SPH_C64(0x34E489BDE4345C34), SPH_C64(0x4875327A7548D848), + SPH_C64(0xFF2454AB24FF1CFF), SPH_C64(0x7A8F8DF78F7A8E7A), + SPH_C64(0x90EA64F4EA90AD90), SPH_C64(0x5F3E9DC23E5FE15F), + SPH_C64(0x20A03D1DA0206020), SPH_C64(0x68D50F67D568B868), + SPH_C64(0x1A72CAD0721A2E1A), SPH_C64(0xAE2CB7192CAEEFAE), + SPH_C64(0xB45E7DC95EB4C1B4), SPH_C64(0x5419CE9A1954FC54), + SPH_C64(0x93E57FECE593A893), SPH_C64(0x22AA2F0DAA226622), + SPH_C64(0x64E96307E964AC64), SPH_C64(0xF1122ADB12F10EF1), + SPH_C64(0x73A2CCBFA2739573), SPH_C64(0x125A82905A123612), + SPH_C64(0x405D7A3A5D40C040), SPH_C64(0x0828484028081808), + SPH_C64(0xC3E89556E8C358C3), SPH_C64(0xEC7BDF337BEC29EC), + SPH_C64(0xDB904D9690DB70DB), SPH_C64(0xA11FC0611FA1FEA1), + SPH_C64(0x8D83911C838D8A8D), SPH_C64(0x3DC9C8F5C93D473D), + SPH_C64(0x97F15BCCF197A497), SPH_C64(0x0000000000000000), + SPH_C64(0xCFD4F936D4CF4CCF), SPH_C64(0x2B876E45872B7D2B), + SPH_C64(0x76B3E197B3769A76), SPH_C64(0x82B0E664B0829B82), + SPH_C64(0xD6A928FEA9D667D6), SPH_C64(0x1B77C3D8771B2D1B), + SPH_C64(0xB55B74C15BB5C2B5), SPH_C64(0xAF29BE1129AFECAF), + SPH_C64(0x6ADF1D77DF6ABE6A), SPH_C64(0x500DEABA0D50F050), + SPH_C64(0x454C57124C45CF45), SPH_C64(0xF31838CB18F308F3), + SPH_C64(0x30F0AD9DF0305030), SPH_C64(0xEF74C42B74EF2CEF), + SPH_C64(0x3FC3DAE5C33F413F), SPH_C64(0x551CC7921C55FF55), + SPH_C64(0xA210DB7910A2FBA2), SPH_C64(0xEA65E90365EA23EA), + SPH_C64(0x65EC6A0FEC65AF65), SPH_C64(0xBA6803B968BAD3BA), + SPH_C64(0x2F934A65932F712F), SPH_C64(0xC0E78E4EE7C05DC0), + SPH_C64(0xDE8160BE81DE7FDE), SPH_C64(0x1C6CFCE06C1C241C), + SPH_C64(0xFD2E46BB2EFD1AFD), SPH_C64(0x4D641F52644DD74D), + SPH_C64(0x92E076E4E092AB92), SPH_C64(0x75BCFA8FBC759F75), + SPH_C64(0x061E36301E060A06), SPH_C64(0x8A98AE24988A838A), + SPH_C64(0xB2404BF940B2CBB2), SPH_C64(0xE659856359E637E6), + SPH_C64(0x0E367E70360E120E), SPH_C64(0x1F63E7F8631F211F), + SPH_C64(0x62F75537F762A662), SPH_C64(0xD4A33AEEA3D461D4), + SPH_C64(0xA832812932A8E5A8), SPH_C64(0x96F452C4F496A796), + SPH_C64(0xF93A629B3AF916F9), SPH_C64(0xC5F6A366F6C552C5), + SPH_C64(0x25B11035B1256F25), SPH_C64(0x5920ABF22059EB59), + SPH_C64(0x84AED054AE849184), SPH_C64(0x72A7C5B7A7729672), + SPH_C64(0x39DDECD5DD394B39), SPH_C64(0x4C61165A614CD44C), + SPH_C64(0x5E3B94CA3B5EE25E), SPH_C64(0x78859FE785788878), + SPH_C64(0x38D8E5DDD8384838), SPH_C64(0x8C869814868C898C), + SPH_C64(0xD1B217C6B2D16ED1), SPH_C64(0xA50BE4410BA5F2A5), + SPH_C64(0xE24DA1434DE23BE2), SPH_C64(0x61F84E2FF861A361), + SPH_C64(0xB34542F145B3C8B3), SPH_C64(0x21A53415A5216321), + SPH_C64(0x9CD60894D69CB99C), SPH_C64(0x1E66EEF0661E221E), + SPH_C64(0x435261225243C543), SPH_C64(0xC7FCB176FCC754C7), + SPH_C64(0xFC2B4FB32BFC19FC), SPH_C64(0x0414242014040C04), + SPH_C64(0x5108E3B20851F351), SPH_C64(0x99C725BCC799B699), + SPH_C64(0x6DC4224FC46DB76D), SPH_C64(0x0D396568390D170D), + SPH_C64(0xFA35798335FA13FA), SPH_C64(0xDF8469B684DF7CDF), + SPH_C64(0x7E9BA9D79B7E827E), SPH_C64(0x24B4193DB4246C24), + SPH_C64(0x3BD7FEC5D73B4D3B), SPH_C64(0xAB3D9A313DABE0AB), + SPH_C64(0xCED1F03ED1CE4FCE), SPH_C64(0x1155998855113311), + SPH_C64(0x8F89830C898F8C8F), SPH_C64(0x4E6B044A6B4ED24E), + SPH_C64(0xB75166D151B7C4B7), SPH_C64(0xEB60E00B60EB20EB), + SPH_C64(0x3CCCC1FDCC3C443C), SPH_C64(0x81BFFD7CBF819E81), + SPH_C64(0x94FE40D4FE94A194), SPH_C64(0xF70C1CEB0CF704F7), + SPH_C64(0xB96718A167B9D6B9), SPH_C64(0x135F8B985F133513), + SPH_C64(0x2C9C517D9C2C742C), SPH_C64(0xD3B805D6B8D368D3), + SPH_C64(0xE75C8C6B5CE734E7), SPH_C64(0x6ECB3957CB6EB26E), + SPH_C64(0xC4F3AA6EF3C451C4), SPH_C64(0x030F1B180F030503), + SPH_C64(0x5613DC8A1356FA56), SPH_C64(0x44495E1A4944CC44), + SPH_C64(0x7F9EA0DF9E7F817F), SPH_C64(0xA937882137A9E6A9), + SPH_C64(0x2A82674D822A7E2A), SPH_C64(0xBB6D0AB16DBBD0BB), + SPH_C64(0xC1E28746E2C15EC1), SPH_C64(0x5302F1A20253F553), + SPH_C64(0xDC8B72AE8BDC79DC), SPH_C64(0x0B275358270B1D0B), + SPH_C64(0x9DD3019CD39DBA9D), SPH_C64(0x6CC12B47C16CB46C), + SPH_C64(0x31F5A495F5315331), SPH_C64(0x74B9F387B9749C74), + SPH_C64(0xF60915E309F607F6), SPH_C64(0x46434C0A4346CA46), + SPH_C64(0xAC26A50926ACE9AC), SPH_C64(0x8997B53C97898689), + SPH_C64(0x1444B4A044143C14), SPH_C64(0xE142BA5B42E13EE1), + SPH_C64(0x164EA6B04E163A16), SPH_C64(0x3AD2F7CDD23A4E3A), + SPH_C64(0x69D0066FD069BB69), SPH_C64(0x092D41482D091B09), + SPH_C64(0x70ADD7A7AD709070), SPH_C64(0xB6546FD954B6C7B6), + SPH_C64(0xD0B71ECEB7D06DD0), SPH_C64(0xED7ED63B7EED2AED), + SPH_C64(0xCCDBE22EDBCC49CC), SPH_C64(0x4257682A5742C642), + SPH_C64(0x98C22CB4C298B598), SPH_C64(0xA40EED490EA4F1A4), + SPH_C64(0x2888755D88287828), SPH_C64(0x5C3186DA315CE45C), + SPH_C64(0xF83F6B933FF815F8), SPH_C64(0x86A4C244A4869786) +}; + +static const uint64_t old1_RC[10] = { + SPH_C64(0x4F01B887E8C62318), + SPH_C64(0x52916F79F5D2A636), + SPH_C64(0x357B0CA38E9BBC60), + SPH_C64(0x57FE4B2EC2D7E01D), + SPH_C64(0xDA4AF09FE5377715), + SPH_C64(0x856BA0B10A29C958), + SPH_C64(0x67053ECBF4105DBD), + SPH_C64(0xD8957DA78B4127E4), + SPH_C64(0x9E4717DD667CEEFB), + SPH_C64(0x33835AAD07BF2DCA) +}; + +static const uint64_t plain_T0[256] = { SPH_C64(0xD83078C018601818), SPH_C64(0x2646AF05238C2323), SPH_C64(0xB891F97EC63FC6C6), SPH_C64(0xFBCD6F13E887E8E8), SPH_C64(0xCB13A14C87268787), SPH_C64(0x116D62A9B8DAB8B8), @@ -202,9 +1256,7 @@ __device__ __constant__ static const uint64_t plain_T0[256] = { SPH_C64(0x6BED3F93F8C7F8F8), SPH_C64(0xC211A44486228686) }; -#if !SPH_SMALL_FOOTPRINT_WHIRLPOOL - -__device__ static const uint64_t plain_T1[256] = { +static const uint64_t plain_T1[256] = { SPH_C64(0x3078C018601818D8), SPH_C64(0x46AF05238C232326), SPH_C64(0x91F97EC63FC6C6B8), SPH_C64(0xCD6F13E887E8E8FB), SPH_C64(0x13A14C87268787CB), SPH_C64(0x6D62A9B8DAB8B811), @@ -335,7 +1387,7 @@ __device__ static const uint64_t plain_T1[256] = { SPH_C64(0xED3F93F8C7F8F86B), SPH_C64(0x11A44486228686C2) }; -__device__ static const uint64_t plain_T2[256] = { +static const uint64_t plain_T2[256] = { SPH_C64(0x78C018601818D830), SPH_C64(0xAF05238C23232646), SPH_C64(0xF97EC63FC6C6B891), SPH_C64(0x6F13E887E8E8FBCD), SPH_C64(0xA14C87268787CB13), SPH_C64(0x62A9B8DAB8B8116D), @@ -466,7 +1518,7 @@ __device__ static const uint64_t plain_T2[256] = { SPH_C64(0x3F93F8C7F8F86BED), SPH_C64(0xA44486228686C211) }; -__device__ static const uint64_t plain_T3[256] = { +static const uint64_t plain_T3[256] = { SPH_C64(0xC018601818D83078), SPH_C64(0x05238C23232646AF), SPH_C64(0x7EC63FC6C6B891F9), SPH_C64(0x13E887E8E8FBCD6F), SPH_C64(0x4C87268787CB13A1), SPH_C64(0xA9B8DAB8B8116D62), @@ -597,7 +1649,7 @@ __device__ static const uint64_t plain_T3[256] = { SPH_C64(0x93F8C7F8F86BED3F), SPH_C64(0x4486228686C211A4) }; -__device__ static const uint64_t plain_T4[256] = { +static const uint64_t plain_T4[256] = { SPH_C64(0x18601818D83078C0), SPH_C64(0x238C23232646AF05), SPH_C64(0xC63FC6C6B891F97E), SPH_C64(0xE887E8E8FBCD6F13), SPH_C64(0x87268787CB13A14C), SPH_C64(0xB8DAB8B8116D62A9), @@ -728,7 +1780,7 @@ __device__ static const uint64_t plain_T4[256] = { SPH_C64(0xF8C7F8F86BED3F93), SPH_C64(0x86228686C211A444) }; -__device__ static const uint64_t plain_T5[256] = { +static const uint64_t plain_T5[256] = { SPH_C64(0x601818D83078C018), SPH_C64(0x8C23232646AF0523), SPH_C64(0x3FC6C6B891F97EC6), SPH_C64(0x87E8E8FBCD6F13E8), SPH_C64(0x268787CB13A14C87), SPH_C64(0xDAB8B8116D62A9B8), @@ -859,7 +1911,7 @@ __device__ static const uint64_t plain_T5[256] = { SPH_C64(0xC7F8F86BED3F93F8), SPH_C64(0x228686C211A44486) }; -__device__ static const uint64_t plain_T6[256] = { +static const uint64_t plain_T6[256] = { SPH_C64(0x1818D83078C01860), SPH_C64(0x23232646AF05238C), SPH_C64(0xC6C6B891F97EC63F), SPH_C64(0xE8E8FBCD6F13E887), SPH_C64(0x8787CB13A14C8726), SPH_C64(0xB8B8116D62A9B8DA), @@ -990,7 +2042,7 @@ __device__ static const uint64_t plain_T6[256] = { SPH_C64(0xF8F86BED3F93F8C7), SPH_C64(0x8686C211A4448622) }; -__device__ static const uint64_t plain_T7[256] = { +static const uint64_t plain_T7[256] = { SPH_C64(0x18D83078C0186018), SPH_C64(0x232646AF05238C23), SPH_C64(0xC6B891F97EC63FC6), SPH_C64(0xE8FBCD6F13E887E8), SPH_C64(0x87CB13A14C872687), SPH_C64(0xB8116D62A9B8DAB8), @@ -1121,12 +2173,12 @@ __device__ static const uint64_t plain_T7[256] = { SPH_C64(0xF86BED3F93F8C7F8), SPH_C64(0x86C211A444862286) }; -#endif /* !SPH_SMALL_FOOTPRINT_WHIRLPOOL */ +/** + * Round constants. + */ +__constant__ uint64_t InitVector_RC[10]; -/* -* Round constants. -*/ -__device__ __constant__ static const uint64_t plain_RC[10] = { +static const uint64_t plain_RC[10] = { SPH_C64(0x4F01B887E8C62318), SPH_C64(0x52916F79F5D2A636), SPH_C64(0x357B0CA38E9BBC60), @@ -1140,151 +2192,542 @@ __device__ __constant__ static const uint64_t plain_RC[10] = { }; /* ====================================================================== */ -#if SPH_SMALL_FOOTPRINT_WHIRLPOOL -__device__ static uint64_t table_skew(uint64_t val, int num) { - return SPH_ROTL64(val, 8 * num); +#define BYTE(x, n) ((unsigned)((x) >> (8 * (n))) & 0xFF) + +#define TRANSFER(dst, src) { \ + dst[0] = src ## 0; \ + dst[1] = src ## 1; \ + dst[2] = src ## 2; \ + dst[3] = src ## 3; \ + dst[4] = src ## 4; \ + dst[5] = src ## 5; \ + dst[6] = src ## 6; \ + dst[7] = src ## 7; \ +} + +#if !USE_ALL_TABLES +/* method disabled to reduce code size */ +__device__ __forceinline__ +static uint64_t table_skew(uint64_t val, int num) { + return ROTL64(val, 8 * num); +} + +__device__ __forceinline__ +static uint64_t ROUND_ELT_SMALL(const uint64_t* __restrict__ sharedMemory,uint64_t in[8], + int i0,int i1,int i2,int i3,int i4,int i5,int i6,int i7) +{ + uint32_t idx0, idx1, idx2, idx3, idx4, idx5, idx6, idx7; + idx0 = BYTE(in[i0], 0); + idx1 = BYTE(in[i1], 1); + idx2 = BYTE(in[i2], 2); + idx3 = BYTE(in[i3], 3); + idx4 = BYTE(in[i4], 4); + idx5 = BYTE(in[i5], 5); + idx6 = BYTE(in[i6], 6); + idx7 = BYTE(in[i7], 7); + + return xor8( + sharedMemory[idx0], + table_skew(sharedMemory[idx1], 1), + table_skew(sharedMemory[idx2], 2), + table_skew(sharedMemory[idx3], 3), + table_skew(sharedMemory[idx4], 4), + table_skew(sharedMemory[idx5], 5), + table_skew(sharedMemory[idx6], 6), + table_skew(sharedMemory[idx7], 7) + ); +} + +#define ROUND_SMALL(table, in, out, c0, c1, c2, c3, c4, c5, c6, c7) { \ + out ## 0 = xor1(ROUND_ELT_SMALL(table, in, 0, 7, 6, 5, 4, 3, 2, 1), c0); \ + out ## 1 = xor1(ROUND_ELT_SMALL(table, in, 1, 0, 7, 6, 5, 4, 3, 2), c1); \ + out ## 2 = xor1(ROUND_ELT_SMALL(table, in, 2, 1, 0, 7, 6, 5, 4, 3), c2); \ + out ## 3 = xor1(ROUND_ELT_SMALL(table, in, 3, 2, 1, 0, 7, 6, 5, 4), c3); \ + out ## 4 = xor1(ROUND_ELT_SMALL(table, in, 4, 3, 2, 1, 0, 7, 6, 5), c4); \ + out ## 5 = xor1(ROUND_ELT_SMALL(table, in, 5, 4, 3, 2, 1, 0, 7, 6), c5); \ + out ## 6 = xor1(ROUND_ELT_SMALL(table, in, 6, 5, 4, 3, 2, 1, 0, 7), c6); \ + out ## 7 = xor1(ROUND_ELT_SMALL(table, in, 7, 6, 5, 4, 3, 2, 1, 0), c7); \ } -#define BYTE(x, n) ((unsigned)((x) >> (8 * (n))) & 0xFF) -#define ROUND_ELT(table, in, i0, i1, i2, i3, i4, i5, i6, i7) \ - (table ## 0[BYTE(in ## i0, 0)] \ - ^ table_skew(table ## 0[BYTE(in ## i1, 1)], 1) \ - ^ table_skew(table ## 0[BYTE(in ## i2, 2)], 2) \ - ^ table_skew(table ## 0[BYTE(in ## i3, 3)], 3) \ - ^ table_skew(table ## 0[BYTE(in ## i4, 4)], 4) \ - ^ table_skew(table ## 0[BYTE(in ## i5, 5)], 5) \ - ^ table_skew(table ## 0[BYTE(in ## i6, 6)], 6) \ - ^ table_skew(table ## 0[BYTE(in ## i7, 7)], 7)) +#define ROUND_KSCHED_SMALL(table, in, out, c) \ + ROUND_SMALL(table, in, out, c, 0, 0, 0, 0, 0, 0, 0) \ + TRANSFER(in, out) + +#define ROUND_WENC_SMALL(table, in, key, out) \ + ROUND_SMALL(table, in, out, key[0], key[1], key[2],key[3], key[4], key[5], key[6], key[7]) \ + TRANSFER(in, out) + #else -#define ROUND_ELT(table, in, i0, i1, i2, i3, i4, i5, i6, i7) \ - (table ## 0[BYTE(in ## i0, 0)] \ - ^ table ## 1[BYTE(in ## i1, 1)] \ - ^ table ## 2[BYTE(in ## i2, 2)] \ - ^ table ## 3[BYTE(in ## i3, 3)] \ - ^ table ## 4[BYTE(in ## i4, 4)] \ - ^ table ## 5[BYTE(in ## i5, 5)] \ - ^ table ## 6[BYTE(in ## i6, 6)] \ - ^ table ## 7[BYTE(in ## i7, 7)]) +# define ROUND_KSCHED_SMALL(table, in, out, c) +# define ROUND_WENC_SMALL(table, in, key, out) #endif -#define ROUND(table, in, out, c0, c1, c2, c3, c4, c5, c6, c7) do { \ - out ## 0 = ROUND_ELT(table, in, 0, 7, 6, 5, 4, 3, 2, 1) ^ c0; \ - out ## 1 = ROUND_ELT(table, in, 1, 0, 7, 6, 5, 4, 3, 2) ^ c1; \ - out ## 2 = ROUND_ELT(table, in, 2, 1, 0, 7, 6, 5, 4, 3) ^ c2; \ - out ## 3 = ROUND_ELT(table, in, 3, 2, 1, 0, 7, 6, 5, 4) ^ c3; \ - out ## 4 = ROUND_ELT(table, in, 4, 3, 2, 1, 0, 7, 6, 5) ^ c4; \ - out ## 5 = ROUND_ELT(table, in, 5, 4, 3, 2, 1, 0, 7, 6) ^ c5; \ - out ## 6 = ROUND_ELT(table, in, 6, 5, 4, 3, 2, 1, 0, 7) ^ c6; \ - out ## 7 = ROUND_ELT(table, in, 7, 6, 5, 4, 3, 2, 1, 0) ^ c7; \ - } while (0) - -#define TRANSFER(dst, src) do { \ - dst ## 0 = src ## 0; \ - dst ## 1 = src ## 1; \ - dst ## 2 = src ## 2; \ - dst ## 3 = src ## 3; \ - dst ## 4 = src ## 4; \ - dst ## 5 = src ## 5; \ - dst ## 6 = src ## 6; \ - dst ## 7 = src ## 7; \ - } while (0) +__device__ __forceinline__ +static uint64_t ROUND_ELT(const uint64_t* __restrict__ sharedMemory, uint64_t in[8], + int i0,int i1,int i2,int i3,int i4,int i5,int i6,int i7) +{ + uint32_t idx0, idx1, idx2, idx3, idx4, idx5, idx6, idx7; + idx0 = BYTE(in[i0], 0); + idx1 = BYTE(in[i1], 1) + 256; + idx2 = BYTE(in[i2], 2) + 512; + idx3 = BYTE(in[i3], 3) + 768; + idx4 = BYTE(in[i4], 4) + 1024; + idx5 = BYTE(in[i5], 5) + 1280; + idx6 = BYTE(in[i6], 6) + 1536; + idx7 = BYTE(in[i7], 7) + 1792; + + return xor8(sharedMemory[idx0],sharedMemory[idx1],sharedMemory[idx2],sharedMemory[idx3], + sharedMemory[idx4],sharedMemory[idx5],sharedMemory[idx6],sharedMemory[idx7]); +} + +#define ROUND(table, in, out, c0, c1, c2, c3, c4, c5, c6, c7) { \ + out ## 0 = xor1(ROUND_ELT(table, in, 0, 7, 6, 5, 4, 3, 2, 1), c0); \ + out ## 1 = xor1(ROUND_ELT(table, in, 1, 0, 7, 6, 5, 4, 3, 2), c1); \ + out ## 2 = xor1(ROUND_ELT(table, in, 2, 1, 0, 7, 6, 5, 4, 3), c2); \ + out ## 3 = xor1(ROUND_ELT(table, in, 3, 2, 1, 0, 7, 6, 5, 4), c3); \ + out ## 4 = xor1(ROUND_ELT(table, in, 4, 3, 2, 1, 0, 7, 6, 5), c4); \ + out ## 5 = xor1(ROUND_ELT(table, in, 5, 4, 3, 2, 1, 0, 7, 6), c5); \ + out ## 6 = xor1(ROUND_ELT(table, in, 6, 5, 4, 3, 2, 1, 0, 7), c6); \ + out ## 7 = xor1(ROUND_ELT(table, in, 7, 6, 5, 4, 3, 2, 1, 0), c7); \ +} #define ROUND_KSCHED(table, in, out, c) \ - ROUND(table, in, out, c, 0, 0, 0, 0, 0, 0, 0); \ + ROUND(table, in, out, c, 0, 0, 0, 0, 0, 0, 0) \ TRANSFER(in, out) #define ROUND_WENC(table, in, key, out) \ - ROUND(table, in, out, key ## 0, key ## 1, key ## 2, \ - key ## 3, key ## 4, key ## 5, key ## 6, key ## 7); \ + ROUND(table, in, out, key[0], key[1], key[2],key[3], key[4], key[5], key[6], key[7]) \ TRANSFER(in, out) -#endif -struct h8x64 +__global__ +void whirlpool512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash) { - uint64_t n0, n1, n2, n3, n4, n5, n6, n7; -}; + __shared__ uint64_t sharedMemory[2048]; + + if (threadIdx.x < 256) { + sharedMemory[threadIdx.x] = mixTob0Tox[threadIdx.x]; + if (USE_ALL_TABLES) { + sharedMemory[threadIdx.x+256] = mixTob1Tox[threadIdx.x]; + sharedMemory[threadIdx.x+512] = mixTob2Tox[threadIdx.x]; + sharedMemory[threadIdx.x+768] = mixTob3Tox[threadIdx.x]; + sharedMemory[threadIdx.x+1024] = mixTob4Tox[threadIdx.x]; + sharedMemory[threadIdx.x+1280] = mixTob5Tox[threadIdx.x]; + sharedMemory[threadIdx.x+1536] = mixTob6Tox[threadIdx.x]; + sharedMemory[threadIdx.x+1792] = mixTob7Tox[threadIdx.x]; + } + } + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + uint32_t nounce = startNounce + thread; + union { + uint8_t h1[64]; + uint32_t h4[16]; + uint64_t h8[8]; + } hash; + + uint64_t state[8]; + uint64_t n[8]; + uint64_t h[8]; + + #pragma unroll 8 + for (int i=0; i<8; i++) { + n[i] = c_PaddedMessage80[i]; // read data + h[i] = 0; // read state + n[i] = xor1(n[i], h[i]); + } + + #pragma unroll 10 + for (unsigned r=0; r < 10; r++) { + uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + if (USE_ALL_TABLES) { + ROUND_KSCHED(sharedMemory, h, tmp, InitVector_RC[r]); + ROUND_WENC(sharedMemory, n, h, tmp); + } else { + ROUND_KSCHED_SMALL(sharedMemory, h, tmp, InitVector_RC[r]); + ROUND_WENC_SMALL(sharedMemory, n, h, tmp); + } + } + + #pragma unroll 8 + for (int i=0; i < 8; i++) { + state[i] = xor1(n[i],c_PaddedMessage80[i]); + n[i]=0; + } + + /// round 2 /////// + ////////////////////////////////// + n[0] = c_PaddedMessage80[8]; //read data + n[1] = REPLACE_HIWORD(c_PaddedMessage80[9], cuda_swab32(nounce)); //whirlpool + n[2] = 0x0000000000000080; //whirlpool + n[7] = 0x8002000000000000; + + #pragma unroll 8 + for (int i=0;i<8;i++) { + h[i] = state[i]; //read state + n[i] = xor1(n[i],h[i]); + } + + #pragma unroll 10 + for (unsigned r=0; r < 10; r++) { + uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + if (USE_ALL_TABLES) { + ROUND_KSCHED(sharedMemory, h, tmp, InitVector_RC[r]); + ROUND_WENC(sharedMemory, n, h, tmp); + } else { + ROUND_KSCHED_SMALL(sharedMemory, h, tmp, InitVector_RC[r]); + ROUND_WENC_SMALL(sharedMemory, n, h, tmp); + } + } -/***************************************************/ -// GPU Hash Function -__global__ void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) + state[0] = xor3(state[0], n[0], c_PaddedMessage80[8]); + state[1] = xor3(state[1], n[1], REPLACE_HIWORD(c_PaddedMessage80[9], cuda_swab32(nounce)) ); + state[2] = xor3(state[2], n[2], 0x0000000000000080); + state[3] = xor1(state[3], n[3]); + state[4] = xor1(state[4], n[4]); + state[5] = xor1(state[5], n[5]); + state[6] = xor1(state[6], n[6]); + state[7] = xor3(state[7], n[7], 0x8002000000000000); + + #pragma unroll 8 + for (unsigned i = 0; i < 8; i++) + hash.h8[i] = state[i]; + + uint32_t *outHash = (uint32_t *)outputHash + 16 * thread; + + #pragma unroll 16 + for (int i=0; i<16; i++) + outHash[i] = hash.h4[i]; + + } // thread < threads +} + +__global__ +void whirlpool512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { - __syncthreads(); + __shared__ uint64_t sharedMemory[2048]; + + if (threadIdx.x < 256) { + sharedMemory[threadIdx.x] = mixTob0Tox[threadIdx.x]; + if (USE_ALL_TABLES) { + sharedMemory[threadIdx.x+256] = mixTob1Tox[threadIdx.x]; + sharedMemory[threadIdx.x+512] = mixTob2Tox[threadIdx.x]; + sharedMemory[threadIdx.x+768] = mixTob3Tox[threadIdx.x]; + sharedMemory[threadIdx.x+1024] = mixTob4Tox[threadIdx.x]; + sharedMemory[threadIdx.x+1280] = mixTob5Tox[threadIdx.x]; + sharedMemory[threadIdx.x+1536] = mixTob6Tox[threadIdx.x]; + sharedMemory[threadIdx.x+1792] = mixTob7Tox[threadIdx.x]; + } + } int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); + + int hashPosition = nounce - startNounce; + uint32_t *inpHash = (uint32_t*)&g_hash[8 * hashPosition]; + union { + uint8_t h1[64]; + uint32_t h4[16]; + uint64_t h8[8]; + } hash; + + #pragma unroll 16 + for (int i=0;i<16;i++) { + hash.h4[i]= inpHash[i]; + } + + uint64_t state[8]; + uint64_t n[8]; + uint64_t h[8]; + #pragma unroll 8 + for (int i=0;i<8;i++) { + n[i] = hash.h8[i]; + h[i] = 0; + n[i] = xor1(n[i],h[i]); + } + + #pragma unroll 10 + for (unsigned r=0; r < 10; r++) { + uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + if (USE_ALL_TABLES) { + ROUND_KSCHED(sharedMemory, h, tmp, InitVector_RC[r]); + ROUND_WENC(sharedMemory, n, h, tmp); + } else { + ROUND_KSCHED_SMALL(sharedMemory, h, tmp, InitVector_RC[r]); + ROUND_WENC_SMALL(sharedMemory, n, h, tmp); + } + } + + #pragma unroll 8 + for (int i=0; i<8; i++) { + state[i] = xor1(n[i],hash.h8[i]); + n[i]=0; + } + + n[0] = 0x80; + n[7] = 0x2000000000000; + + #pragma unroll 8 + for (int i=0; i < 8; i++) { + h[i] = state[i]; + n[i] = xor1(n[i], h[i]); + } + + #pragma unroll 10 + for (unsigned r=0; r < 10; r++) { + uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + if (USE_ALL_TABLES) { + ROUND_KSCHED(sharedMemory, h, tmp, InitVector_RC[r]); + ROUND_WENC(sharedMemory, n, h, tmp); + } else { + ROUND_KSCHED_SMALL(sharedMemory, h, tmp, InitVector_RC[r]); + ROUND_WENC_SMALL(sharedMemory, n, h, tmp); + } + } + + state[0] = xor3(state[0], n[0], 0x80); + state[1] = xor1(state[1], n[1]); + state[2] = xor1(state[2], n[2]); + state[3] = xor1(state[3], n[3]); + state[4] = xor1(state[4], n[4]); + state[5] = xor1(state[5], n[5]); + state[6] = xor1(state[6], n[6]); + state[7] = xor3(state[7], n[7], 0x2000000000000); + + #pragma unroll 8 + for (unsigned i = 0; i < 8; i++) + hash.h8[i] = state[i]; + + #pragma unroll 16 + for (int u = 0; u < 16; u ++) + inpHash[u] = hash.h4[u]; + } +} + +__global__ +void whirlpool512_gpu_finalhash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector, uint32_t *resNounce) +{ + __shared__ uint64_t sharedMemory[2048]; + + if (threadIdx.x < 256) + { + sharedMemory[threadIdx.x] = mixTob0Tox[threadIdx.x]; + if (USE_ALL_TABLES) { + sharedMemory[threadIdx.x+256] = mixTob1Tox[threadIdx.x]; + sharedMemory[threadIdx.x+512] = mixTob2Tox[threadIdx.x]; + sharedMemory[threadIdx.x+768] = mixTob3Tox[threadIdx.x]; + sharedMemory[threadIdx.x+1024] = mixTob4Tox[threadIdx.x]; + sharedMemory[threadIdx.x+1280] = mixTob5Tox[threadIdx.x]; + sharedMemory[threadIdx.x+1536] = mixTob6Tox[threadIdx.x]; + sharedMemory[threadIdx.x+1792] = mixTob7Tox[threadIdx.x]; + } + } + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); - uint32_t hashPosition = nounce - startNounce; - struct h8x64 *phash = (struct h8x64 *) &g_hash[hashPosition<<3]; - struct h8x64 p = *phash; /* copy content in local p */ - struct h8x64 st, n, h = { 0, 0, 0, 0, 0, 0, 0, 0 }; - uint8_t u; -#if NULLTEST - p = h; -#endif - TRANSFER(n.n, p.n); + + int hashPosition = nounce - startNounce; + uint32_t *inpHash = (uint32_t*)&g_hash[8 * hashPosition]; + union { + uint8_t h1[64]; + uint32_t h4[16]; + uint64_t h8[8]; + } hash; + + #pragma unroll 16 + for (int i=0; i<16; i++) { + hash.h4[i]= inpHash[i]; + } + + uint64_t state[8]; + uint64_t n[8]; + uint64_t h[8]; + + #pragma unroll 8 + for (int i=0; i<8; i++) { + n[i] = hash.h8[i]; + h[i] = 0; + n[i] = xor1(n[i],h[i]); + } #pragma unroll 10 - for (u = 0; u < 10; u++) - { - uint64_t t0, t1, t2, t3, t4, t5, t6, t7; - ROUND_KSCHED(plain_T, h.n, t, plain_RC[u]); - ROUND_WENC(plain_T, n.n, h.n, t); + for (unsigned r=0; r < 10; r++) { + uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + if (USE_ALL_TABLES) { + ROUND_KSCHED(sharedMemory, h, tmp, InitVector_RC[r]); + ROUND_WENC(sharedMemory, n, h, tmp); + } else { + ROUND_KSCHED_SMALL(sharedMemory, h, tmp, InitVector_RC[r]); + ROUND_WENC_SMALL(sharedMemory, n, h, tmp); + } } - n.n0 = h.n0 = st.n0 = n.n0 ^ p.n0; - n.n1 = h.n1 = st.n1 = n.n1 ^ p.n1; - n.n2 = h.n2 = st.n2 = n.n2 ^ p.n2; - n.n3 = h.n3 = st.n3 = n.n3 ^ p.n3; - n.n4 = h.n4 = st.n4 = n.n4 ^ p.n4; - n.n5 = h.n5 = st.n5 = n.n5 ^ p.n5; - n.n6 = h.n6 = st.n6 = n.n6 ^ p.n6; - n.n7 = h.n7 = st.n7 = n.n7 ^ p.n7; + #pragma unroll 8 + for (int i=0; i<8; i++) { + state[i] = xor1(n[i],hash.h8[i]); + n[i]=0; + } - n.n0 ^= 0x80; - n.n7 ^= 0x2000000000000; + n[0] = 0x80; + n[7] = 0x2000000000000; + + #pragma unroll 8 + for (int i=0; i<8; i++) { + h[i] = state[i]; + n[i] = xor1(n[i],h[i]); + } #pragma unroll 10 - for (u = 0; u < 10; u++) - { - uint64_t t0, t1, t2, t3, t4, t5, t6, t7; - ROUND_KSCHED(plain_T, h.n, t, plain_RC[u]); - ROUND_WENC(plain_T, n.n, h.n, t); + for (unsigned r=0; r < 10; r++) { + uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + if (USE_ALL_TABLES) { + ROUND_KSCHED(sharedMemory, h, tmp, InitVector_RC[r]); + ROUND_WENC(sharedMemory, n, h, tmp); + } else { + ROUND_KSCHED_SMALL(sharedMemory, h, tmp, InitVector_RC[r]); + ROUND_WENC_SMALL(sharedMemory, n, h, tmp); + } } - n.n0 ^= 0x80; - n.n7 ^= 0x2000000000000; - - phash->n0 = st.n0 ^ n.n0; - phash->n1 = st.n1 ^ n.n1; - phash->n2 = st.n2 ^ n.n2; - phash->n3 = st.n3 ^ n.n3; - phash->n4 = st.n4 ^ n.n4; - phash->n5 = st.n5 ^ n.n5; - phash->n6 = st.n6 ^ n.n6; - phash->n7 = st.n7 ^ n.n7; + state[0] = xor3(state[0], n[0], 0x80); + state[1] = xor1(state[1], n[1]); + state[2] = xor1(state[2], n[2]); + state[3] = xor1(state[3], n[3]); + state[4] = xor1(state[4], n[4]); + state[5] = xor1(state[5], n[5]); + state[6] = xor1(state[6], n[6]); + state[7] = xor3(state[7], n[7], 0x2000000000000); + + #pragma unroll 8 + for (unsigned i = 0; i < 8; i++) + hash.h8[i] = state[i]; + + bool rc = true; + for (int i = 7; i >= 0; i--) { + if (hash.h4[i] > pTarget[i]) { + rc = false; + break; + } + if (hash.h4[i] < pTarget[i]) { + rc = true; + break; + } + } + + if (rc && resNounce[0] > nounce) + resNounce[0] = nounce; + } +} + +__host__ +extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode) +{ + switch (mode) { + case 0: /* x15 with rotated T1-T7 (based on T0) */ + cudaMemcpyToSymbol(InitVector_RC, plain_RC, sizeof(plain_RC), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob0Tox, plain_T0, sizeof(plain_T0), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob1Tox, plain_T1, (256*8), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob2Tox, plain_T2, (256*8), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob3Tox, plain_T3, (256*8), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob4Tox, plain_T4, (256*8), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob5Tox, plain_T5, (256*8), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob6Tox, plain_T6, (256*8), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob7Tox, plain_T7, (256*8), 0, cudaMemcpyHostToDevice); + break; + + case 1: /* old (whirlcoin?) */ + cudaMemcpyToSymbol(InitVector_RC, old1_RC, sizeof(plain_RC), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob0Tox, old1_T0, sizeof(plain_T0), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob1Tox, old1_T1, (256*8), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob2Tox, old1_T2, (256*8), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob3Tox, old1_T3, (256*8), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob4Tox, old1_T4, (256*8), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob5Tox, old1_T5, (256*8), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob6Tox, old1_T6, (256*8), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob7Tox, old1_T7, (256*8), 0, cudaMemcpyHostToDevice); + break; +#if !USE_ALL_TABLES + case 2: /* x15 without rotated T1-T7, slower but use less memory */ + cudaMemcpyToSymbol(InitVector_RC, plain_RC, sizeof(plain_RC), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(mixTob0Tox, plain_T0, sizeof(plain_T0), 0, cudaMemcpyHostToDevice); + break; +#endif } + + cudaMalloc(&d_WNonce[thr_id], sizeof(uint32_t)); + cudaMallocHost(&d_wnounce[thr_id], sizeof(uint32_t)); +} + +__host__ +extern void x15_whirlpool_cpu_free(int thr_id) +{ + cudaFree(d_WNonce[thr_id]); + cudaFreeHost(d_wnounce[thr_id]); } -__host__ void x15_whirlpool_cpu_init(int thr_id, int threads) +__host__ +extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { + dim3 grid((threads + threadsperblock-1) / threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + + whirlpool512_gpu_hash_64<<>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector); + + MyStreamSynchronize(NULL, order, thr_id); } -__host__ void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +__host__ +extern uint32_t whirlpool512_cpu_finalhash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { - const int threadsperblock = 256; + uint32_t result = 0xffffffff; - dim3 grid((threads + threadsperblock - 1) / threadsperblock); + dim3 grid((threads + threadsperblock-1) / threadsperblock); dim3 block(threadsperblock); size_t shared_size = 0; - // fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); + cudaMemset(d_WNonce[thr_id], 0xff, sizeof(uint32_t)); - x15_whirlpool_gpu_hash_64<<>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector); + whirlpool512_gpu_finalhash_64<<>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector,d_WNonce[thr_id]); MyStreamSynchronize(NULL, order, thr_id); + cudaMemcpy(d_wnounce[thr_id], d_WNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); + + result = *d_wnounce[thr_id]; + + return result; +} + +__host__ +void whirlpool512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order) +{ + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1) / threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + + whirlpool512_gpu_hash_80<<>>(threads, startNounce, d_outputHash); + + MyStreamSynchronize(NULL, order, thr_id); +} + +__host__ +void whirlpool512_setBlock_80(void *pdata, const void *ptarget) +{ + unsigned char PaddedMessage[128]; + memcpy(PaddedMessage, pdata, 80); + memset(PaddedMessage+80, 0, 48); + cudaMemcpyToSymbol(pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); } diff --git a/x15/x15.cu b/x15/x15.cu index da72cba..50e2080 100644 --- a/x15/x15.cu +++ b/x15/x15.cu @@ -80,8 +80,9 @@ extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNoun extern void x14_shabal512_cpu_init(int thr_id, int threads); extern void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x15_whirlpool_cpu_init(int thr_id, int threads); +extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode); extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x15_whirlpool_cpu_free(int thr_id); extern void cuda_check_cpu_init(int thr_id, int threads); extern void cuda_check_cpu_setTarget(const void *ptarget); @@ -228,7 +229,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, x13_hamsi512_cpu_init(thr_id, throughput); x13_fugue512_cpu_init(thr_id, throughput); x14_shabal512_cpu_init(thr_id, throughput); - x15_whirlpool_cpu_init(thr_id, throughput); + x15_whirlpool_cpu_init(thr_id, throughput, 0); cuda_check_cpu_init(thr_id, throughput); init[thr_id] = true; @@ -276,6 +277,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { pdata[19] = foundNonce; *hashes_done = foundNonce - first_nonce + 1; + x15_whirlpool_cpu_free(thr_id); return 1; } else if (vhash64[7] > Htarg) { @@ -291,5 +293,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); *hashes_done = pdata[19] - first_nonce + 1; + + x15_whirlpool_cpu_free(thr_id); return 0; }