From 9a7d74eac432e02154105b9bc6fbabce8d91629c Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Sun, 13 Aug 2017 15:35:58 +0200 Subject: [PATCH] skunk: merged kernel to use when possible for now, only used for maxwell/pascal with cuda 7.5 or 8.0 --- Makefile.am | 3 +- ccminer.cpp | 1 + ccminer.vcxproj | 7 +- ccminer.vcxproj.filters | 13 +- skunk/cuda_skunk.cu | 690 +++++++++++++++++++++++++++++++++++ skunk/cuda_skunk_streebog.cu | 371 +++++++++++++++++++ skunk/skein_header.h | 385 +++++++++++++++++++ skunk.cu => skunk/skunk.cu | 51 ++- skunk/streebog_arrays.cuh | 567 ++++++++++++++++++++++++++++ x11/cuda_streebog.cu | 4 +- 10 files changed, 2070 insertions(+), 22 deletions(-) create mode 100644 skunk/cuda_skunk.cu create mode 100644 skunk/cuda_skunk_streebog.cu create mode 100644 skunk/skein_header.h rename skunk.cu => skunk/skunk.cu (76%) create mode 100644 skunk/streebog_arrays.cuh diff --git a/Makefile.am b/Makefile.am index febcc9b..60fb4d6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -55,7 +55,8 @@ ccminer_SOURCES = elist.h miner.h compat.h \ quark/nist5.cu \ quark/quarkcoin.cu quark/cuda_quark_compactionTest.cu \ neoscrypt/neoscrypt.cpp neoscrypt/neoscrypt-cpu.c neoscrypt/cuda_neoscrypt.cu \ - pentablake.cu skein.cu cuda_skeincoin.cu skein2.cpp skunk.cu zr5.cu \ + pentablake.cu skein.cu cuda_skeincoin.cu skein2.cpp zr5.cu \ + skunk/skunk.cu skunk/cuda_skunk.cu skunk/cuda_skunk_streebog.cu \ sha256/sha256d.cu sha256/cuda_sha256d.cu sha256/sha256t.cu sha256/cuda_sha256t.cu \ sia/sia.cu sia/sia-rpc.cpp sph/blake2b.c \ sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \ diff --git a/ccminer.cpp b/ccminer.cpp index 690d76a..41db279 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -2226,6 +2226,7 @@ static void *miner_thread(void *userdata) case ALGO_JHA: case ALGO_LYRA2v2: case ALGO_S3: + case ALGO_SKUNK: case ALGO_TIMETRAVEL: case ALGO_BITCORE: case ALGO_X11EVO: diff --git a/ccminer.vcxproj b/ccminer.vcxproj index a68f66b..a5769f4 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -532,7 +532,12 @@ 48 - + + + compute_50,sm_50;compute_52,sm_52 + 64 + + diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters index f6fef2c..5b0dd48 100644 --- a/ccminer.vcxproj.filters +++ b/ccminer.vcxproj.filters @@ -46,6 +46,9 @@ {f3ed23a2-8ce7-41a5-b051-6da56047dc35} + + {23ed23a2-8e7a-65a2-b051-8aa63047d352} + {dd751f2d-bfd6-42c1-8f9b-cbe94e539353} @@ -754,8 +757,14 @@ Source Files\CUDA - - Source Files\CUDA + + Source Files\CUDA\skunk + + + Source Files\CUDA\skunk + + + Source Files\CUDA\skunk Source Files\CUDA diff --git a/skunk/cuda_skunk.cu b/skunk/cuda_skunk.cu new file mode 100644 index 0000000..4a41507 --- /dev/null +++ b/skunk/cuda_skunk.cu @@ -0,0 +1,690 @@ +/** + * skein + cube + fugue (to fix) + * Based on krnlx work + */ + +#include +#include +#include "skunk/skein_header.h" +#include + +#define TPB 512 + +/* ************************ */ +static __constant__ uint2 c_buffer[120]; // padded message (80 bytes + 72 bytes midstate + align) + +#define SWAP(a,b) { uint32_t u = a; a = b; b = u; } + +__device__ __forceinline__ +static void rrounds(uint32_t *x){ + #pragma unroll 2 + for (int r = 0; r < 16; r++) { + /* "add x_0jklm into x_1jklmn modulo 2^32 rotate x_0jklm upwards by 7 bits" */ + x[16] = x[16] + x[ 0]; x[ 0] = ROTL32(x[ 0], 7);x[17] = x[17] + x[ 1];x[ 1] = ROTL32(x[ 1], 7); + x[18] = x[18] + x[ 2]; x[ 2] = ROTL32(x[ 2], 7);x[19] = x[19] + x[ 3];x[ 3] = ROTL32(x[ 3], 7); + x[20] = x[20] + x[ 4]; x[ 4] = ROTL32(x[ 4], 7);x[21] = x[21] + x[ 5];x[ 5] = ROTL32(x[ 5], 7); + x[22] = x[22] + x[ 6]; x[ 6] = ROTL32(x[ 6], 7);x[23] = x[23] + x[ 7];x[ 7] = ROTL32(x[ 7], 7); + x[24] = x[24] + x[ 8]; x[ 8] = ROTL32(x[ 8], 7);x[25] = x[25] + x[ 9];x[ 9] = ROTL32(x[ 9], 7); + x[26] = x[26] + x[10]; x[10] = ROTL32(x[10], 7);x[27] = x[27] + x[11];x[11] = ROTL32(x[11], 7); + x[28] = x[28] + x[12]; x[12] = ROTL32(x[12], 7);x[29] = x[29] + x[13];x[13] = ROTL32(x[13], 7); + x[30] = x[30] + x[14]; x[14] = ROTL32(x[14], 7);x[31] = x[31] + x[15];x[15] = ROTL32(x[15], 7); + /* "swap x_00klm with x_01klm" */ + SWAP(x[ 0], x[ 8]);x[ 0] ^= x[16];x[ 8] ^= x[24];SWAP(x[ 1], x[ 9]);x[ 1] ^= x[17];x[ 9] ^= x[25]; + SWAP(x[ 2], x[10]);x[ 2] ^= x[18];x[10] ^= x[26];SWAP(x[ 3], x[11]);x[ 3] ^= x[19];x[11] ^= x[27]; + SWAP(x[ 4], x[12]);x[ 4] ^= x[20];x[12] ^= x[28];SWAP(x[ 5], x[13]);x[ 5] ^= x[21];x[13] ^= x[29]; + SWAP(x[ 6], x[14]);x[ 6] ^= x[22];x[14] ^= x[30];SWAP(x[ 7], x[15]);x[ 7] ^= x[23];x[15] ^= x[31]; + /* "swap x_1jk0m with x_1jk1m" */ + SWAP(x[16], x[18]); SWAP(x[17], x[19]); SWAP(x[20], x[22]); SWAP(x[21], x[23]);SWAP(x[24], x[26]); SWAP(x[25], x[27]); SWAP(x[28], x[30]); SWAP(x[29], x[31]); + /* "add x_0jklm into x_1jklm modulo 2^32 rotate x_0jklm upwards by 11 bits" */ + x[16] = x[16] + x[ 0]; x[ 0] = ROTL32(x[ 0],11);x[17] = x[17] + x[ 1];x[ 1] = ROTL32(x[ 1],11); + x[18] = x[18] + x[ 2]; x[ 2] = ROTL32(x[ 2],11);x[19] = x[19] + x[ 3];x[ 3] = ROTL32(x[ 3],11); + x[20] = x[20] + x[ 4]; x[ 4] = ROTL32(x[ 4],11);x[21] = x[21] + x[ 5];x[ 5] = ROTL32(x[ 5],11); + x[22] = x[22] + x[ 6]; x[ 6] = ROTL32(x[ 6],11);x[23] = x[23] + x[ 7];x[ 7] = ROTL32(x[ 7],11); + x[24] = x[24] + x[ 8]; x[ 8] = ROTL32(x[ 8],11);x[25] = x[25] + x[ 9];x[ 9] = ROTL32(x[ 9],11); + x[26] = x[26] + x[10]; x[10] = ROTL32(x[10],11);x[27] = x[27] + x[11];x[11] = ROTL32(x[11],11); + x[28] = x[28] + x[12]; x[12] = ROTL32(x[12],11);x[29] = x[29] + x[13];x[13] = ROTL32(x[13],11); + x[30] = x[30] + x[14]; x[14] = ROTL32(x[14],11);x[31] = x[31] + x[15];x[15] = ROTL32(x[15],11); + /* "swap x_0j0lm with x_0j1lm" */ + SWAP(x[ 0], x[ 4]); x[ 0] ^= x[16]; x[ 4] ^= x[20]; SWAP(x[ 1], x[ 5]); x[ 1] ^= x[17]; x[ 5] ^= x[21]; + SWAP(x[ 2], x[ 6]); x[ 2] ^= x[18]; x[ 6] ^= x[22]; SWAP(x[ 3], x[ 7]); x[ 3] ^= x[19]; x[ 7] ^= x[23]; + SWAP(x[ 8], x[12]); x[ 8] ^= x[24]; x[12] ^= x[28]; SWAP(x[ 9], x[13]); x[ 9] ^= x[25]; x[13] ^= x[29]; + SWAP(x[10], x[14]); x[10] ^= x[26]; x[14] ^= x[30]; SWAP(x[11], x[15]); x[11] ^= x[27]; x[15] ^= x[31]; + /* "swap x_1jkl0 with x_1jkl1" */ + SWAP(x[16], x[17]); SWAP(x[18], x[19]); SWAP(x[20], x[21]); SWAP(x[22], x[23]);SWAP(x[24], x[25]); SWAP(x[26], x[27]); SWAP(x[28], x[29]); SWAP(x[30], x[31]); + } +} + +// fugue +static __constant__ const uint32_t c_S[16] = { + 0x8807a57e, 0xe616af75, 0xc5d3e4db, 0xac9ab027, + 0xd915f117, 0xb6eecc54, 0x06e8020b, 0x4a92efd1, + 0xaac6e2c9, 0xddb21398, 0xcae65838, 0x437f203f, + 0x25ea78e7, 0x951fddd6, 0xda6ed11d, 0xe13e3567 +}; + +static __device__ uint32_t mixtab0[256] = { + 0x63633297, 0x7c7c6feb, 0x77775ec7, 0x7b7b7af7, 0xf2f2e8e5, 0x6b6b0ab7, 0x6f6f16a7, 0xc5c56d39, + 0x303090c0, 0x01010704, 0x67672e87, 0x2b2bd1ac, 0xfefeccd5, 0xd7d71371, 0xabab7c9a, 0x767659c3, + 0xcaca4005, 0x8282a33e, 0xc9c94909, 0x7d7d68ef, 0xfafad0c5, 0x5959947f, 0x4747ce07, 0xf0f0e6ed, + 0xadad6e82, 0xd4d41a7d, 0xa2a243be, 0xafaf608a, 0x9c9cf946, 0xa4a451a6, 0x727245d3, 0xc0c0762d, + 0xb7b728ea, 0xfdfdc5d9, 0x9393d47a, 0x2626f298, 0x363682d8, 0x3f3fbdfc, 0xf7f7f3f1, 0xcccc521d, + 0x34348cd0, 0xa5a556a2, 0xe5e58db9, 0xf1f1e1e9, 0x71714cdf, 0xd8d83e4d, 0x313197c4, 0x15156b54, + 0x04041c10, 0xc7c76331, 0x2323e98c, 0xc3c37f21, 0x18184860, 0x9696cf6e, 0x05051b14, 0x9a9aeb5e, + 0x0707151c, 0x12127e48, 0x8080ad36, 0xe2e298a5, 0xebeba781, 0x2727f59c, 0xb2b233fe, 0x757550cf, + 0x09093f24, 0x8383a43a, 0x2c2cc4b0, 0x1a1a4668, 0x1b1b416c, 0x6e6e11a3, 0x5a5a9d73, 0xa0a04db6, + 0x5252a553, 0x3b3ba1ec, 0xd6d61475, 0xb3b334fa, 0x2929dfa4, 0xe3e39fa1, 0x2f2fcdbc, 0x8484b126, + 0x5353a257, 0xd1d10169, 0x00000000, 0xededb599, 0x2020e080, 0xfcfcc2dd, 0xb1b13af2, 0x5b5b9a77, + 0x6a6a0db3, 0xcbcb4701, 0xbebe17ce, 0x3939afe4, 0x4a4aed33, 0x4c4cff2b, 0x5858937b, 0xcfcf5b11, + 0xd0d0066d, 0xefefbb91, 0xaaaa7b9e, 0xfbfbd7c1, 0x4343d217, 0x4d4df82f, 0x333399cc, 0x8585b622, + 0x4545c00f, 0xf9f9d9c9, 0x02020e08, 0x7f7f66e7, 0x5050ab5b, 0x3c3cb4f0, 0x9f9ff04a, 0xa8a87596, + 0x5151ac5f, 0xa3a344ba, 0x4040db1b, 0x8f8f800a, 0x9292d37e, 0x9d9dfe42, 0x3838a8e0, 0xf5f5fdf9, + 0xbcbc19c6, 0xb6b62fee, 0xdada3045, 0x2121e784, 0x10107040, 0xffffcbd1, 0xf3f3efe1, 0xd2d20865, + 0xcdcd5519, 0x0c0c2430, 0x1313794c, 0xececb29d, 0x5f5f8667, 0x9797c86a, 0x4444c70b, 0x1717655c, + 0xc4c46a3d, 0xa7a758aa, 0x7e7e61e3, 0x3d3db3f4, 0x6464278b, 0x5d5d886f, 0x19194f64, 0x737342d7, + 0x60603b9b, 0x8181aa32, 0x4f4ff627, 0xdcdc225d, 0x2222ee88, 0x2a2ad6a8, 0x9090dd76, 0x88889516, + 0x4646c903, 0xeeeebc95, 0xb8b805d6, 0x14146c50, 0xdede2c55, 0x5e5e8163, 0x0b0b312c, 0xdbdb3741, + 0xe0e096ad, 0x32329ec8, 0x3a3aa6e8, 0x0a0a3628, 0x4949e43f, 0x06061218, 0x2424fc90, 0x5c5c8f6b, + 0xc2c27825, 0xd3d30f61, 0xacac6986, 0x62623593, 0x9191da72, 0x9595c662, 0xe4e48abd, 0x797974ff, + 0xe7e783b1, 0xc8c84e0d, 0x373785dc, 0x6d6d18af, 0x8d8d8e02, 0xd5d51d79, 0x4e4ef123, 0xa9a97292, + 0x6c6c1fab, 0x5656b943, 0xf4f4fafd, 0xeaeaa085, 0x6565208f, 0x7a7a7df3, 0xaeae678e, 0x08083820, + 0xbaba0bde, 0x787873fb, 0x2525fb94, 0x2e2ecab8, 0x1c1c5470, 0xa6a65fae, 0xb4b421e6, 0xc6c66435, + 0xe8e8ae8d, 0xdddd2559, 0x747457cb, 0x1f1f5d7c, 0x4b4bea37, 0xbdbd1ec2, 0x8b8b9c1a, 0x8a8a9b1e, + 0x70704bdb, 0x3e3ebaf8, 0xb5b526e2, 0x66662983, 0x4848e33b, 0x0303090c, 0xf6f6f4f5, 0x0e0e2a38, + 0x61613c9f, 0x35358bd4, 0x5757be47, 0xb9b902d2, 0x8686bf2e, 0xc1c17129, 0x1d1d5374, 0x9e9ef74e, + 0xe1e191a9, 0xf8f8decd, 0x9898e556, 0x11117744, 0x696904bf, 0xd9d93949, 0x8e8e870e, 0x9494c166, + 0x9b9bec5a, 0x1e1e5a78, 0x8787b82a, 0xe9e9a989, 0xcece5c15, 0x5555b04f, 0x2828d8a0, 0xdfdf2b51, + 0x8c8c8906, 0xa1a14ab2, 0x89899212, 0x0d0d2334, 0xbfbf10ca, 0xe6e684b5, 0x4242d513, 0x686803bb, + 0x4141dc1f, 0x9999e252, 0x2d2dc3b4, 0x0f0f2d3c, 0xb0b03df6, 0x5454b74b, 0xbbbb0cda, 0x16166258 +}; + +__device__ __forceinline__ +uint32_t ROL8X(const uint32_t a){ + return __byte_perm(a, 0, 0x2103); +} +__device__ __forceinline__ +uint32_t ROL16X(const uint32_t a){ + return __byte_perm(a, 0, 0x1032); +} +__device__ __forceinline__ +uint32_t ROR8X(const uint32_t a){ + return __byte_perm(a, 0, 0x0321); +} + +#define mixtab0(x) shared[0][x] +#define mixtab1(x) shared[1][x] +#define mixtab2(x) shared[2][x] +#define mixtab3(x) shared[3][x] + +#define TIX4(q, x00, x01, x04, x07, x08, x22, x24, x27, x30) { \ + x22 ^= x00; \ + x00 = (q); \ + x08 ^= (q); \ + x01 ^= x24; \ + x04 ^= x27; \ + x07 ^= x30; \ + } + +#define CMIX36(x00, x01, x02, x04, x05, x06, x18, x19, x20) { \ + x00 ^= x04; \ + x01 ^= x05; \ + x02 ^= x06; \ + x18 ^= x04; \ + x19 ^= x05; \ + x20 ^= x06; \ + } + +__device__ __forceinline__ +static void SMIX(const uint32_t shared[4][256], uint32_t &x0,uint32_t &x1,uint32_t &x2,uint32_t &x3){ + uint32_t c0 = mixtab0(__byte_perm(x0,0,0x4443)); + uint32_t r1 = mixtab1(__byte_perm(x0,0,0x4442)); + uint32_t r2 = mixtab2(__byte_perm(x0,0,0x4441)); + uint32_t r3 = mixtab3(__byte_perm(x0,0,0x4440)); + c0 = c0 ^ r1 ^ r2 ^ r3; + uint32_t r0 = mixtab0(__byte_perm(x1,0,0x4443)); + uint32_t c1 = r0 ^ mixtab1(__byte_perm(x1,0,0x4442)); + uint32_t tmp = mixtab2(__byte_perm(x1,0,0x4441)); + c1 ^= tmp; + r2 ^= tmp; + tmp = mixtab3(__byte_perm(x1,0,0x4440)); + c1 ^= tmp; + r3 ^= tmp; + uint32_t c2 = mixtab0(__byte_perm(x2,0,0x4443)); + r0 ^= c2; + tmp = mixtab1(__byte_perm(x2,0,0x4442)); + c2 ^= tmp; + r1 ^= tmp; + tmp = mixtab2(__byte_perm(x2,0,0x4441)); + c2 ^= tmp; + tmp = mixtab3(__byte_perm(x2,0,0x4440)); + c2 ^= tmp; + r3 ^= tmp; + uint32_t c3 = mixtab0(__byte_perm(x3,0,0x4443)); + r0 ^= c3; + tmp = mixtab1(__byte_perm(x3,0,0x4442)); + c3 ^= tmp; + r1 ^= tmp; + tmp = mixtab2(__byte_perm(x3,0,0x4441)); + c3 ^= tmp; + r2 ^= tmp; + tmp = mixtab3(__byte_perm(x3,0,0x4440)); + c3 ^= tmp; + x0 = ((c0 ^ (r0 << 0)) & 0xFF000000) | ((c1 ^ (r1 << 0)) & 0x00FF0000) | ((c2 ^ (r2 << 0)) & 0x0000FF00) | ((c3 ^ (r3 << 0)) & 0x000000FF); + x1 = ((c1 ^ (r0 << 8)) & 0xFF000000) | ((c2 ^ (r1 << 8)) & 0x00FF0000) | ((c3 ^ (r2 << 8)) & 0x0000FF00) | ((c0 ^ (r3 >>24)) & 0x000000FF); + x2 = ((c2 ^ (r0 <<16)) & 0xFF000000) | ((c3 ^ (r1 <<16)) & 0x00FF0000) | ((c0 ^ (r2 >>16)) & 0x0000FF00) | ((c1 ^ (r3 >>16)) & 0x000000FF); + x3 = ((c3 ^ (r0 <<24)) & 0xFF000000) | ((c0 ^ (r1 >> 8)) & 0x00FF0000) | ((c1 ^ (r2 >> 8)) & 0x0000FF00) | ((c2 ^ (r3 >> 8)) & 0x000000FF); +} + +__device__ +static void SMIX_LDG(const uint32_t shared[4][256], uint32_t &x0,uint32_t &x1,uint32_t &x2,uint32_t &x3){ + uint32_t c0 = __ldg(&mixtab0[__byte_perm(x0,0,0x4443)]); + uint32_t r1 = mixtab1(__byte_perm(x0,0,0x4442)); + uint32_t r2 = mixtab2(__byte_perm(x0,0,0x4441)); + uint32_t r3 = mixtab3(__byte_perm(x0,0,0x4440)); + c0 = c0 ^ r1 ^ r2 ^ r3; + uint32_t r0 = __ldg(&mixtab0[__byte_perm(x1,0,0x4443)]); + uint32_t c1 = r0 ^ mixtab1(__byte_perm(x1,0,0x4442)); + uint32_t tmp = mixtab2(__byte_perm(x1,0,0x4441)); + c1 ^= tmp; + r2 ^= tmp; + tmp = mixtab3(__byte_perm(x1,0,0x4440)); + c1 ^= tmp; + r3 ^= tmp; + uint32_t c2 = __ldg(&mixtab0[__byte_perm(x2,0,0x4443)]); + r0 ^= c2; + tmp = mixtab1(__byte_perm(x2,0,0x4442)); + c2 ^= tmp; + r1 ^= tmp; + tmp = mixtab2(__byte_perm(x2,0,0x4441)); + c2 ^= tmp; + tmp = mixtab3(__byte_perm(x2,0,0x4440)); + c2 ^= tmp; + r3 ^= tmp; + uint32_t c3 = __ldg(&mixtab0[__byte_perm(x3,0,0x4443)]); + r0 ^= c3; + tmp = mixtab1(__byte_perm(x3,0,0x4442)); + c3 ^= tmp; + r1 ^= tmp; + tmp = mixtab2(__byte_perm(x3,0,0x4441)); + c3 ^= tmp; + r2 ^= tmp; + tmp = ROL8X(__ldg(&mixtab0[__byte_perm(x3,0,0x4440)])); + c3 ^= tmp; + x0 = ((c0 ^ (r0 << 0)) & 0xFF000000) | ((c1 ^ (r1 << 0)) & 0x00FF0000) | ((c2 ^ (r2 << 0)) & 0x0000FF00) | ((c3 ^ (r3 << 0)) & 0x000000FF); + x1 = ((c1 ^ (r0 << 8)) & 0xFF000000) | ((c2 ^ (r1 << 8)) & 0x00FF0000) | ((c3 ^ (r2 << 8)) & 0x0000FF00) | ((c0 ^ (r3 >>24)) & 0x000000FF); + x2 = ((c2 ^ (r0 <<16)) & 0xFF000000) | ((c3 ^ (r1 <<16)) & 0x00FF0000) | ((c0 ^ (r2 >>16)) & 0x0000FF00) | ((c1 ^ (r3 >>16)) & 0x000000FF); + x3 = ((c3 ^ (r0 <<24)) & 0xFF000000) | ((c0 ^ (r1 >> 8)) & 0x00FF0000) | ((c1 ^ (r2 >> 8)) & 0x0000FF00) | ((c2 ^ (r3 >> 8)) & 0x000000FF); +} + +#define mROR3 { \ + B[ 6] = S[33], B[ 7] = S[34], B[ 8] = S[35]; \ + S[35] = S[32]; S[34] = S[31]; S[33] = S[30]; S[32] = S[29]; S[31] = S[28]; S[30] = S[27]; S[29] = S[26]; S[28] = S[25]; S[27] = S[24]; \ + S[26] = S[23]; S[25] = S[22]; S[24] = S[21]; S[23] = S[20]; S[22] = S[19]; S[21] = S[18]; S[20] = S[17]; S[19] = S[16]; S[18] = S[15]; \ + S[17] = S[14]; S[16] = S[13]; S[15] = S[12]; S[14] = S[11]; S[13] = S[10]; S[12] = S[ 9]; S[11] = S[ 8]; S[10] = S[ 7]; S[ 9] = S[ 6]; \ + S[ 8] = S[ 5]; S[ 7] = S[ 4]; S[ 6] = S[ 3]; S[ 5] = S[ 2]; S[ 4] = S[ 1]; S[ 3] = S[ 0]; S[ 2] = B[ 8]; S[ 1] = B[ 7]; S[ 0] = B[ 6]; \ + } + +#define mROR8 { \ + B[ 1] = S[28], B[ 2] = S[29], B[ 3] = S[30], B[ 4] = S[31], B[ 5] = S[32], B[ 6] = S[33], B[ 7] = S[34], B[ 8] = S[35]; \ + S[35] = S[27]; S[34] = S[26]; S[33] = S[25]; S[32] = S[24]; S[31] = S[23]; S[30] = S[22]; S[29] = S[21]; S[28] = S[20]; S[27] = S[19]; \ + S[26] = S[18]; S[25] = S[17]; S[24] = S[16]; S[23] = S[15]; S[22] = S[14]; S[21] = S[13]; S[20] = S[12]; S[19] = S[11]; S[18] = S[10]; \ + S[17] = S[ 9]; S[16] = S[ 8]; S[15] = S[ 7]; S[14] = S[ 6]; S[13] = S[ 5]; S[12] = S[ 4]; S[11] = S[ 3]; S[10] = S[ 2]; S[ 9] = S[ 1]; \ + S[ 8] = S[ 0]; S[ 7] = B[ 8]; S[ 6] = B[ 7]; S[ 5] = B[ 6]; S[ 4] = B[ 5]; S[ 3] = B[ 4]; S[ 2] = B[ 3]; S[ 1] = B[ 2]; S[ 0] = B[ 1]; \ + } + +#define mROR9 { \ + B[ 0] = S[27], B[ 1] = S[28], B[ 2] = S[29], B[ 3] = S[30], B[ 4] = S[31], B[ 5] = S[32], B[ 6] = S[33], B[ 7] = S[34], B[ 8] = S[35]; \ + S[35] = S[26]; S[34] = S[25]; S[33] = S[24]; S[32] = S[23]; S[31] = S[22]; S[30] = S[21]; S[29] = S[20]; S[28] = S[19]; S[27] = S[18]; \ + S[26] = S[17]; S[25] = S[16]; S[24] = S[15]; S[23] = S[14]; S[22] = S[13]; S[21] = S[12]; S[20] = S[11]; S[19] = S[10]; S[18] = S[ 9]; \ + S[17] = S[ 8]; S[16] = S[ 7]; S[15] = S[ 6]; S[14] = S[ 5]; S[13] = S[ 4]; S[12] = S[ 3]; S[11] = S[ 2]; S[10] = S[ 1]; S[ 9] = S[ 0]; \ + S[ 8] = B[ 8]; S[ 7] = B[ 7]; S[ 6] = B[ 6]; S[ 5] = B[ 5]; S[ 4] = B[ 4]; S[ 3] = B[ 3]; S[ 2] = B[ 2]; S[ 1] = B[ 1]; S[ 0] = B[ 0]; \ + } + +#define FUGUE512_3(x, y, z) { \ + TIX4(x, S[ 0], S[ 1], S[ 4], S[ 7], S[ 8], S[22], S[24], S[27], S[30]); \ + CMIX36(S[33], S[34], S[35], S[ 1], S[ 2], S[ 3], S[15], S[16], S[17]); \ + SMIX_LDG(shared, S[33], S[34], S[35], S[ 0]); \ + CMIX36(S[30], S[31], S[32], S[34], S[35], S[ 0], S[12], S[13], S[14]); \ + SMIX_LDG(shared, S[30], S[31], S[32], S[33]); \ + CMIX36(S[27], S[28], S[29], S[31], S[32], S[33], S[ 9], S[10], S[11]); \ + SMIX(shared, S[27], S[28], S[29], S[30]); \ + CMIX36(S[24], S[25], S[26], S[28], S[29], S[30], S[ 6], S[ 7], S[ 8]); \ + SMIX_LDG(shared, S[24], S[25], S[26], S[27]); \ + \ + TIX4(y, S[24], S[25], S[28], S[31], S[32], S[10], S[12], S[15], S[18]); \ + CMIX36(S[21], S[22], S[23], S[25], S[26], S[27], S[ 3], S[ 4], S[ 5]); \ + SMIX(shared, S[21], S[22], S[23], S[24]); \ + CMIX36(S[18], S[19], S[20], S[22], S[23], S[24], S[ 0], S[ 1], S[ 2]); \ + SMIX_LDG(shared, S[18], S[19], S[20], S[21]); \ + CMIX36(S[15], S[16], S[17], S[19], S[20], S[21], S[33], S[34], S[35]); \ + SMIX_LDG(shared, S[15], S[16], S[17], S[18]); \ + CMIX36(S[12], S[13], S[14], S[16], S[17], S[18], S[30], S[31], S[32]); \ + SMIX(shared, S[12], S[13], S[14], S[15]); \ + \ + TIX4(z, S[12], S[13], S[16], S[19], S[20], S[34], S[ 0], S[ 3], S[ 6]); \ + CMIX36(S[ 9], S[10], S[11], S[13], S[14], S[15], S[27], S[28], S[29]); \ + SMIX_LDG(shared, S[ 9], S[10], S[11], S[12]); \ + CMIX36(S[ 6], S[ 7], S[ 8], S[10], S[11], S[12], S[24], S[25], S[26]); \ + SMIX_LDG(shared, S[ 6], S[ 7], S[ 8], S[ 9]); \ + CMIX36(S[ 3], S[ 4], S[ 5], S[ 7], S[ 8], S[ 9], S[21], S[22], S[23]); \ + SMIX_LDG(shared, S[ 3], S[ 4], S[ 5], S[ 6]); \ + CMIX36(S[ 0], S[ 1], S[ 2], S[ 4], S[ 5], S[ 6], S[18], S[19], S[20]); \ + SMIX_LDG(shared, S[ 0], S[ 1], S[ 2], S[ 3]); \ + } + +__global__ +__launch_bounds__(TPB, 2) +void skunk_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint64_t *output64) +{ + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); + + __shared__ uint32_t shared[4][256]; + + if(threadIdx.x<256) { + const uint32_t tmp = __ldg(&mixtab0[threadIdx.x]); + shared[0][threadIdx.x] = tmp; + shared[1][threadIdx.x] = ROR8X(tmp); + shared[2][threadIdx.x] = ROL16X(tmp); + shared[3][threadIdx.x] = ROL8X(tmp); + } + __syncthreads(); + + if (thread < threads) + { + // Skein + uint2 h[9]; + uint2 t0, t1, t2; + + uint32_t nonce = cuda_swab32(startNounce + thread); + uint2 nonce2 = make_uint2(c_buffer[0].x, nonce); + + uint2 p[8]; + p[1] = nonce2; + + h[0] = c_buffer[ 1]; + h[1] = c_buffer[ 2]; + h[2] = c_buffer[ 3]; + h[3] = c_buffer[ 4]; + h[4] = c_buffer[ 5]; + h[5] = c_buffer[ 6]; + h[6] = c_buffer[ 7]; + h[7] = c_buffer[ 8]; + h[8] = c_buffer[ 9]; + + t0 = vectorize(0x50ull); + t1 = vectorize(0xB000000000000000ull); + t2 = t0^t1; + + p[ 1]=nonce2 + h[1]; p[ 0]= c_buffer[10] + p[ 1]; + p[ 2]=c_buffer[11]; + p[ 3]=c_buffer[12]; + p[ 4]=c_buffer[13]; + p[ 5]=c_buffer[14]; + p[ 6]=c_buffer[15]; + p[ 7]=c_buffer[16]; + +// TFBIGMIX8e(); + p[1] = ROL2(p[1], 46) ^ p[0]; + p[2] += p[1]; + p[0] += p[3]; + p[1] = ROL2(p[1], 33) ^ p[2]; + p[3] = c_buffer[17] ^ p[0]; + p[4] += p[1]; + p[6] += p[3]; + p[0] += p[5]; + p[2] += p[7]; + p[1] = ROL2(p[1], 17) ^ p[4]; + p[3] = ROL2(p[3], 49) ^ p[6]; + p[5] = c_buffer[18] ^ p[0]; + p[7] = c_buffer[19] ^ p[2]; + p[6] += p[1]; + p[0] += p[7]; + p[2] += p[5]; + p[4] += p[3]; + p[1] = ROL2(p[1], 44) ^ p[6]; + p[7] = ROL2(p[7], 9) ^ p[0]; + p[5] = ROL2(p[5], 54) ^ p[2]; + p[3] = ROR8(p[3]) ^ p[4]; + + p[ 0]+=h[1]; p[ 1]+=h[2]; p[ 2]+=h[3]; p[ 3]+=h[4]; p[ 4]+=h[5]; p[ 5]+=c_buffer[20]; p[ 7]+=c_buffer[21]; p[ 6]+=c_buffer[22]; + TFBIGMIX8o(); + p[ 0]+=h[2]; p[ 1]+=h[3]; p[ 2]+=h[4]; p[ 3]+=h[5]; p[ 4]+=h[6]; p[ 5]+=c_buffer[22]; p[ 7]+=c_buffer[23]; p[ 6]+=c_buffer[24]; + TFBIGMIX8e(); + p[ 0]+=h[3]; p[ 1]+=h[4]; p[ 2]+=h[5]; p[ 3]+=h[6]; p[ 4]+=h[7]; p[ 5]+=c_buffer[24]; p[ 7]+=c_buffer[25]; p[ 6]+=c_buffer[26]; + TFBIGMIX8o(); + p[ 0]+=h[4]; p[ 1]+=h[5]; p[ 2]+=h[6]; p[ 3]+=h[7]; p[ 4]+=h[8]; p[ 5]+=c_buffer[26]; p[ 7]+=c_buffer[27]; p[ 6]+=c_buffer[28]; + TFBIGMIX8e(); + p[ 0]+=h[5]; p[ 1]+=h[6]; p[ 2]+=h[7]; p[ 3]+=h[8]; p[ 4]+=h[0]; p[ 5]+=c_buffer[28]; p[ 7]+=c_buffer[29]; p[ 6]+=c_buffer[30]; + TFBIGMIX8o(); + p[ 0]+=h[6]; p[ 1]+=h[7]; p[ 2]+=h[8]; p[ 3]+=h[0]; p[ 4]+=h[1]; p[ 5]+=c_buffer[30]; p[ 7]+=c_buffer[31]; p[ 6]+=c_buffer[32]; + TFBIGMIX8e(); + p[ 0]+=h[7]; p[ 1]+=h[8]; p[ 2]+=h[0]; p[ 3]+=h[1]; p[ 4]+=h[2]; p[ 5]+=c_buffer[32]; p[ 7]+=c_buffer[33]; p[ 6]+=c_buffer[34]; + TFBIGMIX8o(); + p[ 0]+=h[8]; p[ 1]+=h[0]; p[ 2]+=h[1]; p[ 3]+=h[2]; p[ 4]+=h[3]; p[ 5]+=c_buffer[34]; p[ 7]+=c_buffer[35]; p[ 6]+=c_buffer[36]; + TFBIGMIX8e(); + p[ 0]+=h[0]; p[ 1]+=h[1]; p[ 2]+=h[2]; p[ 3]+=h[3]; p[ 4]+=h[4]; p[ 5]+=c_buffer[36]; p[ 7]+=c_buffer[37]; p[ 6]+=c_buffer[38]; + TFBIGMIX8o(); + p[ 0]+=h[1]; p[ 1]+=h[2]; p[ 2]+=h[3]; p[ 3]+=h[4]; p[ 4]+=h[5]; p[ 5]+=c_buffer[38]; p[ 7]+=c_buffer[39]; p[ 6]+=c_buffer[40]; + TFBIGMIX8e(); + p[ 0]+=h[2]; p[ 1]+=h[3]; p[ 2]+=h[4]; p[ 3]+=h[5]; p[ 4]+=h[6]; p[ 5]+=c_buffer[40]; p[ 7]+=c_buffer[41]; p[ 6]+=c_buffer[42]; + TFBIGMIX8o(); + p[ 0]+=h[3]; p[ 1]+=h[4]; p[ 2]+=h[5]; p[ 3]+=h[6]; p[ 4]+=h[7]; p[ 5]+=c_buffer[42]; p[ 7]+=c_buffer[43]; p[ 6]+=c_buffer[44]; + TFBIGMIX8e(); + p[ 0]+=h[4]; p[ 1]+=h[5]; p[ 2]+=h[6]; p[ 3]+=h[7]; p[ 4]+=h[8]; p[ 5]+=c_buffer[44]; p[ 7]+=c_buffer[45]; p[ 6]+=c_buffer[46]; + TFBIGMIX8o(); + p[ 0]+=h[5]; p[ 1]+=h[6]; p[ 2]+=h[7]; p[ 3]+=h[8]; p[ 4]+=h[0]; p[ 5]+=c_buffer[46]; p[ 7]+=c_buffer[47]; p[ 6]+=c_buffer[48]; + TFBIGMIX8e(); + p[ 0]+=h[6]; p[ 1]+=h[7]; p[ 2]+=h[8]; p[ 3]+=h[0]; p[ 4]+=h[1]; p[ 5]+=c_buffer[48]; p[ 7]+=c_buffer[49]; p[ 6]+=c_buffer[50]; + TFBIGMIX8o(); + p[ 0]+=h[7]; p[ 1]+=h[8]; p[ 2]+=h[0]; p[ 3]+=h[1]; p[ 4]+=h[2]; p[ 5]+=c_buffer[50]; p[ 7]+=c_buffer[51]; p[ 6]+=c_buffer[52]; + TFBIGMIX8e(); + p[ 0]+=h[8]; p[ 1]+=h[0]; p[ 2]+=h[1]; p[ 3]+=h[2]; p[ 4]+=h[3]; p[ 5]+=c_buffer[52]; p[ 7]+=c_buffer[53]; p[ 6]+=c_buffer[54]; + TFBIGMIX8o(); + p[ 0]+=h[0]; p[ 1]+=h[1]; p[ 2]+=h[2]; p[ 3]+=h[3]; p[ 4]+=h[4]; p[ 5]+=c_buffer[54]; p[ 7]+=c_buffer[55]; p[ 6]+=c_buffer[56]; + + p[0]^= c_buffer[57]; + p[1]^= nonce2; + + t0 = vectorize(8); // extra + t1 = vectorize(0xFF00000000000000ull); // etype + t2 = t0^t1; + + h[0] = p[ 0]; + h[1] = p[ 1]; + h[2] = p[ 2]; + h[3] = p[ 3]; + h[4] = p[ 4]; + h[5] = p[ 5]; + h[6] = p[ 6]; + h[7] = p[ 7]; + + h[8] = h[0] ^ h[1] ^ h[2] ^ h[3] ^ h[4] ^ h[5] ^ h[6] ^ h[7] ^ vectorize(0x1BD11BDAA9FC1A22); + p[ 0] = p[ 1] = p[ 2] = p[ 3] = p[ 4] =p[ 5] =p[ 6] = p[ 7] = vectorize(0); + + #define h0 h[0] + #define h1 h[1] + #define h2 h[2] + #define h3 h[3] + #define h4 h[4] + #define h5 h[5] + #define h6 h[6] + #define h7 h[7] + #define h8 h[8] + + TFBIG_4e_UI2(0); + TFBIG_4o_UI2(1); + TFBIG_4e_UI2(2); + TFBIG_4o_UI2(3); + TFBIG_4e_UI2(4); + TFBIG_4o_UI2(5); + TFBIG_4e_UI2(6); + TFBIG_4o_UI2(7); + TFBIG_4e_UI2(8); + TFBIG_4o_UI2(9); + TFBIG_4e_UI2(10); + TFBIG_4o_UI2(11); + TFBIG_4e_UI2(12); + TFBIG_4o_UI2(13); + TFBIG_4e_UI2(14); + TFBIG_4o_UI2(15); + TFBIG_4e_UI2(16); + TFBIG_4o_UI2(17); + TFBIG_ADDKEY_UI2(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, 18); + + //cubehash + uint32_t x[32] = { + 0x2AEA2A61, 0x50F494D4, 0x2D538B8B, 0x4167D83E, + 0x3FEE2313, 0xC701CF8C, 0xCC39968E, 0x50AC5695, + 0x4D42C787, 0xA647A8B3, 0x97CF0BEF, 0x825B4537, + 0xEEF864D2, 0xF22090C4, 0xD0E5CD33, 0xA23911AE, + 0xFCD398D9, 0x148FE485, 0x1B017BEF, 0xB6444532, + 0x6A536159, 0x2FF5781C, 0x91FA7934, 0x0DBADEA9, + 0xD65C8A2B, 0xA5A70E75, 0xB1C62456, 0xBC796576, + 0x1921C8F7, 0xE7989AF1, 0x7795D246, 0xD43E3B44 + }; + +// *(uint2x4*)&x[ 0] ^= *((uint2x4*)&p[0]); + #pragma unroll 4 + for(int i=0;i<4;i++){ + x[i*2] ^= p[i].x; + x[i*2+1] ^= p[i].y; + } + rrounds(x); + +// *(uint2x4*)&x[ 0] ^= *((uint2x4*)&p[4]); + #pragma unroll 4 + for(int i=0;i<4;i++){ + x[i*2] ^= p[i+4].x; + x[i*2+1] ^= p[i+4].y; + } + rrounds(x); + + // Padding Block + x[ 0] ^= 0x80; + rrounds(x); + +// Final(x, (BitSequence*)Hash); + x[31] ^= 1; + + /* "the state is then transformed invertibly through 10r identical rounds" */ + #pragma unroll 10 + for (int i = 0;i < 10;++i) + rrounds(x); + + uint32_t Hash[16]; + #pragma unroll 16 + for(int i = 0; i < 16; i++) + Hash[i] = cuda_swab32(x[i]); + + uint32_t S[36]; + uint32_t B[ 9]; + + S[ 0] = S[ 1] = S[ 2] = S[ 3] = S[ 4] = S[ 5] = S[ 6] = S[ 7] = S[ 8] = S[ 9] = S[10] = S[11] = S[12] = S[13] = S[14] = S[15] = S[16] = S[17] = S[18] = S[19] = 0; + *(uint2x4*)&S[20] = *(uint2x4*)&c_S[ 0]; + *(uint2x4*)&S[28] = *(uint2x4*)&c_S[ 8]; + + FUGUE512_3(Hash[0x0], Hash[0x1], Hash[0x2]); + FUGUE512_3(Hash[0x3], Hash[0x4], Hash[0x5]); + FUGUE512_3(Hash[0x6], Hash[0x7], Hash[0x8]); + FUGUE512_3(Hash[0x9], Hash[0xA], Hash[0xB]); + FUGUE512_3(Hash[0xC], Hash[0xD], Hash[0xE]); + FUGUE512_3(Hash[0xF], 0U, 512U); + //#pragma unroll 16 + for (uint32_t i = 0; i < 32; i+=2){ + mROR3; + CMIX36(S[ 0], S[ 1], S[ 2], S[ 4], S[ 5], S[ 6], S[18], S[19], S[20]); + SMIX_LDG(shared, S[ 0], S[ 1], S[ 2], S[ 3]); + mROR3; + CMIX36(S[ 0], S[ 1], S[ 2], S[ 4], S[ 5], S[ 6], S[18], S[19], S[20]); + SMIX_LDG(shared, S[ 0], S[ 1], S[ 2], S[ 3]); + } + //#pragma unroll 13 + for (uint32_t i = 0; i < 13; i ++) { + S[ 4] ^= S[ 0]; S[ 9] ^= S[ 0]; S[18] ^= S[ 0]; S[27] ^= S[ 0]; + mROR9; + SMIX_LDG(shared, S[ 0], S[ 1], S[ 2], S[ 3]); + S[ 4] ^= S[ 0]; S[10] ^= S[ 0]; S[18] ^= S[ 0]; S[27] ^= S[ 0]; + mROR9; + SMIX_LDG(shared, S[ 0], S[ 1], S[ 2], S[ 3]); + S[ 4] ^= S[ 0]; S[10] ^= S[ 0]; S[19] ^= S[ 0]; S[27] ^= S[ 0]; + mROR9; + SMIX_LDG(shared, S[ 0], S[ 1], S[ 2], S[ 3]); + S[ 4] ^= S[ 0]; S[10] ^= S[ 0]; S[19] ^= S[ 0]; S[28] ^= S[ 0]; + mROR8; + SMIX_LDG(shared, S[ 0], S[ 1], S[ 2], S[ 3]); + } + S[ 4] ^= S[ 0]; S[ 9] ^= S[ 0]; S[18] ^= S[ 0]; S[27] ^= S[ 0]; + + S[ 0] = cuda_swab32(S[ 1]); S[ 1] = cuda_swab32(S[ 2]); S[ 2] = cuda_swab32(S[ 3]); S[ 3] = cuda_swab32(S[ 4]); + S[ 4] = cuda_swab32(S[ 9]); S[ 5] = cuda_swab32(S[10]); S[ 6] = cuda_swab32(S[11]); S[ 7] = cuda_swab32(S[12]); + S[ 8] = cuda_swab32(S[18]); S[ 9] = cuda_swab32(S[19]); S[10] = cuda_swab32(S[20]); S[11] = cuda_swab32(S[21]); + S[12] = cuda_swab32(S[27]); S[13] = cuda_swab32(S[28]); S[14] = cuda_swab32(S[29]); S[15] = cuda_swab32(S[30]); + + uint64_t *outpHash = &output64[thread<<3]; + *(uint2x4*)&outpHash[ 0] = *(uint2x4*)&S[ 0]; + *(uint2x4*)&outpHash[ 4] = *(uint2x4*)&S[ 8]; + } +} + +__host__ +void skunk_cuda_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *g_hash) +{ + const dim3 grid((threads + TPB - 1) / TPB); + const dim3 block(TPB); + + uint64_t *d_hash = (uint64_t*) g_hash; + skunk_gpu_hash_80 <<< grid, block >>> (threads, startNounce, d_hash); + + MyStreamSynchronize(NULL, 1, thr_id); +} + +__host__ +void skunk_setBlock_80(int thr_id, void *pdata) +{ + uint64_t message[20]; + memcpy(&message[0], pdata, 80); + + uint64_t p[8]; + uint64_t h[9]; + uint64_t t0, t1, t2; + + h[0] = 0x4903ADFF749C51CEull; + h[1] = 0x0D95DE399746DF03ull; + h[2] = 0x8FD1934127C79BCEull; + h[3] = 0x9A255629FF352CB1ull; + h[4] = 0x5DB62599DF6CA7B0ull; + h[5] = 0xEABE394CA9D5C3F4ull; + h[6] = 0x991112C71A75B523ull; + h[7] = 0xAE18A40B660FCC33ull; + // h[8] = h[0] ^ h[1] ^ h[2] ^ h[3] ^ h[4] ^ h[5] ^ h[6] ^ h[7] ^ SPH_C64(0x1BD11BDAA9FC1A22); + h[8] = 0xcab2076d98173ec4ULL; + + t0 = 64; // ptr + t1 = 0x7000000000000000ull; + t2 = 0x7000000000000040ull; + + memcpy(&p[0], &message[0], 64); + + TFBIG_4e_PRE(0); + TFBIG_4o_PRE(1); + TFBIG_4e_PRE(2); + TFBIG_4o_PRE(3); + TFBIG_4e_PRE(4); + TFBIG_4o_PRE(5); + TFBIG_4e_PRE(6); + TFBIG_4o_PRE(7); + TFBIG_4e_PRE(8); + TFBIG_4o_PRE(9); + TFBIG_4e_PRE(10); + TFBIG_4o_PRE(11); + TFBIG_4e_PRE(12); + TFBIG_4o_PRE(13); + TFBIG_4e_PRE(14); + TFBIG_4o_PRE(15); + TFBIG_4e_PRE(16); + TFBIG_4o_PRE(17); + TFBIG_ADDKEY_PRE(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, 18); + + message[10] = message[0] ^ p[0]; + message[11] = message[1] ^ p[1]; + message[12] = message[2] ^ p[2]; + message[13] = message[3] ^ p[3]; + message[14] = message[4] ^ p[4]; + message[15] = message[5] ^ p[5]; + message[16] = message[6] ^ p[6]; + message[17] = message[7] ^ p[7]; + message[18] = t2; + + uint64_t buffer[128]; + +// buffer[ 0] = message[ 8]; + buffer[ 0] = message[ 9]; + h[0] = buffer[ 1] = message[10]; + h[1] = buffer[ 2] = message[11]; + h[2] = buffer[ 3] = message[12]; + h[3] = buffer[ 4] = message[13]; + h[4] = buffer[ 5] = message[14]; + h[5] = buffer[ 6] = message[15]; + h[6] = buffer[ 7] = message[16]; + h[7] = buffer[ 8] = message[17]; + h[8] = buffer[ 9] = h[0]^h[1]^h[2]^h[3]^h[4]^h[5]^h[6]^h[7]^0x1BD11BDAA9FC1A22ULL; + + t0 = 0x50ull; + t1 = 0xB000000000000000ull; + t2 = t0^t1; + + p[0] = message[ 8] + h[0]; + p[2] = h[2]; p[3] = h[3]; p[4] = h[4]; + p[5] = h[5] + t0; + p[6] = h[6] + t1; + p[7] = h[7]; + + p[2] += p[3]; + p[4] += p[5]; p[6] += p[7]; + + p[3] = ROTL64(p[3], 36) ^ p[2]; p[5] = ROTL64(p[5], 19) ^ p[4]; p[7] = ROTL64(p[7], 37) ^ p[6]; + p[4] += p[7]; p[6] += p[5]; + + p[7] = ROTL64(p[7], 27) ^ p[4]; + p[5] = ROTL64(p[5], 14) ^ p[6]; + + buffer[10] = p[ 0]; + buffer[11] = p[ 2]; + buffer[12] = p[ 3]; + buffer[13] = p[ 4]; + buffer[14] = p[ 5]; + buffer[15] = p[ 6]; + buffer[16] = p[ 7]; + buffer[17] = ROTL64(p[3], 42); + buffer[18] = ROTL64(p[5], 36); + buffer[19] = ROTL64(p[7], 39); + + buffer[20] = h[6]+t1; + buffer[21] = h[8]+1; + buffer[22] = h[7]+t2; + buffer[23] = h[0]+2; + buffer[24] = h[8]+t0; + buffer[25] = h[1]+3; + buffer[26] = h[0]+t1; + buffer[27] = h[2]+4; + buffer[28] = h[1]+t2; + buffer[29] = h[3]+5; + buffer[30] = h[2]+t0; + buffer[31] = h[4]+6; + buffer[32] = h[3]+t1; + buffer[33] = h[5]+7; + buffer[34] = h[4]+t2; + buffer[35] = h[6]+8; + buffer[36] = h[5]+t0; + buffer[37] = h[7]+9; + buffer[38] = h[6]+t1; + buffer[39] = h[8]+10; + buffer[40] = h[7]+t2; + buffer[41] = h[0]+11; + buffer[42] = h[8]+t0; + buffer[43] = h[1]+12; + buffer[44] = h[0]+t1; + buffer[45] = h[2]+13; + buffer[46] = h[1]+t2; + buffer[47] = h[3]+14; + buffer[48] = h[2]+t0; + buffer[49] = h[4]+15; + buffer[50] = h[3]+t1; + buffer[51] = h[5]+16; + buffer[52] = h[4]+t2; + buffer[53] = h[6]+17; + buffer[54] = h[5]+t0; + buffer[55] = h[7]+18; + buffer[56] = h[6]+t1; + + buffer[57] = message[ 8]; + + cudaMemcpyToSymbol(c_buffer, buffer, sizeof(c_buffer), 0, cudaMemcpyHostToDevice); + CUDA_LOG_ERROR(); +} + +__host__ +void skunk_cpu_init(int thr_id, uint32_t threads) +{ + cuda_get_arch(thr_id); +} + diff --git a/skunk/cuda_skunk_streebog.cu b/skunk/cuda_skunk_streebog.cu new file mode 100644 index 0000000..3a0435f --- /dev/null +++ b/skunk/cuda_skunk_streebog.cu @@ -0,0 +1,371 @@ +/* + * Streebog GOST R 34.10-2012 CUDA implementation. + * + * https://tools.ietf.org/html/rfc6986 + * https://en.wikipedia.org/wiki/Streebog + * + * ==========================(LICENSE BEGIN)============================ + * + * @author Tanguy Pruvot - 2015 + * @author Alexis Provos - 2016 + */ + +// Further improved with shared memory partial utilization +// Tested under CUDA7.5 toolkit for cp 5.0/5.2 + +#include +#include +#include +#include + +#include "skunk/streebog_arrays.cuh" + +//#define FULL_UNROLL +__device__ __forceinline__ +static void GOST_FS(const uint2 shared[8][256],const uint2 *const __restrict__ state,uint2* return_state) +{ + return_state[0] = __ldg(&T02[__byte_perm(state[7].x,0,0x44440)]) + ^ shared[1][__byte_perm(state[6].x,0,0x44440)] + ^ shared[2][__byte_perm(state[5].x,0,0x44440)] + ^ shared[3][__byte_perm(state[4].x,0,0x44440)] + ^ shared[4][__byte_perm(state[3].x,0,0x44440)] + ^ shared[5][__byte_perm(state[2].x,0,0x44440)] + ^ shared[6][__byte_perm(state[1].x,0,0x44440)] + ^ __ldg(&T72[__byte_perm(state[0].x,0,0x44440)]); + + return_state[1] = __ldg(&T02[__byte_perm(state[7].x,0,0x44441)]) + ^ __ldg(&T12[__byte_perm(state[6].x,0,0x44441)]) + ^ shared[2][__byte_perm(state[5].x,0,0x44441)] + ^ shared[3][__byte_perm(state[4].x,0,0x44441)] + ^ shared[4][__byte_perm(state[3].x,0,0x44441)] + ^ shared[5][__byte_perm(state[2].x,0,0x44441)] + ^ shared[6][__byte_perm(state[1].x,0,0x44441)] + ^ __ldg(&T72[__byte_perm(state[0].x,0,0x44441)]); + + return_state[2] = __ldg(&T02[__byte_perm(state[7].x,0,0x44442)]) + ^ __ldg(&T12[__byte_perm(state[6].x,0,0x44442)]) + ^ shared[2][__byte_perm(state[5].x,0,0x44442)] + ^ shared[3][__byte_perm(state[4].x,0,0x44442)] + ^ shared[4][__byte_perm(state[3].x,0,0x44442)] + ^ shared[5][__byte_perm(state[2].x,0,0x44442)] + ^ __ldg(&T72[__byte_perm(state[0].x,0,0x44442)]) + ^ shared[6][__byte_perm(state[1].x,0,0x44442)]; + + return_state[3] = __ldg(&T02[__byte_perm(state[7].x,0,0x44443)]) + ^ shared[1][__byte_perm(state[6].x,0,0x44443)] + ^ shared[2][__byte_perm(state[5].x,0,0x44443)] + ^ shared[3][__byte_perm(state[4].x,0,0x44443)] + ^ __ldg(&T42[__byte_perm(state[3].x,0,0x44443)]) + ^ shared[5][__byte_perm(state[2].x,0,0x44443)] + ^ __ldg(&T72[__byte_perm(state[0].x,0,0x44443)]) + ^ shared[6][__byte_perm(state[1].x,0,0x44443)]; + + return_state[4] = __ldg(&T02[__byte_perm(state[7].y,0,0x44440)]) + ^ shared[1][__byte_perm(state[6].y,0,0x44440)] + ^ __ldg(&T22[__byte_perm(state[5].y,0,0x44440)]) + ^ shared[3][__byte_perm(state[4].y,0,0x44440)] + ^ shared[4][__byte_perm(state[3].y,0,0x44440)] + ^ __ldg(&T62[__byte_perm(state[1].y,0,0x44440)]) + ^ shared[5][__byte_perm(state[2].y,0,0x44440)] + ^ __ldg(&T72[__byte_perm(state[0].y,0,0x44440)]); + + return_state[5] = __ldg(&T02[__byte_perm(state[7].y,0,0x44441)]) + ^ shared[2][__byte_perm(state[5].y,0,0x44441)] + ^ __ldg(&T12[__byte_perm(state[6].y,0,0x44441)]) + ^ shared[3][__byte_perm(state[4].y,0,0x44441)] + ^ shared[4][__byte_perm(state[3].y,0,0x44441)] + ^ shared[5][__byte_perm(state[2].y,0,0x44441)] + ^ __ldg(&T62[__byte_perm(state[1].y,0,0x44441)]) + ^ __ldg(&T72[__byte_perm(state[0].y,0,0x44441)]); + + return_state[6] = __ldg(&T02[__byte_perm(state[7].y,0,0x44442)]) + ^ shared[1][__byte_perm(state[6].y,0,0x44442)] + ^ shared[2][__byte_perm(state[5].y,0,0x44442)] + ^ shared[3][__byte_perm(state[4].y,0,0x44442)] + ^ shared[4][__byte_perm(state[3].y,0,0x44442)] + ^ shared[5][__byte_perm(state[2].y,0,0x44442)] + ^ __ldg(&T62[__byte_perm(state[1].y,0,0x44442)]) + ^ __ldg(&T72[__byte_perm(state[0].y,0,0x44442)]); + + return_state[7] = __ldg(&T02[__byte_perm(state[7].y,0,0x44443)]) + ^ __ldg(&T12[__byte_perm(state[6].y,0,0x44443)]) + ^ shared[2][__byte_perm(state[5].y,0,0x44443)] + ^ shared[3][__byte_perm(state[4].y,0,0x44443)] + ^ shared[4][__byte_perm(state[3].y,0,0x44443)] + ^ shared[5][__byte_perm(state[2].y,0,0x44443)] + ^ __ldg(&T62[__byte_perm(state[1].y,0,0x44443)]) + ^ __ldg(&T72[__byte_perm(state[0].y,0,0x44443)]); +} + +__device__ __forceinline__ +static void GOST_FS_LDG(const uint2 shared[8][256],const uint2 *const __restrict__ state,uint2* return_state) +{ + return_state[0] = __ldg(&T02[__byte_perm(state[7].x,0,0x44440)]) + ^ __ldg(&T12[__byte_perm(state[6].x,0,0x44440)]) + ^ shared[2][__byte_perm(state[5].x,0,0x44440)] + ^ shared[3][__byte_perm(state[4].x,0,0x44440)] + ^ shared[4][__byte_perm(state[3].x,0,0x44440)] + ^ shared[5][__byte_perm(state[2].x,0,0x44440)] + ^ shared[6][__byte_perm(state[1].x,0,0x44440)] + ^ __ldg(&T72[__byte_perm(state[0].x,0,0x44440)]); + + return_state[1] = __ldg(&T02[__byte_perm(state[7].x,0,0x44441)]) + ^ __ldg(&T12[__byte_perm(state[6].x,0,0x44441)]) + ^ shared[2][__byte_perm(state[5].x,0,0x44441)] + ^ shared[3][__byte_perm(state[4].x,0,0x44441)] + ^ shared[4][__byte_perm(state[3].x,0,0x44441)] + ^ shared[5][__byte_perm(state[2].x,0,0x44441)] + ^ __ldg(&T72[__byte_perm(state[0].x,0,0x44441)]) + ^ shared[6][__byte_perm(state[1].x,0,0x44441)]; + + return_state[2] = __ldg(&T02[__byte_perm(state[7].x,0,0x44442)]) + ^ __ldg(&T12[__byte_perm(state[6].x,0,0x44442)]) + ^ shared[2][__byte_perm(state[5].x,0,0x44442)] + ^ shared[3][__byte_perm(state[4].x,0,0x44442)] + ^ shared[4][__byte_perm(state[3].x,0,0x44442)] + ^ shared[5][__byte_perm(state[2].x,0,0x44442)] + ^ shared[6][__byte_perm(state[1].x,0,0x44442)] + ^ __ldg(&T72[__byte_perm(state[0].x,0,0x44442)]); + + return_state[3] = __ldg(&T02[__byte_perm(state[7].x,0,0x44443)]) + ^ __ldg(&T12[__byte_perm(state[6].x,0,0x44443)]) + ^ shared[2][__byte_perm(state[5].x,0,0x44443)] + ^ shared[3][__byte_perm(state[4].x,0,0x44443)] + ^ shared[4][__byte_perm(state[3].x,0,0x44443)] + ^ shared[5][__byte_perm(state[2].x,0,0x44443)] + ^ shared[6][__byte_perm(state[1].x,0,0x44443)] + ^ __ldg(&T72[__byte_perm(state[0].x,0,0x44443)]); + + return_state[4] = __ldg(&T02[__byte_perm(state[7].y,0,0x44440)]) + ^ shared[1][__byte_perm(state[6].y,0,0x44440)] + ^ __ldg(&T22[__byte_perm(state[5].y,0,0x44440)]) + ^ shared[3][__byte_perm(state[4].y,0,0x44440)] + ^ shared[4][__byte_perm(state[3].y,0,0x44440)] + ^ shared[5][__byte_perm(state[2].y,0,0x44440)] + ^ __ldg(&T72[__byte_perm(state[0].y,0,0x44440)]) + ^ __ldg(&T62[__byte_perm(state[1].y,0,0x44440)]); + + return_state[5] = __ldg(&T02[__byte_perm(state[7].y,0,0x44441)]) + ^ __ldg(&T12[__byte_perm(state[6].y,0,0x44441)]) + ^ shared[2][__byte_perm(state[5].y,0,0x44441)] + ^ shared[3][__byte_perm(state[4].y,0,0x44441)] + ^ shared[4][__byte_perm(state[3].y,0,0x44441)] + ^ shared[5][__byte_perm(state[2].y,0,0x44441)] + ^ __ldg(&T72[__byte_perm(state[0].y,0,0x44441)]) + ^ __ldg(&T62[__byte_perm(state[1].y,0,0x44441)]); + + return_state[6] = __ldg(&T02[__byte_perm(state[7].y,0,0x44442)]) + ^ __ldg(&T12[__byte_perm(state[6].y,0,0x44442)]) + ^ __ldg(&T22[__byte_perm(state[5].y,0,0x44442)]) + ^ shared[3][__byte_perm(state[4].y,0,0x44442)] + ^ shared[4][__byte_perm(state[3].y,0,0x44442)] + ^ shared[5][__byte_perm(state[2].y,0,0x44442)] + ^ __ldg(&T72[__byte_perm(state[0].y,0,0x44442)]) + ^ __ldg(&T62[__byte_perm(state[1].y,0,0x44442)]); + + return_state[7] = __ldg(&T02[__byte_perm(state[7].y,0,0x44443)]) + ^ shared[1][__byte_perm(state[6].y,0,0x44443)] + ^ __ldg(&T22[__byte_perm(state[5].y,0,0x44443)]) + ^ shared[3][__byte_perm(state[4].y,0,0x44443)] + ^ shared[4][__byte_perm(state[3].y,0,0x44443)] + ^ shared[5][__byte_perm(state[2].y,0,0x44443)] + ^ __ldg(&T72[__byte_perm(state[0].y,0,0x44443)]) + ^ __ldg(&T62[__byte_perm(state[1].y,0,0x44443)]); +} + +__device__ __forceinline__ +static void GOST_E12(const uint2 shared[8][256],uint2 *const __restrict__ K, uint2 *const __restrict__ state) +{ + uint2 t[ 8]; + //#pragma unroll 12 + for(int i=0; i<12; i++){ + GOST_FS(shared,state, t); + + #pragma unroll 8 + for(int j=0;j<8;j++) + K[ j] ^= *(uint2*)&CC[i][j]; + + #pragma unroll 8 + for(int j=0;j<8;j++) + state[ j] = t[ j]; + + GOST_FS_LDG(shared,K, t); + + #pragma unroll 8 + for(int j=0;j<8;j++) + state[ j]^= t[ j]; + + #pragma unroll 8 + for(int j=0;j<8;j++) + K[ j] = t[ j]; + } +} + +__constant__ uint64_t target64[4]; + +__host__ +void skunk_set_target(uint32_t* ptarget) +{ + cudaMemcpyToSymbol(target64, ptarget, 4*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); +} + +#define TPB 256 +__global__ +__launch_bounds__(TPB, 2) +void skunk_streebog_gpu_final_64(uint64_t *g_hash, uint32_t* resNonce) +{ + const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint2 buf[8], t[8], temp[8], K0[8], hash[8]; + + __shared__ uint2 shared[8][256]; + shared[0][threadIdx.x] = __ldg(&T02[threadIdx.x]); + shared[1][threadIdx.x] = __ldg(&T12[threadIdx.x]); + shared[2][threadIdx.x] = __ldg(&T22[threadIdx.x]); + shared[3][threadIdx.x] = __ldg(&T32[threadIdx.x]); + shared[4][threadIdx.x] = __ldg(&T42[threadIdx.x]); + shared[5][threadIdx.x] = __ldg(&T52[threadIdx.x]); + shared[6][threadIdx.x] = __ldg(&T62[threadIdx.x]); + shared[7][threadIdx.x] = __ldg(&T72[threadIdx.x]); + +// if (thread < threads) +// { + uint64_t* inout = &g_hash[thread<<3]; + *(uint2x4*)&hash[0] = __ldg4((uint2x4*)&inout[0]); + *(uint2x4*)&hash[4] = __ldg4((uint2x4*)&inout[4]); + + __threadfence_block(); + + K0[0] = vectorize(0x74a5d4ce2efc83b3); + + #pragma unroll 8 + for(uint32_t i=0;i<8;i++){ + buf[ i] = hash[ i] ^ K0[ 0]; + } + //#pragma unroll 12 + for(int i=0; i<12; i++){ + GOST_FS(shared, buf, temp); + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + buf[ j] = temp[ j] ^ *(uint2*)&precomputed_values[i][j]; + } + } + #pragma unroll 8 + for(int j=0;j<8;j++){ + buf[ j]^= hash[ j]; + } + #pragma unroll 8 + for(int j=0;j<8;j++){ + K0[ j] = buf[ j]; + } + + K0[7].y ^= 0x00020000; + + GOST_FS(shared, K0, t); + + #pragma unroll 8 + for(uint32_t i=0;i<8;i++) + K0[ i] = t[ i]; + + t[7].y ^= 0x01000000; + GOST_E12(shared, K0, t); + + #pragma unroll 8 + for(int j=0;j<8;j++) + buf[ j] ^= t[ j]; + + buf[7].y ^= 0x01000000; + + GOST_FS(shared, buf,K0); + + buf[7].y ^= 0x00020000; + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++) + t[ j] = K0[ j]; + + t[7].y ^= 0x00020000; + GOST_E12(shared, K0, t); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++) + buf[ j] ^= t[ j]; + + GOST_FS(shared, buf,K0); // K = F(h) + + hash[7]+= vectorize(0x0100000000000000); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++) + t[ j] = K0[ j] ^ hash[ j]; + +// #pragma unroll + for(uint32_t i=0; i<10; i++){ + GOST_FS(shared, t, temp); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + t[ j] = temp[ j]; + K0[ j] = K0[ j] ^ *(uint2*)&CC[ i][ j]; + } + + GOST_FS(shared, K0, temp); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + K0[ j] = temp[ j]; + t[ j]^= temp[ j]; + } + } + + GOST_FS(shared, t, temp); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + t[ j] = temp[ j]; + K0[ j] = K0[ j] ^ *(uint2*)&CC[10][ j]; + } + + GOST_FS(shared, K0, temp); + + #pragma unroll 8 + for(int i=7;i>=0;i--){ + t[i].x = t[i].x ^ temp[i].x; + temp[i].x = temp[i].x ^ ((uint32_t*)&CC[11])[i<<1]; + } + + uint2 last[2]; + +#define T0(x) shared[0][x] +#define T1(x) shared[1][x] +#define T2(x) shared[2][x] +#define T3(x) shared[3][x] +#define T4(x) shared[4][x] +#define T5(x) shared[5][x] +#define T6(x) shared[6][x] +#define T7(x) shared[7][x] + + last[ 0] = T0(__byte_perm(t[7].x,0,0x44443)) ^ T1(__byte_perm(t[6].x,0,0x44443)) + ^ T2(__byte_perm(t[5].x,0,0x44443)) ^ T3(__byte_perm(t[4].x,0,0x44443)) + ^ T4(__byte_perm(t[3].x,0,0x44443)) ^ T5(__byte_perm(t[2].x,0,0x44443)) + ^ T6(__byte_perm(t[1].x,0,0x44443)) ^ T7(__byte_perm(t[0].x,0,0x44443)); + + last[ 1] = T0(__byte_perm(temp[7].x,0,0x44443)) ^ T1(__byte_perm(temp[6].x,0,0x44443)) + ^ T2(__byte_perm(temp[5].x,0,0x44443)) ^ T3(__byte_perm(temp[4].x,0,0x44443)) + ^ T4(__byte_perm(temp[3].x,0,0x44443)) ^ T5(__byte_perm(temp[2].x,0,0x44443)) + ^ T6(__byte_perm(temp[1].x,0,0x44443)) ^ T7(__byte_perm(temp[0].x,0,0x44443)); + + if(devectorize(buf[3] ^ hash[3] ^ last[ 0] ^ last[ 1]) <= target64[3]){ + uint32_t tmp = atomicExch(&resNonce[0], thread); + if (tmp != UINT32_MAX) + resNonce[1] = tmp; + } +} + +__host__ +void skunk_cuda_streebog(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce) +{ + dim3 grid((threads + TPB-1) / TPB); + dim3 block(TPB); + + skunk_streebog_gpu_final_64 <<< grid, block >>> ((uint64_t*)d_hash, d_resNonce); +} diff --git a/skunk/skein_header.h b/skunk/skein_header.h new file mode 100644 index 0000000..460b311 --- /dev/null +++ b/skunk/skein_header.h @@ -0,0 +1,385 @@ +/* Elementary defines for SKEIN */ + +/* + * M9_ ## s ## _ ## i evaluates to s+i mod 9 (0 <= s <= 18, 0 <= i <= 7). + */ + +#define M9_0_0 0 +#define M9_0_1 1 +#define M9_0_2 2 +#define M9_0_3 3 +#define M9_0_4 4 +#define M9_0_5 5 +#define M9_0_6 6 +#define M9_0_7 7 + +#define M9_1_0 1 +#define M9_1_1 2 +#define M9_1_2 3 +#define M9_1_3 4 +#define M9_1_4 5 +#define M9_1_5 6 +#define M9_1_6 7 +#define M9_1_7 8 + +#define M9_2_0 2 +#define M9_2_1 3 +#define M9_2_2 4 +#define M9_2_3 5 +#define M9_2_4 6 +#define M9_2_5 7 +#define M9_2_6 8 +#define M9_2_7 0 + +#define M9_3_0 3 +#define M9_3_1 4 +#define M9_3_2 5 +#define M9_3_3 6 +#define M9_3_4 7 +#define M9_3_5 8 +#define M9_3_6 0 +#define M9_3_7 1 + +#define M9_4_0 4 +#define M9_4_1 5 +#define M9_4_2 6 +#define M9_4_3 7 +#define M9_4_4 8 +#define M9_4_5 0 +#define M9_4_6 1 +#define M9_4_7 2 + +#define M9_5_0 5 +#define M9_5_1 6 +#define M9_5_2 7 +#define M9_5_3 8 +#define M9_5_4 0 +#define M9_5_5 1 +#define M9_5_6 2 +#define M9_5_7 3 + +#define M9_6_0 6 +#define M9_6_1 7 +#define M9_6_2 8 +#define M9_6_3 0 +#define M9_6_4 1 +#define M9_6_5 2 +#define M9_6_6 3 +#define M9_6_7 4 + +#define M9_7_0 7 +#define M9_7_1 8 +#define M9_7_2 0 +#define M9_7_3 1 +#define M9_7_4 2 +#define M9_7_5 3 +#define M9_7_6 4 +#define M9_7_7 5 + +#define M9_8_0 8 +#define M9_8_1 0 +#define M9_8_2 1 +#define M9_8_3 2 +#define M9_8_4 3 +#define M9_8_5 4 +#define M9_8_6 5 +#define M9_8_7 6 + +#define M9_9_0 0 +#define M9_9_1 1 +#define M9_9_2 2 +#define M9_9_3 3 +#define M9_9_4 4 +#define M9_9_5 5 +#define M9_9_6 6 +#define M9_9_7 7 + +#define M9_10_0 1 +#define M9_10_1 2 +#define M9_10_2 3 +#define M9_10_3 4 +#define M9_10_4 5 +#define M9_10_5 6 +#define M9_10_6 7 +#define M9_10_7 8 + +#define M9_11_0 2 +#define M9_11_1 3 +#define M9_11_2 4 +#define M9_11_3 5 +#define M9_11_4 6 +#define M9_11_5 7 +#define M9_11_6 8 +#define M9_11_7 0 + +#define M9_12_0 3 +#define M9_12_1 4 +#define M9_12_2 5 +#define M9_12_3 6 +#define M9_12_4 7 +#define M9_12_5 8 +#define M9_12_6 0 +#define M9_12_7 1 + +#define M9_13_0 4 +#define M9_13_1 5 +#define M9_13_2 6 +#define M9_13_3 7 +#define M9_13_4 8 +#define M9_13_5 0 +#define M9_13_6 1 +#define M9_13_7 2 + +#define M9_14_0 5 +#define M9_14_1 6 +#define M9_14_2 7 +#define M9_14_3 8 +#define M9_14_4 0 +#define M9_14_5 1 +#define M9_14_6 2 +#define M9_14_7 3 + +#define M9_15_0 6 +#define M9_15_1 7 +#define M9_15_2 8 +#define M9_15_3 0 +#define M9_15_4 1 +#define M9_15_5 2 +#define M9_15_6 3 +#define M9_15_7 4 + +#define M9_16_0 7 +#define M9_16_1 8 +#define M9_16_2 0 +#define M9_16_3 1 +#define M9_16_4 2 +#define M9_16_5 3 +#define M9_16_6 4 +#define M9_16_7 5 + +#define M9_17_0 8 +#define M9_17_1 0 +#define M9_17_2 1 +#define M9_17_3 2 +#define M9_17_4 3 +#define M9_17_5 4 +#define M9_17_6 5 +#define M9_17_7 6 + +#define M9_18_0 0 +#define M9_18_1 1 +#define M9_18_2 2 +#define M9_18_3 3 +#define M9_18_4 4 +#define M9_18_5 5 +#define M9_18_6 6 +#define M9_18_7 7 + +/* + * M3_ ## s ## _ ## i evaluates to s+i mod 3 (0 <= s <= 18, 0 <= i <= 1). + */ + +#define M3_0_0 0 +#define M3_0_1 1 +#define M3_1_0 1 +#define M3_1_1 2 +#define M3_2_0 2 +#define M3_2_1 0 +#define M3_3_0 0 +#define M3_3_1 1 +#define M3_4_0 1 +#define M3_4_1 2 +#define M3_5_0 2 +#define M3_5_1 0 +#define M3_6_0 0 +#define M3_6_1 1 +#define M3_7_0 1 +#define M3_7_1 2 +#define M3_8_0 2 +#define M3_8_1 0 +#define M3_9_0 0 +#define M3_9_1 1 +#define M3_10_0 1 +#define M3_10_1 2 +#define M3_11_0 2 +#define M3_11_1 0 +#define M3_12_0 0 +#define M3_12_1 1 +#define M3_13_0 1 +#define M3_13_1 2 +#define M3_14_0 2 +#define M3_14_1 0 +#define M3_15_0 0 +#define M3_15_1 1 +#define M3_16_0 1 +#define M3_16_1 2 +#define M3_17_0 2 +#define M3_17_1 0 +#define M3_18_0 0 +#define M3_18_1 1 + +#define XCAT(x, y) XCAT_(x, y) +#define XCAT_(x, y) x ## y + +#define SKBI(k, s, i) XCAT(k, XCAT(XCAT(XCAT(M9_, s), _), i)) +#define SKBT(t, s, v) XCAT(t, XCAT(XCAT(XCAT(M3_, s), _), v)) + +#define TFBIG_ADDKEY(w0, w1, w2, w3, w4, w5, w6, w7, k, t, s) { \ + w0 = (w0 + SKBI(k, s, 0)); \ + w1 = (w1 + SKBI(k, s, 1)); \ + w2 = (w2 + SKBI(k, s, 2)); \ + w3 = (w3 + SKBI(k, s, 3)); \ + w4 = (w4 + SKBI(k, s, 4)); \ + w5 = (w5 + SKBI(k, s, 5) + SKBT(t, s, 0)); \ + w6 = (w6 + SKBI(k, s, 6) + SKBT(t, s, 1)); \ + w7 = (w7 + SKBI(k, s, 7) + make_uint2(s,0); \ + } + +#define TFBIG_MIX(x0, x1, rc) { \ + x0 = x0 + x1; \ + x1 = ROL2(x1, rc) ^ x0; \ + } + +#define TFBIG_MIX8(w0, w1, w2, w3, w4, w5, w6, w7, rc0, rc1, rc2, rc3) { \ + TFBIG_MIX(w0, w1, rc0); \ + TFBIG_MIX(w2, w3, rc1); \ + TFBIG_MIX(w4, w5, rc2); \ + TFBIG_MIX(w6, w7, rc3); \ + } + +#define TFBIG_4e(s) { \ + TFBIG_ADDKEY(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, s); \ + TFBIG_MIX8(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 46, 36, 19, 37); \ + TFBIG_MIX8(p[2], p[1], p[4], p[7], p[6], p[5], p[0], p[3], 33, 27, 14, 42); \ + TFBIG_MIX8(p[4], p[1], p[6], p[3], p[0], p[5], p[2], p[7], 17, 49, 36, 39); \ + TFBIG_MIX8(p[6], p[1], p[0], p[7], p[2], p[5], p[4], p[3], 44, 9, 54, 56); \ + } + +#define TFBIG_4o(s) { \ + TFBIG_ADDKEY(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, s); \ + TFBIG_MIX8(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 39, 30, 34, 24); \ + TFBIG_MIX8(p[2], p[1], p[4], p[7], p[6], p[5], p[0], p[3], 13, 50, 10, 17); \ + TFBIG_MIX8(p[4], p[1], p[6], p[3], p[0], p[5], p[2], p[7], 25, 29, 39, 43); \ + TFBIG_MIX8(p[6], p[1], p[0], p[7], p[2], p[5], p[4], p[3], 8, 35, 56, 22); \ + } + +#define TFBIG_KINIT_UI2(k0, k1, k2, k3, k4, k5, k6, k7, k8, t0, t1, t2) { \ + k8 = ((k0 ^ k1) ^ (k2 ^ k3)) ^ ((k4 ^ k5) ^ (k6 ^ k7)) \ + ^ vectorize(0x1BD11BDAA9FC1A22); \ + t2 = t0 ^ t1; \ + } + +#define TFBIG_ADDKEY_UI2(w0, w1, w2, w3, w4, w5, w6, w7, k, t, s) { \ + w0 = (w0 + SKBI(k, s, 0)); \ + w1 = (w1 + SKBI(k, s, 1)); \ + w2 = (w2 + SKBI(k, s, 2)); \ + w3 = (w3 + SKBI(k, s, 3)); \ + w4 = (w4 + SKBI(k, s, 4)); \ + w5 = (w5 + SKBI(k, s, 5) + SKBT(t, s, 0)); \ + w6 = (w6 + SKBI(k, s, 6) + SKBT(t, s, 1)); \ + w7 = (w7 + SKBI(k, s, 7) + vectorize(s)); \ + } + +#define TFBIG_ADDKEY_PRE(w0, w1, w2, w3, w4, w5, w6, w7, k, t, s) { \ + w0 = (w0 + SKBI(k, s, 0)); \ + w1 = (w1 + SKBI(k, s, 1)); \ + w2 = (w2 + SKBI(k, s, 2)); \ + w3 = (w3 + SKBI(k, s, 3)); \ + w4 = (w4 + SKBI(k, s, 4)); \ + w5 = (w5 + SKBI(k, s, 5) + SKBT(t, s, 0)); \ + w6 = (w6 + SKBI(k, s, 6) + SKBT(t, s, 1)); \ + w7 = (w7 + SKBI(k, s, 7) + (s)); \ + } + +#define TFBIG_MIX_UI2(x0, x1, rc) { \ + x0 = x0 + x1; \ + x1 = ROL2(x1, rc) ^ x0; \ + } + +#define TFBIG_MIX_PRE(x0, x1, rc) { \ + x0 = x0 + x1; \ + x1 = ROTL64(x1, rc) ^ x0; \ + } + +#define TFBIG_MIX8_UI2(w0, w1, w2, w3, w4, w5, w6, w7, rc0, rc1, rc2, rc3) { \ + TFBIG_MIX_UI2(w0, w1, rc0); \ + TFBIG_MIX_UI2(w2, w3, rc1); \ + TFBIG_MIX_UI2(w4, w5, rc2); \ + TFBIG_MIX_UI2(w6, w7, rc3); \ + } + +#define TFBIG_MIX8_PRE(w0, w1, w2, w3, w4, w5, w6, w7, rc0, rc1, rc2, rc3) { \ + TFBIG_MIX_PRE(w0, w1, rc0); \ + TFBIG_MIX_PRE(w2, w3, rc1); \ + TFBIG_MIX_PRE(w4, w5, rc2); \ + TFBIG_MIX_PRE(w6, w7, rc3); \ + } + +#define TFBIG_4e_UI2(s) { \ + TFBIG_ADDKEY_UI2(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, s); \ + TFBIG_MIX8_UI2(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 46, 36, 19, 37); \ + TFBIG_MIX8_UI2(p[2], p[1], p[4], p[7], p[6], p[5], p[0], p[3], 33, 27, 14, 42); \ + TFBIG_MIX8_UI2(p[4], p[1], p[6], p[3], p[0], p[5], p[2], p[7], 17, 49, 36, 39); \ + TFBIG_MIX8_UI2(p[6], p[1], p[0], p[7], p[2], p[5], p[4], p[3], 44, 9, 54, 56); \ + } + +#define TFBIG_4e_PRE(s) { \ + TFBIG_ADDKEY_PRE(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, s); \ + TFBIG_MIX8_PRE(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 46, 36, 19, 37); \ + TFBIG_MIX8_PRE(p[2], p[1], p[4], p[7], p[6], p[5], p[0], p[3], 33, 27, 14, 42); \ + TFBIG_MIX8_PRE(p[4], p[1], p[6], p[3], p[0], p[5], p[2], p[7], 17, 49, 36, 39); \ + TFBIG_MIX8_PRE(p[6], p[1], p[0], p[7], p[2], p[5], p[4], p[3], 44, 9, 54, 56); \ + } + +#define TFBIG_4o_UI2(s) { \ + TFBIG_ADDKEY_UI2(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, s); \ + TFBIG_MIX8_UI2(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 39, 30, 34, 24); \ + TFBIG_MIX8_UI2(p[2], p[1], p[4], p[7], p[6], p[5], p[0], p[3], 13, 50, 10, 17); \ + TFBIG_MIX8_UI2(p[4], p[1], p[6], p[3], p[0], p[5], p[2], p[7], 25, 29, 39, 43); \ + TFBIG_MIX8_UI2(p[6], p[1], p[0], p[7], p[2], p[5], p[4], p[3], 8, 35, 56, 22); \ + } + +#define TFBIG_4o_PRE(s) { \ + TFBIG_ADDKEY_PRE(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, s); \ + TFBIG_MIX8_PRE(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 39, 30, 34, 24); \ + TFBIG_MIX8_PRE(p[2], p[1], p[4], p[7], p[6], p[5], p[0], p[3], 13, 50, 10, 17); \ + TFBIG_MIX8_PRE(p[4], p[1], p[6], p[3], p[0], p[5], p[2], p[7], 25, 29, 39, 43); \ + TFBIG_MIX8_PRE(p[6], p[1], p[0], p[7], p[2], p[5], p[4], p[3], 8, 35, 56, 22); \ + } + +#define TFBIGMIX8e(){\ + p[ 0]+=p[ 1];p[ 2]+=p[ 3];p[ 4]+=p[ 5];p[ 6]+=p[ 7];p[ 1]=ROL2(p[ 1],46) ^ p[ 0];p[ 3]=ROL2(p[ 3],36) ^ p[ 2];p[ 5]=ROL2(p[ 5],19) ^ p[ 4];p[ 7]=ROL2(p[ 7],37) ^ p[ 6];\ + p[ 2]+=p[ 1];p[ 4]+=p[ 7];p[ 6]+=p[ 5];p[ 0]+=p[ 3];p[ 1]=ROL2(p[ 1],33) ^ p[ 2];p[ 7]=ROL2(p[ 7],27) ^ p[ 4];p[ 5]=ROL2(p[ 5],14) ^ p[ 6];p[ 3]=ROL2(p[ 3],42) ^ p[ 0];\ + p[ 4]+=p[ 1];p[ 6]+=p[ 3];p[ 0]+=p[ 5];p[ 2]+=p[ 7];p[ 1]=ROL2(p[ 1],17) ^ p[ 4];p[ 3]=ROL2(p[ 3],49) ^ p[ 6];p[ 5]=ROL2(p[ 5],36) ^ p[ 0];p[ 7]=ROL2(p[ 7],39) ^ p[ 2];\ + p[ 6]+=p[ 1];p[ 0]+=p[ 7];p[ 2]+=p[ 5];p[ 4]+=p[ 3];p[ 1]=ROL2(p[ 1],44) ^ p[ 6];p[ 7]=ROL2(p[ 7], 9) ^ p[ 0];p[ 5]=ROL2(p[ 5],54) ^ p[ 2];p[ 3]=ROR8(p[ 3]) ^ p[ 4];\ +} +#define TFBIGMIX8o(){\ + p[ 0]+=p[ 1];p[ 2]+=p[ 3];p[ 4]+=p[ 5];p[ 6]+=p[ 7];p[ 1]=ROL2(p[ 1],39) ^ p[ 0];p[ 3]=ROL2(p[ 3],30) ^ p[ 2];p[ 5]=ROL2(p[ 5],34) ^ p[ 4];p[ 7]=ROL24(p[ 7]) ^ p[ 6];\ + p[ 2]+=p[ 1];p[ 4]+=p[ 7];p[ 6]+=p[ 5];p[ 0]+=p[ 3];p[ 1]=ROL2(p[ 1],13) ^ p[ 2];p[ 7]=ROL2(p[ 7],50) ^ p[ 4];p[ 5]=ROL2(p[ 5],10) ^ p[ 6];p[ 3]=ROL2(p[ 3],17) ^ p[ 0];\ + p[ 4]+=p[ 1];p[ 6]+=p[ 3];p[ 0]+=p[ 5];p[ 2]+=p[ 7];p[ 1]=ROL2(p[ 1],25) ^ p[ 4];p[ 3]=ROL2(p[ 3],29) ^ p[ 6];p[ 5]=ROL2(p[ 5],39) ^ p[ 0];p[ 7]=ROL2(p[ 7],43) ^ p[ 2];\ + p[ 6]+=p[ 1];p[ 0]+=p[ 7];p[ 2]+=p[ 5];p[ 4]+=p[ 3];p[ 1]=ROL8(p[ 1]) ^ p[ 6];p[ 7]=ROL2(p[ 7],35) ^ p[ 0];p[ 5]=ROR8(p[ 5]) ^ p[ 2];p[ 3]=ROL2(p[ 3],22) ^ p[ 4];\ +} + +#define addwBuff(x0,x1,x2,x3,x4){\ + p[ 0]+=h[x0];\ + p[ 1]+=h[x1];\ + p[ 2]+=h[x2];\ + p[ 3]+=h[x3];\ + p[ 4]+=h[x4];\ + p[ 5]+=c_buffer[i++];\ + p[ 7]+=c_buffer[i++];\ + p[ 6]+=c_buffer[i];\ +} + +#define addwCon(x0,x1,x2,x3,x4,x5,x6,x7,y0,y1,y2){\ + p[ 0]+= h[x0];\ + p[ 1]+= h[x1];\ + p[ 2]+= h[x2];\ + p[ 3]+= h[x3];\ + p[ 4]+= h[x4];\ + p[ 5]+= h[x5] + c_t[y0];\ + p[ 6]+= h[x6] + c_t[y1];\ + p[ 7]+= h[x7] + c_add[y2];\ +} + + diff --git a/skunk.cu b/skunk/skunk.cu similarity index 76% rename from skunk.cu rename to skunk/skunk.cu index d7d9deb..0a503e7 100644 --- a/skunk.cu +++ b/skunk/skunk.cu @@ -2,7 +2,7 @@ * Skunk Algo for Signatum * (skein, cube, fugue, gost streebog) * - * tpruvot@github 06 2017 - GPLv3 + * tpruvot@github 08 2017 - GPLv3 */ extern "C" { #include "sph/sph_skein.h" @@ -14,20 +14,25 @@ extern "C" { #include "miner.h" #include "cuda_helper.h" +//#define WANT_COMPAT_KERNEL + +// compatibility kernels extern void skein512_cpu_setBlock_80(void *pdata); -extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap); - -extern void x11_cubehash512_cpu_init(int thr_id, uint32_t threads); extern void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); - extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads); extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x13_fugue512_cpu_free(int thr_id); - extern void streebog_cpu_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce); extern void streebog_set_target(const uint32_t* ptarget); +// krnlx merged kernel (for high-end cards only) +extern void skunk_cpu_init(int thr_id, uint32_t threads); +extern void skunk_set_target(uint32_t* ptarget); +extern void skunk_setBlock_80(int thr_id, void *pdata); +extern void skunk_cuda_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash); +extern void skunk_cuda_streebog(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce); + #include #include @@ -65,6 +70,7 @@ extern "C" void skunk_hash(void *output, const void *input) } static bool init[MAX_GPUS] = { 0 }; +static bool use_compat_kernels[MAX_GPUS] = { 0 }; extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) { @@ -74,7 +80,8 @@ extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce, uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; int intensity = (device_sm[device_map[thr_id]] > 500) ? 18 : 17; - if (strstr(device_name[dev_id], "GTX 10")) intensity = 19; + if (strstr(device_name[dev_id], "GTX 10")) intensity = 20; + if (strstr(device_name[dev_id], "GTX 1080")) intensity = 21; uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); //if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); @@ -92,8 +99,8 @@ extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce, } gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); - quark_skein512_cpu_init(thr_id, throughput); - x11_cubehash512_cpu_init(thr_id, throughput); + skunk_cpu_init(thr_id, throughput); + use_compat_kernels[thr_id] = (cuda_arch[dev_id] < 500 || CUDART_VERSION < 7500 || CUDART_VERSION > 8000); x13_fugue512_cpu_init(thr_id, throughput); CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0); @@ -107,18 +114,26 @@ extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce, for (int k=0; k < 20; k++) be32enc(&endiandata[k], pdata[k]); - skein512_cpu_setBlock_80(endiandata); - cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t)); - streebog_set_target(ptarget); + if (use_compat_kernels[thr_id]) { + skein512_cpu_setBlock_80(endiandata); + streebog_set_target(ptarget); + } else { + skunk_setBlock_80(thr_id, endiandata); + skunk_set_target(ptarget); + } do { int order = 0; - skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1); order++; - x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - streebog_cpu_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]); - + if (use_compat_kernels[thr_id]) { + skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + streebog_cpu_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]); + } else { + skunk_cuda_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); + skunk_cuda_streebog(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]); + } cudaMemcpy(h_resNonce, d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost); *hashes_done = pdata[19] - first_nonce + throughput; @@ -158,6 +173,7 @@ extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce, else if (vhash[7] > Htarg) { gpu_increment_reject(thr_id); cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t)); + gpulog(LOG_WARNING, thr_id, "result does not validate on CPU!"); pdata[19] = startNounce + h_resNonce[0] + 1; continue; } @@ -185,6 +201,7 @@ extern "C" void free_skunk(int thr_id) cudaThreadSynchronize(); x13_fugue512_cpu_free(thr_id); + cudaFree(d_hash[thr_id]); cudaFree(d_resNonce[thr_id]); diff --git a/skunk/streebog_arrays.cuh b/skunk/streebog_arrays.cuh new file mode 100644 index 0000000..7836293 --- /dev/null +++ b/skunk/streebog_arrays.cuh @@ -0,0 +1,567 @@ +// Tables for function F +__device__ uint2 T02[256] = { + { 0x5B711FD0, 0xE6F87E5C }, { 0x0924FA16, 0x25837780 }, { 0x852EA4A8, 0xC849E07E }, { 0x8F06C16A, 0x5B4686A1 }, + { 0xD77B416E, 0x0B32E9A2 }, { 0x67815C66, 0xABDA37A4 }, { 0x1A686676, 0xF61796A8 }, { 0x6391954B, 0xF5DC0B70 }, + { 0xB7E64BF1, 0x4862F38D }, { 0x68BD85C5, 0xFF5C629A }, { 0xFCD75795, 0xCB827DA6 }, { 0x69B9F089, 0x66D36DAF }, + { 0x483D83B0, 0x356C9F74 }, { 0x238C99A1, 0x7CBCECB1 }, { 0x31C4708D, 0x36A702AC }, { 0x2FBCDFD6, 0x9EB6A8D0 }, + { 0xE5B3AE37, 0x8B19FA51 }, { 0x8A127D0B, 0x9CCFB540 }, { 0x08208F5A, 0xBC0C78B5 }, { 0x2288ECED, 0xE533E384 }, + { 0x77C15FD2, 0xCEC2C7D3 }, { 0x505D0F5E, 0xEC7817B6 }, { 0x8336871D, 0xB94CC2C0 }, { 0xCB0B04AD, 0x8C205DB4 }, + { 0x28A0892F, 0x763C855B }, { 0xF6FF3257, 0x588D1B79 }, { 0x4311933E, 0x3FECF69E }, { 0x803A18C9, 0x0FC0D39F }, + { 0xF5F3AD83, 0xEE010A26 }, { 0x411979A6, 0x10EFE8F4 }, { 0x7DE93A10, 0x5DCDA10C }, { 0x1248E92C, 0x4A1BEE1D }, + { 0x21847339, 0x53BFF2DB }, { 0xA6A23D09, 0xB4F50CCF }, { 0xD84798CD, 0x5FB4BC9C }, { 0x071C56F9, 0xE88A2D8B }, + { 0x5A756A9C, 0x7F777169 }, { 0xA0BA1EBC, 0xC5F02E71 }, { 0x4215E672, 0xA663F9AB }, { 0xDE5FBB78, 0x2EB19E22 }, + { 0x2594BA14, 0x0DB9CE0F }, { 0x97664D84, 0x82520E63 }, { 0x0208EA98, 0x2F031E6A }, { 0xA1BE6BF0, 0x5C7F2144 }, + { 0xD16362DB, 0x7A37CB1C }, { 0x4B311C64, 0x83E08E2B }, { 0xAB960E32, 0xCF70479B }, { 0xB9DEE71E, 0x856BA986 }, + { 0x7AF56CE9, 0xB5478C87 }, { 0x5F61D6FD, 0xB8FE4288 }, { 0x966238C8, 0x1BDD0156 }, { 0x3EF8A92E, 0x62215792 }, + { 0x114476F8, 0xFC97FF42 }, { 0x56452CEB, 0x9D7D3508 }, { 0xE0A71256, 0x4C90C9B0 }, { 0xFBCB016C, 0x2308502D }, + { 0xA7A64845, 0x2D7A03FA }, { 0xBFC6C4AB, 0xF46E8B38 }, { 0xD477DEBA, 0xBDBEF8FD }, { 0xC8079B79, 0x3AAC4CEB }, + { 0xE8879D0C, 0xF09CB105 }, { 0xAC8A58CB, 0x27FA6A10 }, { 0x401D0CEA, 0x8960E7C1 }, { 0x4A356928, 0x1A6F811E }, + { 0x73D196FF, 0x90C4FB07 }, { 0x609D0A9F, 0x43501A2F }, { 0xC63F3796, 0xF7A516E0 }, { 0xB8DA9252, 0x1CE4A6B3 }, + { 0x38E08A9B, 0x1324752C }, { 0x3BEC154F, 0xA5A86473 }, { 0x5549B33F, 0x2BF12457 }, { 0x440DC5C7, 0xD766DB15 }, + { 0x9E42B792, 0xA7D179E3 }, { 0x61997FD3, 0xDADF151A }, { 0xC0271423, 0x86A0345E }, { 0x6DA939A4, 0x38D5517B }, + { 0x104003B4, 0x6518F077 }, { 0xA5AEA2DD, 0x02791D90 }, { 0x9C4A5D0A, 0x88D26789 }, { 0x0A2865C2, 0x930F66DF }, + { 0x4509B08B, 0x4EE9D420 }, { 0x6685292A, 0x32553891 }, { 0xC533A842, 0x412907BF }, { 0x544DC673, 0xB27E2B62 }, + { 0x6295E007, 0x6C530445 }, { 0x5351908A, 0x5AF406E9 }, { 0xC123616F, 0x1F2F3B6B }, { 0x5255E5C6, 0xC37B09DC }, + { 0xB1FE6844, 0x3967D133 }, { 0xF0E711E2, 0x298839C7 }, { 0x1964F9A2, 0x409B87F7 }, { 0xDB4B0719, 0xE938ADC3 }, + { 0xF9C3EBF4, 0x0C0B4E47 }, { 0xD36B8843, 0x5534D576 }, { 0xEB8B02D8, 0x4610A05A }, { 0x8232F251, 0x20C3CDF5 }, + { 0xBEC2B1E7, 0x6DE1840D }, { 0xB0FA1D08, 0xA0E8DE06 }, { 0x0D34333B, 0x7B854B54 }, { 0xBCCA5B7F, 0x42E29A67 }, + { 0xC437DD0E, 0xD8A6088A }, { 0xD943ED81, 0xC63BB3A9 }, { 0x5E65A3B1, 0x21714DBD }, { 0xB5EEA169, 0x6761EDE7 }, + { 0xD573ABF6, 0x2431F7C8 }, { 0xE1A3671A, 0xD51FC685 }, { 0x0410C92D, 0x5E063CD4 }, { 0x2CB04002, 0x283AB98F }, + { 0xB2F2F790, 0x8FEBC06C }, { 0x6FA1D33C, 0x17D64F11 }, { 0xA99EE4AA, 0xE07359F1 }, { 0x74CDC006, 0x784ED68C }, + { 0xC73B42DA, 0x6E2A19D5 }, { 0x1C7045C3, 0x8712B416 }, { 0xED93216D, 0x371582E4 }, { 0x4939F6FC, 0xACE39041 }, + { 0x86223B7C, 0x7EC5F121 }, { 0x2BAC16FB, 0xC0B09404 }, { 0x9A527EBF, 0xF9D74537 }, { 0xA3B68168, 0x737C3F2E }, + { 0xBAD278CA, 0x33E7B8D9 }, { 0xC22FFEBB, 0xA9A32A34 }, { 0xFEDFBD0D, 0xE48163CC }, { 0x6EA5A670, 0x8E594024 }, + { 0x842AD1E4, 0x51C6EF4B }, { 0x279C508C, 0x22BAD065 }, { 0x18608CEE, 0xD91488C2 }, { 0x1F7CDA17, 0x319EA549 }, + { 0x134C9C60, 0xD394E128 }, { 0x72D5E3B3, 0x094BF432 }, { 0xA4AAD791, 0x9BF612A5 }, { 0xD26FFD0F, 0xCCBBDA43 }, + { 0x946AD250, 0x34DE1F3C }, { 0x995EE16B, 0x4F5B5468 }, { 0xEA8F7794, 0xDF9FAF6F }, { 0x70DD092B, 0x2648EA58 }, + { 0x71D97C67, 0xBFC7E56D }, { 0x4F21D549, 0xDDE6B2FF }, { 0x3AE86003, 0x3C276B46 }, { 0xAF86C71F, 0x91767B4F }, + { 0x35D4B9A0, 0x68A13E78 }, { 0x030C9FD4, 0xB68C115F }, { 0x16582001, 0x141DD2C9 }, { 0xDD5324AC, 0x983D8F7D }, + { 0xCC175254, 0x64AA703F }, { 0x8E02B426, 0xC2C98994 }, { 0x9F46C2DE, 0x3E5E76D6 }, { 0x587D8004, 0x50746F03 }, + { 0x9272F1E5, 0x45DB3D82 }, { 0x9B560BF3, 0x60584A02 }, { 0x3FFCDC62, 0xFBAE58A7 }, { 0x6CAD4CE8, 0xA15A5E4E }, + { 0xCE1FB8CC, 0x4BA96E55 }, { 0xAE82B253, 0x08F9747A }, { 0xF7FB471B, 0xC102144C }, { 0xF3EB8E36, 0x9F042898 }, + { 0xF2EFFB7A, 0x068B27AD }, { 0x8C0A5EBE, 0xEDCA97FE }, { 0xF4F7D8CF, 0x778E0513 }, { 0xC32B8BF7, 0x302C2501 }, + { 0x175C554D, 0x8D92DDFC }, { 0x46052F5F, 0xF865C57F }, { 0xA2B2F424, 0xEAF3301B }, { 0xBBD60D86, 0xAA68B7EC }, + { 0x0104754C, 0x998F0F35 }, { 0x00000000, 0x00000000 }, { 0x34D0CCEC, 0xF12E314D }, { 0x061823B5, 0x710522BE }, + { 0x30C005C1, 0xAF280D99 }, { 0x5D693C65, 0x97FD5CE2 }, { 0x33CC9A15, 0x19A41CC6 }, { 0xF8C79EB8, 0x95844172 }, + { 0x937684A9, 0xDC5432B7 }, { 0x2490CF58, 0x9436C13A }, { 0x32C8EF59, 0x802B13F3 }, { 0x7CED4F5C, 0xC442AE39 }, + { 0xE3AB8D82, 0xFA1CD8EF }, { 0x4D293FD1, 0xF2E5AC95 }, { 0x907A1B7D, 0x6AD823E8 }, { 0x3CF043B6, 0x4D2249F8 }, + { 0x79F9F33D, 0x03CB9DD8 }, { 0x36D82674, 0xDE2D2F27 }, { 0x891EE2DF, 0x2A43A41F }, { 0x1B6C133A, 0x6F98999D }, + { 0x3DF436FA, 0xD4AD46CD }, { 0x269825C0, 0xBB35DF50 }, { 0x813E6D85, 0x964FDCAA }, { 0x7EE5A5C4, 0xEB41B053 }, + { 0x8B160847, 0x0540BA75 }, { 0xE7BB44AF, 0xA41AE43B }, { 0xD0671797, 0xE3B8C429 }, { 0xEE9FBEB9, 0x819993BB }, + { 0xEC975421, 0xAE9A8DD1 }, { 0x917E6E31, 0xF3572CDD }, { 0xE2AFF8CE, 0x6393D7DA }, { 0x37DC5338, 0x47A22012 }, + { 0xC903EE35, 0xA32343DE }, { 0xA89A91E6, 0x79FC56C4 }, { 0xDC5751E0, 0x01B28048 }, { 0xE4B7DB7B, 0x1296F564 }, + { 0x51597A12, 0x75F71883 }, { 0xBDCE2E33, 0xDB6D9552 }, { 0x1D74308F, 0x1E9DBB23 }, { 0xFDD322D9, 0x520D7293 }, + { 0x0C304677, 0xE20A4461 }, { 0xB4EAD425, 0xFEEEE2D2 }, { 0x20800675, 0xCA30FDEE }, { 0x47015A13, 0x61EACA4A }, + { 0x87264E30, 0xE74AFE14 }, { 0x7BF119A5, 0x2CC883B2 }, { 0xB3F682DC, 0x1664CF59 }, { 0x1E78AF5B, 0xA811AA7C }, + { 0x648DC3B2, 0x1D5626FB }, { 0xDF5BCE34, 0xB73E9117 }, { 0x6AB56F5D, 0xD05F7CF0 }, { 0xCD132718, 0xFD257F0A }, + { 0x76C52A9E, 0x574DC8E6 }, { 0x2EB8AA9A, 0x0739A7E5 }, { 0x0F3CD9A3, 0x5486553E }, { 0xAA927B7E, 0x56FF48AE }, + { 0xAD8E2D87, 0xBE756525 }, { 0xFFDBC841, 0x7D0E6CF9 }, { 0x1450CA99, 0x3B1ECCA3 }, { 0xE983E840, 0x6913BE30 }, + { 0x956EA71C, 0xAD511009 }, { 0x2DB4354E, 0xB1B5B6BA }, { 0x4E25A005, 0x4469BDCA }, { 0xCA0F71E1, 0x15AF5281 }, + { 0x8D0E2BF2, 0x744598CB }, { 0x2AA863B7, 0x593F9B31 }, { 0x29A4FC63, 0xEFB38A6E }, { 0x4C2D4A9D, 0x6B6AA3A0 }, + { 0xE6BF31E3, 0x3D95EB0E }, { 0x1554BFD5, 0xA291C396 }, { 0xEF9BCBF5, 0x18169C8E }, { 0x9D4E2846, 0x115D68BC }, + { 0xFACF7420, 0xBA875F18 }, { 0xB6E23EBD, 0xD1EDFCB8 }, { 0xF1E364AE, 0xB00736F2 }, { 0x6589B6FE, 0x84D929CE }, + { 0xDA4F7255, 0x70B7A2F6 }, { 0x5C6D4929, 0x0E7253D7 }, { 0x574159A7, 0x04F23A3D }, { 0x0B2C108E, 0x0A8069EA }, + { 0x6BB11A11, 0x49D073C5 }, { 0x39E4FFD7, 0x8AAB7A19 }, { 0x0E38ACEF, 0xCD095A0B }, { 0x5979F548, 0xC9FB6036 }, + { 0xD67F3422, 0x92BDE697 }, { 0x0514BC61, 0xC78933E1 }, { 0x75C9B54A, 0xE1C1D9B9 }, { 0xCF1BCD80, 0xD2266160 }, + { 0x78FD8671, 0x9A4492ED }, { 0x881A9793, 0xB3CCAB2A }, { 0x7FE1D088, 0x72CEBF66 }, { 0x985A9427, 0xD6D45B5D }, +}; + +__device__ uint2 T12[256] = { + { 0x8C3F55DE, 0xC811A805 }, { 0x96B50619, 0x65F5B431 }, { 0xD6706E43, 0xF74F96B1 }, { 0xCB43D336, 0x859D1E8B }, + { 0xCCFA3D84, 0x5AAB8A85 }, { 0xC295FCFD, 0xF9C7BF99 }, { 0xDE4B630F, 0xA21FD5A1 }, { 0x3B8B456D, 0xCDB3EF76 }, + { 0x7CF7C385, 0x803F59F8 }, { 0x5F31913C, 0xB27C73BE }, { 0x33B04821, 0x98E3AC66 }, { 0x26B8F818, 0xBF61674C }, + { 0xC4C130C8, 0x0FFBC995 }, { 0x10761A98, 0xAAA08620 }, { 0x210116AA, 0x6057F342 }, { 0x0654CC35, 0xF63C760C }, + { 0x667D9042, 0x2DDB45CC }, { 0x4BD40382, 0xBCF45A96 }, { 0xEF3C6F3D, 0x68E8A0C3 }, { 0x69FF73BC, 0xA7BD92D2 }, + { 0x01ED2287, 0x290AE202 }, { 0xE885818F, 0xB7DE34CD }, { 0xDD61059B, 0xD901EEA7 }, { 0x19A03553, 0xD6FA2732 }, + { 0x74CCCEC9, 0xD56F1AE8 }, { 0x2E83F554, 0xEA31245C }, { 0xA07BE499, 0x7034555D }, { 0x56E7BEF7, 0xCE26D2AC }, + { 0xA5054E38, 0xFD161857 }, { 0x527436D1, 0x6A0E7DA4 }, { 0x1CDE9FF2, 0x5BD86A38 }, { 0x31770C32, 0xCAF77562 }, + { 0xE279C8D0, 0xB09AAED9 }, { 0xC60674DB, 0x5DEF1091 }, { 0x515E5045, 0x111046A2 }, { 0x729802FC, 0x23536CE4 }, + { 0xF5B63CFA, 0xC50CBCF7 }, { 0xCD171F03, 0x73A16887 }, { 0xD9F28DBD, 0x7D2941AF }, { 0x5A4F3B9D, 0x3F5E3EB4 }, + { 0x1B677140, 0x84EEFE36 }, { 0xE7076271, 0x3DB8E3D3 }, { 0xF20FD248, 0x1A3A28F9 }, { 0xB49E7627, 0x7EBC7C75 }, + { 0xC7EB565C, 0x74E5F293 }, { 0x4F478BA4, 0x18DCF59E }, { 0xA9ADCB52, 0x0C6EF44F }, { 0x98DAC760, 0xC699812D }, + { 0x6E469D0E, 0x788B06DC }, { 0x7521EC4E, 0xFC65F8EA }, { 0x9E8E0B55, 0x30A5F721 }, { 0xBCA57B6B, 0x2BEC3F65 }, + { 0xBAF1B75E, 0xDDD04969 }, { 0xE394EA57, 0x99904CDB }, { 0xE6EA40F6, 0x14B201D1 }, { 0x41284ADD, 0xBBB0C082 }, + { 0xBF8F1DFF, 0x50F20463 }, { 0x93CBACB8, 0xE8D7F93B }, { 0x477C86E8, 0x4D8CB68E }, { 0x92268E3F, 0xC1DD1B39 }, + { 0x09D62FCB, 0x7C5AA112 }, { 0xDB35C9AE, 0x2F3D98AB }, { 0x2BFD5FF5, 0x67136956 }, { 0x36CEE280, 0x15C1E16C }, + { 0xF8F39B17, 0x1D7EB2ED }, { 0xB00DFE01, 0xDA94D37D }, { 0x760B8ADA, 0x877BC3EC }, { 0xE153AE44, 0xCB8495DF }, + { 0xB7B410B3, 0x05A24773 }, { 0x3C32ABDF, 0x12857B78 }, { 0x6812513B, 0x8EB770D0 }, { 0xD2E3E665, 0x536739B9 }, + { 0x71B26468, 0x584D57E2 }, { 0xC9849725, 0xD789C78F }, { 0x7D1AE102, 0xA935BBFA }, { 0xDFA64188, 0x8B1537A3 }, + { 0xC378DE7A, 0xD0CD5D9B }, { 0x4D80CFB7, 0x4AC82C9A }, { 0x83BDB620, 0x42777F1B }, { 0x1D33BD75, 0x72D2883A }, + { 0xAB6A8F41, 0x5E7A2D4B }, { 0xBB1C95D9, 0xF4DAAB6B }, { 0xFD8D31B6, 0x905CFFE7 }, { 0x119B381F, 0x83AA6422 }, + { 0x42022C49, 0xC0AEFB84 }, { 0x63033AE3, 0xA0F908C6 }, { 0x04938826, 0xA428AF08 }, { 0x1A8A53C7, 0xADE41C34 }, + { 0x77E6A85D, 0xAE7121EE }, { 0x25929E8C, 0xC47F5C4A }, { 0x55CDD863, 0xB538E9AA }, { 0xDAD8EB29, 0x06377AA9 }, + { 0xB3279895, 0xA18AE87B }, { 0x35E48414, 0x6EDFDA6A }, { 0x825094A7, 0x6B7D9D19 }, { 0xA4E86CBF, 0xD41CFA55 }, + { 0xEA42C59C, 0xE5CAEDC9 }, { 0x0E6FC179, 0xA36C351C }, { 0x6FABBF89, 0x5181E4DE }, { 0x184D17D4, 0xFFF0C530 }, + { 0x84045892, 0x9D41EB15 }, { 0x28D73961, 0x1C0D5250 }, { 0x0CA8856A, 0xF178EC18 }, { 0x8EF811CD, 0x9A057101 }, + { 0x3EF5EFCC, 0x4091A27C }, { 0x9F6329D2, 0x19AF1523 }, { 0xF91EB990, 0x347450EF }, { 0x8DD27759, 0xE11B4A07 }, + { 0xFC601331, 0xB9561DE5 }, { 0x2DA993C0, 0x912F1F5A }, { 0x5BA2191A, 0x1654DCB6 }, { 0x8A6B99EB, 0x3E2DDE09 }, + { 0x0F82E3FE, 0x8A66D71E }, { 0xD55A08D7, 0x8C51ADB7 }, { 0x8941FF7F, 0x4533E50F }, { 0xBD4859EC, 0x02E6DD67 }, + { 0x5DF6D52F, 0xE068AABA }, { 0xFF4A75A5, 0xC24826E3 }, { 0x88ACDDF8, 0x6C39070D }, { 0x4691A46F, 0x6486548C }, + { 0x135C7C0C, 0xD1BEBD26 }, { 0x8F15334A, 0xB30F9303 }, { 0xC1BF9A69, 0x82D9849F }, { 0x5420FAE4, 0x9C320BA8 }, + { 0xAFF90767, 0xFA528243 }, { 0xE968A308, 0x9ED4D6CF }, { 0x2C44B147, 0xB825FD58 }, { 0x5EDCB3BB, 0x9B7691BC }, + { 0x48FE6516, 0xC7EA6190 }, { 0x817AF233, 0x1063A61F }, { 0x3409A693, 0x47D53868 }, { 0x4C6DED30, 0x63C2CE98 }, + { 0x6C81D91D, 0x2A9FDFD8 }, { 0x032A6694, 0x7B1E3B06 }, { 0xFBD9FD83, 0x666089EB }, { 0x7375207B, 0x0A598EE6 }, + { 0x0AFC495F, 0x07449A14 }, { 0xB6593234, 0x2CA8A571 }, { 0x45BBC2FB, 0x1F986F8A }, { 0x50B372C2, 0x381AA4A0 }, + { 0xD81FAF3A, 0x5423A3AD }, { 0x8B86BB6C, 0x17273C0B }, { 0xC869B5A2, 0xFE83258D }, { 0xD1C980F1, 0x287902BF }, + { 0x6B3837AF, 0xF5A94BD6 }, { 0xB2CABA12, 0x88800A79 }, { 0x083B0D4C, 0x55504310 }, { 0x07B9EEB2, 0xDF36940E }, + { 0x6790B2C5, 0x04D1A7CE }, { 0xF125B4DC, 0x612413FF }, { 0xC52C124F, 0x26F12B97 }, { 0xA62F28AC, 0x86082351 }, + { 0x9937E5E7, 0xEF93632F }, { 0x293A1BE6, 0x3507B052 }, { 0x570A9C70, 0xE72C30AE }, { 0xAE1425E0, 0xD3586041 }, + { 0xD79D4CC4, 0xDE4574B3 }, { 0x40C5685A, 0x92BA2280 }, { 0xDC8C271C, 0xF00B0CA5 }, { 0xF69C5A6E, 0xBE1287F1 }, + { 0xB1E0DC86, 0xF39E317F }, { 0x20EC342D, 0x495D1140 }, { 0x3F18CD4B, 0x699B407E }, { 0x6AD51528, 0xDCA3A9D4 }, + { 0x79896924, 0x0D1D14F2 }, { 0x00000000, 0x00000000 }, { 0xA196C61E, 0x593EB75F }, { 0x0B116BD8, 0x2E4E7816 }, + { 0x58887F8E, 0x6D4AE7B0 }, { 0x872E3E06, 0xE65FD013 }, { 0xD30EC4E2, 0x7A6DDBBB }, { 0xCAAEF1B1, 0xAC97FC89 }, + { 0x1E19DBE1, 0x09CCB33C }, { 0x62EE1864, 0x89F3EAC4 }, { 0xAA87ADC6, 0x7770CF49 }, { 0x6557F6D6, 0x56C57ECA }, + { 0x6D6CFB9A, 0x03953DDA }, { 0x4456E07C, 0x36928D88 }, { 0x959F608D, 0x1EEB8F37 }, { 0x4EAAA923, 0x31D6179C }, + { 0xE5C02662, 0x6FAC3AD7 }, { 0x53991456, 0x43049FA6 }, { 0xC052B8EE, 0xABD3669D }, { 0xA7C20A2B, 0xAF02C153 }, + { 0x3723C007, 0x3CCB036E }, { 0x90E1CA2C, 0x93C9C23D }, { 0x2F6ED7D3, 0xC33BC65E }, { 0x9758249E, 0x4CFF5633 }, + { 0x325D6AA6, 0xB1E94E64 }, { 0x9472420A, 0x37E16D35 }, { 0xBE623F78, 0x79F8E661 }, { 0x02C74413, 0x5214D904 }, + { 0xF0C8965B, 0x482EF1FD }, { 0xEC1609A9, 0x13F69BC5 }, { 0x14E592BE, 0x0E882928 }, { 0x2A107D72, 0x4E198B54 }, + { 0xEBAFE71B, 0xCCC00FCB }, { 0x222B703E, 0x1B49C844 }, { 0xA840E9D5, 0x2564164D }, { 0x1FF4F966, 0x20C6513E }, + { 0x910CE8AB, 0xBAC3203F }, { 0x61C47EF0, 0xF2EDD1C2 }, { 0xACD361F3, 0x814CB945 }, { 0x4A392105, 0x95FEB894 }, + { 0x1622D6AD, 0x5C9CF02C }, { 0xF77178E9, 0x971865F3 }, { 0x9BF0A1F4, 0xBD87BA2B }, { 0x59655D09, 0x444005B2 }, + { 0x247FBC0B, 0xED75BE48 }, { 0x17CFF42A, 0x7596122E }, { 0x85E97A15, 0xB44B0917 }, { 0x2755DA9F, 0x966B854E }, + { 0x49134791, 0xEEE08392 }, { 0x23C652B9, 0x32432A46 }, { 0xAD3E4374, 0xA8465B47 }, { 0x12B15E8B, 0xF8B45F24 }, + { 0x78644BA3, 0x2417F6F0 }, { 0x7FDDA511, 0xFB2162FE }, { 0x9DA46DC1, 0x4BBBCC27 }, { 0xD024A276, 0x0173E0BD }, + { 0xA2BCA08A, 0x22208C59 }, { 0xB836F34D, 0x8FC4906D }, { 0x3A6667EA, 0xE4B90D74 }, { 0x705F46EF, 0x7147B5E0 }, + { 0x1508B039, 0x2782CB2A }, { 0xF45B1E7D, 0xEC065EF5 }, { 0xCFD05B10, 0x21B5B183 }, { 0x60295C77, 0xDBE733C0 }, + { 0x394C017E, 0x9FA73672 }, { 0x86C31C81, 0xCF553211 }, { 0x0D45A7ED, 0xD8720E1A }, { 0x3DDF8958, 0x3B8F997A }, + { 0xEDFB2B2E, 0x3AFC79C7 }, { 0x43EF0ECE, 0xE9A41986 }, { 0x7B4E2D37, 0x5F09CDF6 }, { 0xFA34DF04, 0x4F6A6BE9 }, + { 0x38A123F9, 0xB6ADD470 }, { 0x057EAAA1, 0x8D224D0A }, { 0x5C1BF7A8, 0xC96248B8 }, { 0x309A2EB5, 0xE3FD9760 }, + { 0xA351820D, 0x0B2A6E5B }, { 0xFEA75722, 0xEB42C4E1 }, { 0x9A1D8373, 0x948D5829 }, { 0x64BAD451, 0x7FCF9CC8 }, + { 0xD4B72A50, 0xA55B4FB5 }, { 0xCE3D7997, 0x08BF5381 }, { 0xE42D04E5, 0x46A6D8D5 }, { 0x7E308796, 0xD22B80FC }, + { 0xB57354A0, 0x57B69E77 }, { 0x8097D0B4, 0x3969441D }, { 0xF3E2F0CF, 0x3330CAFB }, { 0xE0BE8CC3, 0xE28E77DD }, + { 0x9C494F46, 0x62B12E25 }, { 0xB9DBD1CA, 0xA6CE726F }, { 0xEED14DBA, 0x41E242C1 }, { 0x7AA30FB0, 0x76032FF4 }, +}; + +__device__ uint2 T22[256] = { + { 0x3ACDE4CC, 0x45B268A9 }, { 0x84549D08, 0xAF7F0BE8 }, { 0xC1468263, 0x048354B3 }, { 0xC80EFED2, 0x925435C2 }, + { 0x7FDFFBA7, 0xEE4E37F2 }, { 0x0C60F14D, 0x167A3392 }, { 0xEA03E584, 0xFB123B52 }, { 0xFDBB9007, 0x4A0CAB53 }, + { 0x0F788A19, 0x9DEAF638 }, { 0x8F0CB32A, 0xCB48EC55 }, { 0xD6FEF7E0, 0xB59DC4B2 }, { 0xF4F3ECB6, 0xDCDBCA22 }, + { 0x549A9C40, 0x11DF5813 }, { 0x568ACED3, 0xE33FDEDF }, { 0x4322E9C3, 0xA0C1C812 }, { 0x58FA6D0D, 0x07A56B81 }, + { 0xB1E1F3DD, 0x77279579 }, { 0x422AC004, 0xD9B18B74 }, { 0xFFABC294, 0xB8EC2D9F }, { 0x2D75914F, 0xF4ACF8A8 }, + { 0xEF2B6878, 0x7BBF69B1 }, { 0x487AC7E1, 0xC4F62FAF }, { 0xC67E5D0C, 0x76CE809C }, { 0x92E4C14C, 0x6711D88F }, + { 0x243DEDFE, 0x627B99D9 }, { 0xDFB68B51, 0x234AA5C3 }, { 0x262DBF6D, 0x909B1F15 }, { 0x4B62BCB5, 0x4F66EA05 }, + { 0x52AA6AE8, 0x1AE2CF5A }, { 0xD0CE0148, 0xBEA053FB }, { 0xE66314C9, 0xED6808C0 }, { 0x15A82710, 0x43FE16CD }, + { 0xA06970F6, 0xCD049231 }, { 0x97CC4CB0, 0xE7BC8A6C }, { 0xFCB3B9C0, 0x337CE835 }, { 0x7CC780F3, 0x65DEF258 }, + { 0x4132BB50, 0x52214EDE }, { 0x90F493DF, 0x95F15E43 }, { 0x5DD2E0F1, 0x87083962 }, { 0xFB8B66AF, 0x41313C1A }, + { 0x51B211BC, 0x91720AF0 }, { 0xD4EEA573, 0x477D427E }, { 0xF6E3BE25, 0x2E3B4CEE }, { 0xEB0BCC43, 0x82627834 }, + { 0x78E724C8, 0x9C03E3DD }, { 0xD9867DF9, 0x2877328A }, { 0xE243B0F2, 0x14B51945 }, { 0xF7EB97E2, 0x574B0F88 }, + { 0x9AA4943A, 0x88B6FA98 }, { 0xCB168586, 0x19C4F068 }, { 0xAF11FAEF, 0x50EE6409 }, { 0xC04EABA4, 0x7DF317D5 }, + { 0x98B4C6A9, 0x7A567C54 }, { 0x4F42188E, 0xB6BBFB80 }, { 0x3BC5CD0B, 0x3CC22BCF }, { 0xAA397713, 0xD04336EA }, + { 0xEC33132C, 0xF02FAC1B }, { 0xF0D3488D, 0x2506DBA7 }, { 0xF2C31A1E, 0xD7E65D6B }, { 0x1FF820F5, 0x5EB9B216 }, + { 0xC46E0F9F, 0x842E0650 }, { 0x9E843001, 0x716BEB1D }, { 0xAB315ED4, 0xA933758C }, { 0xA2792265, 0x3FE414FD }, + { 0x1EF00932, 0x27C9F170 }, { 0x70A771BE, 0x73A4C1CA }, { 0xE76B3D0E, 0x94184BA6 }, { 0x8C14C87E, 0x40D829FF }, + { 0xC77674CB, 0x0FBEC3FA }, { 0x4A6A9572, 0x3616A963 }, { 0xC25EF937, 0x8F139119 }, { 0x5AEA3F9E, 0xF545ED4D }, + { 0x50BA387B, 0xE8024996 }, { 0x0B582E22, 0x6437E7BD }, { 0xE053E261, 0xE6559F89 }, { 0x05288DFC, 0x80AD52E3 }, + { 0xE34B9935, 0x6DC55A23 }, { 0x1AD0AD09, 0xDE14E0F5 }, { 0xA659865E, 0xC6390578 }, { 0x09487CB1, 0x96D76171 }, + { 0x21156002, 0xE2D6CB3A }, { 0x779FAED1, 0x01E915E5 }, { 0x6A77DCB7, 0xADB0213F }, { 0xB9A1A6AB, 0x9880B76E }, + { 0x8644CF9B, 0x5D9F8D24 }, { 0xC5662658, 0xFD5E4536 }, { 0x9BACBDFD, 0xF1C6B9FE }, { 0xBE9979C4, 0xEACD6341 }, + { 0x08405576, 0xEFA72217 }, { 0xD88E543E, 0x510771EC }, { 0x671F043D, 0xC2BA51CB }, { 0x71AF5879, 0x0AD482AC }, + { 0x5CDAC936, 0xFE787A04 }, { 0x8E049AED, 0xB238AF33 }, { 0x4972EE26, 0xBD866CC9 }, { 0xBD810290, 0x615DA6EB }, + { 0x8B2C1711, 0x3295FDD0 }, { 0x73BF0AEA, 0xF8340460 }, { 0x758FFC42, 0xF3099329 }, { 0x7DCFA934, 0x1CAEB13E }, + { 0x1188832B, 0xBA230748 }, { 0x874CE65C, 0x24EFCE42 }, { 0xB0E9DA1A, 0x0E57D61F }, { 0xF99B343C, 0xB3D1BAD6 }, + { 0x893C4582, 0xC0757B1C }, { 0x403A9297, 0x2B510DB8 }, { 0xF1DB614A, 0x5C7698C1 }, { 0xD5E68CB4, 0x3E0D0118 }, + { 0x855CB4CF, 0xD60F488E }, { 0xF3CB33D9, 0xAE961E0D }, { 0x14A00ED7, 0x3A8E55AB }, { 0x623789C1, 0x42170328 }, + { 0x9C946292, 0x838B6DD1 }, { 0xED3B3AEB, 0x895FEF7D }, { 0x4E4A3149, 0xCFCBB8E6 }, { 0x2F65C3DC, 0x064C7E64 }, + { 0x4C5A63DA, 0x3D2B3E2A }, { 0xA9210C47, 0x5BD3F340 }, { 0xA1615931, 0xB474D157 }, { 0x1DE87266, 0xAC5934DA }, + { 0x7AF7765B, 0x6EE36511 }, { 0x16B05C44, 0xC86ED367 }, { 0x201D49C5, 0x9BA6885C }, { 0x88346C45, 0xB905387A }, + { 0xBAB9DDFF, 0x131072C4 }, { 0xA751AF99, 0xBF49461E }, { 0x1CE05BA1, 0xD52977BC }, { 0x6027DB52, 0xB0F785E4 }, + { 0x6E57788C, 0x546D30BA }, { 0x650F56AE, 0x305AD707 }, { 0x612FF295, 0xC987C682 }, { 0xF5FBC571, 0xA5AB8944 }, + { 0x59F244CA, 0x7ED528E7 }, { 0x2C7DB888, 0x8DDCBBCE }, { 0x328DB1BA, 0xAA154ABE }, { 0x93ECE88B, 0x1E619BE9 }, + { 0xE813B717, 0x09F2BD9E }, { 0x285D1CB3, 0x7401AA4B }, { 0x3195CAEE, 0x21858F14 }, { 0x1398D1B8, 0x48C38184 }, + { 0xB2F98889, 0xFCB750D3 }, { 0x8D1CE1B9, 0x39A86A99 }, { 0xE473465A, 0x1F888E0C }, { 0x76978716, 0x78995683 }, + { 0xEE2341BF, 0x02CF2AD7 }, { 0xB3F1A14E, 0x85C713B5 }, { 0x2B4567E7, 0xFF916FE1 }, { 0xB7D10575, 0x7C1A0230 }, + { 0x5ECA9BA5, 0x0C98FCC8 }, { 0xDA9E06AD, 0xA3E7F720 }, { 0xBBB1F438, 0x6A6031A2 }, { 0x7ED7D260, 0x973E7494 }, + { 0x18C0FF9A, 0x2CF46639 }, { 0x68678E24, 0x5F50A7F3 }, { 0xA449D4CD, 0x34D983B4 }, { 0x5592B587, 0x68AF1B75 }, + { 0x2E6DEA1B, 0x7F3C3D02 }, { 0x45121F6B, 0xABFC5F5B }, { 0x29553574, 0x0D71E92D }, { 0x6D4F03D8, 0xDFFDF510 }, + { 0x9F8C19C6, 0x081BA87B }, { 0xAC0981BB, 0xDB7EA1A3 }, { 0x66172DFA, 0xBBCA12AD }, { 0x010829C7, 0x79704366 }, + { 0x7BFF5F9C, 0x17932677 }, { 0x00000000, 0x00000000 }, { 0xC906D715, 0xEB2476A4 }, { 0x0738DF6F, 0x724DD42F }, + { 0x38DDB65F, 0xB752EE65 }, { 0x3DF53BA3, 0x37FFBC86 }, { 0xB5C157E6, 0x8EFA84FC }, { 0x272596AA, 0xE9EB5C73 }, + { 0x2535C439, 0x1B0BDABF }, { 0x2A4D4E20, 0x86E12C87 }, { 0xCE3E087A, 0x9969A28B }, { 0x9D9C4B55, 0xFAFB2EB7 }, + { 0xB6D92CB2, 0x056A4156 }, { 0xDEBEA296, 0x5A3AE6A5 }, { 0xA8292580, 0x22A3B026 }, { 0x36AD1581, 0x53C85B3B }, + { 0x17B87583, 0xB11E9001 }, { 0x3FE56930, 0xC51F3A4A }, { 0xCF3621BD, 0xE019E1ED }, { 0x91FCBA18, 0xEC811D25 }, + { 0x4D524A1D, 0x445B7D4C }, { 0xDCAEF005, 0xA8DA6069 }, { 0x309DE329, 0x58F5CC72 }, { 0x6B7FF570, 0xD4C06259 }, + { 0x39D59F98, 0xCE22AD03 }, { 0x47024DF8, 0x591CD997 }, { 0x03187B54, 0x8B90C5AA }, { 0xC356D0F0, 0xF663D27F }, + { 0x35B56ED5, 0xD8589E91 }, { 0xD3D67A1C, 0x35309651 }, { 0xCD26732E, 0x12F96721 }, { 0x441A36AC, 0xD28C1C3D }, + { 0x64077F69, 0x492A9461 }, { 0x6F5F514B, 0x2D1D73DC }, { 0x0D68D88A, 0x6F0A70F4 }, { 0xCA1EAC41, 0x60B4B30E }, + { 0x3385987D, 0xD36509D8 }, { 0x0630F6A8, 0x0B3D9749 }, { 0x96C46577, 0x9ECCC90A }, { 0xAD01A87C, 0xA20EE2C5 }, + { 0x0E70A3DE, 0xE49AB55E }, { 0x82646BA0, 0xA4429CA1 }, { 0xDB962F6A, 0xDA97B446 }, { 0xD7F6DE27, 0xCCED87D4 }, + { 0x37A53C46, 0x2AB8185D }, { 0xE15BCBA6, 0x9F25DCEF }, { 0xFEA3EB53, 0xC19C6EF9 }, { 0x1BD884CE, 0xA764A393 }, + { 0x817C10F4, 0x2FD2590B }, { 0x80743933, 0x56A21A6D }, { 0x79EF0D0F, 0xE573A0BB }, { 0x95DC1E23, 0x155C0CA0 }, + { 0x94D437E4, 0x6C2C4FC6 }, { 0x23053291, 0x10364DF6 }, { 0x836C4267, 0xDD32DFC7 }, { 0x99BCEF6E, 0x03263F32 }, + { 0xE57B6F9D, 0x66F8CD6A }, { 0x5BE21659, 0x8C35AE2B }, { 0x1290F87F, 0x31B3C2E2 }, { 0xBF915003, 0x93BD2027 }, + { 0x220D1B56, 0x69460E90 }, { 0xAE19D328, 0x299E276F }, { 0x53A2432F, 0x63928C3C }, { 0xE91B9ED0, 0x7082FEF8 }, + { 0x3EED40F7, 0xBC6F792C }, { 0xD2DE53DB, 0x4C40D537 }, { 0x5FC2B262, 0x75E8BFAE }, { 0xA541FD0A, 0x4DA9C0D2 }, + { 0x3CFD1264, 0x4E8FFFE0 }, { 0x696FA7E3, 0x2620E495 }, { 0xB8A98F6C, 0xE1F0F408 }, { 0xDDA6D9C2, 0xD1AA230F }, + { 0xD1C6288F, 0xC7D0109D }, { 0x7487D585, 0x8A79D04F }, { 0xA3710BA2, 0x4694579B }, { 0xFA834F68, 0x38417F7C }, + { 0x0A5007E5, 0x1D47A4DB }, { 0x460A643F, 0x206C9AF1 }, { 0x34BD4712, 0xA128DDF7 }, { 0x72B7232D, 0x81444706 }, + { 0x02105293, 0xF2E086CC }, { 0xBC892B57, 0x182DE58D }, { 0xF8931DFB, 0xCAA1F9B0 }, { 0xCC2E5AE9, 0x6B892447 }, + { 0x0420A43B, 0xF9DD1185 }, { 0x8A243ED6, 0x4BE5BEB6 }, { 0x19C8D65D, 0x5584255F }, { 0x633FA006, 0x3B67404E }, + { 0x6C472A1F, 0xA68DB676 }, { 0xB4C97E21, 0xF78AC79A }, { 0x1080AAEC, 0xC353442E }, { 0x5782E714, 0x9A4F9DB9 }, +}; + +__device__ uint2 T32[256] = { + { 0x2C9B3220, 0x05BA7BC8 }, { 0xF8B65E4F, 0x31A54665 }, { 0x7547F4D4, 0xB1B651F7 }, { 0x7BA46682, 0x8BFA0D85 }, + { 0xA16A98BB, 0x85A96C5A }, { 0x08EB79C9, 0x990FAEF9 }, { 0x47F4A62D, 0xA15E37A2 }, { 0x5D27741E, 0x76857DCD }, + { 0x0A1820BC, 0xF8C50B80 }, { 0x01F7A2B4, 0xBE65DCB2 }, { 0x6F9426E7, 0x666D1B98 }, { 0x53C4E648, 0x4CC921BF }, + { 0x93D9CA42, 0x95410A0F }, { 0x647BA4EF, 0x20CDCCAA }, { 0x890A1871, 0x429A4060 }, { 0x9B32B38B, 0x0C4EA4F6 }, + { 0xDE354CD3, 0xCCDA362D }, { 0x7C5B2FA9, 0x96DC23BC }, { 0xAA851AB3, 0xC309BB68 }, { 0x3648E013, 0xD26131A7 }, + { 0x41FC4DB2, 0x021DC529 }, { 0x704BE48A, 0xCD5ADAB7 }, { 0x84ED71E6, 0xA77965D9 }, { 0x1734BBA4, 0x32386FD6 }, + { 0x38AB7245, 0xE82D6DD5 }, { 0x6177B4B1, 0x5C2147EA }, { 0xCF091CE8, 0x5DA1AB70 }, { 0x72B8BDFF, 0xAC907FCE }, + { 0x972278A8, 0x57C85DFD }, { 0x6B6F940D, 0xA4E44C6A }, { 0x4F1FDFE4, 0x3851995B }, { 0xED71BC9E, 0x62578CCA }, + { 0xC01D2C0A, 0xD9882BB0 }, { 0x113C503B, 0x917B9D5D }, { 0xA87643C6, 0xA2C31E11 }, { 0xA399C1CE, 0xE463C923 }, + { 0x7EA876DC, 0xF71686C5 }, { 0xE096D509, 0x87B4A973 }, { 0x9D3A5814, 0xAF0D567D }, { 0x59DCC6F4, 0xB40C2A3F }, + { 0x95D121DD, 0x3602F884 }, { 0x9836484A, 0xD3E1DD3D }, { 0xA46688E5, 0xF945E71A }, { 0xB2A591F5, 0x7518547E }, + { 0x50C01D89, 0x93665874 }, { 0x658C065B, 0x9EA81018 }, { 0xBC4603A3, 0x4F54080C }, { 0x5137BF3D, 0x2D0384C6 }, + { 0xEC861E2A, 0xDC325078 }, { 0x79573FF7, 0xEA30A8FC }, { 0xCA050CB6, 0x214D2030 }, { 0x8016C30C, 0x65F0322B }, + { 0x1B247087, 0x69BE96DD }, { 0x81E161B8, 0xDB95EE99 }, { 0xD9CA05F8, 0xD1FC1814 }, { 0xCC0DE729, 0x820ED2BB }, + { 0x430F14C7, 0x63D76050 }, { 0xA09D3A0F, 0x3BCCB0E8 }, { 0x573F54A2, 0x8E40764D }, { 0xE16177BD, 0x39D175C1 }, + { 0x734F1F4B, 0x12F5A37C }, { 0x1FDFC26D, 0xAB37C12F }, { 0x395CD0F1, 0x5648B167 }, { 0x37BF42A7, 0x6C04ED15 }, + { 0x14304065, 0xED97161D }, { 0xAB72B807, 0x7D6C67DA }, { 0xBA4EE83C, 0xEC17FA87 }, { 0x0304FBC1, 0xDFAF79CB }, + { 0x71BC463E, 0x733F0605 }, { 0x87E98A27, 0x78D61C12 }, { 0x77B4ADA1, 0xD07CF48E }, { 0x6C90DD26, 0xB9C26253 }, + { 0x60801605, 0xE2449B58 }, { 0xF941FCFB, 0x8FC09AD7 }, { 0x4BE46D0E, 0xFAD8CEA9 }, { 0x0608EB9F, 0xA343F28B }, + { 0x4917347B, 0x9B126BD0 }, { 0xE7699C22, 0x9A92874A }, { 0xC4E69EE0, 0x1B017C42 }, { 0x0EE39256, 0x3A4C5C72 }, + { 0x3EA399DA, 0x4B6E9F5E }, { 0x5AD83D35, 0x6BA353F4 }, { 0x4C1B2425, 0xE7FEE090 }, { 0x2587E95D, 0x22D00983 }, + { 0x0F1430E2, 0x842980C0 }, { 0x861E2893, 0xC6B3C0A0 }, { 0x19D729F2, 0x087433A4 }, { 0xD42D6C6F, 0x341F3DAD }, + { 0xFBB2A58E, 0xEE0A3FAE }, { 0x90DD3183, 0x4AEE73C4 }, { 0xB1A16A34, 0xAAB72DB5 }, { 0x5E238FDF, 0xA92A0406 }, + { 0x686B6FCC, 0x7B4B35A1 }, { 0xF4A6956C, 0x6A23BF6E }, { 0x851AD352, 0x191CB96B }, { 0xD6DE351A, 0x55D598D4 }, + { 0xF2AE7EF3, 0xC9604DE5 }, { 0xA981E172, 0x1CA6C2A3 }, { 0xAD7A5398, 0xDE2F9551 }, { 0x56C8F616, 0x3025AAFF }, + { 0x1E2860D9, 0x15521D9D }, { 0xFA45073A, 0x506FE31C }, { 0x2B647B0B, 0x189C55F1 }, { 0xAE7EA859, 0x0180EC9A }, + { 0x050C105E, 0x7CEC8B40 }, { 0x8BF94104, 0x2350E519 }, { 0x55CC0DD7, 0xEF8AD334 }, { 0x6D677F92, 0x07A7BEE1 }, + { 0x0DE76997, 0xE5E325B9 }, { 0xA26E637A, 0x5A061591 }, { 0x18208B46, 0xB611EF16 }, { 0xB7A981AB, 0x09F4DF3E }, + { 0xE87DACC0, 0x1EBB078A }, { 0xB65E231F, 0xB791038C }, { 0x74B05660, 0x0FD38D45 }, { 0xC1EA8EBE, 0x67EDF702 }, + { 0x831238CD, 0xBA5F4BE0 }, { 0xCEFEBE5C, 0xE3C477C2 }, { 0x354C1BD2, 0x0DCE486C }, { 0x16C31910, 0x8C5DB364 }, + { 0xA7627324, 0x26EA9ED1 }, { 0xEF82E5EB, 0x039D29B3 }, { 0xCBF2AE02, 0x9F28FC82 }, { 0xF05D2786, 0xA8AAE89C }, + { 0x2774B028, 0x431AACFA }, { 0x31B7A938, 0xCF471F9E }, { 0xE3922EC8, 0x581BD0B8 }, { 0x400BEF06, 0xBC78199B }, + { 0xBF42F862, 0x90FB71C7 }, { 0x46030499, 0x1F3BEB10 }, { 0xB55AD8DE, 0x683E7A47 }, { 0xA695D190, 0x988F4263 }, + { 0x6E638453, 0xD808C72A }, { 0xC319D7CB, 0x0627527B }, { 0xD72997AE, 0xEBB04466 }, { 0xE2658C7C, 0xE67E0C0A }, + { 0xB056C880, 0x14D2F107 }, { 0x30400B8C, 0x7122C32C }, { 0xD5DACEDB, 0x8A7AE11F }, { 0xE98A0E74, 0xA0DEDB38 }, + { 0xDCC615A6, 0xAD109354 }, { 0xF655CC19, 0x0BE91A17 }, { 0xB8BDB149, 0x8DDD5FFE }, { 0xAF890AED, 0xBFE53028 }, + { 0xB4AD7A6A, 0xD65BA6F5 }, { 0x2997227E, 0x7956F088 }, { 0x32B352F9, 0x10E86655 }, { 0xDACEFE39, 0x0E5361DF }, + { 0x9FC90161, 0xCEC7F304 }, { 0x677F5F2E, 0xFF62B561 }, { 0xD22587F0, 0x975CCF26 }, { 0x543BAF63, 0x51EF0F86 }, + { 0x10CBF28F, 0x2F1E41EF }, { 0xBBB94A88, 0x52722635 }, { 0x3344F04D, 0xAE8DBAE7 }, { 0x6688FD9A, 0x410769D3 }, + { 0x34BBB966, 0xB3AB94DE }, { 0x8DF1AA9B, 0x80131792 }, { 0xC5113C54, 0xA564A0F0 }, { 0xBDB1A117, 0xF131D4BE }, + { 0xEA8EF5B5, 0x7F71A2F3 }, { 0xC8F655C3, 0x40878549 }, { 0x44F05DEC, 0x7EF14E69 }, { 0xF55137D8, 0xD44663DC }, + { 0x523344FC, 0xF2ACFD0D }, { 0x00000000, 0x00000000 }, { 0x8EF5515A, 0x5FBC6E59 }, { 0xF1AA8532, 0x16CF342E }, + { 0xDB395C8D, 0xB036BD6D }, { 0xDD31B712, 0x13754FE6 }, { 0x2D6C9094, 0xBBDFA77A }, { 0x3A582B30, 0x89E7C8AC }, + { 0xCDFA459D, 0x3C6B0E09 }, { 0xC7E26521, 0xC4AE0589 }, { 0x7F5FD468, 0x49735A77 }, { 0x1D2C9B18, 0xCAFD6456 }, + { 0x2F9FC9E1, 0xDA150203 }, { 0x94268369, 0x88672436 }, { 0x3BAF8984, 0x3782141E }, { 0x24704BE9, 0x9CB5D531 }, + { 0x1AD3D233, 0xD7DB4A6F }, { 0x2A93D9BF, 0xA6F98943 }, { 0x8A0EE3B0, 0x9D3539AB }, { 0x15C7E2D1, 0x53F2CAAF }, + { 0x76430F15, 0x6E19283C }, { 0x6384EDC4, 0x3DEBE293 }, { 0x208BF903, 0x5E3C82C3 }, { 0xB94A13FD, 0x33B8834C }, + { 0x2E686B55, 0x6470DEB1 }, { 0x7A53C436, 0x359FD137 }, { 0x02F35975, 0x61CAA579 }, { 0x82E59A79, 0x043A9752 }, + { 0x2683129C, 0xFD7F7048 }, { 0x699CCD78, 0xC52EE913 }, { 0x7DAC8D1D, 0x28B9FF0E }, { 0x78A09D43, 0x5455744E }, + { 0xB3523341, 0xCB7D88CC }, { 0x4A13CFBA, 0x44BD121B }, { 0xFDBA4E11, 0x4D49CD25 }, { 0x8C06082F, 0x3E76CB20 }, + { 0x2278A076, 0x3FF627BA }, { 0x04FBB2EA, 0xC28957F2 }, { 0xE46D67E3, 0x453DFE81 }, { 0x3DA7621B, 0x94C1E695 }, + { 0xFF491764, 0x2C83685C }, { 0xFC4DECA5, 0xF32C1197 }, { 0x922E68F6, 0x2B24D6BD }, { 0x9AC5113F, 0xB22B7844 }, + { 0xD1217C31, 0x48F3B6ED }, { 0xBEB55AD6, 0x2E9EAD75 }, { 0x5FD42D6B, 0x174FD8B4 }, { 0x1238ABFA, 0x4ED4E496 }, + { 0xFEBEB5D0, 0x92E6B4EE }, { 0x0BEF8208, 0x46A0D732 }, { 0xA5912A51, 0x47203BA8 }, { 0xE69E3E96, 0x24F75BF8 }, + { 0x13CF094E, 0xF0B13824 }, { 0xC901F777, 0xFEE259FB }, { 0x091CDB7D, 0x276A724B }, { 0xEE75475F, 0xBDF8F501 }, + { 0x4DEC8691, 0x599B3C22 }, { 0x99C1EAFE, 0x6D84018F }, { 0x1CDB39AC, 0x7498B8E4 }, { 0x217C5BB7, 0xE0595E71 }, + { 0x3C50C0AF, 0x2AA43A27 }, { 0x3F543B6E, 0xF50B43EC }, { 0x62734F70, 0x838E3E21 }, { 0x4507FF58, 0xC09492DB }, + { 0xDFC2EE67, 0x72BFEA9F }, { 0x9CCDFAA0, 0x11688ACF }, { 0x6A9836B9, 0x1A8190D8 }, { 0xC615C795, 0x7ACBD93B }, + { 0x286080CA, 0xC7332C3A }, { 0x4EE87D50, 0x863445E9 }, { 0xD0D6DE85, 0xF6966A5F }, { 0x96D5DA1C, 0xE9AD814F }, + { 0x9E3EA3D5, 0x70A22FB6 }, { 0x582B6440, 0x0A69F68D }, { 0xC2EE757F, 0xB8428EC9 }, { 0xAC8DF12C, 0x604A49E3 }, + { 0x0C10CB23, 0x5B86F90B }, { 0x8F02F3EE, 0xE1D9B2EB }, { 0xD3D22544, 0x29391394 }, { 0x5CD0D6AA, 0xC8E0A17F }, + { 0xF7A26EAD, 0xB58CC6A5 }, { 0x238F02C2, 0x8193FB08 }, { 0x5B2F9F81, 0xD5C68F46 }, { 0x88FDBAC5, 0xFCFF9CD2 }, + { 0xF359DC47, 0x77059157 }, { 0x07FF492B, 0x1D262E39 }, { 0xE59AC557, 0xFB582233 }, { 0x42F8B673, 0xDDB2BCE2 }, + { 0x48E096CF, 0x2577B762 }, { 0xD83DA74C, 0x6F99C4A6 }, { 0xEB795701, 0xC1147E41 }, { 0x912A9337, 0xF48BAF76 }, +}; + +__device__ uint2 T42[256] = { + { 0x9B2C0A19, 0x3EF29D24 }, { 0xB6F8622F, 0xE9E16322 }, { 0x47757F7A, 0x55369940 }, { 0xA47B0B33, 0x9F4D56D5 }, + { 0x6AA1174C, 0x82256746 }, { 0xEB082FB2, 0xB8F5057D }, { 0xF4475F53, 0xCC48C10B }, { 0x275DEC3A, 0x373088D4 }, + { 0x180AED10, 0x968F4325 }, { 0xF7016151, 0x173D232C }, { 0x946FCC13, 0xAE4ED09F }, { 0xC4539873, 0xFD4B4741 }, + { 0xD9933765, 0x1B5B3F0D }, { 0x7B644052, 0x2FFCB096 }, { 0x0A89840C, 0xE02376D2 }, { 0x329B18D7, 0xA3AE3A70 }, + { 0x35DE8526, 0x419CBD23 }, { 0x5B7C3199, 0xFAFEBF11 }, { 0x85AA9B0D, 0x0397074F }, { 0x4836B970, 0xC58AD4FB }, + { 0xFC4104A8, 0xBEC60BE3 }, { 0x4B708772, 0x1EFF36DC }, { 0xED8453B6, 0x131FDC33 }, { 0x341764D3, 0x0844E33E }, + { 0xAB38CD39, 0x0FF11B6E }, { 0x7761B85A, 0x64351F0A }, { 0x09CFBA0E, 0x3B5694F5 }, { 0xB87245D0, 0x30857084 }, + { 0x2297AE3C, 0x47AFB3BD }, { 0x6F6B554A, 0xF2BA5C2F }, { 0x1F4F70E1, 0x74BDC476 }, { 0x71EDC45E, 0xCFDFC644 }, + { 0x1DC0AF16, 0xE610784C }, { 0x3C113F28, 0x7ACA29D6 }, { 0x76A859AF, 0x2DED4117 }, { 0x99A3D5EE, 0xAC5F211E }, + { 0xA87EF33B, 0xD484F949 }, { 0x96E013E4, 0x3CE36CA5 }, { 0x3A9D432C, 0xD120F098 }, { 0xDC597563, 0x6BC40464 }, + { 0xD1956C9E, 0x69D5F5E5 }, { 0x3698BB24, 0x9AE95F04 }, { 0x66A4EF44, 0xC9ECC8DA }, { 0xA5B2EAC6, 0xD69508C8 }, + { 0xC0503B80, 0xC40C2235 }, { 0x8C652103, 0x38C193BA }, { 0x46BC9E8F, 0x1CEEC75D }, { 0x37515AD1, 0xD3310119 }, + { 0x86ECA50F, 0xD8E2E568 }, { 0x5779C991, 0xB137108D }, { 0x05CA4206, 0x709F3B69 }, { 0x1680CAEF, 0x4FEB5083 }, + { 0x241BD238, 0xEC456AF3 }, { 0xE181ABBE, 0x58D673AF }, { 0xCAD9BF8C, 0x242F54E7 }, { 0x0DCC19FD, 0x0211F181 }, + { 0x0F43C60A, 0x90BC4DBB }, { 0x9DA0761D, 0x9518446A }, { 0x3F57012A, 0xA1BFCBF1 }, { 0x61E172B5, 0x2BDE4F89 }, + { 0x4F732481, 0x27B853A8 }, { 0xDF1F4B61, 0xB0B1E643 }, { 0x5C39AC68, 0x18CC3842 }, { 0xBF37D821, 0xD2B7F7D7 }, + { 0x3014C720, 0x3103864A }, { 0x72ABFA5C, 0x14AA2463 }, { 0x4EBAC574, 0x6E600DB5 }, { 0x0403A3F3, 0x39476574 }, + { 0xBC71E623, 0x09C215F0 }, { 0xE987F045, 0x2A58B947 }, { 0xB477BDD8, 0x7B4CDF18 }, { 0x906C6FE0, 0x9709B5EB }, + { 0x8060D90B, 0x73083C26 }, { 0x41F9037E, 0xFEDC400E }, { 0xE44BE9B8, 0x284948C6 }, { 0x08065BFB, 0x728ECAE8 }, + { 0x17492B1A, 0x06330E9E }, { 0x69E7294E, 0x59508561 }, { 0xE6C4364F, 0xBAE4F4FC }, { 0xE30E7449, 0xCA7BCF95 }, + { 0xA33E96C2, 0x7D7FD186 }, { 0xD85AD690, 0x52836110 }, { 0x1B4CD312, 0x4DFAA102 }, { 0x872544FA, 0x913ABB75 }, + { 0x140F1518, 0xDD46ECB9 }, { 0x1E869114, 0x3D659A6B }, { 0xD719109A, 0xC23F2CAB }, { 0x2DD46836, 0xD713FE06 }, + { 0xB2FBC1DC, 0xD0A60656 }, { 0xDD909496, 0x221C5A79 }, { 0xA1B14935, 0xEFD26DBC }, { 0x235E4FC9, 0x0E77EDA0 }, + { 0x6B68F6B9, 0xCBFD395B }, { 0xA6F4D4C4, 0x0DE0EAEF }, { 0x1A8532E7, 0x0422FF1F }, { 0xDED6AA94, 0xF969B85E }, + { 0xAEF28F3F, 0x7F6E2007 }, { 0x81A938FE, 0x3AD0623B }, { 0x7AADA1A7, 0x6624EE8B }, { 0xC856607B, 0xB682E8DD }, + { 0x281E2A30, 0xA78CC56F }, { 0x45FAA08D, 0xC79B257A }, { 0x642B30B3, 0x5B4174E0 }, { 0x7EAE0254, 0x5F638BFF }, + { 0x0C05F808, 0x4BC9AF9C }, { 0xF98B46AE, 0xCE59308A }, { 0xCC55C388, 0x8FC58DA9 }, { 0x676D0EB1, 0x803496C7 }, + { 0xE70DD7BA, 0xF33CAAE1 }, { 0x6EA2B4BF, 0xBB620232 }, { 0x201871CB, 0xD5020F87 }, { 0xA9B712CE, 0x9D5CA754 }, + { 0x7DE83C56, 0x841669D8 }, { 0x5EB6739F, 0x8A618478 }, { 0xB0741E2B, 0x420BBA6C }, { 0xEAC1CE47, 0xF12D5B60 }, + { 0x1283691C, 0x76AC35F7 }, { 0xFECEDB5F, 0x2C6BB7D9 }, { 0x4C351A83, 0xFCCDB18F }, { 0xC3160582, 0x1F79C012 }, + { 0x62A74CB7, 0xF0ABADAE }, { 0x82EF06FC, 0xE1A5801C }, { 0xF2CB2357, 0x67A21845 }, { 0x5DF04D9D, 0x5114665F }, + { 0x74278658, 0xBF40FD2D }, { 0xB73183DA, 0xA0393D3F }, { 0x92E3B017, 0x05A409D1 }, { 0x0B4065F9, 0xA9FB28CF }, + { 0x42BF3D7C, 0x25A9A229 }, { 0x03463E02, 0xDB75E227 }, { 0x5AB5D06C, 0xB326E10C }, { 0x95A62DE6, 0xE7968E82 }, + { 0x636EAD42, 0xB973F3B3 }, { 0x19C30CE5, 0xDF571D38 }, { 0x29D7CBC5, 0xEE549B72 }, { 0x65E2D146, 0x12992AFD }, + { 0x56B02864, 0xF8EF4E90 }, { 0x4030E28B, 0xB7041E13 }, { 0xDAD50967, 0xC02EDD2A }, { 0x8AE95D07, 0x932B4AF4 }, + { 0xC6DC4784, 0x6FE6FB7B }, { 0x55F61666, 0x239AACB7 }, { 0xBDB807D6, 0x401A4BED }, { 0x89AF6305, 0x485EA8D3 }, + { 0xADB4B13D, 0xA41BC220 }, { 0x9729F211, 0x753B32B8 }, { 0xB3322029, 0x997E584B }, { 0xCEDA1C7F, 0x1D683193 }, + { 0xC99F818E, 0xFF5AB6C0 }, { 0x7F67E3A1, 0x16BBD5E2 }, { 0x25D233CD, 0xA59D34EE }, { 0x3B54A2D9, 0x98F8AE85 }, + { 0xCB105E79, 0x6DF70AFA }, { 0xB9BBA425, 0x795D2E99 }, { 0x44334178, 0x8E437B67 }, { 0x886682F0, 0x0186F6CE }, + { 0xBB347BD2, 0xEBF092A3 }, { 0xF18D1D55, 0xBCD7FA62 }, { 0x11C5571E, 0xADD9D7D0 }, { 0xB1BDFFDE, 0x0BD3E471 }, + { 0x8EEAFEF4, 0xAA6C2F80 }, { 0xF6C880A4, 0x5EE57D31 }, { 0xF044FCA0, 0xF50FA47F }, { 0x51F5B595, 0x1ADDC9C3 }, + { 0x3352F922, 0xEA76646D }, { 0x00000000, 0x00000000 }, { 0xF58EBEA6, 0x85909F16 }, { 0xAAF12CCC, 0x46294573 }, + { 0x39DB7D2E, 0x0A5512BF }, { 0x31DD26D5, 0x78DBD857 }, { 0x6C2D6B48, 0x29CFBE08 }, { 0x583A0F9B, 0x218B5D36 }, + { 0xFACD78AC, 0x152CD2AD }, { 0xE2C795BC, 0x83A39188 }, { 0x5F7F926A, 0xC3B9DA65 }, { 0x2C1D89C3, 0x9ECBA01B }, + { 0x9F2FA9EA, 0x07B5F850 }, { 0x26940DCF, 0x7EE8D6C9 }, { 0xAF3B6ECA, 0x36B67E1A }, { 0x702425AB, 0x86079859 }, + { 0xD31AB369, 0xFB7849DF }, { 0x932A51E2, 0x4C7C57CC }, { 0x0E8A27FF, 0xD96413A6 }, { 0xC715A671, 0x263EA566 }, + { 0x4376DC89, 0x6C71FC34 }, { 0x84637AF8, 0x4A4F5952 }, { 0x8B20BCF2, 0xDAF314E9 }, { 0x4AB96687, 0x572768C1 }, + { 0x682EC8BB, 0x1088DB7C }, { 0x537A6A62, 0x887075F9 }, { 0xF302C2A2, 0x2E7A4658 }, { 0xE582084D, 0x619116DB }, + { 0x8326E709, 0xA87DDE01 }, { 0x9C6997E8, 0xDCC01A77 }, { 0xAC7D50C8, 0xEDC39C3D }, { 0xA078A8C0, 0xA60A33A1 }, + { 0x52B38B97, 0xC1A82BE4 }, { 0x134A88E9, 0x3F746BEA }, { 0xBAFD9A27, 0xA228CCBE }, { 0x068C7C04, 0xABEAD94E }, + { 0x78227E50, 0xF48952B1 }, { 0xFB049959, 0x5CF48CB0 }, { 0x6DE48ABD, 0x6017E015 }, { 0xA73D3531, 0x4438B4F2 }, + { 0x49FF5885, 0x8C528AE6 }, { 0x4DFCFB76, 0xB515EF92 }, { 0x2E925634, 0x0C661C21 }, { 0xC59A7986, 0xB493195C }, + { 0x21D1903E, 0x9CDA519A }, { 0xB5BE5C2D, 0x32948105 }, { 0xD45F2E98, 0x194ACE8C }, { 0x38129CDB, 0x438D4CA2 }, + { 0xBEFE39D4, 0x9B6FA9CA }, { 0xEF0B8C41, 0x81B26009 }, { 0x91A58E15, 0xDED1EBF6 }, { 0x9EE6481F, 0x4E6DA64D }, + { 0xCF13FD8A, 0x54B06F8E }, { 0x01C9E1F5, 0x49D85E1D }, { 0x1C094EE3, 0xAFC82651 }, { 0x75EE67AD, 0xF698A330 }, + { 0xEC4DB243, 0x5AC7822E }, { 0xC199DA75, 0x8DD47C28 }, { 0xDB1CE892, 0x89F68337 }, { 0x7C21DDA3, 0xCDCE37C5 }, + { 0x503C5460, 0x530597DE }, { 0x543FF793, 0x6A42F2AA }, { 0x73621BA9, 0x5D727A7E }, { 0x07459DF1, 0xE2328753 }, + { 0xC2DFE477, 0x56A19E0F }, { 0xCD9C227D, 0xC61DD3B4 }, { 0x986A341B, 0xE5877F03 }, { 0x15C6F4ED, 0x949EB2A4 }, + { 0x60289340, 0x62061194 }, { 0xE84E11B0, 0x6380E75A }, { 0xD6D0F16F, 0x8BE772B6 }, { 0xD596CF6D, 0x50929091 }, + { 0x3E9EE0DF, 0xE86795EC }, { 0x2B581432, 0x7CF92748 }, { 0xEEC26DB4, 0xC86A3E14 }, { 0x8DACC0F6, 0x7119CDA7 }, + { 0x100CB6EB, 0xE40189CD }, { 0x028FDFF7, 0x92ADBC3A }, { 0xD2D3529C, 0xB2A017C2 }, { 0xD05C8D6B, 0x200DABF8 }, + { 0xA2F77737, 0x34A78F9B }, { 0x8F231F01, 0xE3B4719D }, { 0x2F5BB7C1, 0x45BE423C }, { 0xFD88E55D, 0xF71E55FE }, + { 0x59F3EE6E, 0x6853032B }, { 0xFF073AAA, 0x65B3E9C4 }, { 0x9AE5EBEC, 0x772AC339 }, { 0xF842A75B, 0x87816E97 }, + { 0xE0484A4B, 0x110E2DB2 }, { 0x3DD8DEDD, 0x331277CB }, { 0x79EB9FA5, 0xBD510CAC }, { 0x2A91F5C7, 0x35217955 }, +}; + +__device__ uint2 T52[256] = { + { 0x46E06A6D, 0x8AB0A968 }, { 0x4BF0B33A, 0x43C7E80B }, { 0x6B161EE5, 0x08C9B354 }, { 0xEBA990BE, 0x39F1C235 }, + { 0x6606C7B2, 0xC1BEF237 }, { 0x614569AA, 0x2C209233 }, { 0x6FC3289A, 0xEB01523B }, { 0x935ACEDD, 0x946953AB }, + { 0x3E13340E, 0x272838F6 }, { 0xA12BA052, 0x8B0455EC }, { 0x978FF8A2, 0x77A1B2C4 }, { 0x13E54086, 0xA55122CA }, + { 0x62D3F1CD, 0x22761358 }, { 0x08B76CFE, 0xDB8DDFDE }, { 0x9E4A178A, 0x5D1E12C8 }, { 0x03969867, 0x0E56816B }, + { 0x3303ED59, 0xEE5F7995 }, { 0xAB78D71D, 0xAFED748B }, { 0xF93E53EE, 0x6D929F2D }, { 0xBA798C2A, 0xF5D8A8F8 }, + { 0x8E39CF6B, 0xF619B169 }, { 0x749104E2, 0x95DDAF2F }, { 0xE0886427, 0xEC2A9C80 }, { 0x825B95EA, 0xCE5C8FD8 }, + { 0x3AC60271, 0xC4E0D999 }, { 0x173076F9, 0x4699C3A5 }, { 0x50A29F42, 0x3D1B151F }, { 0x2BC75946, 0x9ED505EA }, + { 0xDC7F4B98, 0x34665ACF }, { 0x292342F7, 0x61B1FB53 }, { 0x0E864130, 0xC721C008 }, { 0x96FD7B74, 0x8693CD16 }, + { 0x7136B14B, 0x87273192 }, { 0x63A1721B, 0xD3446C8A }, { 0xA6680E4A, 0x669A35E8 }, { 0x39509A16, 0xCAB658F2 }, + { 0xF42E8AB9, 0xA4E5DE4E }, { 0xE83F08D9, 0x37A7435E }, { 0xE26C7F96, 0x134E6239 }, { 0x2DF67488, 0x82791A3C }, + { 0x8329163C, 0x3F6EF00A }, { 0xFDEB6591, 0x8E5A7E42 }, { 0x7981DDB5, 0x5CAAEE4C }, { 0x5AF1E80D, 0x19F23478 }, + { 0xED98BD70, 0x255DDDE3 }, { 0xA99CCCAC, 0x50898A32 }, { 0xDA4E6656, 0x28CA4519 }, { 0x4CB31D22, 0xAE59880F }, + { 0x37D6DB26, 0x0D9798FA }, { 0xB4FFCD1A, 0x32F968F0 }, { 0x4F258545, 0xA00F0964 }, { 0x5E24DE72, 0xFA3AD517 }, + { 0x5DB24615, 0xF46C547C }, { 0xFF0F7E20, 0x713E80FB }, { 0x73D2AAFA, 0x7843CF2B }, { 0xAEDF62B4, 0xBD17EA36 }, + { 0xD16F92CF, 0xFD111BAC }, { 0xC72D67E0, 0x4ABAA7DB }, { 0xAD49FAD3, 0xB3416B5D }, { 0x4914A88B, 0xBCA316B2 }, + { 0x8AECF914, 0x15D15006 }, { 0xE31EFC40, 0xE27C1DEB }, { 0x9BEDA223, 0x4FE48C75 }, { 0x1B522C78, 0x7EDCFD14 }, + { 0x7C26681C, 0x4E5070F1 }, { 0x5815F3BC, 0xE696CAC1 }, { 0x3BB481A7, 0x35D2A64B }, { 0xFE7DFDF6, 0x800CFF29 }, + { 0xD5BAA4B0, 0x1ED9FAC3 }, { 0x1EF599D1, 0x6C2663A9 }, { 0x34404341, 0x03C11991 }, { 0x69F20554, 0xF7AD4DED }, + { 0xB61BD6AB, 0xCD9D9649 }, { 0xEADB1368, 0xC8C3BDE7 }, { 0xB02AFB65, 0xD131899F }, { 0xE1FAE7F1, 0x1D18E352 }, + { 0xEF7CA6C1, 0xDA39235A }, { 0xA8EE4F7A, 0xA1BBF5E0 }, { 0xCF9A0B1E, 0x91377805 }, { 0x80BF8E5B, 0x31387161 }, + { 0xDB3CE580, 0xD9F83ACB }, { 0xD38B897E, 0x0275E515 }, { 0xF0FBBCC6, 0x472D3F21 }, { 0x868EA395, 0x2D946EB7 }, + { 0x21942E09, 0xBA3C248D }, { 0xBFDE3983, 0xE7223645 }, { 0x02E41BB1, 0xFF64FEB9 }, { 0x0D10D957, 0xC9774163 }, + { 0xB58D4ECC, 0xC3CB1722 }, { 0x9CAE0C3B, 0xA27AEC71 }, { 0xA48C15FB, 0x99FECB51 }, { 0x6D27332B, 0x1465AC82 }, + { 0xD75EBF01, 0xE1BD047A }, { 0x941960C5, 0x79F733AF }, { 0x41A3C475, 0x672EC96C }, { 0x524684F3, 0xC27FEBA6 }, + { 0x75E38734, 0x64EFD0FD }, { 0x0743AE18, 0xED9E6004 }, { 0xB9EF144D, 0xFB8E2993 }, { 0x0C625A81, 0x38453EB1 }, + { 0x42355C12, 0x69784807 }, { 0x14A6EE9E, 0x48CF42CE }, { 0x06312DCE, 0x1CAC1FD6 }, { 0x4792E9BB, 0x7B82D6BA }, + { 0x1F871A07, 0x9D141C7B }, { 0xC11C4A2E, 0x5616B80D }, { 0xF21FA777, 0xB849C198 }, { 0xC8D9A506, 0x7CA91801 }, + { 0x7EC273AD, 0xB1348E48 }, { 0x987B3A44, 0x41B20D1E }, { 0xA3CFBBE3, 0x7460AB55 }, { 0x4576F20A, 0x84E62803 }, + { 0x897A6173, 0x1B87D16D }, { 0xE45D5258, 0x0FE27DEF }, { 0xCA3DBEB7, 0x83CDE6B8 }, { 0xD01D1119, 0x0C23647E }, + { 0xA0592384, 0x7A362A3E }, { 0xF1893F10, 0xB61F40F3 }, { 0x440471DC, 0x75D457D1 }, { 0x237035B8, 0x4558DA34 }, + { 0x87FC2043, 0xDCA61165 }, { 0xC9AB26D0, 0x8D9B67D3 }, { 0xEE0E2517, 0x2B0B5C88 }, { 0x2AB5DA90, 0x6FE77A38 }, + { 0xD9D8FE31, 0x269CC472 }, { 0xFAA8CB89, 0x63C41E46 }, { 0x1642F52F, 0xB7ABBC77 }, { 0x2F126F39, 0x7D1DE485 }, + { 0x24339BA0, 0xA8C6BA30 }, { 0xCEE888C8, 0x600507D7 }, { 0x1A20AFAE, 0x8FEE82C6 }, { 0x26D78011, 0x57A24489 }, + { 0x36A458F0, 0xFCA5E728 }, { 0x8F4B4CBD, 0x072BCEBB }, { 0xF36D24A1, 0x497BBE4A }, { 0xB769557D, 0x3CAFE99B }, + { 0x05A7B5A9, 0x12FA9EBD }, { 0x5B836BDB, 0xE8C04BAA }, { 0xAC3B7905, 0x4273148F }, { 0x2851C121, 0x90838481 }, + { 0x6C55B0FD, 0xE557D350 }, { 0xCB4F3D61, 0x72FF996A }, { 0x64E2DC03, 0x3EDA0C8E }, { 0xE6B949E9, 0xF0868356 }, + { 0xBB0B0FFC, 0x04EAD72A }, { 0x5967706A, 0x17A4B513 }, { 0x04D5367F, 0xE3C8E16F }, { 0x8DAF570C, 0xF84F3002 }, + { 0xBD3A2232, 0x1846C8FC }, { 0xF6CA9108, 0x5B8120F7 }, { 0xECEA3EA6, 0xD46FA231 }, { 0x53340725, 0x334D9474 }, + { 0xC28AD249, 0x58403966 }, { 0x9A9F21F5, 0xBED6F3A7 }, { 0xA5FE962D, 0x68CCB483 }, { 0x57E1315A, 0xD085751B }, + { 0xE52FD18E, 0xFED0023D }, { 0x20E6ADDF, 0x4B0E5B5F }, { 0x6EB1AB4C, 0x1A332DE9 }, { 0x7B65C604, 0xA3CE10F5 }, + { 0xD62C3CD7, 0x108F7BA8 }, { 0x1073D8E1, 0xAB07A3A1 }, { 0x91BED56C, 0x6B0DAD12 }, { 0x3532C097, 0xF2F36643 }, + { 0xB2CEE0D4, 0x2E557726 }, { 0x00000000, 0x00000000 }, { 0xDE9B5029, 0xCB02A476 }, { 0x8B9E7AC2, 0xE4E32FD4 }, + { 0x2C84F75E, 0x734B65EE }, { 0xCD7E10AF, 0x6E5386BC }, { 0xE7CBCA3F, 0x01B4FC84 }, { 0x65905FD5, 0xCFE8735C }, + { 0x0FF4C2E6, 0x3613BFDA }, { 0x31E7F6E8, 0x113B872C }, { 0x55052AEB, 0x2FE18BA2 }, { 0xBC48A1E4, 0xE974B72E }, + { 0xB89D979B, 0x0ABC5641 }, { 0x2202B66E, 0xB46AA5E6 }, { 0xC4BBFF87, 0x44EC26B0 }, { 0x27A503C7, 0xA6903B5B }, + { 0xFC99E647, 0x7F680190 }, { 0xA71A8D9C, 0x97A84A3A }, { 0x6037EA7C, 0xDD12EDE1 }, { 0xDD0DC84E, 0xC554251D }, + { 0x956BE313, 0x88C54C7D }, { 0x48662B5D, 0x4D916960 }, { 0x9909B992, 0xB08072CC }, { 0xC5C97C51, 0xB5DE5962 }, + { 0x19B637C9, 0x81B803AD }, { 0x4A8230EC, 0xB2F597D9 }, { 0x5F565DA4, 0x0B08AAC5 }, { 0x017283D6, 0xF1327FD2 }, + { 0x78F35E63, 0xAD98919E }, { 0x76751F53, 0x6AB95196 }, { 0x0A53774F, 0x24E92167 }, { 0x15D46D48, 0xB9FD3D1C }, + { 0xFBDA485F, 0x92F66194 }, { 0x11015B37, 0x5A35DC73 }, { 0x5477A93D, 0xDED3F470 }, { 0x81CD0D8D, 0xC00A0EB3 }, + { 0xC65FE436, 0xBB88D809 }, { 0xBEACBA55, 0x16104997 }, { 0x5693B28C, 0x21B70AC9 }, { 0x25411876, 0x59F4C5E2 }, + { 0x0B21F499, 0xD5DB5EB5 }, { 0xF55C096F, 0x55D7A19C }, { 0xC3F8519F, 0xA97246B4 }, { 0xA2BD3835, 0x8552D487 }, + { 0x1297C350, 0x54635D18 }, { 0x85183BF2, 0x23C2EFDC }, { 0xCC0C9379, 0x9F61F96E }, { 0x9DDC8FED, 0x534893A3 }, + { 0xAA0A54CB, 0x5EDF0B59 }, { 0x9F38945C, 0xAC2C6D1A }, { 0xD8AA7DE7, 0xD7AEBBA0 }, { 0x09C5EF28, 0x2ABFA00C }, + { 0x3CF72FBF, 0xD84CC64F }, { 0xB15878B3, 0x2003F64D }, { 0xC06EC9F8, 0xA724C7DF }, { 0x68808682, 0x069F323F }, + { 0x51D01C94, 0xCC296ACD }, { 0x5CC0C5C3, 0x055E2BAE }, { 0x1D6301B6, 0x6270E2C2 }, { 0x382219C0, 0x3B842720 }, + { 0x846AB824, 0xD2F0900E }, { 0x7A1745D2, 0x52FC6F27 }, { 0xE94D8B0F, 0xC6953C8C }, { 0x3095753E, 0xE009F8FE }, + { 0x92284D0B, 0x655B2C79 }, { 0x4347DFC4, 0x984A37D5 }, { 0x8808E2A5, 0xEAB5AEBF }, { 0x90CC56BA, 0x9A3FD2C0 }, + { 0xF84CD038, 0x9CA0E0FF }, { 0xAFADE162, 0x4C2595E4 }, { 0xB3BC6302, 0xDF6708F4 }, { 0x7D54EBCA, 0xBF620F23 }, + { 0x1C118260, 0x93429D10 }, { 0x8CDDD4DA, 0x097D4FD0 }, { 0x2E60ECEF, 0x8C2F9B57 }, { 0x18C4B41F, 0x708A7C7F }, + { 0xDFE9D3FF, 0x3A30DBA4 }, { 0x7FB0F07B, 0x4006F19A }, { 0x4DC19EF4, 0x5F6BF7DD }, { 0x32716E8F, 0x1F6D0647 }, + { 0x6A649D33, 0xF9FBCC86 }, { 0x67744464, 0x308C8DE5 }, { 0x72A0292C, 0x8971B0F9 }, { 0x3F61B7D8, 0xD61A4724 }, + { 0xD4C82766, 0xEFEB8511 }, { 0x40D147A3, 0x961CB6BE }, { 0xF7B812DE, 0xAAB35F25 }, { 0x7044329D, 0x76154E40 }, + { 0x4E570693, 0x513D76B6 }, { 0xD2F90AA8, 0xF3479AC7 }, { 0x77079C85, 0x9B8B2E44 }, { 0x3D85AC69, 0x297EB99D }, +}; + +__device__ uint2 T62[256] = { + { 0xFC7D40C3, 0x7E37E62D }, { 0xEE939E5B, 0x776F25A4 }, { 0xDD8FB5AD, 0xE045C850 }, { 0x11FF1952, 0x86ED5BA7 }, + { 0xCF616B35, 0xE91D0BD9 }, { 0x6E408FFB, 0x37E0AB25 }, { 0x31025A7A, 0x9607F6C0 }, { 0x16D23C9D, 0x0B02F5E1 }, + { 0xFB50650C, 0xF3D8486B }, { 0xC40875F5, 0x621CFF27 }, { 0xFA5FD34A, 0x7D40CB71 }, { 0xDAA29062, 0x6DAA6616 }, + { 0x23EC84E2, 0x9F5F3549 }, { 0xC507C3B3, 0xEC847C3D }, { 0x043CE205, 0x025A3668 }, { 0x4DAC0B19, 0xA8BF9E6C }, + { 0xE9BEBB94, 0xFA808BE2 }, { 0x77C74FA3, 0xB5B99C52 }, { 0xF0397BCC, 0x78D9BC95 }, { 0xDBAD2624, 0xE332E50C }, + { 0x9332797E, 0xC74FCE12 }, { 0x2EA709AB, 0x1729ECEB }, { 0x9954D1F8, 0xC2D6B9F6 }, { 0xBAB8551A, 0x5D898CBF }, + { 0x17DD8ADB, 0x859A76FB }, { 0x362F7FB5, 0x1BE85886 }, { 0xF136CD8A, 0xF6413F8F }, { 0xBBB7E35C, 0xD3110FA5 }, + { 0x14CC4D11, 0x0A2FEED5 }, { 0xCD7F1AB9, 0xE83010ED }, { 0x5F42D581, 0xA1E75DE5 }, { 0xC13B21B6, 0xEEDE4A55 }, + { 0xF94E1480, 0xF2F5535F }, { 0x1888761E, 0x0CC1B46D }, { 0x6529913B, 0xBCE15FDB }, { 0x5A7181C2, 0x2D25E897 }, + { 0xE2D7A554, 0x71817F1C }, { 0x5C53124B, 0x2E52C5CB }, { 0xEF9C281D, 0xF9F7A6BE }, { 0x21F2F56E, 0x9E722E7D }, + { 0x81DCA7E6, 0xCE170D9B }, { 0x1CB4941B, 0x0E9B8205 }, { 0x3C49D733, 0x1E712F62 }, { 0x42F9F7DC, 0x21E45CFA }, + { 0x8BBA0F60, 0xCB8E7A7F }, { 0x010FB646, 0x8E98831A }, { 0x8E895B23, 0x474CCF0D }, { 0x4FB27A95, 0xA9928558 }, + { 0x05335443, 0x8CC2B572 }, { 0x84EFF3A5, 0x42D5B8E9 }, { 0x021E718C, 0x012D1B34 }, { 0xAE74180B, 0x57A6626A }, + { 0xE3D81312, 0xFF19FC06 }, { 0x6A7C6DFE, 0x35BA9D4D }, { 0x8F86ED65, 0xC9D44C17 }, { 0xA02E5288, 0x506523E6 }, + { 0x06229389, 0x03772D5C }, { 0x0B691EC0, 0x8B01F4FE }, { 0xED825991, 0xF8DABD8A }, { 0x985B67BE, 0x4C4E3AEC }, + { 0x7FBF96A9, 0xB10DF082 }, { 0xD4F8DAE1, 0x6A69279A }, { 0xD3D5FF2E, 0xE78689DC }, { 0x1FA553D1, 0x812E1A2B }, + { 0xEBA0CA18, 0xFBAD90D6 }, { 0x34310E39, 0x1AC543B2 }, { 0x2CB97827, 0x1604F7DF }, { 0x51189F02, 0xA6241C69 }, + { 0xEAAF7C5E, 0x753513CC }, { 0xC84C4EFA, 0x64F2A59F }, { 0x489F5F5A, 0x247D2B1E }, { 0xAB474C48, 0xDB64D718 }, + { 0xF2270A40, 0x79F4A7A1 }, { 0x2A9BEBAE, 0x1573DA83 }, { 0x68621C72, 0x34978679 }, { 0xA2302304, 0x514838D2 }, + { 0xFD72F685, 0xF0AF6537 }, { 0x3A6B44BA, 0x1D06023E }, { 0xCE6EDD73, 0x678588C3 }, { 0xCC70ACFF, 0x66A893F7 }, + { 0xB5EDA9DF, 0xD4D24E29 }, { 0x70EA6A6C, 0x38563214 }, { 0x0E5A4A83, 0x07C3418C }, { 0x5635BACD, 0x2BCBB22F }, + { 0x0878D90A, 0x04B46CD0 }, { 0x0C443B0F, 0x06EE5AB8 }, { 0x76C8F9E5, 0x3B211F48 }, { 0x12EEDE98, 0x0958C389 }, + { 0xBF8B0159, 0xD14B39CD }, { 0x72F41BE0, 0x397B2920 }, { 0x13E168DE, 0x87C04093 }, { 0x47CAA39F, 0xAD26E988 }, + { 0x9C6785BB, 0x4E140C84 }, { 0xB7F3D853, 0xD5FF551D }, { 0x5D5CA40D, 0xA0CA46D1 }, { 0x87FE346F, 0xCD6020C7 }, + { 0x15C3FB57, 0x84B76DCF }, { 0xA121E4CE, 0xDEFDA0FC }, { 0x96012D3D, 0x4B8D7B60 }, { 0x298A2C64, 0x9AC642AD }, + { 0x10F0AF14, 0x0875D8BD }, { 0x7B8374AC, 0xB357C6EA }, { 0x9A451632, 0x4D6321D8 }, { 0xC719B23F, 0xEDA96709 }, + { 0xF328BC06, 0xF76C24BB }, { 0x912C08F2, 0xC662D526 }, { 0x7892B366, 0x3CE25EC4 }, { 0x6F4F39BD, 0xB978283F }, + { 0x9D6833FD, 0xC08C8F9E }, { 0x9E79F437, 0x4F3917B0 }, { 0xB2C08C10, 0x593DE06F }, { 0xB1D14BDA, 0xD6887841 }, + { 0x32139DB0, 0x19B26EEE }, { 0x75D93E2F, 0xB4948766 }, { 0x1987C058, 0x82593777 }, { 0x3D466175, 0x90E9AC78 }, + { 0xFF6C8709, 0xF1827E03 }, { 0x353EB87F, 0x945DC0A8 }, { 0x8AB5B926, 0x4516F965 }, { 0x7EB020EF, 0x3F957398 }, + { 0x6D514831, 0xB855330B }, { 0x542BCB41, 0x2AE6A91B }, { 0xC6160479, 0x6331E413 }, { 0x80D311A0, 0x408F8E81 }, + { 0xC325503A, 0xEFF35161 }, { 0xBD9570D5, 0xD06622F9 }, { 0x0D4B8D49, 0x8876D9A2 }, { 0x573A0C8B, 0xA5533135 }, + { 0xDF91C421, 0xE168D364 }, { 0xF50A2F8F, 0xF41B09E7 }, { 0x24C1A12D, 0x12B09B0F }, { 0xA9593DC4, 0xDA49CC2C }, + { 0x3E57A6BF, 0x1F5C3456 }, { 0xA8568B82, 0x54D14F36 }, { 0x43F6419A, 0xAF7CDFE0 }, { 0xC943F8BC, 0xEA6A2685 }, + { 0xD7E91D2B, 0xE5DCBFB4 }, { 0x799D0520, 0xB27ADDDE }, { 0xD6E6AB6D, 0x6B443CAE }, { 0xF61BE845, 0x7BAE91C9 }, + { 0x7CAE5163, 0x3EB868AC }, { 0x22E332A4, 0x11C7B653 }, { 0xB9A992D0, 0xD23C1491 }, { 0x0311C7CA, 0x8FB5982E }, + { 0xE0C9D4D8, 0x70AC6428 }, { 0x0F55FCC5, 0x895BC296 }, { 0xEC8DEFD7, 0x76423E90 }, { 0xDE9E7267, 0x6FF0507E }, + { 0x7A8CC2EA, 0x3DCF45F0 }, { 0x941F5CB1, 0x4AA06054 }, { 0xB0DEFD9C, 0x5810FB5B }, { 0xBC9AC693, 0x5EFEA1E3 }, + { 0xDC8003EB, 0x6EDD4B4A }, { 0xE8B10DD2, 0x741808F8 }, { 0x28859A22, 0x145EC1B7 }, { 0x50172944, 0x28BC9F73 }, + { 0x4EBDCCD3, 0x270A0642 }, { 0x331C2BF6, 0x972AEDF4 }, { 0x0A66A886, 0x059977E4 }, { 0x4A812ED6, 0x2550302A }, + { 0xA7037747, 0xDD8A8DA0 }, { 0x970E9B7B, 0xC515F87A }, { 0x601AC578, 0x3023EAA9 }, { 0x73FBADA6, 0xB7E3AA3A }, + { 0x1EAAE597, 0x0FB69931 }, { 0x00000000, 0x00000000 }, { 0x6204B4F4, 0x310EF19D }, { 0x44DB6455, 0x229371A6 }, + { 0x1A960792, 0x0DECAF59 }, { 0xB8A62496, 0x5CA4978B }, { 0x38753536, 0x1C2B190A }, { 0x82CD602C, 0x41A295B5 }, + { 0x6426277D, 0x3279DCC1 }, { 0x9F764271, 0xC1A194AA }, { 0x26DFD0A1, 0x139D803B }, { 0x41E83016, 0xAE51C4D4 }, + { 0xAD65DFC1, 0xD813FA44 }, { 0x45D4D213, 0xAC0BF2BC }, { 0x46C515D9, 0x23BE6A92 }, { 0x923DCF38, 0x49D74D08 }, + { 0x27D066E7, 0x9D050321 }, { 0x5E4D63C7, 0x2F7FDEFF }, { 0x55247D07, 0xA47E2A01 }, { 0x2FA8BFED, 0x99B16FF1 }, + { 0x8C972AAF, 0x4661D439 }, { 0xA33F9542, 0xDFD0BBC8 }, { 0xA51D06CB, 0xDCA79694 }, { 0x7DA1E725, 0xB020EBB6 }, + { 0x696DAA34, 0xBA0F0563 }, { 0xD5F76CA7, 0xE4F1A480 }, { 0x9510EAF7, 0xC438E34E }, { 0x3B64F2FC, 0x939E8124 }, + { 0x072D25CF, 0x8DEFAE46 }, { 0x586FF04E, 0x2C08F3A3 }, { 0xB3CF3A56, 0xD7A56375 }, { 0x40E78650, 0x20C947CE }, + { 0x86F18229, 0x43F8A3DD }, { 0xAC6A6987, 0x568B795E }, { 0x1DBB225D, 0x8003011F }, { 0xF7145E03, 0xF53612D3 }, + { 0x300DEC3C, 0x189F75DA }, { 0x3720C9F3, 0x9570DB9C }, { 0x6B73DBB8, 0xBB221E57 }, { 0xE4F536DD, 0x72F65240 }, + { 0x88ABC8AA, 0x443BE251 }, { 0xD9B357A8, 0xE21FFE38 }, { 0xE7E4F117, 0xFD43CA6E }, { 0x89A47EEC, 0xCAA3614B }, + { 0xE1C6629E, 0xFE34E732 }, { 0x1B99B1D4, 0x83742C43 }, { 0x83C2D66A, 0xCF3A16AF }, { 0x4990E91C, 0xAAE5A804 }, + { 0x4CA3BD5F, 0x26271D76 }, { 0x3F5810F9, 0x91C4B74C }, { 0xF841A2C6, 0x7C6DD045 }, { 0xFE63314F, 0x7F1AFD19 }, + { 0x8D989CE9, 0xC8F95723 }, { 0x5306EE8E, 0xA709075D }, { 0xAA48FA0E, 0x55FC5402 }, { 0x9023BEB4, 0x48FA563C }, + { 0xCA523F76, 0x65DFBEAB }, { 0xD8BCE1EE, 0x6C877D22 }, { 0x85E045E3, 0xCC4D3BF3 }, { 0x6115733E, 0xBEBB69B3 }, + { 0x20FD4328, 0x10EAAD67 }, { 0x71E5DC2A, 0xB6CEB10E }, { 0x6737E0B7, 0xBDCC44EF }, { 0xA412B08D, 0x523F158E }, + { 0x2DB6CE61, 0x989C74C5 }, { 0x2B945DE8, 0x9BEB5999 }, { 0x09776F4C, 0x8A2CEFCA }, { 0x5B7E3784, 0xA3BD6B8D }, + { 0xCB5D8930, 0xEB473DB1 }, { 0x9B4AA074, 0xC3FBA2C2 }, { 0x25CE176B, 0x9C281815 }, { 0xD0C438E4, 0x683311F2 }, + { 0xBE84B71F, 0x5FD3BAD7 }, { 0xE5FA809B, 0xFC6ED15A }, { 0x6C5EFE77, 0x36CDB011 }, { 0x520958C8, 0x29918447 }, + { 0x59604608, 0xA29070B9 }, { 0xA60CC101, 0x53120EBA }, { 0x74D68869, 0x3A0C047C }, { 0xD2DA4968, 0x691E0AC6 }, + { 0xE6EB4751, 0x73DB4974 }, { 0xF40599C9, 0x7A838AFD }, { 0xB4E21F99, 0x5A4ACD33 }, { 0xC03497F0, 0x6046C94F }, + { 0xD1CB8EA2, 0xE6AB92E8 }, { 0x663856F1, 0x3354C7F5 }, { 0xAF7BAE4D, 0xD93EE170 }, { 0xC22AE67C, 0x616BD27B }, + { 0x397A8370, 0x92B39A10 }, { 0x4B8E9890, 0xABC8B330 }, { 0x630B02B2, 0xBF967287 }, { 0xB6FC6E15, 0x5B67D607 }, +}; + +__device__ uint2 T72[256] = { + { 0xCE553FE6, 0xD031C397 }, { 0xB006B525, 0x16BA5B01 }, { 0x296E70C8, 0xA89BADE6 }, { 0x77D3435B, 0x6A1F525D }, + { 0x573DFA0B, 0x6E103570 }, { 0x17FC95AB, 0x660EFB2A }, { 0x97634BF6, 0x76327A9E }, { 0x62458BF5, 0x4BAD9D64 }, + { 0xDBC3F748, 0xF1830CAE }, { 0x669131FF, 0xC5C8F542 }, { 0xDC48B0CB, 0x95044A1C }, { 0x3CF8B866, 0x892962DF }, + { 0xE930C135, 0xB0B9E208 }, { 0x611A767C, 0xA14FB3F0 }, { 0x1C160136, 0x8D2605F2 }, { 0xFECC549E, 0xD6B71922 }, + { 0xA5907D8B, 0x37089438 }, { 0x5803D49C, 0x0B5DA38E }, { 0xEA6F3CBC, 0x5A5BCC9C }, { 0x3B73FFE5, 0xEDAE246D }, + { 0xDE22EDCE, 0xD2B87E0F }, { 0xCA8185EC, 0x5E54ABB1 }, { 0xE80561B9, 0x1DE7F88F }, { 0x0135A08C, 0xAD5E1A87 }, + { 0x65CECC76, 0x2F2ADBD6 }, { 0x82F58358, 0x5780B5A7 }, { 0xEDE47B3F, 0x3EDC8A2E }, { 0x06BEE70F, 0xC9D95C35 }, + { 0x6C4E05EE, 0x83BE111D }, { 0x59367410, 0xA603B909 }, { 0x809FDE5D, 0x103C81B4 }, { 0x7D0C774A, 0x2C69B602 }, + { 0xD5C87953, 0x399080D7 }, { 0x487406B4, 0x09D41E16 }, { 0x26505E5F, 0xCDD63B18 }, { 0x9B0298E8, 0xF99DC2F4 }, + { 0x943CB67F, 0x9CD0540A }, { 0x891F17C5, 0xBCA84B7F }, { 0xB78DF2A6, 0x723D1DB3 }, { 0xE73B4F2E, 0x78AA6E71 }, + { 0xA071670D, 0x1433E699 }, { 0x54620782, 0x84F21BE4 }, { 0xB4D20F2F, 0x98DF3327 }, { 0xD3769E5C, 0xF049DCE2 }, + { 0x9656EB7A, 0xDB6C6019 }, { 0x078B4783, 0x648746B2 }, { 0x8DCBADCF, 0x32CD2359 }, { 0xF0C7DA85, 0x1EA4955B }, + { 0x1B9D46B5, 0xE9A14340 }, { 0xBBEC21B8, 0xFD92A5D9 }, { 0x0E0B8E1B, 0xC8138C79 }, { 0x6D7BA562, 0x2EE00B9A }, + { 0x93B7F1FC, 0xF85712B8 }, { 0x0BEA949D, 0xEB28FED8 }, { 0x8A40EA4C, 0x564A65EB }, { 0x474A2823, 0x6C9988E8 }, + { 0x121D8F2D, 0x4535898B }, { 0x31ACCBF4, 0xABD8C032 }, { 0xB9867CBD, 0xBA2E91CA }, { 0xEF8E263A, 0x7960BE3D }, + { 0x602FD6F0, 0x0C11A977 }, { 0x16C93527, 0xCB50E1AD }, { 0x035FFD89, 0xEAE22E94 }, { 0x5DE2CE1A, 0x2866D12F }, + { 0xAB9BF390, 0xFF1B1841 }, { 0x8CFE0D43, 0x9F9339DE }, { 0xC48A0BF7, 0x964727C8 }, { 0xAAAE531C, 0x524502C6 }, + { 0xAC10B413, 0x9B9C5EF3 }, { 0x42AB32A5, 0x4FA2FA49 }, { 0xE551122B, 0x3F165A62 }, { 0x76E6E3D7, 0xC74148DA }, + { 0xE464B2A7, 0x924840E5 }, { 0xD69784DA, 0xD372AE43 }, { 0x05E11A86, 0x233B72A1 }, { 0x4941A638, 0xA48A0491 }, + { 0xC9DE7865, 0xB4B68525 }, { 0xA6CF8002, 0xDDEABAAC }, { 0x50B6BD88, 0x0A9773C2 }, { 0x5EBD3393, 0xC284FFBB }, + { 0x2C8F6A4E, 0x8BA0DF47 }, { 0x4D951C32, 0x2AEF6CB7 }, { 0x2A318D41, 0x42798372 }, { 0xBF389BB2, 0x73F7CDFF }, + { 0x382C026C, 0x074C0AF9 }, { 0x243A035A, 0x8A6A0F0B }, { 0x5F88931F, 0x6FDAE53C }, { 0x7E538AC3, 0xC68B9896 }, + { 0x1AA8E639, 0x44FF59C7 }, { 0x439E9229, 0xE2FCE0CE }, { 0x79D8CD40, 0xA20CDE24 }, { 0xC8EBD8E9, 0x19E89FA2 }, + { 0xF398270C, 0xF446BBCF }, { 0x2284E455, 0x43B3533E }, { 0x8E945046, 0xD82F0DCD }, { 0xB26CE820, 0x51066F12 }, + { 0x6BC5426D, 0xE73957AF }, { 0x40C16FA0, 0x081ECE5A }, { 0xC5BFAB7B, 0x3B193D4F }, { 0xDF174D42, 0x7FE66488 }, + { 0x705804D8, 0x0E9814EF }, { 0x7C39D7C6, 0x8137AC85 }, { 0xE185A821, 0xB1733244 }, { 0x6F11F867, 0x695C3F89 }, + { 0xE3EFF524, 0xF6CF0657 }, { 0xD02963D5, 0x1AABF276 }, { 0x75B91E5E, 0x2DA3664E }, { 0x1077D228, 0x0289BD98 }, + { 0xF413608F, 0x90C1FD7D }, { 0xFD93A917, 0x3C5537B6 }, { 0x3919A2E0, 0xAA12107E }, { 0x30996B78, 0x0686DAB5 }, + { 0x9EE3826E, 0xDAA6B055 }, { 0x56085A87, 0xC34E2FF7 }, { 0x4FFF4137, 0x6D5358A4 }, { 0xB35948AC, 0xFC587595 }, + { 0xC7D5F67E, 0x7CA5095C }, { 0x8B754AC0, 0xFB147F6C }, { 0x91DDACF9, 0xBFEB26AB }, { 0x67A49173, 0x6896EFC5 }, + { 0x1E7C5C33, 0xCA9A31E1 }, { 0xB13315A9, 0xBBE44186 }, { 0x689ABFE4, 0x0DDB793B }, { 0xA7FA208E, 0x70B4A02B }, + { 0x7307F951, 0xE47A3A7B }, { 0x14A36822, 0x8CECD5BE }, { 0x23B144D9, 0xEEED49B9 }, { 0xB8B3DC31, 0x17708B4D }, + { 0x2765FED3, 0x6088219F }, { 0xF1F27A09, 0xB3FA8FDC }, { 0xFCA6099B, 0x910B2D31 }, { 0x78ED6DCC, 0x0F52C4A3 }, + { 0xBAD98134, 0x50CCBF5E }, { 0x7F662A4F, 0x6BD58211 }, { 0xD4FDD9DF, 0x94CE9A50 }, { 0x45207526, 0x2B25BCFB }, + { 0x1F49FCBF, 0x67C42B66 }, { 0x723259DD, 0x492420FC }, { 0x18C2BB3C, 0x03436DD4 }, { 0xF872B391, 0x1F6E4517 }, + { 0x69AF1F68, 0xA08563BC }, { 0xEEBB86B6, 0xD43EA4BA }, { 0x08B56914, 0x01CAD04C }, { 0x0980C998, 0xAC94CACB }, + { 0x9A373864, 0x54C3D873 }, { 0x2DBACAC2, 0x26FEC5C0 }, { 0xBE0D3B3E, 0xDEA9D778 }, { 0x20EEB950, 0x040F672D }, + { 0x7BB29045, 0xE5B0EA37 }, { 0xCBB42560, 0xF30AB136 }, { 0x37122CFB, 0x62019C07 }, { 0x13282FA1, 0xE86B930C }, + { 0x2EE5374B, 0xCC1CEB54 }, { 0xA21B3A08, 0x538FD28A }, { 0xD89C0AC1, 0x1B61223A }, { 0xAD25149F, 0x36C24474 }, + { 0xF74C9D06, 0x7A23D3E9 }, { 0x9968C5ED, 0xBE21F6E7 }, { 0x36278C77, 0xCF5F8680 }, { 0xEB5A9C30, 0xF705D61B }, + { 0x52DCE08D, 0x4D2B47D1 }, { 0xC234ECF8, 0x5F9E7BFD }, { 0x3DCD18EA, 0x24777858 }, { 0x4415D5AA, 0x867BA67C }, + { 0x5A698999, 0x4CE1979D }, { 0x00000000, 0x00000000 }, { 0x33C696F1, 0xEC64F421 }, { 0xC16B1171, 0xB57C5569 }, + { 0x467F88AF, 0xC1C7926F }, { 0x0F3E2E97, 0x654D96FE }, { 0xA8C40E19, 0x15F936D5 }, { 0xA9F1AE95, 0xB8A72C52 }, + { 0x21DB19DC, 0xA9517DAA }, { 0xFA18EE94, 0x58D27104 }, { 0xF2AD8780, 0x5918A148 }, { 0xDAF657C4, 0x5CDD1629 }, + { 0x64FB6CFA, 0x8274C151 }, { 0xC6E056F2, 0xD1FB13DB }, { 0xCF609F6A, 0x7D6FD910 }, { 0xD9A9AA4D, 0xB63F38BD }, + { 0xF526C003, 0x3D9FE7FA }, { 0x871499DE, 0x74BBC706 }, { 0xB6B8522A, 0xDF630734 }, { 0xCD0AC26F, 0x3AD3ED03 }, + { 0x83C023D4, 0xFADEAF20 }, { 0x4ECAE1BB, 0xC00D4223 }, { 0x5CD76E96, 0x8538CBA8 }, { 0x6E2458EB, 0xC402250E }, + { 0x026A5D05, 0x47BC3413 }, { 0x114272A4, 0xAFD7A71F }, { 0xCC3F62E3, 0x978DF784 }, { 0xA144C781, 0xB96DFC1E }, + { 0x1596C8AE, 0x21B2CF39 }, { 0x950916F3, 0x318E4E8D }, { 0x3E92E563, 0xCE9556CC }, { 0xDD7D1047, 0x385A509B }, + { 0xB5E7AFA3, 0x358129A0 }, { 0x63702B79, 0xE6F387E3 }, { 0x53E94001, 0xE0755D56 }, { 0xFFF9F412, 0x7BE903A5 }, + { 0x90E80C75, 0x12B53C2C }, { 0x857EC4DB, 0x3307F315 }, { 0x0C61D31E, 0x8FAFB86A }, { 0x86213952, 0xD9E5DD81 }, + { 0x9FD622E2, 0x77F8AAD2 }, { 0x357871FE, 0x25BDA814 }, { 0x8FA1F0CA, 0x7571174A }, { 0x985D6561, 0x137FEC60 }, + { 0x9DBC7FE7, 0x30449EC1 }, { 0x41F4CF2C, 0xA540D4DD }, { 0xAE7AE916, 0xDC206AE0 }, { 0xE2DA55A8, 0x5B911CD0 }, + { 0xF947131D, 0xB2305F90 }, { 0xBD52C6B7, 0x344BF9EC }, { 0xD2433ED0, 0x5D17C665 }, { 0xC05EB1FD, 0x18224FEE }, + { 0x844B6457, 0x9E59E992 }, { 0xA4A5DD07, 0x9A568EBF }, { 0x716DA454, 0xA3C60E68 }, { 0xD7A22456, 0x7E2CB4C4 }, + { 0x4CA0BCBE, 0x87B17630 }, { 0x32F3367D, 0x413AEEA6 }, { 0xBC67663B, 0x9915E36B }, { 0x3A465F69, 0x40F03EEA }, + { 0xE0B008AD, 0x1C2D28C3 }, { 0x4A1E5BB1, 0x4E682A05 }, { 0x285BD044, 0x05C5B761 }, { 0x5B5C2915, 0xE1BF8D1A }, + { 0xC3014C74, 0xF2C0617A }, { 0xD11CC359, 0xB7F5E8F1 }, { 0x3FA745EF, 0x63CB4C4B }, { 0x9C89DF6B, 0x9D1A8446 }, + { 0x4B2BFB3D, 0xE3363082 }, { 0xE60EEFA2, 0xD5F474F6 }, { 0xFB2D4E18, 0xF58C6B83 }, { 0x0ADF3411, 0x4676E45F }, + { 0x1D23A1BA, 0x20781F75 }, { 0x81AA7ED1, 0xBD629B33 }, { 0x19F71BB0, 0xAE1D7753 }, { 0xA32E9A84, 0xFED1C80D }, + { 0x92825170, 0x5509083F }, { 0x5557A70E, 0x29AC0163 }, { 0x51831D04, 0xA7C96945 }, { 0x04D4BA0A, 0x8E656826 }, + { 0x882AB749, 0x11F651F8 }, { 0xF6793D8A, 0xD77DC96E }, { 0x2B042DCD, 0xEF2799F5 }, { 0x7A8730C9, 0x48EEF0B0 }, + { 0x0D547392, 0x22F1A2ED }, { 0x2FD097C7, 0x6142F1D3 }, { 0x6AF0E2E1, 0x4A674D28 }, { 0x748CBED2, 0x80FD7CC9 }, + { 0xAF4F499A, 0x717E7067 }, { 0xECD1DBB3, 0x938290A9 }, { 0x344DD172, 0x88E3B293 }, { 0x250FA3D6, 0x2734158C }, +}; + +// KeySchedule +__constant__ const uint64_t CC[12][8] = { +{ 0xe9daca1eda5b08b1, 0x1f7c65c0812fcbeb, 0x16d0452e43766a2f, 0xfcc485758db84e71, 0x0169679291e07c4b, 0x15d360a4082a42a2, 0x234d74cc36747605, 0x0745a6f2596580dd }, +{ 0x1a2f9da98ab5a36f, 0xd7b5700f469de34f, 0x982b230a72eafef3, 0x3101b5160f5ed561, 0x5899d6126b17b59a, 0xcaa70adbc261b55c, 0x56cdcbd71ba2dd55, 0xb79bb121700479e6 }, +{ 0xc72fce2bacdc74f5, 0x35843d6a28fc390a, 0x8b1f9c525f5ef106, 0x7b7b29b11475eaf2, 0xb19e3590e40fe2d3, 0x09db6260373ac9c1, 0x31db7a8643f4b6c2, 0xb20aba0af5961e99 }, +{ 0xd26615e8b3df1fef, 0xdde4715da0e148f9, 0x7d3c5c337e858e48, 0x3f355e68ad1c729d, 0x75d603ed822cd7a9, 0xbe0352933313b7d8, 0xf137e893a1ea5334, 0x2ed1e384bcbe0c22 }, +{ 0x994747adac6bea4b, 0x6323a96c0c413f9a, 0x4a1086161f1c157f, 0xbdff0f80d7359e35, 0xa3f53a254717cdbf, 0x161a2723b700ffdf, 0xf563eaa97ea2567a, 0x57fe6c7cfd581760 }, +{ 0xd9d33a1daeae4fae, 0xc039307a3bc3a46f, 0x6ca44251f9c4662d, 0xc68ef09ab49a7f18, 0xb4b79a1cb7a6facf, 0xb6c6bec2661ff20a, 0x354f903672c571bf, 0x6e7d64467a4068fa }, +{ 0xecc5aaee160ec7f4, 0x540924bffe86ac51, 0xc987bfe6c7c69e39, 0xc9937a19333e47d3, 0x372c822dc5ab9209, 0x04054a2883694706, 0xf34a3ca24c451735, 0x93d4143a4d568688 }, +{ 0xa7c9934d425b1f9b, 0x41416e0c02aae703, 0x1ede369c71f8b74e, 0x9ac4db4d3b44b489, 0x90069b92cb2b89f4, 0x2fc4a5d12b8dd169, 0xd9a8515935c2ac36, 0x1ee702bfd40d7fa4 }, +{ 0x9b223116545a8f37, 0xde5f16ecd89a4c94, 0x244289251b3a7d3a, 0x84090de0b755d93c, 0xb1ceb2db0b440a80, 0x549c07a69a8a2b7b, 0x602a1fcb92dc380e, 0xdb5a238351446172 }, +{ 0x526f0580a6debeab, 0xf3f3e4b248e52a38, 0xdb788aff1ce74189, 0x0361331b8ae1ff1f, 0x4b3369af0267e79f, 0xf452763b306c1e7a, 0xc3b63b15d1fa9836, 0xed9c4598fbc7b474 }, +{ 0xfb89c8efd09ecd7b, 0x94fe5a63cdc60230, 0x6107abebbb6bfad8, 0x7966841421800120, 0xcab948eaef711d8a, 0x986e477d1dcdbaef, 0x5dd86fc04a59a2de, 0x1b2df381cda4ca6b }, +{ 0xba3116f167e78e37, 0x7ab14904b08013d2, 0x771ddfbc323ca4cd, 0x9b9f2130d41220f8, 0x86cc91189def805d, 0x5228e188aaa41de7, 0x991bb2d9d517f4fa, 0x20d71bf14a92bc48 } +}; + +__constant__ const uint64_t precomputed_values[12][8] = { + 0x8FD72F640708B0D0, 0x0DE874C7EBC3F213, 0xE92EEF3AD202E9E0, 0xC1E9DA0708013DA7, 0x9727DAB2F014BE88, 0x103051A02BCD6935, 0x33EC7E1DBD28F736, 0x1ECF460CF78AD1F4, + 0x0B2D9F89C775449D, 0x6B6EEFC6DAB7E8B0, 0xF1A0D31667F6EC44, 0x2A71132D5E108166, 0x0E9357C2EC87931A, 0xC99F5C1B4A01612D, 0x7E60B16E637D4EE2, 0xA9FCB827F9BA6D81, + 0x231FECA5AB3D285C, 0x70C6E1483C838C3B, 0x9C21C3C40CE4E2DA, 0x2FA796BD5688E573, 0x04C0E3FF55809FDF, 0x5FF978BFB8E3CDC8, 0xC54A19D6A3D07033, 0x0FCA83FDDE872478, + 0xBDF9312726339F10, 0x51A5BA1793BC9C56, 0xC4428DA14F96D2D4, 0xEC925222374EAB1F, 0x79477893747DD92F, 0xC495E19A46886304, 0x9C23F893BA7CFA36, 0x0C47268881FC5FEB, + 0xCF117966029B2CB3, 0x07179ABE77088A8F, 0x671EF4CC2650E257, 0x7474B8B170DAB5C6, 0x4224FEBECF35113E, 0x993D156C675C5537, 0x2DEE3A5782C39B45, 0xE7C586F2990DD385, + 0x8608FD95B1C1138A, 0x8BB0847D9E9849AC, 0x5E76623F4F0EB0C7, 0x34C2BDBAFC5060CE, 0xE9E814475907826C, 0x22C9ED94D6AAC7C9, 0xE6B75E28171EB0D6, 0xF1329E5534E60215, + 0x86BB4814B1C3CE52, 0xE8F226C9FBDDD017, 0xCEDED67991CB3087, 0x76C33E32FDBFACA5, 0xDBB13BE1A9F7474C, 0x3D0273470342C356, 0x8E7246C51CF07F61, 0xAC8C125DDEF8DF71, + 0x6D73E747795B8CF3, 0x4E4AA65EA0072050, 0xA14A1582CB43C2B9, 0x748EF2B7BB63B938, 0x126789534410D7D4, 0xD4D48FF40301D791, 0xC67DFBE315C41FC0, 0x35E7A1A1AF88601C, + 0x9BD33EA0FAB34007, 0xF51B7CDBE3D67D25, 0xD3ABDA0CE4186E6B, 0x8E61DDADCBCE1706, 0x58994565B41BE6A5, 0x7A87ABC1240CD31D, 0xFAFE6C28487968D0, 0x15B368609FF9EEA7, + 0xAE33263CCF115818, 0x93B2DBE9CADFCFC8, 0x0A91952BF91B0147, 0x458E67CA5F1ED73A, 0x94C2E5F288F074E3, 0x377895E85C69E996, 0xF11A4456AAB37B10, 0x163131934816821A, + 0xD07E4A2366BF469D, 0x5EF1A3D220213B6C, 0x3C5BB78971D8ED0F, 0x0DE05E6B9006F2D2, 0xC58CFB00B8EAA1C9, 0xEFCDB54D1F250B76, 0xFD135634FA527042, 0x4CEE791290516407, + 0xD800B9264010790F, 0x974C4823E2B668D7, 0xA605A4B385C5E361, 0x3F6C92DA5A56D8D2, 0x82B9D67C12EF8277, 0x0AB6B4582561BF90, 0x46954FD98FC2CBA3, 0x70BE45CB21B6760D +}; diff --git a/x11/cuda_streebog.cu b/x11/cuda_streebog.cu index 672f9cf..228c691 100644 --- a/x11/cuda_streebog.cu +++ b/x11/cuda_streebog.cu @@ -808,7 +808,9 @@ void streebog_cpu_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash) __constant__ uint64_t target64[4]; -void streebog_set_target(const uint32_t* ptarget){ +__host__ +void streebog_set_target(const uint32_t* ptarget) +{ cudaMemcpyToSymbol(target64,ptarget,4*sizeof(uint64_t),0,cudaMemcpyHostToDevice); }