Browse Source

skunk: merged kernel to use when possible

for now, only used for maxwell/pascal with cuda 7.5 or 8.0
master
Tanguy Pruvot 7 years ago
parent
commit
9a7d74eac4
  1. 3
      Makefile.am
  2. 1
      ccminer.cpp
  3. 7
      ccminer.vcxproj
  4. 13
      ccminer.vcxproj.filters
  5. 690
      skunk/cuda_skunk.cu
  6. 371
      skunk/cuda_skunk_streebog.cu
  7. 385
      skunk/skein_header.h
  8. 43
      skunk/skunk.cu
  9. 567
      skunk/streebog_arrays.cuh
  10. 4
      x11/cuda_streebog.cu

3
Makefile.am

@ -55,7 +55,8 @@ ccminer_SOURCES = elist.h miner.h compat.h \
quark/nist5.cu \ quark/nist5.cu \
quark/quarkcoin.cu quark/cuda_quark_compactionTest.cu \ quark/quarkcoin.cu quark/cuda_quark_compactionTest.cu \
neoscrypt/neoscrypt.cpp neoscrypt/neoscrypt-cpu.c neoscrypt/cuda_neoscrypt.cu \ neoscrypt/neoscrypt.cpp neoscrypt/neoscrypt-cpu.c neoscrypt/cuda_neoscrypt.cu \
pentablake.cu skein.cu cuda_skeincoin.cu skein2.cpp skunk.cu zr5.cu \ pentablake.cu skein.cu cuda_skeincoin.cu skein2.cpp zr5.cu \
skunk/skunk.cu skunk/cuda_skunk.cu skunk/cuda_skunk_streebog.cu \
sha256/sha256d.cu sha256/cuda_sha256d.cu sha256/sha256t.cu sha256/cuda_sha256t.cu \ sha256/sha256d.cu sha256/cuda_sha256d.cu sha256/sha256t.cu sha256/cuda_sha256t.cu \
sia/sia.cu sia/sia-rpc.cpp sph/blake2b.c \ sia/sia.cu sia/sia-rpc.cpp sph/blake2b.c \
sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \ sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \

1
ccminer.cpp

@ -2226,6 +2226,7 @@ static void *miner_thread(void *userdata)
case ALGO_JHA: case ALGO_JHA:
case ALGO_LYRA2v2: case ALGO_LYRA2v2:
case ALGO_S3: case ALGO_S3:
case ALGO_SKUNK:
case ALGO_TIMETRAVEL: case ALGO_TIMETRAVEL:
case ALGO_BITCORE: case ALGO_BITCORE:
case ALGO_X11EVO: case ALGO_X11EVO:

7
ccminer.vcxproj

@ -532,7 +532,12 @@
<CudaCompile Include="cuda_skeincoin.cu"> <CudaCompile Include="cuda_skeincoin.cu">
<MaxRegCount>48</MaxRegCount> <MaxRegCount>48</MaxRegCount>
</CudaCompile> </CudaCompile>
<CudaCompile Include="skunk.cu" /> <CudaCompile Include="skunk\skunk.cu" />
<CudaCompile Include="skunk\cuda_skunk.cu">
<CodeGeneration>compute_50,sm_50;compute_52,sm_52</CodeGeneration>
<MaxRegCount>64</MaxRegCount>
</CudaCompile>
<CudaCompile Include="skunk\cuda_skunk_streebog.cu" />
<CudaCompile Include="tribus.cu" /> <CudaCompile Include="tribus.cu" />
<ClInclude Include="x11\cuda_x11_aes.cuh" /> <ClInclude Include="x11\cuda_x11_aes.cuh" />
<CudaCompile Include="x11\cuda_x11_cubehash512.cu" /> <CudaCompile Include="x11\cuda_x11_cubehash512.cu" />

13
ccminer.vcxproj.filters

@ -46,6 +46,9 @@
<Filter Include="Source Files\CUDA\qubit"> <Filter Include="Source Files\CUDA\qubit">
<UniqueIdentifier>{f3ed23a2-8ce7-41a5-b051-6da56047dc35}</UniqueIdentifier> <UniqueIdentifier>{f3ed23a2-8ce7-41a5-b051-6da56047dc35}</UniqueIdentifier>
</Filter> </Filter>
<Filter Include="Source Files\CUDA\skunk">
<UniqueIdentifier>{23ed23a2-8e7a-65a2-b051-8aa63047d352}</UniqueIdentifier>
</Filter>
<Filter Include="Source Files\CUDA\x11"> <Filter Include="Source Files\CUDA\x11">
<UniqueIdentifier>{dd751f2d-bfd6-42c1-8f9b-cbe94e539353}</UniqueIdentifier> <UniqueIdentifier>{dd751f2d-bfd6-42c1-8f9b-cbe94e539353}</UniqueIdentifier>
</Filter> </Filter>
@ -754,8 +757,14 @@
<CudaCompile Include="pentablake.cu"> <CudaCompile Include="pentablake.cu">
<Filter>Source Files\CUDA</Filter> <Filter>Source Files\CUDA</Filter>
</CudaCompile> </CudaCompile>
<CudaCompile Include="skunk.cu"> <CudaCompile Include="skunk\skunk.cu">
<Filter>Source Files\CUDA</Filter> <Filter>Source Files\CUDA\skunk</Filter>
</CudaCompile>
<CudaCompile Include="skunk\cuda_skunk.cu">
<Filter>Source Files\CUDA\skunk</Filter>
</CudaCompile>
<CudaCompile Include="skunk\cuda_skunk_streebog.cu">
<Filter>Source Files\CUDA\skunk</Filter>
</CudaCompile> </CudaCompile>
<CudaCompile Include="tribus.cu"> <CudaCompile Include="tribus.cu">
<Filter>Source Files\CUDA</Filter> <Filter>Source Files\CUDA</Filter>

690
skunk/cuda_skunk.cu

@ -0,0 +1,690 @@
/**
* skein + cube + fugue (to fix)
* Based on krnlx work
*/
#include <miner.h>
#include <cuda_vectors.h>
#include "skunk/skein_header.h"
#include <cuda_vector_uint2x4.h>
#define TPB 512
/* ************************ */
static __constant__ uint2 c_buffer[120]; // padded message (80 bytes + 72 bytes midstate + align)
#define SWAP(a,b) { uint32_t u = a; a = b; b = u; }
__device__ __forceinline__
static void rrounds(uint32_t *x){
#pragma unroll 2
for (int r = 0; r < 16; r++) {
/* "add x_0jklm into x_1jklmn modulo 2^32 rotate x_0jklm upwards by 7 bits" */
x[16] = x[16] + x[ 0]; x[ 0] = ROTL32(x[ 0], 7);x[17] = x[17] + x[ 1];x[ 1] = ROTL32(x[ 1], 7);
x[18] = x[18] + x[ 2]; x[ 2] = ROTL32(x[ 2], 7);x[19] = x[19] + x[ 3];x[ 3] = ROTL32(x[ 3], 7);
x[20] = x[20] + x[ 4]; x[ 4] = ROTL32(x[ 4], 7);x[21] = x[21] + x[ 5];x[ 5] = ROTL32(x[ 5], 7);
x[22] = x[22] + x[ 6]; x[ 6] = ROTL32(x[ 6], 7);x[23] = x[23] + x[ 7];x[ 7] = ROTL32(x[ 7], 7);
x[24] = x[24] + x[ 8]; x[ 8] = ROTL32(x[ 8], 7);x[25] = x[25] + x[ 9];x[ 9] = ROTL32(x[ 9], 7);
x[26] = x[26] + x[10]; x[10] = ROTL32(x[10], 7);x[27] = x[27] + x[11];x[11] = ROTL32(x[11], 7);
x[28] = x[28] + x[12]; x[12] = ROTL32(x[12], 7);x[29] = x[29] + x[13];x[13] = ROTL32(x[13], 7);
x[30] = x[30] + x[14]; x[14] = ROTL32(x[14], 7);x[31] = x[31] + x[15];x[15] = ROTL32(x[15], 7);
/* "swap x_00klm with x_01klm" */
SWAP(x[ 0], x[ 8]);x[ 0] ^= x[16];x[ 8] ^= x[24];SWAP(x[ 1], x[ 9]);x[ 1] ^= x[17];x[ 9] ^= x[25];
SWAP(x[ 2], x[10]);x[ 2] ^= x[18];x[10] ^= x[26];SWAP(x[ 3], x[11]);x[ 3] ^= x[19];x[11] ^= x[27];
SWAP(x[ 4], x[12]);x[ 4] ^= x[20];x[12] ^= x[28];SWAP(x[ 5], x[13]);x[ 5] ^= x[21];x[13] ^= x[29];
SWAP(x[ 6], x[14]);x[ 6] ^= x[22];x[14] ^= x[30];SWAP(x[ 7], x[15]);x[ 7] ^= x[23];x[15] ^= x[31];
/* "swap x_1jk0m with x_1jk1m" */
SWAP(x[16], x[18]); SWAP(x[17], x[19]); SWAP(x[20], x[22]); SWAP(x[21], x[23]);SWAP(x[24], x[26]); SWAP(x[25], x[27]); SWAP(x[28], x[30]); SWAP(x[29], x[31]);
/* "add x_0jklm into x_1jklm modulo 2^32 rotate x_0jklm upwards by 11 bits" */
x[16] = x[16] + x[ 0]; x[ 0] = ROTL32(x[ 0],11);x[17] = x[17] + x[ 1];x[ 1] = ROTL32(x[ 1],11);
x[18] = x[18] + x[ 2]; x[ 2] = ROTL32(x[ 2],11);x[19] = x[19] + x[ 3];x[ 3] = ROTL32(x[ 3],11);
x[20] = x[20] + x[ 4]; x[ 4] = ROTL32(x[ 4],11);x[21] = x[21] + x[ 5];x[ 5] = ROTL32(x[ 5],11);
x[22] = x[22] + x[ 6]; x[ 6] = ROTL32(x[ 6],11);x[23] = x[23] + x[ 7];x[ 7] = ROTL32(x[ 7],11);
x[24] = x[24] + x[ 8]; x[ 8] = ROTL32(x[ 8],11);x[25] = x[25] + x[ 9];x[ 9] = ROTL32(x[ 9],11);
x[26] = x[26] + x[10]; x[10] = ROTL32(x[10],11);x[27] = x[27] + x[11];x[11] = ROTL32(x[11],11);
x[28] = x[28] + x[12]; x[12] = ROTL32(x[12],11);x[29] = x[29] + x[13];x[13] = ROTL32(x[13],11);
x[30] = x[30] + x[14]; x[14] = ROTL32(x[14],11);x[31] = x[31] + x[15];x[15] = ROTL32(x[15],11);
/* "swap x_0j0lm with x_0j1lm" */
SWAP(x[ 0], x[ 4]); x[ 0] ^= x[16]; x[ 4] ^= x[20]; SWAP(x[ 1], x[ 5]); x[ 1] ^= x[17]; x[ 5] ^= x[21];
SWAP(x[ 2], x[ 6]); x[ 2] ^= x[18]; x[ 6] ^= x[22]; SWAP(x[ 3], x[ 7]); x[ 3] ^= x[19]; x[ 7] ^= x[23];
SWAP(x[ 8], x[12]); x[ 8] ^= x[24]; x[12] ^= x[28]; SWAP(x[ 9], x[13]); x[ 9] ^= x[25]; x[13] ^= x[29];
SWAP(x[10], x[14]); x[10] ^= x[26]; x[14] ^= x[30]; SWAP(x[11], x[15]); x[11] ^= x[27]; x[15] ^= x[31];
/* "swap x_1jkl0 with x_1jkl1" */
SWAP(x[16], x[17]); SWAP(x[18], x[19]); SWAP(x[20], x[21]); SWAP(x[22], x[23]);SWAP(x[24], x[25]); SWAP(x[26], x[27]); SWAP(x[28], x[29]); SWAP(x[30], x[31]);
}
}
// fugue
static __constant__ const uint32_t c_S[16] = {
0x8807a57e, 0xe616af75, 0xc5d3e4db, 0xac9ab027,
0xd915f117, 0xb6eecc54, 0x06e8020b, 0x4a92efd1,
0xaac6e2c9, 0xddb21398, 0xcae65838, 0x437f203f,
0x25ea78e7, 0x951fddd6, 0xda6ed11d, 0xe13e3567
};
static __device__ uint32_t mixtab0[256] = {
0x63633297, 0x7c7c6feb, 0x77775ec7, 0x7b7b7af7, 0xf2f2e8e5, 0x6b6b0ab7, 0x6f6f16a7, 0xc5c56d39,
0x303090c0, 0x01010704, 0x67672e87, 0x2b2bd1ac, 0xfefeccd5, 0xd7d71371, 0xabab7c9a, 0x767659c3,
0xcaca4005, 0x8282a33e, 0xc9c94909, 0x7d7d68ef, 0xfafad0c5, 0x5959947f, 0x4747ce07, 0xf0f0e6ed,
0xadad6e82, 0xd4d41a7d, 0xa2a243be, 0xafaf608a, 0x9c9cf946, 0xa4a451a6, 0x727245d3, 0xc0c0762d,
0xb7b728ea, 0xfdfdc5d9, 0x9393d47a, 0x2626f298, 0x363682d8, 0x3f3fbdfc, 0xf7f7f3f1, 0xcccc521d,
0x34348cd0, 0xa5a556a2, 0xe5e58db9, 0xf1f1e1e9, 0x71714cdf, 0xd8d83e4d, 0x313197c4, 0x15156b54,
0x04041c10, 0xc7c76331, 0x2323e98c, 0xc3c37f21, 0x18184860, 0x9696cf6e, 0x05051b14, 0x9a9aeb5e,
0x0707151c, 0x12127e48, 0x8080ad36, 0xe2e298a5, 0xebeba781, 0x2727f59c, 0xb2b233fe, 0x757550cf,
0x09093f24, 0x8383a43a, 0x2c2cc4b0, 0x1a1a4668, 0x1b1b416c, 0x6e6e11a3, 0x5a5a9d73, 0xa0a04db6,
0x5252a553, 0x3b3ba1ec, 0xd6d61475, 0xb3b334fa, 0x2929dfa4, 0xe3e39fa1, 0x2f2fcdbc, 0x8484b126,
0x5353a257, 0xd1d10169, 0x00000000, 0xededb599, 0x2020e080, 0xfcfcc2dd, 0xb1b13af2, 0x5b5b9a77,
0x6a6a0db3, 0xcbcb4701, 0xbebe17ce, 0x3939afe4, 0x4a4aed33, 0x4c4cff2b, 0x5858937b, 0xcfcf5b11,
0xd0d0066d, 0xefefbb91, 0xaaaa7b9e, 0xfbfbd7c1, 0x4343d217, 0x4d4df82f, 0x333399cc, 0x8585b622,
0x4545c00f, 0xf9f9d9c9, 0x02020e08, 0x7f7f66e7, 0x5050ab5b, 0x3c3cb4f0, 0x9f9ff04a, 0xa8a87596,
0x5151ac5f, 0xa3a344ba, 0x4040db1b, 0x8f8f800a, 0x9292d37e, 0x9d9dfe42, 0x3838a8e0, 0xf5f5fdf9,
0xbcbc19c6, 0xb6b62fee, 0xdada3045, 0x2121e784, 0x10107040, 0xffffcbd1, 0xf3f3efe1, 0xd2d20865,
0xcdcd5519, 0x0c0c2430, 0x1313794c, 0xececb29d, 0x5f5f8667, 0x9797c86a, 0x4444c70b, 0x1717655c,
0xc4c46a3d, 0xa7a758aa, 0x7e7e61e3, 0x3d3db3f4, 0x6464278b, 0x5d5d886f, 0x19194f64, 0x737342d7,
0x60603b9b, 0x8181aa32, 0x4f4ff627, 0xdcdc225d, 0x2222ee88, 0x2a2ad6a8, 0x9090dd76, 0x88889516,
0x4646c903, 0xeeeebc95, 0xb8b805d6, 0x14146c50, 0xdede2c55, 0x5e5e8163, 0x0b0b312c, 0xdbdb3741,
0xe0e096ad, 0x32329ec8, 0x3a3aa6e8, 0x0a0a3628, 0x4949e43f, 0x06061218, 0x2424fc90, 0x5c5c8f6b,
0xc2c27825, 0xd3d30f61, 0xacac6986, 0x62623593, 0x9191da72, 0x9595c662, 0xe4e48abd, 0x797974ff,
0xe7e783b1, 0xc8c84e0d, 0x373785dc, 0x6d6d18af, 0x8d8d8e02, 0xd5d51d79, 0x4e4ef123, 0xa9a97292,
0x6c6c1fab, 0x5656b943, 0xf4f4fafd, 0xeaeaa085, 0x6565208f, 0x7a7a7df3, 0xaeae678e, 0x08083820,
0xbaba0bde, 0x787873fb, 0x2525fb94, 0x2e2ecab8, 0x1c1c5470, 0xa6a65fae, 0xb4b421e6, 0xc6c66435,
0xe8e8ae8d, 0xdddd2559, 0x747457cb, 0x1f1f5d7c, 0x4b4bea37, 0xbdbd1ec2, 0x8b8b9c1a, 0x8a8a9b1e,
0x70704bdb, 0x3e3ebaf8, 0xb5b526e2, 0x66662983, 0x4848e33b, 0x0303090c, 0xf6f6f4f5, 0x0e0e2a38,
0x61613c9f, 0x35358bd4, 0x5757be47, 0xb9b902d2, 0x8686bf2e, 0xc1c17129, 0x1d1d5374, 0x9e9ef74e,
0xe1e191a9, 0xf8f8decd, 0x9898e556, 0x11117744, 0x696904bf, 0xd9d93949, 0x8e8e870e, 0x9494c166,
0x9b9bec5a, 0x1e1e5a78, 0x8787b82a, 0xe9e9a989, 0xcece5c15, 0x5555b04f, 0x2828d8a0, 0xdfdf2b51,
0x8c8c8906, 0xa1a14ab2, 0x89899212, 0x0d0d2334, 0xbfbf10ca, 0xe6e684b5, 0x4242d513, 0x686803bb,
0x4141dc1f, 0x9999e252, 0x2d2dc3b4, 0x0f0f2d3c, 0xb0b03df6, 0x5454b74b, 0xbbbb0cda, 0x16166258
};
__device__ __forceinline__
uint32_t ROL8X(const uint32_t a){
return __byte_perm(a, 0, 0x2103);
}
__device__ __forceinline__
uint32_t ROL16X(const uint32_t a){
return __byte_perm(a, 0, 0x1032);
}
__device__ __forceinline__
uint32_t ROR8X(const uint32_t a){
return __byte_perm(a, 0, 0x0321);
}
#define mixtab0(x) shared[0][x]
#define mixtab1(x) shared[1][x]
#define mixtab2(x) shared[2][x]
#define mixtab3(x) shared[3][x]
#define TIX4(q, x00, x01, x04, x07, x08, x22, x24, x27, x30) { \
x22 ^= x00; \
x00 = (q); \
x08 ^= (q); \
x01 ^= x24; \
x04 ^= x27; \
x07 ^= x30; \
}
#define CMIX36(x00, x01, x02, x04, x05, x06, x18, x19, x20) { \
x00 ^= x04; \
x01 ^= x05; \
x02 ^= x06; \
x18 ^= x04; \
x19 ^= x05; \
x20 ^= x06; \
}
__device__ __forceinline__
static void SMIX(const uint32_t shared[4][256], uint32_t &x0,uint32_t &x1,uint32_t &x2,uint32_t &x3){
uint32_t c0 = mixtab0(__byte_perm(x0,0,0x4443));
uint32_t r1 = mixtab1(__byte_perm(x0,0,0x4442));
uint32_t r2 = mixtab2(__byte_perm(x0,0,0x4441));
uint32_t r3 = mixtab3(__byte_perm(x0,0,0x4440));
c0 = c0 ^ r1 ^ r2 ^ r3;
uint32_t r0 = mixtab0(__byte_perm(x1,0,0x4443));
uint32_t c1 = r0 ^ mixtab1(__byte_perm(x1,0,0x4442));
uint32_t tmp = mixtab2(__byte_perm(x1,0,0x4441));
c1 ^= tmp;
r2 ^= tmp;
tmp = mixtab3(__byte_perm(x1,0,0x4440));
c1 ^= tmp;
r3 ^= tmp;
uint32_t c2 = mixtab0(__byte_perm(x2,0,0x4443));
r0 ^= c2;
tmp = mixtab1(__byte_perm(x2,0,0x4442));
c2 ^= tmp;
r1 ^= tmp;
tmp = mixtab2(__byte_perm(x2,0,0x4441));
c2 ^= tmp;
tmp = mixtab3(__byte_perm(x2,0,0x4440));
c2 ^= tmp;
r3 ^= tmp;
uint32_t c3 = mixtab0(__byte_perm(x3,0,0x4443));
r0 ^= c3;
tmp = mixtab1(__byte_perm(x3,0,0x4442));
c3 ^= tmp;
r1 ^= tmp;
tmp = mixtab2(__byte_perm(x3,0,0x4441));
c3 ^= tmp;
r2 ^= tmp;
tmp = mixtab3(__byte_perm(x3,0,0x4440));
c3 ^= tmp;
x0 = ((c0 ^ (r0 << 0)) & 0xFF000000) | ((c1 ^ (r1 << 0)) & 0x00FF0000) | ((c2 ^ (r2 << 0)) & 0x0000FF00) | ((c3 ^ (r3 << 0)) & 0x000000FF);
x1 = ((c1 ^ (r0 << 8)) & 0xFF000000) | ((c2 ^ (r1 << 8)) & 0x00FF0000) | ((c3 ^ (r2 << 8)) & 0x0000FF00) | ((c0 ^ (r3 >>24)) & 0x000000FF);
x2 = ((c2 ^ (r0 <<16)) & 0xFF000000) | ((c3 ^ (r1 <<16)) & 0x00FF0000) | ((c0 ^ (r2 >>16)) & 0x0000FF00) | ((c1 ^ (r3 >>16)) & 0x000000FF);
x3 = ((c3 ^ (r0 <<24)) & 0xFF000000) | ((c0 ^ (r1 >> 8)) & 0x00FF0000) | ((c1 ^ (r2 >> 8)) & 0x0000FF00) | ((c2 ^ (r3 >> 8)) & 0x000000FF);
}
__device__
static void SMIX_LDG(const uint32_t shared[4][256], uint32_t &x0,uint32_t &x1,uint32_t &x2,uint32_t &x3){
uint32_t c0 = __ldg(&mixtab0[__byte_perm(x0,0,0x4443)]);
uint32_t r1 = mixtab1(__byte_perm(x0,0,0x4442));
uint32_t r2 = mixtab2(__byte_perm(x0,0,0x4441));
uint32_t r3 = mixtab3(__byte_perm(x0,0,0x4440));
c0 = c0 ^ r1 ^ r2 ^ r3;
uint32_t r0 = __ldg(&mixtab0[__byte_perm(x1,0,0x4443)]);
uint32_t c1 = r0 ^ mixtab1(__byte_perm(x1,0,0x4442));
uint32_t tmp = mixtab2(__byte_perm(x1,0,0x4441));
c1 ^= tmp;
r2 ^= tmp;
tmp = mixtab3(__byte_perm(x1,0,0x4440));
c1 ^= tmp;
r3 ^= tmp;
uint32_t c2 = __ldg(&mixtab0[__byte_perm(x2,0,0x4443)]);
r0 ^= c2;
tmp = mixtab1(__byte_perm(x2,0,0x4442));
c2 ^= tmp;
r1 ^= tmp;
tmp = mixtab2(__byte_perm(x2,0,0x4441));
c2 ^= tmp;
tmp = mixtab3(__byte_perm(x2,0,0x4440));
c2 ^= tmp;
r3 ^= tmp;
uint32_t c3 = __ldg(&mixtab0[__byte_perm(x3,0,0x4443)]);
r0 ^= c3;
tmp = mixtab1(__byte_perm(x3,0,0x4442));
c3 ^= tmp;
r1 ^= tmp;
tmp = mixtab2(__byte_perm(x3,0,0x4441));
c3 ^= tmp;
r2 ^= tmp;
tmp = ROL8X(__ldg(&mixtab0[__byte_perm(x3,0,0x4440)]));
c3 ^= tmp;
x0 = ((c0 ^ (r0 << 0)) & 0xFF000000) | ((c1 ^ (r1 << 0)) & 0x00FF0000) | ((c2 ^ (r2 << 0)) & 0x0000FF00) | ((c3 ^ (r3 << 0)) & 0x000000FF);
x1 = ((c1 ^ (r0 << 8)) & 0xFF000000) | ((c2 ^ (r1 << 8)) & 0x00FF0000) | ((c3 ^ (r2 << 8)) & 0x0000FF00) | ((c0 ^ (r3 >>24)) & 0x000000FF);
x2 = ((c2 ^ (r0 <<16)) & 0xFF000000) | ((c3 ^ (r1 <<16)) & 0x00FF0000) | ((c0 ^ (r2 >>16)) & 0x0000FF00) | ((c1 ^ (r3 >>16)) & 0x000000FF);
x3 = ((c3 ^ (r0 <<24)) & 0xFF000000) | ((c0 ^ (r1 >> 8)) & 0x00FF0000) | ((c1 ^ (r2 >> 8)) & 0x0000FF00) | ((c2 ^ (r3 >> 8)) & 0x000000FF);
}
#define mROR3 { \
B[ 6] = S[33], B[ 7] = S[34], B[ 8] = S[35]; \
S[35] = S[32]; S[34] = S[31]; S[33] = S[30]; S[32] = S[29]; S[31] = S[28]; S[30] = S[27]; S[29] = S[26]; S[28] = S[25]; S[27] = S[24]; \
S[26] = S[23]; S[25] = S[22]; S[24] = S[21]; S[23] = S[20]; S[22] = S[19]; S[21] = S[18]; S[20] = S[17]; S[19] = S[16]; S[18] = S[15]; \
S[17] = S[14]; S[16] = S[13]; S[15] = S[12]; S[14] = S[11]; S[13] = S[10]; S[12] = S[ 9]; S[11] = S[ 8]; S[10] = S[ 7]; S[ 9] = S[ 6]; \
S[ 8] = S[ 5]; S[ 7] = S[ 4]; S[ 6] = S[ 3]; S[ 5] = S[ 2]; S[ 4] = S[ 1]; S[ 3] = S[ 0]; S[ 2] = B[ 8]; S[ 1] = B[ 7]; S[ 0] = B[ 6]; \
}
#define mROR8 { \
B[ 1] = S[28], B[ 2] = S[29], B[ 3] = S[30], B[ 4] = S[31], B[ 5] = S[32], B[ 6] = S[33], B[ 7] = S[34], B[ 8] = S[35]; \
S[35] = S[27]; S[34] = S[26]; S[33] = S[25]; S[32] = S[24]; S[31] = S[23]; S[30] = S[22]; S[29] = S[21]; S[28] = S[20]; S[27] = S[19]; \
S[26] = S[18]; S[25] = S[17]; S[24] = S[16]; S[23] = S[15]; S[22] = S[14]; S[21] = S[13]; S[20] = S[12]; S[19] = S[11]; S[18] = S[10]; \
S[17] = S[ 9]; S[16] = S[ 8]; S[15] = S[ 7]; S[14] = S[ 6]; S[13] = S[ 5]; S[12] = S[ 4]; S[11] = S[ 3]; S[10] = S[ 2]; S[ 9] = S[ 1]; \
S[ 8] = S[ 0]; S[ 7] = B[ 8]; S[ 6] = B[ 7]; S[ 5] = B[ 6]; S[ 4] = B[ 5]; S[ 3] = B[ 4]; S[ 2] = B[ 3]; S[ 1] = B[ 2]; S[ 0] = B[ 1]; \
}
#define mROR9 { \
B[ 0] = S[27], B[ 1] = S[28], B[ 2] = S[29], B[ 3] = S[30], B[ 4] = S[31], B[ 5] = S[32], B[ 6] = S[33], B[ 7] = S[34], B[ 8] = S[35]; \
S[35] = S[26]; S[34] = S[25]; S[33] = S[24]; S[32] = S[23]; S[31] = S[22]; S[30] = S[21]; S[29] = S[20]; S[28] = S[19]; S[27] = S[18]; \
S[26] = S[17]; S[25] = S[16]; S[24] = S[15]; S[23] = S[14]; S[22] = S[13]; S[21] = S[12]; S[20] = S[11]; S[19] = S[10]; S[18] = S[ 9]; \
S[17] = S[ 8]; S[16] = S[ 7]; S[15] = S[ 6]; S[14] = S[ 5]; S[13] = S[ 4]; S[12] = S[ 3]; S[11] = S[ 2]; S[10] = S[ 1]; S[ 9] = S[ 0]; \
S[ 8] = B[ 8]; S[ 7] = B[ 7]; S[ 6] = B[ 6]; S[ 5] = B[ 5]; S[ 4] = B[ 4]; S[ 3] = B[ 3]; S[ 2] = B[ 2]; S[ 1] = B[ 1]; S[ 0] = B[ 0]; \
}
#define FUGUE512_3(x, y, z) { \
TIX4(x, S[ 0], S[ 1], S[ 4], S[ 7], S[ 8], S[22], S[24], S[27], S[30]); \
CMIX36(S[33], S[34], S[35], S[ 1], S[ 2], S[ 3], S[15], S[16], S[17]); \
SMIX_LDG(shared, S[33], S[34], S[35], S[ 0]); \
CMIX36(S[30], S[31], S[32], S[34], S[35], S[ 0], S[12], S[13], S[14]); \
SMIX_LDG(shared, S[30], S[31], S[32], S[33]); \
CMIX36(S[27], S[28], S[29], S[31], S[32], S[33], S[ 9], S[10], S[11]); \
SMIX(shared, S[27], S[28], S[29], S[30]); \
CMIX36(S[24], S[25], S[26], S[28], S[29], S[30], S[ 6], S[ 7], S[ 8]); \
SMIX_LDG(shared, S[24], S[25], S[26], S[27]); \
\
TIX4(y, S[24], S[25], S[28], S[31], S[32], S[10], S[12], S[15], S[18]); \
CMIX36(S[21], S[22], S[23], S[25], S[26], S[27], S[ 3], S[ 4], S[ 5]); \
SMIX(shared, S[21], S[22], S[23], S[24]); \
CMIX36(S[18], S[19], S[20], S[22], S[23], S[24], S[ 0], S[ 1], S[ 2]); \
SMIX_LDG(shared, S[18], S[19], S[20], S[21]); \
CMIX36(S[15], S[16], S[17], S[19], S[20], S[21], S[33], S[34], S[35]); \
SMIX_LDG(shared, S[15], S[16], S[17], S[18]); \
CMIX36(S[12], S[13], S[14], S[16], S[17], S[18], S[30], S[31], S[32]); \
SMIX(shared, S[12], S[13], S[14], S[15]); \
\
TIX4(z, S[12], S[13], S[16], S[19], S[20], S[34], S[ 0], S[ 3], S[ 6]); \
CMIX36(S[ 9], S[10], S[11], S[13], S[14], S[15], S[27], S[28], S[29]); \
SMIX_LDG(shared, S[ 9], S[10], S[11], S[12]); \
CMIX36(S[ 6], S[ 7], S[ 8], S[10], S[11], S[12], S[24], S[25], S[26]); \
SMIX_LDG(shared, S[ 6], S[ 7], S[ 8], S[ 9]); \
CMIX36(S[ 3], S[ 4], S[ 5], S[ 7], S[ 8], S[ 9], S[21], S[22], S[23]); \
SMIX_LDG(shared, S[ 3], S[ 4], S[ 5], S[ 6]); \
CMIX36(S[ 0], S[ 1], S[ 2], S[ 4], S[ 5], S[ 6], S[18], S[19], S[20]); \
SMIX_LDG(shared, S[ 0], S[ 1], S[ 2], S[ 3]); \
}
__global__
__launch_bounds__(TPB, 2)
void skunk_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint64_t *output64)
{
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
__shared__ uint32_t shared[4][256];
if(threadIdx.x<256) {
const uint32_t tmp = __ldg(&mixtab0[threadIdx.x]);
shared[0][threadIdx.x] = tmp;
shared[1][threadIdx.x] = ROR8X(tmp);
shared[2][threadIdx.x] = ROL16X(tmp);
shared[3][threadIdx.x] = ROL8X(tmp);
}
__syncthreads();
if (thread < threads)
{
// Skein
uint2 h[9];
uint2 t0, t1, t2;
uint32_t nonce = cuda_swab32(startNounce + thread);
uint2 nonce2 = make_uint2(c_buffer[0].x, nonce);
uint2 p[8];
p[1] = nonce2;
h[0] = c_buffer[ 1];
h[1] = c_buffer[ 2];
h[2] = c_buffer[ 3];
h[3] = c_buffer[ 4];
h[4] = c_buffer[ 5];
h[5] = c_buffer[ 6];
h[6] = c_buffer[ 7];
h[7] = c_buffer[ 8];
h[8] = c_buffer[ 9];
t0 = vectorize(0x50ull);
t1 = vectorize(0xB000000000000000ull);
t2 = t0^t1;
p[ 1]=nonce2 + h[1]; p[ 0]= c_buffer[10] + p[ 1];
p[ 2]=c_buffer[11];
p[ 3]=c_buffer[12];
p[ 4]=c_buffer[13];
p[ 5]=c_buffer[14];
p[ 6]=c_buffer[15];
p[ 7]=c_buffer[16];
// TFBIGMIX8e();
p[1] = ROL2(p[1], 46) ^ p[0];
p[2] += p[1];
p[0] += p[3];
p[1] = ROL2(p[1], 33) ^ p[2];
p[3] = c_buffer[17] ^ p[0];
p[4] += p[1];
p[6] += p[3];
p[0] += p[5];
p[2] += p[7];
p[1] = ROL2(p[1], 17) ^ p[4];
p[3] = ROL2(p[3], 49) ^ p[6];
p[5] = c_buffer[18] ^ p[0];
p[7] = c_buffer[19] ^ p[2];
p[6] += p[1];
p[0] += p[7];
p[2] += p[5];
p[4] += p[3];
p[1] = ROL2(p[1], 44) ^ p[6];
p[7] = ROL2(p[7], 9) ^ p[0];
p[5] = ROL2(p[5], 54) ^ p[2];
p[3] = ROR8(p[3]) ^ p[4];
p[ 0]+=h[1]; p[ 1]+=h[2]; p[ 2]+=h[3]; p[ 3]+=h[4]; p[ 4]+=h[5]; p[ 5]+=c_buffer[20]; p[ 7]+=c_buffer[21]; p[ 6]+=c_buffer[22];
TFBIGMIX8o();
p[ 0]+=h[2]; p[ 1]+=h[3]; p[ 2]+=h[4]; p[ 3]+=h[5]; p[ 4]+=h[6]; p[ 5]+=c_buffer[22]; p[ 7]+=c_buffer[23]; p[ 6]+=c_buffer[24];
TFBIGMIX8e();
p[ 0]+=h[3]; p[ 1]+=h[4]; p[ 2]+=h[5]; p[ 3]+=h[6]; p[ 4]+=h[7]; p[ 5]+=c_buffer[24]; p[ 7]+=c_buffer[25]; p[ 6]+=c_buffer[26];
TFBIGMIX8o();
p[ 0]+=h[4]; p[ 1]+=h[5]; p[ 2]+=h[6]; p[ 3]+=h[7]; p[ 4]+=h[8]; p[ 5]+=c_buffer[26]; p[ 7]+=c_buffer[27]; p[ 6]+=c_buffer[28];
TFBIGMIX8e();
p[ 0]+=h[5]; p[ 1]+=h[6]; p[ 2]+=h[7]; p[ 3]+=h[8]; p[ 4]+=h[0]; p[ 5]+=c_buffer[28]; p[ 7]+=c_buffer[29]; p[ 6]+=c_buffer[30];
TFBIGMIX8o();
p[ 0]+=h[6]; p[ 1]+=h[7]; p[ 2]+=h[8]; p[ 3]+=h[0]; p[ 4]+=h[1]; p[ 5]+=c_buffer[30]; p[ 7]+=c_buffer[31]; p[ 6]+=c_buffer[32];
TFBIGMIX8e();
p[ 0]+=h[7]; p[ 1]+=h[8]; p[ 2]+=h[0]; p[ 3]+=h[1]; p[ 4]+=h[2]; p[ 5]+=c_buffer[32]; p[ 7]+=c_buffer[33]; p[ 6]+=c_buffer[34];
TFBIGMIX8o();
p[ 0]+=h[8]; p[ 1]+=h[0]; p[ 2]+=h[1]; p[ 3]+=h[2]; p[ 4]+=h[3]; p[ 5]+=c_buffer[34]; p[ 7]+=c_buffer[35]; p[ 6]+=c_buffer[36];
TFBIGMIX8e();
p[ 0]+=h[0]; p[ 1]+=h[1]; p[ 2]+=h[2]; p[ 3]+=h[3]; p[ 4]+=h[4]; p[ 5]+=c_buffer[36]; p[ 7]+=c_buffer[37]; p[ 6]+=c_buffer[38];
TFBIGMIX8o();
p[ 0]+=h[1]; p[ 1]+=h[2]; p[ 2]+=h[3]; p[ 3]+=h[4]; p[ 4]+=h[5]; p[ 5]+=c_buffer[38]; p[ 7]+=c_buffer[39]; p[ 6]+=c_buffer[40];
TFBIGMIX8e();
p[ 0]+=h[2]; p[ 1]+=h[3]; p[ 2]+=h[4]; p[ 3]+=h[5]; p[ 4]+=h[6]; p[ 5]+=c_buffer[40]; p[ 7]+=c_buffer[41]; p[ 6]+=c_buffer[42];
TFBIGMIX8o();
p[ 0]+=h[3]; p[ 1]+=h[4]; p[ 2]+=h[5]; p[ 3]+=h[6]; p[ 4]+=h[7]; p[ 5]+=c_buffer[42]; p[ 7]+=c_buffer[43]; p[ 6]+=c_buffer[44];
TFBIGMIX8e();
p[ 0]+=h[4]; p[ 1]+=h[5]; p[ 2]+=h[6]; p[ 3]+=h[7]; p[ 4]+=h[8]; p[ 5]+=c_buffer[44]; p[ 7]+=c_buffer[45]; p[ 6]+=c_buffer[46];
TFBIGMIX8o();
p[ 0]+=h[5]; p[ 1]+=h[6]; p[ 2]+=h[7]; p[ 3]+=h[8]; p[ 4]+=h[0]; p[ 5]+=c_buffer[46]; p[ 7]+=c_buffer[47]; p[ 6]+=c_buffer[48];
TFBIGMIX8e();
p[ 0]+=h[6]; p[ 1]+=h[7]; p[ 2]+=h[8]; p[ 3]+=h[0]; p[ 4]+=h[1]; p[ 5]+=c_buffer[48]; p[ 7]+=c_buffer[49]; p[ 6]+=c_buffer[50];
TFBIGMIX8o();
p[ 0]+=h[7]; p[ 1]+=h[8]; p[ 2]+=h[0]; p[ 3]+=h[1]; p[ 4]+=h[2]; p[ 5]+=c_buffer[50]; p[ 7]+=c_buffer[51]; p[ 6]+=c_buffer[52];
TFBIGMIX8e();
p[ 0]+=h[8]; p[ 1]+=h[0]; p[ 2]+=h[1]; p[ 3]+=h[2]; p[ 4]+=h[3]; p[ 5]+=c_buffer[52]; p[ 7]+=c_buffer[53]; p[ 6]+=c_buffer[54];
TFBIGMIX8o();
p[ 0]+=h[0]; p[ 1]+=h[1]; p[ 2]+=h[2]; p[ 3]+=h[3]; p[ 4]+=h[4]; p[ 5]+=c_buffer[54]; p[ 7]+=c_buffer[55]; p[ 6]+=c_buffer[56];
p[0]^= c_buffer[57];
p[1]^= nonce2;
t0 = vectorize(8); // extra
t1 = vectorize(0xFF00000000000000ull); // etype
t2 = t0^t1;
h[0] = p[ 0];
h[1] = p[ 1];
h[2] = p[ 2];
h[3] = p[ 3];
h[4] = p[ 4];
h[5] = p[ 5];
h[6] = p[ 6];
h[7] = p[ 7];
h[8] = h[0] ^ h[1] ^ h[2] ^ h[3] ^ h[4] ^ h[5] ^ h[6] ^ h[7] ^ vectorize(0x1BD11BDAA9FC1A22);
p[ 0] = p[ 1] = p[ 2] = p[ 3] = p[ 4] =p[ 5] =p[ 6] = p[ 7] = vectorize(0);
#define h0 h[0]
#define h1 h[1]
#define h2 h[2]
#define h3 h[3]
#define h4 h[4]
#define h5 h[5]
#define h6 h[6]
#define h7 h[7]
#define h8 h[8]
TFBIG_4e_UI2(0);
TFBIG_4o_UI2(1);
TFBIG_4e_UI2(2);
TFBIG_4o_UI2(3);
TFBIG_4e_UI2(4);
TFBIG_4o_UI2(5);
TFBIG_4e_UI2(6);
TFBIG_4o_UI2(7);
TFBIG_4e_UI2(8);
TFBIG_4o_UI2(9);
TFBIG_4e_UI2(10);
TFBIG_4o_UI2(11);
TFBIG_4e_UI2(12);
TFBIG_4o_UI2(13);
TFBIG_4e_UI2(14);
TFBIG_4o_UI2(15);
TFBIG_4e_UI2(16);
TFBIG_4o_UI2(17);
TFBIG_ADDKEY_UI2(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, 18);
//cubehash
uint32_t x[32] = {
0x2AEA2A61, 0x50F494D4, 0x2D538B8B, 0x4167D83E,
0x3FEE2313, 0xC701CF8C, 0xCC39968E, 0x50AC5695,
0x4D42C787, 0xA647A8B3, 0x97CF0BEF, 0x825B4537,
0xEEF864D2, 0xF22090C4, 0xD0E5CD33, 0xA23911AE,
0xFCD398D9, 0x148FE485, 0x1B017BEF, 0xB6444532,
0x6A536159, 0x2FF5781C, 0x91FA7934, 0x0DBADEA9,
0xD65C8A2B, 0xA5A70E75, 0xB1C62456, 0xBC796576,
0x1921C8F7, 0xE7989AF1, 0x7795D246, 0xD43E3B44
};
// *(uint2x4*)&x[ 0] ^= *((uint2x4*)&p[0]);
#pragma unroll 4
for(int i=0;i<4;i++){
x[i*2] ^= p[i].x;
x[i*2+1] ^= p[i].y;
}
rrounds(x);
// *(uint2x4*)&x[ 0] ^= *((uint2x4*)&p[4]);
#pragma unroll 4
for(int i=0;i<4;i++){
x[i*2] ^= p[i+4].x;
x[i*2+1] ^= p[i+4].y;
}
rrounds(x);
// Padding Block
x[ 0] ^= 0x80;
rrounds(x);
// Final(x, (BitSequence*)Hash);
x[31] ^= 1;
/* "the state is then transformed invertibly through 10r identical rounds" */
#pragma unroll 10
for (int i = 0;i < 10;++i)
rrounds(x);
uint32_t Hash[16];
#pragma unroll 16
for(int i = 0; i < 16; i++)
Hash[i] = cuda_swab32(x[i]);
uint32_t S[36];
uint32_t B[ 9];
S[ 0] = S[ 1] = S[ 2] = S[ 3] = S[ 4] = S[ 5] = S[ 6] = S[ 7] = S[ 8] = S[ 9] = S[10] = S[11] = S[12] = S[13] = S[14] = S[15] = S[16] = S[17] = S[18] = S[19] = 0;
*(uint2x4*)&S[20] = *(uint2x4*)&c_S[ 0];
*(uint2x4*)&S[28] = *(uint2x4*)&c_S[ 8];
FUGUE512_3(Hash[0x0], Hash[0x1], Hash[0x2]);
FUGUE512_3(Hash[0x3], Hash[0x4], Hash[0x5]);
FUGUE512_3(Hash[0x6], Hash[0x7], Hash[0x8]);
FUGUE512_3(Hash[0x9], Hash[0xA], Hash[0xB]);
FUGUE512_3(Hash[0xC], Hash[0xD], Hash[0xE]);
FUGUE512_3(Hash[0xF], 0U, 512U);
//#pragma unroll 16
for (uint32_t i = 0; i < 32; i+=2){
mROR3;
CMIX36(S[ 0], S[ 1], S[ 2], S[ 4], S[ 5], S[ 6], S[18], S[19], S[20]);
SMIX_LDG(shared, S[ 0], S[ 1], S[ 2], S[ 3]);
mROR3;
CMIX36(S[ 0], S[ 1], S[ 2], S[ 4], S[ 5], S[ 6], S[18], S[19], S[20]);
SMIX_LDG(shared, S[ 0], S[ 1], S[ 2], S[ 3]);
}
//#pragma unroll 13
for (uint32_t i = 0; i < 13; i ++) {
S[ 4] ^= S[ 0]; S[ 9] ^= S[ 0]; S[18] ^= S[ 0]; S[27] ^= S[ 0];
mROR9;
SMIX_LDG(shared, S[ 0], S[ 1], S[ 2], S[ 3]);
S[ 4] ^= S[ 0]; S[10] ^= S[ 0]; S[18] ^= S[ 0]; S[27] ^= S[ 0];
mROR9;
SMIX_LDG(shared, S[ 0], S[ 1], S[ 2], S[ 3]);
S[ 4] ^= S[ 0]; S[10] ^= S[ 0]; S[19] ^= S[ 0]; S[27] ^= S[ 0];
mROR9;
SMIX_LDG(shared, S[ 0], S[ 1], S[ 2], S[ 3]);
S[ 4] ^= S[ 0]; S[10] ^= S[ 0]; S[19] ^= S[ 0]; S[28] ^= S[ 0];
mROR8;
SMIX_LDG(shared, S[ 0], S[ 1], S[ 2], S[ 3]);
}
S[ 4] ^= S[ 0]; S[ 9] ^= S[ 0]; S[18] ^= S[ 0]; S[27] ^= S[ 0];
S[ 0] = cuda_swab32(S[ 1]); S[ 1] = cuda_swab32(S[ 2]); S[ 2] = cuda_swab32(S[ 3]); S[ 3] = cuda_swab32(S[ 4]);
S[ 4] = cuda_swab32(S[ 9]); S[ 5] = cuda_swab32(S[10]); S[ 6] = cuda_swab32(S[11]); S[ 7] = cuda_swab32(S[12]);
S[ 8] = cuda_swab32(S[18]); S[ 9] = cuda_swab32(S[19]); S[10] = cuda_swab32(S[20]); S[11] = cuda_swab32(S[21]);
S[12] = cuda_swab32(S[27]); S[13] = cuda_swab32(S[28]); S[14] = cuda_swab32(S[29]); S[15] = cuda_swab32(S[30]);
uint64_t *outpHash = &output64[thread<<3];
*(uint2x4*)&outpHash[ 0] = *(uint2x4*)&S[ 0];
*(uint2x4*)&outpHash[ 4] = *(uint2x4*)&S[ 8];
}
}
__host__
void skunk_cuda_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *g_hash)
{
const dim3 grid((threads + TPB - 1) / TPB);
const dim3 block(TPB);
uint64_t *d_hash = (uint64_t*) g_hash;
skunk_gpu_hash_80 <<< grid, block >>> (threads, startNounce, d_hash);
MyStreamSynchronize(NULL, 1, thr_id);
}
__host__
void skunk_setBlock_80(int thr_id, void *pdata)
{
uint64_t message[20];
memcpy(&message[0], pdata, 80);
uint64_t p[8];
uint64_t h[9];
uint64_t t0, t1, t2;
h[0] = 0x4903ADFF749C51CEull;
h[1] = 0x0D95DE399746DF03ull;
h[2] = 0x8FD1934127C79BCEull;
h[3] = 0x9A255629FF352CB1ull;
h[4] = 0x5DB62599DF6CA7B0ull;
h[5] = 0xEABE394CA9D5C3F4ull;
h[6] = 0x991112C71A75B523ull;
h[7] = 0xAE18A40B660FCC33ull;
// h[8] = h[0] ^ h[1] ^ h[2] ^ h[3] ^ h[4] ^ h[5] ^ h[6] ^ h[7] ^ SPH_C64(0x1BD11BDAA9FC1A22);
h[8] = 0xcab2076d98173ec4ULL;
t0 = 64; // ptr
t1 = 0x7000000000000000ull;
t2 = 0x7000000000000040ull;
memcpy(&p[0], &message[0], 64);
TFBIG_4e_PRE(0);
TFBIG_4o_PRE(1);
TFBIG_4e_PRE(2);
TFBIG_4o_PRE(3);
TFBIG_4e_PRE(4);
TFBIG_4o_PRE(5);
TFBIG_4e_PRE(6);
TFBIG_4o_PRE(7);
TFBIG_4e_PRE(8);
TFBIG_4o_PRE(9);
TFBIG_4e_PRE(10);
TFBIG_4o_PRE(11);
TFBIG_4e_PRE(12);
TFBIG_4o_PRE(13);
TFBIG_4e_PRE(14);
TFBIG_4o_PRE(15);
TFBIG_4e_PRE(16);
TFBIG_4o_PRE(17);
TFBIG_ADDKEY_PRE(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, 18);
message[10] = message[0] ^ p[0];
message[11] = message[1] ^ p[1];
message[12] = message[2] ^ p[2];
message[13] = message[3] ^ p[3];
message[14] = message[4] ^ p[4];
message[15] = message[5] ^ p[5];
message[16] = message[6] ^ p[6];
message[17] = message[7] ^ p[7];
message[18] = t2;
uint64_t buffer[128];
// buffer[ 0] = message[ 8];
buffer[ 0] = message[ 9];
h[0] = buffer[ 1] = message[10];
h[1] = buffer[ 2] = message[11];
h[2] = buffer[ 3] = message[12];
h[3] = buffer[ 4] = message[13];
h[4] = buffer[ 5] = message[14];
h[5] = buffer[ 6] = message[15];
h[6] = buffer[ 7] = message[16];
h[7] = buffer[ 8] = message[17];
h[8] = buffer[ 9] = h[0]^h[1]^h[2]^h[3]^h[4]^h[5]^h[6]^h[7]^0x1BD11BDAA9FC1A22ULL;
t0 = 0x50ull;
t1 = 0xB000000000000000ull;
t2 = t0^t1;
p[0] = message[ 8] + h[0];
p[2] = h[2]; p[3] = h[3]; p[4] = h[4];
p[5] = h[5] + t0;
p[6] = h[6] + t1;
p[7] = h[7];
p[2] += p[3];
p[4] += p[5]; p[6] += p[7];
p[3] = ROTL64(p[3], 36) ^ p[2]; p[5] = ROTL64(p[5], 19) ^ p[4]; p[7] = ROTL64(p[7], 37) ^ p[6];
p[4] += p[7]; p[6] += p[5];
p[7] = ROTL64(p[7], 27) ^ p[4];
p[5] = ROTL64(p[5], 14) ^ p[6];
buffer[10] = p[ 0];
buffer[11] = p[ 2];
buffer[12] = p[ 3];
buffer[13] = p[ 4];
buffer[14] = p[ 5];
buffer[15] = p[ 6];
buffer[16] = p[ 7];
buffer[17] = ROTL64(p[3], 42);
buffer[18] = ROTL64(p[5], 36);
buffer[19] = ROTL64(p[7], 39);
buffer[20] = h[6]+t1;
buffer[21] = h[8]+1;
buffer[22] = h[7]+t2;
buffer[23] = h[0]+2;
buffer[24] = h[8]+t0;
buffer[25] = h[1]+3;
buffer[26] = h[0]+t1;
buffer[27] = h[2]+4;
buffer[28] = h[1]+t2;
buffer[29] = h[3]+5;
buffer[30] = h[2]+t0;
buffer[31] = h[4]+6;
buffer[32] = h[3]+t1;
buffer[33] = h[5]+7;
buffer[34] = h[4]+t2;
buffer[35] = h[6]+8;
buffer[36] = h[5]+t0;
buffer[37] = h[7]+9;
buffer[38] = h[6]+t1;
buffer[39] = h[8]+10;
buffer[40] = h[7]+t2;
buffer[41] = h[0]+11;
buffer[42] = h[8]+t0;
buffer[43] = h[1]+12;
buffer[44] = h[0]+t1;
buffer[45] = h[2]+13;
buffer[46] = h[1]+t2;
buffer[47] = h[3]+14;
buffer[48] = h[2]+t0;
buffer[49] = h[4]+15;
buffer[50] = h[3]+t1;
buffer[51] = h[5]+16;
buffer[52] = h[4]+t2;
buffer[53] = h[6]+17;
buffer[54] = h[5]+t0;
buffer[55] = h[7]+18;
buffer[56] = h[6]+t1;
buffer[57] = message[ 8];
cudaMemcpyToSymbol(c_buffer, buffer, sizeof(c_buffer), 0, cudaMemcpyHostToDevice);
CUDA_LOG_ERROR();
}
__host__
void skunk_cpu_init(int thr_id, uint32_t threads)
{
cuda_get_arch(thr_id);
}

371
skunk/cuda_skunk_streebog.cu

@ -0,0 +1,371 @@
/*
* Streebog GOST R 34.10-2012 CUDA implementation.
*
* https://tools.ietf.org/html/rfc6986
* https://en.wikipedia.org/wiki/Streebog
*
* ==========================(LICENSE BEGIN)============================
*
* @author Tanguy Pruvot - 2015
* @author Alexis Provos - 2016
*/
// Further improved with shared memory partial utilization
// Tested under CUDA7.5 toolkit for cp 5.0/5.2
#include <miner.h>
#include <cuda_helper.h>
#include <cuda_vectors.h>
#include <cuda_vector_uint2x4.h>
#include "skunk/streebog_arrays.cuh"
//#define FULL_UNROLL
__device__ __forceinline__
static void GOST_FS(const uint2 shared[8][256],const uint2 *const __restrict__ state,uint2* return_state)
{
return_state[0] = __ldg(&T02[__byte_perm(state[7].x,0,0x44440)])
^ shared[1][__byte_perm(state[6].x,0,0x44440)]
^ shared[2][__byte_perm(state[5].x,0,0x44440)]
^ shared[3][__byte_perm(state[4].x,0,0x44440)]
^ shared[4][__byte_perm(state[3].x,0,0x44440)]
^ shared[5][__byte_perm(state[2].x,0,0x44440)]
^ shared[6][__byte_perm(state[1].x,0,0x44440)]
^ __ldg(&T72[__byte_perm(state[0].x,0,0x44440)]);
return_state[1] = __ldg(&T02[__byte_perm(state[7].x,0,0x44441)])
^ __ldg(&T12[__byte_perm(state[6].x,0,0x44441)])
^ shared[2][__byte_perm(state[5].x,0,0x44441)]
^ shared[3][__byte_perm(state[4].x,0,0x44441)]
^ shared[4][__byte_perm(state[3].x,0,0x44441)]
^ shared[5][__byte_perm(state[2].x,0,0x44441)]
^ shared[6][__byte_perm(state[1].x,0,0x44441)]
^ __ldg(&T72[__byte_perm(state[0].x,0,0x44441)]);
return_state[2] = __ldg(&T02[__byte_perm(state[7].x,0,0x44442)])
^ __ldg(&T12[__byte_perm(state[6].x,0,0x44442)])
^ shared[2][__byte_perm(state[5].x,0,0x44442)]
^ shared[3][__byte_perm(state[4].x,0,0x44442)]
^ shared[4][__byte_perm(state[3].x,0,0x44442)]
^ shared[5][__byte_perm(state[2].x,0,0x44442)]
^ __ldg(&T72[__byte_perm(state[0].x,0,0x44442)])
^ shared[6][__byte_perm(state[1].x,0,0x44442)];
return_state[3] = __ldg(&T02[__byte_perm(state[7].x,0,0x44443)])
^ shared[1][__byte_perm(state[6].x,0,0x44443)]
^ shared[2][__byte_perm(state[5].x,0,0x44443)]
^ shared[3][__byte_perm(state[4].x,0,0x44443)]
^ __ldg(&T42[__byte_perm(state[3].x,0,0x44443)])
^ shared[5][__byte_perm(state[2].x,0,0x44443)]
^ __ldg(&T72[__byte_perm(state[0].x,0,0x44443)])
^ shared[6][__byte_perm(state[1].x,0,0x44443)];
return_state[4] = __ldg(&T02[__byte_perm(state[7].y,0,0x44440)])
^ shared[1][__byte_perm(state[6].y,0,0x44440)]
^ __ldg(&T22[__byte_perm(state[5].y,0,0x44440)])
^ shared[3][__byte_perm(state[4].y,0,0x44440)]
^ shared[4][__byte_perm(state[3].y,0,0x44440)]
^ __ldg(&T62[__byte_perm(state[1].y,0,0x44440)])
^ shared[5][__byte_perm(state[2].y,0,0x44440)]
^ __ldg(&T72[__byte_perm(state[0].y,0,0x44440)]);
return_state[5] = __ldg(&T02[__byte_perm(state[7].y,0,0x44441)])
^ shared[2][__byte_perm(state[5].y,0,0x44441)]
^ __ldg(&T12[__byte_perm(state[6].y,0,0x44441)])
^ shared[3][__byte_perm(state[4].y,0,0x44441)]
^ shared[4][__byte_perm(state[3].y,0,0x44441)]
^ shared[5][__byte_perm(state[2].y,0,0x44441)]
^ __ldg(&T62[__byte_perm(state[1].y,0,0x44441)])
^ __ldg(&T72[__byte_perm(state[0].y,0,0x44441)]);
return_state[6] = __ldg(&T02[__byte_perm(state[7].y,0,0x44442)])
^ shared[1][__byte_perm(state[6].y,0,0x44442)]
^ shared[2][__byte_perm(state[5].y,0,0x44442)]
^ shared[3][__byte_perm(state[4].y,0,0x44442)]
^ shared[4][__byte_perm(state[3].y,0,0x44442)]
^ shared[5][__byte_perm(state[2].y,0,0x44442)]
^ __ldg(&T62[__byte_perm(state[1].y,0,0x44442)])
^ __ldg(&T72[__byte_perm(state[0].y,0,0x44442)]);
return_state[7] = __ldg(&T02[__byte_perm(state[7].y,0,0x44443)])
^ __ldg(&T12[__byte_perm(state[6].y,0,0x44443)])
^ shared[2][__byte_perm(state[5].y,0,0x44443)]
^ shared[3][__byte_perm(state[4].y,0,0x44443)]
^ shared[4][__byte_perm(state[3].y,0,0x44443)]
^ shared[5][__byte_perm(state[2].y,0,0x44443)]
^ __ldg(&T62[__byte_perm(state[1].y,0,0x44443)])
^ __ldg(&T72[__byte_perm(state[0].y,0,0x44443)]);
}
__device__ __forceinline__
static void GOST_FS_LDG(const uint2 shared[8][256],const uint2 *const __restrict__ state,uint2* return_state)
{
return_state[0] = __ldg(&T02[__byte_perm(state[7].x,0,0x44440)])
^ __ldg(&T12[__byte_perm(state[6].x,0,0x44440)])
^ shared[2][__byte_perm(state[5].x,0,0x44440)]
^ shared[3][__byte_perm(state[4].x,0,0x44440)]
^ shared[4][__byte_perm(state[3].x,0,0x44440)]
^ shared[5][__byte_perm(state[2].x,0,0x44440)]
^ shared[6][__byte_perm(state[1].x,0,0x44440)]
^ __ldg(&T72[__byte_perm(state[0].x,0,0x44440)]);
return_state[1] = __ldg(&T02[__byte_perm(state[7].x,0,0x44441)])
^ __ldg(&T12[__byte_perm(state[6].x,0,0x44441)])
^ shared[2][__byte_perm(state[5].x,0,0x44441)]
^ shared[3][__byte_perm(state[4].x,0,0x44441)]
^ shared[4][__byte_perm(state[3].x,0,0x44441)]
^ shared[5][__byte_perm(state[2].x,0,0x44441)]
^ __ldg(&T72[__byte_perm(state[0].x,0,0x44441)])
^ shared[6][__byte_perm(state[1].x,0,0x44441)];
return_state[2] = __ldg(&T02[__byte_perm(state[7].x,0,0x44442)])
^ __ldg(&T12[__byte_perm(state[6].x,0,0x44442)])
^ shared[2][__byte_perm(state[5].x,0,0x44442)]
^ shared[3][__byte_perm(state[4].x,0,0x44442)]
^ shared[4][__byte_perm(state[3].x,0,0x44442)]
^ shared[5][__byte_perm(state[2].x,0,0x44442)]
^ shared[6][__byte_perm(state[1].x,0,0x44442)]
^ __ldg(&T72[__byte_perm(state[0].x,0,0x44442)]);
return_state[3] = __ldg(&T02[__byte_perm(state[7].x,0,0x44443)])
^ __ldg(&T12[__byte_perm(state[6].x,0,0x44443)])
^ shared[2][__byte_perm(state[5].x,0,0x44443)]
^ shared[3][__byte_perm(state[4].x,0,0x44443)]
^ shared[4][__byte_perm(state[3].x,0,0x44443)]
^ shared[5][__byte_perm(state[2].x,0,0x44443)]
^ shared[6][__byte_perm(state[1].x,0,0x44443)]
^ __ldg(&T72[__byte_perm(state[0].x,0,0x44443)]);
return_state[4] = __ldg(&T02[__byte_perm(state[7].y,0,0x44440)])
^ shared[1][__byte_perm(state[6].y,0,0x44440)]
^ __ldg(&T22[__byte_perm(state[5].y,0,0x44440)])
^ shared[3][__byte_perm(state[4].y,0,0x44440)]
^ shared[4][__byte_perm(state[3].y,0,0x44440)]
^ shared[5][__byte_perm(state[2].y,0,0x44440)]
^ __ldg(&T72[__byte_perm(state[0].y,0,0x44440)])
^ __ldg(&T62[__byte_perm(state[1].y,0,0x44440)]);
return_state[5] = __ldg(&T02[__byte_perm(state[7].y,0,0x44441)])
^ __ldg(&T12[__byte_perm(state[6].y,0,0x44441)])
^ shared[2][__byte_perm(state[5].y,0,0x44441)]
^ shared[3][__byte_perm(state[4].y,0,0x44441)]
^ shared[4][__byte_perm(state[3].y,0,0x44441)]
^ shared[5][__byte_perm(state[2].y,0,0x44441)]
^ __ldg(&T72[__byte_perm(state[0].y,0,0x44441)])
^ __ldg(&T62[__byte_perm(state[1].y,0,0x44441)]);
return_state[6] = __ldg(&T02[__byte_perm(state[7].y,0,0x44442)])
^ __ldg(&T12[__byte_perm(state[6].y,0,0x44442)])
^ __ldg(&T22[__byte_perm(state[5].y,0,0x44442)])
^ shared[3][__byte_perm(state[4].y,0,0x44442)]
^ shared[4][__byte_perm(state[3].y,0,0x44442)]
^ shared[5][__byte_perm(state[2].y,0,0x44442)]
^ __ldg(&T72[__byte_perm(state[0].y,0,0x44442)])
^ __ldg(&T62[__byte_perm(state[1].y,0,0x44442)]);
return_state[7] = __ldg(&T02[__byte_perm(state[7].y,0,0x44443)])
^ shared[1][__byte_perm(state[6].y,0,0x44443)]
^ __ldg(&T22[__byte_perm(state[5].y,0,0x44443)])
^ shared[3][__byte_perm(state[4].y,0,0x44443)]
^ shared[4][__byte_perm(state[3].y,0,0x44443)]
^ shared[5][__byte_perm(state[2].y,0,0x44443)]
^ __ldg(&T72[__byte_perm(state[0].y,0,0x44443)])
^ __ldg(&T62[__byte_perm(state[1].y,0,0x44443)]);
}
__device__ __forceinline__
static void GOST_E12(const uint2 shared[8][256],uint2 *const __restrict__ K, uint2 *const __restrict__ state)
{
uint2 t[ 8];
//#pragma unroll 12
for(int i=0; i<12; i++){
GOST_FS(shared,state, t);
#pragma unroll 8
for(int j=0;j<8;j++)
K[ j] ^= *(uint2*)&CC[i][j];
#pragma unroll 8
for(int j=0;j<8;j++)
state[ j] = t[ j];
GOST_FS_LDG(shared,K, t);
#pragma unroll 8
for(int j=0;j<8;j++)
state[ j]^= t[ j];
#pragma unroll 8
for(int j=0;j<8;j++)
K[ j] = t[ j];
}
}
__constant__ uint64_t target64[4];
__host__
void skunk_set_target(uint32_t* ptarget)
{
cudaMemcpyToSymbol(target64, ptarget, 4*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
}
#define TPB 256
__global__
__launch_bounds__(TPB, 2)
void skunk_streebog_gpu_final_64(uint64_t *g_hash, uint32_t* resNonce)
{
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
uint2 buf[8], t[8], temp[8], K0[8], hash[8];
__shared__ uint2 shared[8][256];
shared[0][threadIdx.x] = __ldg(&T02[threadIdx.x]);
shared[1][threadIdx.x] = __ldg(&T12[threadIdx.x]);
shared[2][threadIdx.x] = __ldg(&T22[threadIdx.x]);
shared[3][threadIdx.x] = __ldg(&T32[threadIdx.x]);
shared[4][threadIdx.x] = __ldg(&T42[threadIdx.x]);
shared[5][threadIdx.x] = __ldg(&T52[threadIdx.x]);
shared[6][threadIdx.x] = __ldg(&T62[threadIdx.x]);
shared[7][threadIdx.x] = __ldg(&T72[threadIdx.x]);
// if (thread < threads)
// {
uint64_t* inout = &g_hash[thread<<3];
*(uint2x4*)&hash[0] = __ldg4((uint2x4*)&inout[0]);
*(uint2x4*)&hash[4] = __ldg4((uint2x4*)&inout[4]);
__threadfence_block();
K0[0] = vectorize(0x74a5d4ce2efc83b3);
#pragma unroll 8
for(uint32_t i=0;i<8;i++){
buf[ i] = hash[ i] ^ K0[ 0];
}
//#pragma unroll 12
for(int i=0; i<12; i++){
GOST_FS(shared, buf, temp);
#pragma unroll 8
for(uint32_t j=0;j<8;j++){
buf[ j] = temp[ j] ^ *(uint2*)&precomputed_values[i][j];
}
}
#pragma unroll 8
for(int j=0;j<8;j++){
buf[ j]^= hash[ j];
}
#pragma unroll 8
for(int j=0;j<8;j++){
K0[ j] = buf[ j];
}
K0[7].y ^= 0x00020000;
GOST_FS(shared, K0, t);
#pragma unroll 8
for(uint32_t i=0;i<8;i++)
K0[ i] = t[ i];
t[7].y ^= 0x01000000;
GOST_E12(shared, K0, t);
#pragma unroll 8
for(int j=0;j<8;j++)
buf[ j] ^= t[ j];
buf[7].y ^= 0x01000000;
GOST_FS(shared, buf,K0);
buf[7].y ^= 0x00020000;
#pragma unroll 8
for(uint32_t j=0;j<8;j++)
t[ j] = K0[ j];
t[7].y ^= 0x00020000;
GOST_E12(shared, K0, t);
#pragma unroll 8
for(uint32_t j=0;j<8;j++)
buf[ j] ^= t[ j];
GOST_FS(shared, buf,K0); // K = F(h)
hash[7]+= vectorize(0x0100000000000000);
#pragma unroll 8
for(uint32_t j=0;j<8;j++)
t[ j] = K0[ j] ^ hash[ j];
// #pragma unroll
for(uint32_t i=0; i<10; i++){
GOST_FS(shared, t, temp);
#pragma unroll 8
for(uint32_t j=0;j<8;j++){
t[ j] = temp[ j];
K0[ j] = K0[ j] ^ *(uint2*)&CC[ i][ j];
}
GOST_FS(shared, K0, temp);
#pragma unroll 8
for(uint32_t j=0;j<8;j++){
K0[ j] = temp[ j];
t[ j]^= temp[ j];
}
}
GOST_FS(shared, t, temp);
#pragma unroll 8
for(uint32_t j=0;j<8;j++){
t[ j] = temp[ j];
K0[ j] = K0[ j] ^ *(uint2*)&CC[10][ j];
}
GOST_FS(shared, K0, temp);
#pragma unroll 8
for(int i=7;i>=0;i--){
t[i].x = t[i].x ^ temp[i].x;
temp[i].x = temp[i].x ^ ((uint32_t*)&CC[11])[i<<1];
}
uint2 last[2];
#define T0(x) shared[0][x]
#define T1(x) shared[1][x]
#define T2(x) shared[2][x]
#define T3(x) shared[3][x]
#define T4(x) shared[4][x]
#define T5(x) shared[5][x]
#define T6(x) shared[6][x]
#define T7(x) shared[7][x]
last[ 0] = T0(__byte_perm(t[7].x,0,0x44443)) ^ T1(__byte_perm(t[6].x,0,0x44443))
^ T2(__byte_perm(t[5].x,0,0x44443)) ^ T3(__byte_perm(t[4].x,0,0x44443))
^ T4(__byte_perm(t[3].x,0,0x44443)) ^ T5(__byte_perm(t[2].x,0,0x44443))
^ T6(__byte_perm(t[1].x,0,0x44443)) ^ T7(__byte_perm(t[0].x,0,0x44443));
last[ 1] = T0(__byte_perm(temp[7].x,0,0x44443)) ^ T1(__byte_perm(temp[6].x,0,0x44443))
^ T2(__byte_perm(temp[5].x,0,0x44443)) ^ T3(__byte_perm(temp[4].x,0,0x44443))
^ T4(__byte_perm(temp[3].x,0,0x44443)) ^ T5(__byte_perm(temp[2].x,0,0x44443))
^ T6(__byte_perm(temp[1].x,0,0x44443)) ^ T7(__byte_perm(temp[0].x,0,0x44443));
if(devectorize(buf[3] ^ hash[3] ^ last[ 0] ^ last[ 1]) <= target64[3]){
uint32_t tmp = atomicExch(&resNonce[0], thread);
if (tmp != UINT32_MAX)
resNonce[1] = tmp;
}
}
__host__
void skunk_cuda_streebog(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce)
{
dim3 grid((threads + TPB-1) / TPB);
dim3 block(TPB);
skunk_streebog_gpu_final_64 <<< grid, block >>> ((uint64_t*)d_hash, d_resNonce);
}

385
skunk/skein_header.h

@ -0,0 +1,385 @@
/* Elementary defines for SKEIN */
/*
* M9_ ## s ## _ ## i evaluates to s+i mod 9 (0 <= s <= 18, 0 <= i <= 7).
*/
#define M9_0_0 0
#define M9_0_1 1
#define M9_0_2 2
#define M9_0_3 3
#define M9_0_4 4
#define M9_0_5 5
#define M9_0_6 6
#define M9_0_7 7
#define M9_1_0 1
#define M9_1_1 2
#define M9_1_2 3
#define M9_1_3 4
#define M9_1_4 5
#define M9_1_5 6
#define M9_1_6 7
#define M9_1_7 8
#define M9_2_0 2
#define M9_2_1 3
#define M9_2_2 4
#define M9_2_3 5
#define M9_2_4 6
#define M9_2_5 7
#define M9_2_6 8
#define M9_2_7 0
#define M9_3_0 3
#define M9_3_1 4
#define M9_3_2 5
#define M9_3_3 6
#define M9_3_4 7
#define M9_3_5 8
#define M9_3_6 0
#define M9_3_7 1
#define M9_4_0 4
#define M9_4_1 5
#define M9_4_2 6
#define M9_4_3 7
#define M9_4_4 8
#define M9_4_5 0
#define M9_4_6 1
#define M9_4_7 2
#define M9_5_0 5
#define M9_5_1 6
#define M9_5_2 7
#define M9_5_3 8
#define M9_5_4 0
#define M9_5_5 1
#define M9_5_6 2
#define M9_5_7 3
#define M9_6_0 6
#define M9_6_1 7
#define M9_6_2 8
#define M9_6_3 0
#define M9_6_4 1
#define M9_6_5 2
#define M9_6_6 3
#define M9_6_7 4
#define M9_7_0 7
#define M9_7_1 8
#define M9_7_2 0
#define M9_7_3 1
#define M9_7_4 2
#define M9_7_5 3
#define M9_7_6 4
#define M9_7_7 5
#define M9_8_0 8
#define M9_8_1 0
#define M9_8_2 1
#define M9_8_3 2
#define M9_8_4 3
#define M9_8_5 4
#define M9_8_6 5
#define M9_8_7 6
#define M9_9_0 0
#define M9_9_1 1
#define M9_9_2 2
#define M9_9_3 3
#define M9_9_4 4
#define M9_9_5 5
#define M9_9_6 6
#define M9_9_7 7
#define M9_10_0 1
#define M9_10_1 2
#define M9_10_2 3
#define M9_10_3 4
#define M9_10_4 5
#define M9_10_5 6
#define M9_10_6 7
#define M9_10_7 8
#define M9_11_0 2
#define M9_11_1 3
#define M9_11_2 4
#define M9_11_3 5
#define M9_11_4 6
#define M9_11_5 7
#define M9_11_6 8
#define M9_11_7 0
#define M9_12_0 3
#define M9_12_1 4
#define M9_12_2 5
#define M9_12_3 6
#define M9_12_4 7
#define M9_12_5 8
#define M9_12_6 0
#define M9_12_7 1
#define M9_13_0 4
#define M9_13_1 5
#define M9_13_2 6
#define M9_13_3 7
#define M9_13_4 8
#define M9_13_5 0
#define M9_13_6 1
#define M9_13_7 2
#define M9_14_0 5
#define M9_14_1 6
#define M9_14_2 7
#define M9_14_3 8
#define M9_14_4 0
#define M9_14_5 1
#define M9_14_6 2
#define M9_14_7 3
#define M9_15_0 6
#define M9_15_1 7
#define M9_15_2 8
#define M9_15_3 0
#define M9_15_4 1
#define M9_15_5 2
#define M9_15_6 3
#define M9_15_7 4
#define M9_16_0 7
#define M9_16_1 8
#define M9_16_2 0
#define M9_16_3 1
#define M9_16_4 2
#define M9_16_5 3
#define M9_16_6 4
#define M9_16_7 5
#define M9_17_0 8
#define M9_17_1 0
#define M9_17_2 1
#define M9_17_3 2
#define M9_17_4 3
#define M9_17_5 4
#define M9_17_6 5
#define M9_17_7 6
#define M9_18_0 0
#define M9_18_1 1
#define M9_18_2 2
#define M9_18_3 3
#define M9_18_4 4
#define M9_18_5 5
#define M9_18_6 6
#define M9_18_7 7
/*
* M3_ ## s ## _ ## i evaluates to s+i mod 3 (0 <= s <= 18, 0 <= i <= 1).
*/
#define M3_0_0 0
#define M3_0_1 1
#define M3_1_0 1
#define M3_1_1 2
#define M3_2_0 2
#define M3_2_1 0
#define M3_3_0 0
#define M3_3_1 1
#define M3_4_0 1
#define M3_4_1 2
#define M3_5_0 2
#define M3_5_1 0
#define M3_6_0 0
#define M3_6_1 1
#define M3_7_0 1
#define M3_7_1 2
#define M3_8_0 2
#define M3_8_1 0
#define M3_9_0 0
#define M3_9_1 1
#define M3_10_0 1
#define M3_10_1 2
#define M3_11_0 2
#define M3_11_1 0
#define M3_12_0 0
#define M3_12_1 1
#define M3_13_0 1
#define M3_13_1 2
#define M3_14_0 2
#define M3_14_1 0
#define M3_15_0 0
#define M3_15_1 1
#define M3_16_0 1
#define M3_16_1 2
#define M3_17_0 2
#define M3_17_1 0
#define M3_18_0 0
#define M3_18_1 1
#define XCAT(x, y) XCAT_(x, y)
#define XCAT_(x, y) x ## y
#define SKBI(k, s, i) XCAT(k, XCAT(XCAT(XCAT(M9_, s), _), i))
#define SKBT(t, s, v) XCAT(t, XCAT(XCAT(XCAT(M3_, s), _), v))
#define TFBIG_ADDKEY(w0, w1, w2, w3, w4, w5, w6, w7, k, t, s) { \
w0 = (w0 + SKBI(k, s, 0)); \
w1 = (w1 + SKBI(k, s, 1)); \
w2 = (w2 + SKBI(k, s, 2)); \
w3 = (w3 + SKBI(k, s, 3)); \
w4 = (w4 + SKBI(k, s, 4)); \
w5 = (w5 + SKBI(k, s, 5) + SKBT(t, s, 0)); \
w6 = (w6 + SKBI(k, s, 6) + SKBT(t, s, 1)); \
w7 = (w7 + SKBI(k, s, 7) + make_uint2(s,0); \
}
#define TFBIG_MIX(x0, x1, rc) { \
x0 = x0 + x1; \
x1 = ROL2(x1, rc) ^ x0; \
}
#define TFBIG_MIX8(w0, w1, w2, w3, w4, w5, w6, w7, rc0, rc1, rc2, rc3) { \
TFBIG_MIX(w0, w1, rc0); \
TFBIG_MIX(w2, w3, rc1); \
TFBIG_MIX(w4, w5, rc2); \
TFBIG_MIX(w6, w7, rc3); \
}
#define TFBIG_4e(s) { \
TFBIG_ADDKEY(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, s); \
TFBIG_MIX8(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 46, 36, 19, 37); \
TFBIG_MIX8(p[2], p[1], p[4], p[7], p[6], p[5], p[0], p[3], 33, 27, 14, 42); \
TFBIG_MIX8(p[4], p[1], p[6], p[3], p[0], p[5], p[2], p[7], 17, 49, 36, 39); \
TFBIG_MIX8(p[6], p[1], p[0], p[7], p[2], p[5], p[4], p[3], 44, 9, 54, 56); \
}
#define TFBIG_4o(s) { \
TFBIG_ADDKEY(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, s); \
TFBIG_MIX8(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 39, 30, 34, 24); \
TFBIG_MIX8(p[2], p[1], p[4], p[7], p[6], p[5], p[0], p[3], 13, 50, 10, 17); \
TFBIG_MIX8(p[4], p[1], p[6], p[3], p[0], p[5], p[2], p[7], 25, 29, 39, 43); \
TFBIG_MIX8(p[6], p[1], p[0], p[7], p[2], p[5], p[4], p[3], 8, 35, 56, 22); \
}
#define TFBIG_KINIT_UI2(k0, k1, k2, k3, k4, k5, k6, k7, k8, t0, t1, t2) { \
k8 = ((k0 ^ k1) ^ (k2 ^ k3)) ^ ((k4 ^ k5) ^ (k6 ^ k7)) \
^ vectorize(0x1BD11BDAA9FC1A22); \
t2 = t0 ^ t1; \
}
#define TFBIG_ADDKEY_UI2(w0, w1, w2, w3, w4, w5, w6, w7, k, t, s) { \
w0 = (w0 + SKBI(k, s, 0)); \
w1 = (w1 + SKBI(k, s, 1)); \
w2 = (w2 + SKBI(k, s, 2)); \
w3 = (w3 + SKBI(k, s, 3)); \
w4 = (w4 + SKBI(k, s, 4)); \
w5 = (w5 + SKBI(k, s, 5) + SKBT(t, s, 0)); \
w6 = (w6 + SKBI(k, s, 6) + SKBT(t, s, 1)); \
w7 = (w7 + SKBI(k, s, 7) + vectorize(s)); \
}
#define TFBIG_ADDKEY_PRE(w0, w1, w2, w3, w4, w5, w6, w7, k, t, s) { \
w0 = (w0 + SKBI(k, s, 0)); \
w1 = (w1 + SKBI(k, s, 1)); \
w2 = (w2 + SKBI(k, s, 2)); \
w3 = (w3 + SKBI(k, s, 3)); \
w4 = (w4 + SKBI(k, s, 4)); \
w5 = (w5 + SKBI(k, s, 5) + SKBT(t, s, 0)); \
w6 = (w6 + SKBI(k, s, 6) + SKBT(t, s, 1)); \
w7 = (w7 + SKBI(k, s, 7) + (s)); \
}
#define TFBIG_MIX_UI2(x0, x1, rc) { \
x0 = x0 + x1; \
x1 = ROL2(x1, rc) ^ x0; \
}
#define TFBIG_MIX_PRE(x0, x1, rc) { \
x0 = x0 + x1; \
x1 = ROTL64(x1, rc) ^ x0; \
}
#define TFBIG_MIX8_UI2(w0, w1, w2, w3, w4, w5, w6, w7, rc0, rc1, rc2, rc3) { \
TFBIG_MIX_UI2(w0, w1, rc0); \
TFBIG_MIX_UI2(w2, w3, rc1); \
TFBIG_MIX_UI2(w4, w5, rc2); \
TFBIG_MIX_UI2(w6, w7, rc3); \
}
#define TFBIG_MIX8_PRE(w0, w1, w2, w3, w4, w5, w6, w7, rc0, rc1, rc2, rc3) { \
TFBIG_MIX_PRE(w0, w1, rc0); \
TFBIG_MIX_PRE(w2, w3, rc1); \
TFBIG_MIX_PRE(w4, w5, rc2); \
TFBIG_MIX_PRE(w6, w7, rc3); \
}
#define TFBIG_4e_UI2(s) { \
TFBIG_ADDKEY_UI2(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, s); \
TFBIG_MIX8_UI2(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 46, 36, 19, 37); \
TFBIG_MIX8_UI2(p[2], p[1], p[4], p[7], p[6], p[5], p[0], p[3], 33, 27, 14, 42); \
TFBIG_MIX8_UI2(p[4], p[1], p[6], p[3], p[0], p[5], p[2], p[7], 17, 49, 36, 39); \
TFBIG_MIX8_UI2(p[6], p[1], p[0], p[7], p[2], p[5], p[4], p[3], 44, 9, 54, 56); \
}
#define TFBIG_4e_PRE(s) { \
TFBIG_ADDKEY_PRE(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, s); \
TFBIG_MIX8_PRE(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 46, 36, 19, 37); \
TFBIG_MIX8_PRE(p[2], p[1], p[4], p[7], p[6], p[5], p[0], p[3], 33, 27, 14, 42); \
TFBIG_MIX8_PRE(p[4], p[1], p[6], p[3], p[0], p[5], p[2], p[7], 17, 49, 36, 39); \
TFBIG_MIX8_PRE(p[6], p[1], p[0], p[7], p[2], p[5], p[4], p[3], 44, 9, 54, 56); \
}
#define TFBIG_4o_UI2(s) { \
TFBIG_ADDKEY_UI2(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, s); \
TFBIG_MIX8_UI2(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 39, 30, 34, 24); \
TFBIG_MIX8_UI2(p[2], p[1], p[4], p[7], p[6], p[5], p[0], p[3], 13, 50, 10, 17); \
TFBIG_MIX8_UI2(p[4], p[1], p[6], p[3], p[0], p[5], p[2], p[7], 25, 29, 39, 43); \
TFBIG_MIX8_UI2(p[6], p[1], p[0], p[7], p[2], p[5], p[4], p[3], 8, 35, 56, 22); \
}
#define TFBIG_4o_PRE(s) { \
TFBIG_ADDKEY_PRE(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], h, t, s); \
TFBIG_MIX8_PRE(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], 39, 30, 34, 24); \
TFBIG_MIX8_PRE(p[2], p[1], p[4], p[7], p[6], p[5], p[0], p[3], 13, 50, 10, 17); \
TFBIG_MIX8_PRE(p[4], p[1], p[6], p[3], p[0], p[5], p[2], p[7], 25, 29, 39, 43); \
TFBIG_MIX8_PRE(p[6], p[1], p[0], p[7], p[2], p[5], p[4], p[3], 8, 35, 56, 22); \
}
#define TFBIGMIX8e(){\
p[ 0]+=p[ 1];p[ 2]+=p[ 3];p[ 4]+=p[ 5];p[ 6]+=p[ 7];p[ 1]=ROL2(p[ 1],46) ^ p[ 0];p[ 3]=ROL2(p[ 3],36) ^ p[ 2];p[ 5]=ROL2(p[ 5],19) ^ p[ 4];p[ 7]=ROL2(p[ 7],37) ^ p[ 6];\
p[ 2]+=p[ 1];p[ 4]+=p[ 7];p[ 6]+=p[ 5];p[ 0]+=p[ 3];p[ 1]=ROL2(p[ 1],33) ^ p[ 2];p[ 7]=ROL2(p[ 7],27) ^ p[ 4];p[ 5]=ROL2(p[ 5],14) ^ p[ 6];p[ 3]=ROL2(p[ 3],42) ^ p[ 0];\
p[ 4]+=p[ 1];p[ 6]+=p[ 3];p[ 0]+=p[ 5];p[ 2]+=p[ 7];p[ 1]=ROL2(p[ 1],17) ^ p[ 4];p[ 3]=ROL2(p[ 3],49) ^ p[ 6];p[ 5]=ROL2(p[ 5],36) ^ p[ 0];p[ 7]=ROL2(p[ 7],39) ^ p[ 2];\
p[ 6]+=p[ 1];p[ 0]+=p[ 7];p[ 2]+=p[ 5];p[ 4]+=p[ 3];p[ 1]=ROL2(p[ 1],44) ^ p[ 6];p[ 7]=ROL2(p[ 7], 9) ^ p[ 0];p[ 5]=ROL2(p[ 5],54) ^ p[ 2];p[ 3]=ROR8(p[ 3]) ^ p[ 4];\
}
#define TFBIGMIX8o(){\
p[ 0]+=p[ 1];p[ 2]+=p[ 3];p[ 4]+=p[ 5];p[ 6]+=p[ 7];p[ 1]=ROL2(p[ 1],39) ^ p[ 0];p[ 3]=ROL2(p[ 3],30) ^ p[ 2];p[ 5]=ROL2(p[ 5],34) ^ p[ 4];p[ 7]=ROL24(p[ 7]) ^ p[ 6];\
p[ 2]+=p[ 1];p[ 4]+=p[ 7];p[ 6]+=p[ 5];p[ 0]+=p[ 3];p[ 1]=ROL2(p[ 1],13) ^ p[ 2];p[ 7]=ROL2(p[ 7],50) ^ p[ 4];p[ 5]=ROL2(p[ 5],10) ^ p[ 6];p[ 3]=ROL2(p[ 3],17) ^ p[ 0];\
p[ 4]+=p[ 1];p[ 6]+=p[ 3];p[ 0]+=p[ 5];p[ 2]+=p[ 7];p[ 1]=ROL2(p[ 1],25) ^ p[ 4];p[ 3]=ROL2(p[ 3],29) ^ p[ 6];p[ 5]=ROL2(p[ 5],39) ^ p[ 0];p[ 7]=ROL2(p[ 7],43) ^ p[ 2];\
p[ 6]+=p[ 1];p[ 0]+=p[ 7];p[ 2]+=p[ 5];p[ 4]+=p[ 3];p[ 1]=ROL8(p[ 1]) ^ p[ 6];p[ 7]=ROL2(p[ 7],35) ^ p[ 0];p[ 5]=ROR8(p[ 5]) ^ p[ 2];p[ 3]=ROL2(p[ 3],22) ^ p[ 4];\
}
#define addwBuff(x0,x1,x2,x3,x4){\
p[ 0]+=h[x0];\
p[ 1]+=h[x1];\
p[ 2]+=h[x2];\
p[ 3]+=h[x3];\
p[ 4]+=h[x4];\
p[ 5]+=c_buffer[i++];\
p[ 7]+=c_buffer[i++];\
p[ 6]+=c_buffer[i];\
}
#define addwCon(x0,x1,x2,x3,x4,x5,x6,x7,y0,y1,y2){\
p[ 0]+= h[x0];\
p[ 1]+= h[x1];\
p[ 2]+= h[x2];\
p[ 3]+= h[x3];\
p[ 4]+= h[x4];\
p[ 5]+= h[x5] + c_t[y0];\
p[ 6]+= h[x6] + c_t[y1];\
p[ 7]+= h[x7] + c_add[y2];\
}

43
skunk.cu → skunk/skunk.cu

@ -2,7 +2,7 @@
* Skunk Algo for Signatum * Skunk Algo for Signatum
* (skein, cube, fugue, gost streebog) * (skein, cube, fugue, gost streebog)
* *
* tpruvot@github 06 2017 - GPLv3 * tpruvot@github 08 2017 - GPLv3
*/ */
extern "C" { extern "C" {
#include "sph/sph_skein.h" #include "sph/sph_skein.h"
@ -14,20 +14,25 @@ extern "C" {
#include "miner.h" #include "miner.h"
#include "cuda_helper.h" #include "cuda_helper.h"
//#define WANT_COMPAT_KERNEL
// compatibility kernels
extern void skein512_cpu_setBlock_80(void *pdata); extern void skein512_cpu_setBlock_80(void *pdata);
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap); extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
extern void x11_cubehash512_cpu_init(int thr_id, uint32_t threads);
extern void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads); extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_fugue512_cpu_free(int thr_id); extern void x13_fugue512_cpu_free(int thr_id);
extern void streebog_cpu_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce); extern void streebog_cpu_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);
extern void streebog_set_target(const uint32_t* ptarget); extern void streebog_set_target(const uint32_t* ptarget);
// krnlx merged kernel (for high-end cards only)
extern void skunk_cpu_init(int thr_id, uint32_t threads);
extern void skunk_set_target(uint32_t* ptarget);
extern void skunk_setBlock_80(int thr_id, void *pdata);
extern void skunk_cuda_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash);
extern void skunk_cuda_streebog(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);
#include <stdio.h> #include <stdio.h>
#include <memory.h> #include <memory.h>
@ -65,6 +70,7 @@ extern "C" void skunk_hash(void *output, const void *input)
} }
static bool init[MAX_GPUS] = { 0 }; static bool init[MAX_GPUS] = { 0 };
static bool use_compat_kernels[MAX_GPUS] = { 0 };
extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{ {
@ -74,7 +80,8 @@ extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce,
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
int intensity = (device_sm[device_map[thr_id]] > 500) ? 18 : 17; int intensity = (device_sm[device_map[thr_id]] > 500) ? 18 : 17;
if (strstr(device_name[dev_id], "GTX 10")) intensity = 19; if (strstr(device_name[dev_id], "GTX 10")) intensity = 20;
if (strstr(device_name[dev_id], "GTX 1080")) intensity = 21;
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); //if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
@ -92,8 +99,8 @@ extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce,
} }
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
quark_skein512_cpu_init(thr_id, throughput); skunk_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput); use_compat_kernels[thr_id] = (cuda_arch[dev_id] < 500 || CUDART_VERSION < 7500 || CUDART_VERSION > 8000);
x13_fugue512_cpu_init(thr_id, throughput); x13_fugue512_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0); CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
@ -107,18 +114,26 @@ extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce,
for (int k=0; k < 20; k++) for (int k=0; k < 20; k++)
be32enc(&endiandata[k], pdata[k]); be32enc(&endiandata[k], pdata[k]);
skein512_cpu_setBlock_80(endiandata);
cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t)); cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t));
if (use_compat_kernels[thr_id]) {
skein512_cpu_setBlock_80(endiandata);
streebog_set_target(ptarget); streebog_set_target(ptarget);
} else {
skunk_setBlock_80(thr_id, endiandata);
skunk_set_target(ptarget);
}
do { do {
int order = 0; int order = 0;
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1); order++; if (use_compat_kernels[thr_id]) {
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
streebog_cpu_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]); streebog_cpu_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
} else {
skunk_cuda_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]);
skunk_cuda_streebog(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
}
cudaMemcpy(h_resNonce, d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost); cudaMemcpy(h_resNonce, d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost);
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
@ -158,6 +173,7 @@ extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce,
else if (vhash[7] > Htarg) { else if (vhash[7] > Htarg) {
gpu_increment_reject(thr_id); gpu_increment_reject(thr_id);
cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t)); cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t));
gpulog(LOG_WARNING, thr_id, "result does not validate on CPU!");
pdata[19] = startNounce + h_resNonce[0] + 1; pdata[19] = startNounce + h_resNonce[0] + 1;
continue; continue;
} }
@ -185,6 +201,7 @@ extern "C" void free_skunk(int thr_id)
cudaThreadSynchronize(); cudaThreadSynchronize();
x13_fugue512_cpu_free(thr_id); x13_fugue512_cpu_free(thr_id);
cudaFree(d_hash[thr_id]); cudaFree(d_hash[thr_id]);
cudaFree(d_resNonce[thr_id]); cudaFree(d_resNonce[thr_id]);

567
skunk/streebog_arrays.cuh

@ -0,0 +1,567 @@
// Tables for function F
__device__ uint2 T02[256] = {
{ 0x5B711FD0, 0xE6F87E5C }, { 0x0924FA16, 0x25837780 }, { 0x852EA4A8, 0xC849E07E }, { 0x8F06C16A, 0x5B4686A1 },
{ 0xD77B416E, 0x0B32E9A2 }, { 0x67815C66, 0xABDA37A4 }, { 0x1A686676, 0xF61796A8 }, { 0x6391954B, 0xF5DC0B70 },
{ 0xB7E64BF1, 0x4862F38D }, { 0x68BD85C5, 0xFF5C629A }, { 0xFCD75795, 0xCB827DA6 }, { 0x69B9F089, 0x66D36DAF },
{ 0x483D83B0, 0x356C9F74 }, { 0x238C99A1, 0x7CBCECB1 }, { 0x31C4708D, 0x36A702AC }, { 0x2FBCDFD6, 0x9EB6A8D0 },
{ 0xE5B3AE37, 0x8B19FA51 }, { 0x8A127D0B, 0x9CCFB540 }, { 0x08208F5A, 0xBC0C78B5 }, { 0x2288ECED, 0xE533E384 },
{ 0x77C15FD2, 0xCEC2C7D3 }, { 0x505D0F5E, 0xEC7817B6 }, { 0x8336871D, 0xB94CC2C0 }, { 0xCB0B04AD, 0x8C205DB4 },
{ 0x28A0892F, 0x763C855B }, { 0xF6FF3257, 0x588D1B79 }, { 0x4311933E, 0x3FECF69E }, { 0x803A18C9, 0x0FC0D39F },
{ 0xF5F3AD83, 0xEE010A26 }, { 0x411979A6, 0x10EFE8F4 }, { 0x7DE93A10, 0x5DCDA10C }, { 0x1248E92C, 0x4A1BEE1D },
{ 0x21847339, 0x53BFF2DB }, { 0xA6A23D09, 0xB4F50CCF }, { 0xD84798CD, 0x5FB4BC9C }, { 0x071C56F9, 0xE88A2D8B },
{ 0x5A756A9C, 0x7F777169 }, { 0xA0BA1EBC, 0xC5F02E71 }, { 0x4215E672, 0xA663F9AB }, { 0xDE5FBB78, 0x2EB19E22 },
{ 0x2594BA14, 0x0DB9CE0F }, { 0x97664D84, 0x82520E63 }, { 0x0208EA98, 0x2F031E6A }, { 0xA1BE6BF0, 0x5C7F2144 },
{ 0xD16362DB, 0x7A37CB1C }, { 0x4B311C64, 0x83E08E2B }, { 0xAB960E32, 0xCF70479B }, { 0xB9DEE71E, 0x856BA986 },
{ 0x7AF56CE9, 0xB5478C87 }, { 0x5F61D6FD, 0xB8FE4288 }, { 0x966238C8, 0x1BDD0156 }, { 0x3EF8A92E, 0x62215792 },
{ 0x114476F8, 0xFC97FF42 }, { 0x56452CEB, 0x9D7D3508 }, { 0xE0A71256, 0x4C90C9B0 }, { 0xFBCB016C, 0x2308502D },
{ 0xA7A64845, 0x2D7A03FA }, { 0xBFC6C4AB, 0xF46E8B38 }, { 0xD477DEBA, 0xBDBEF8FD }, { 0xC8079B79, 0x3AAC4CEB },
{ 0xE8879D0C, 0xF09CB105 }, { 0xAC8A58CB, 0x27FA6A10 }, { 0x401D0CEA, 0x8960E7C1 }, { 0x4A356928, 0x1A6F811E },
{ 0x73D196FF, 0x90C4FB07 }, { 0x609D0A9F, 0x43501A2F }, { 0xC63F3796, 0xF7A516E0 }, { 0xB8DA9252, 0x1CE4A6B3 },
{ 0x38E08A9B, 0x1324752C }, { 0x3BEC154F, 0xA5A86473 }, { 0x5549B33F, 0x2BF12457 }, { 0x440DC5C7, 0xD766DB15 },
{ 0x9E42B792, 0xA7D179E3 }, { 0x61997FD3, 0xDADF151A }, { 0xC0271423, 0x86A0345E }, { 0x6DA939A4, 0x38D5517B },
{ 0x104003B4, 0x6518F077 }, { 0xA5AEA2DD, 0x02791D90 }, { 0x9C4A5D0A, 0x88D26789 }, { 0x0A2865C2, 0x930F66DF },
{ 0x4509B08B, 0x4EE9D420 }, { 0x6685292A, 0x32553891 }, { 0xC533A842, 0x412907BF }, { 0x544DC673, 0xB27E2B62 },
{ 0x6295E007, 0x6C530445 }, { 0x5351908A, 0x5AF406E9 }, { 0xC123616F, 0x1F2F3B6B }, { 0x5255E5C6, 0xC37B09DC },
{ 0xB1FE6844, 0x3967D133 }, { 0xF0E711E2, 0x298839C7 }, { 0x1964F9A2, 0x409B87F7 }, { 0xDB4B0719, 0xE938ADC3 },
{ 0xF9C3EBF4, 0x0C0B4E47 }, { 0xD36B8843, 0x5534D576 }, { 0xEB8B02D8, 0x4610A05A }, { 0x8232F251, 0x20C3CDF5 },
{ 0xBEC2B1E7, 0x6DE1840D }, { 0xB0FA1D08, 0xA0E8DE06 }, { 0x0D34333B, 0x7B854B54 }, { 0xBCCA5B7F, 0x42E29A67 },
{ 0xC437DD0E, 0xD8A6088A }, { 0xD943ED81, 0xC63BB3A9 }, { 0x5E65A3B1, 0x21714DBD }, { 0xB5EEA169, 0x6761EDE7 },
{ 0xD573ABF6, 0x2431F7C8 }, { 0xE1A3671A, 0xD51FC685 }, { 0x0410C92D, 0x5E063CD4 }, { 0x2CB04002, 0x283AB98F },
{ 0xB2F2F790, 0x8FEBC06C }, { 0x6FA1D33C, 0x17D64F11 }, { 0xA99EE4AA, 0xE07359F1 }, { 0x74CDC006, 0x784ED68C },
{ 0xC73B42DA, 0x6E2A19D5 }, { 0x1C7045C3, 0x8712B416 }, { 0xED93216D, 0x371582E4 }, { 0x4939F6FC, 0xACE39041 },
{ 0x86223B7C, 0x7EC5F121 }, { 0x2BAC16FB, 0xC0B09404 }, { 0x9A527EBF, 0xF9D74537 }, { 0xA3B68168, 0x737C3F2E },
{ 0xBAD278CA, 0x33E7B8D9 }, { 0xC22FFEBB, 0xA9A32A34 }, { 0xFEDFBD0D, 0xE48163CC }, { 0x6EA5A670, 0x8E594024 },
{ 0x842AD1E4, 0x51C6EF4B }, { 0x279C508C, 0x22BAD065 }, { 0x18608CEE, 0xD91488C2 }, { 0x1F7CDA17, 0x319EA549 },
{ 0x134C9C60, 0xD394E128 }, { 0x72D5E3B3, 0x094BF432 }, { 0xA4AAD791, 0x9BF612A5 }, { 0xD26FFD0F, 0xCCBBDA43 },
{ 0x946AD250, 0x34DE1F3C }, { 0x995EE16B, 0x4F5B5468 }, { 0xEA8F7794, 0xDF9FAF6F }, { 0x70DD092B, 0x2648EA58 },
{ 0x71D97C67, 0xBFC7E56D }, { 0x4F21D549, 0xDDE6B2FF }, { 0x3AE86003, 0x3C276B46 }, { 0xAF86C71F, 0x91767B4F },
{ 0x35D4B9A0, 0x68A13E78 }, { 0x030C9FD4, 0xB68C115F }, { 0x16582001, 0x141DD2C9 }, { 0xDD5324AC, 0x983D8F7D },
{ 0xCC175254, 0x64AA703F }, { 0x8E02B426, 0xC2C98994 }, { 0x9F46C2DE, 0x3E5E76D6 }, { 0x587D8004, 0x50746F03 },
{ 0x9272F1E5, 0x45DB3D82 }, { 0x9B560BF3, 0x60584A02 }, { 0x3FFCDC62, 0xFBAE58A7 }, { 0x6CAD4CE8, 0xA15A5E4E },
{ 0xCE1FB8CC, 0x4BA96E55 }, { 0xAE82B253, 0x08F9747A }, { 0xF7FB471B, 0xC102144C }, { 0xF3EB8E36, 0x9F042898 },
{ 0xF2EFFB7A, 0x068B27AD }, { 0x8C0A5EBE, 0xEDCA97FE }, { 0xF4F7D8CF, 0x778E0513 }, { 0xC32B8BF7, 0x302C2501 },
{ 0x175C554D, 0x8D92DDFC }, { 0x46052F5F, 0xF865C57F }, { 0xA2B2F424, 0xEAF3301B }, { 0xBBD60D86, 0xAA68B7EC },
{ 0x0104754C, 0x998F0F35 }, { 0x00000000, 0x00000000 }, { 0x34D0CCEC, 0xF12E314D }, { 0x061823B5, 0x710522BE },
{ 0x30C005C1, 0xAF280D99 }, { 0x5D693C65, 0x97FD5CE2 }, { 0x33CC9A15, 0x19A41CC6 }, { 0xF8C79EB8, 0x95844172 },
{ 0x937684A9, 0xDC5432B7 }, { 0x2490CF58, 0x9436C13A }, { 0x32C8EF59, 0x802B13F3 }, { 0x7CED4F5C, 0xC442AE39 },
{ 0xE3AB8D82, 0xFA1CD8EF }, { 0x4D293FD1, 0xF2E5AC95 }, { 0x907A1B7D, 0x6AD823E8 }, { 0x3CF043B6, 0x4D2249F8 },
{ 0x79F9F33D, 0x03CB9DD8 }, { 0x36D82674, 0xDE2D2F27 }, { 0x891EE2DF, 0x2A43A41F }, { 0x1B6C133A, 0x6F98999D },
{ 0x3DF436FA, 0xD4AD46CD }, { 0x269825C0, 0xBB35DF50 }, { 0x813E6D85, 0x964FDCAA }, { 0x7EE5A5C4, 0xEB41B053 },
{ 0x8B160847, 0x0540BA75 }, { 0xE7BB44AF, 0xA41AE43B }, { 0xD0671797, 0xE3B8C429 }, { 0xEE9FBEB9, 0x819993BB },
{ 0xEC975421, 0xAE9A8DD1 }, { 0x917E6E31, 0xF3572CDD }, { 0xE2AFF8CE, 0x6393D7DA }, { 0x37DC5338, 0x47A22012 },
{ 0xC903EE35, 0xA32343DE }, { 0xA89A91E6, 0x79FC56C4 }, { 0xDC5751E0, 0x01B28048 }, { 0xE4B7DB7B, 0x1296F564 },
{ 0x51597A12, 0x75F71883 }, { 0xBDCE2E33, 0xDB6D9552 }, { 0x1D74308F, 0x1E9DBB23 }, { 0xFDD322D9, 0x520D7293 },
{ 0x0C304677, 0xE20A4461 }, { 0xB4EAD425, 0xFEEEE2D2 }, { 0x20800675, 0xCA30FDEE }, { 0x47015A13, 0x61EACA4A },
{ 0x87264E30, 0xE74AFE14 }, { 0x7BF119A5, 0x2CC883B2 }, { 0xB3F682DC, 0x1664CF59 }, { 0x1E78AF5B, 0xA811AA7C },
{ 0x648DC3B2, 0x1D5626FB }, { 0xDF5BCE34, 0xB73E9117 }, { 0x6AB56F5D, 0xD05F7CF0 }, { 0xCD132718, 0xFD257F0A },
{ 0x76C52A9E, 0x574DC8E6 }, { 0x2EB8AA9A, 0x0739A7E5 }, { 0x0F3CD9A3, 0x5486553E }, { 0xAA927B7E, 0x56FF48AE },
{ 0xAD8E2D87, 0xBE756525 }, { 0xFFDBC841, 0x7D0E6CF9 }, { 0x1450CA99, 0x3B1ECCA3 }, { 0xE983E840, 0x6913BE30 },
{ 0x956EA71C, 0xAD511009 }, { 0x2DB4354E, 0xB1B5B6BA }, { 0x4E25A005, 0x4469BDCA }, { 0xCA0F71E1, 0x15AF5281 },
{ 0x8D0E2BF2, 0x744598CB }, { 0x2AA863B7, 0x593F9B31 }, { 0x29A4FC63, 0xEFB38A6E }, { 0x4C2D4A9D, 0x6B6AA3A0 },
{ 0xE6BF31E3, 0x3D95EB0E }, { 0x1554BFD5, 0xA291C396 }, { 0xEF9BCBF5, 0x18169C8E }, { 0x9D4E2846, 0x115D68BC },
{ 0xFACF7420, 0xBA875F18 }, { 0xB6E23EBD, 0xD1EDFCB8 }, { 0xF1E364AE, 0xB00736F2 }, { 0x6589B6FE, 0x84D929CE },
{ 0xDA4F7255, 0x70B7A2F6 }, { 0x5C6D4929, 0x0E7253D7 }, { 0x574159A7, 0x04F23A3D }, { 0x0B2C108E, 0x0A8069EA },
{ 0x6BB11A11, 0x49D073C5 }, { 0x39E4FFD7, 0x8AAB7A19 }, { 0x0E38ACEF, 0xCD095A0B }, { 0x5979F548, 0xC9FB6036 },
{ 0xD67F3422, 0x92BDE697 }, { 0x0514BC61, 0xC78933E1 }, { 0x75C9B54A, 0xE1C1D9B9 }, { 0xCF1BCD80, 0xD2266160 },
{ 0x78FD8671, 0x9A4492ED }, { 0x881A9793, 0xB3CCAB2A }, { 0x7FE1D088, 0x72CEBF66 }, { 0x985A9427, 0xD6D45B5D },
};
__device__ uint2 T12[256] = {
{ 0x8C3F55DE, 0xC811A805 }, { 0x96B50619, 0x65F5B431 }, { 0xD6706E43, 0xF74F96B1 }, { 0xCB43D336, 0x859D1E8B },
{ 0xCCFA3D84, 0x5AAB8A85 }, { 0xC295FCFD, 0xF9C7BF99 }, { 0xDE4B630F, 0xA21FD5A1 }, { 0x3B8B456D, 0xCDB3EF76 },
{ 0x7CF7C385, 0x803F59F8 }, { 0x5F31913C, 0xB27C73BE }, { 0x33B04821, 0x98E3AC66 }, { 0x26B8F818, 0xBF61674C },
{ 0xC4C130C8, 0x0FFBC995 }, { 0x10761A98, 0xAAA08620 }, { 0x210116AA, 0x6057F342 }, { 0x0654CC35, 0xF63C760C },
{ 0x667D9042, 0x2DDB45CC }, { 0x4BD40382, 0xBCF45A96 }, { 0xEF3C6F3D, 0x68E8A0C3 }, { 0x69FF73BC, 0xA7BD92D2 },
{ 0x01ED2287, 0x290AE202 }, { 0xE885818F, 0xB7DE34CD }, { 0xDD61059B, 0xD901EEA7 }, { 0x19A03553, 0xD6FA2732 },
{ 0x74CCCEC9, 0xD56F1AE8 }, { 0x2E83F554, 0xEA31245C }, { 0xA07BE499, 0x7034555D }, { 0x56E7BEF7, 0xCE26D2AC },
{ 0xA5054E38, 0xFD161857 }, { 0x527436D1, 0x6A0E7DA4 }, { 0x1CDE9FF2, 0x5BD86A38 }, { 0x31770C32, 0xCAF77562 },
{ 0xE279C8D0, 0xB09AAED9 }, { 0xC60674DB, 0x5DEF1091 }, { 0x515E5045, 0x111046A2 }, { 0x729802FC, 0x23536CE4 },
{ 0xF5B63CFA, 0xC50CBCF7 }, { 0xCD171F03, 0x73A16887 }, { 0xD9F28DBD, 0x7D2941AF }, { 0x5A4F3B9D, 0x3F5E3EB4 },
{ 0x1B677140, 0x84EEFE36 }, { 0xE7076271, 0x3DB8E3D3 }, { 0xF20FD248, 0x1A3A28F9 }, { 0xB49E7627, 0x7EBC7C75 },
{ 0xC7EB565C, 0x74E5F293 }, { 0x4F478BA4, 0x18DCF59E }, { 0xA9ADCB52, 0x0C6EF44F }, { 0x98DAC760, 0xC699812D },
{ 0x6E469D0E, 0x788B06DC }, { 0x7521EC4E, 0xFC65F8EA }, { 0x9E8E0B55, 0x30A5F721 }, { 0xBCA57B6B, 0x2BEC3F65 },
{ 0xBAF1B75E, 0xDDD04969 }, { 0xE394EA57, 0x99904CDB }, { 0xE6EA40F6, 0x14B201D1 }, { 0x41284ADD, 0xBBB0C082 },
{ 0xBF8F1DFF, 0x50F20463 }, { 0x93CBACB8, 0xE8D7F93B }, { 0x477C86E8, 0x4D8CB68E }, { 0x92268E3F, 0xC1DD1B39 },
{ 0x09D62FCB, 0x7C5AA112 }, { 0xDB35C9AE, 0x2F3D98AB }, { 0x2BFD5FF5, 0x67136956 }, { 0x36CEE280, 0x15C1E16C },
{ 0xF8F39B17, 0x1D7EB2ED }, { 0xB00DFE01, 0xDA94D37D }, { 0x760B8ADA, 0x877BC3EC }, { 0xE153AE44, 0xCB8495DF },
{ 0xB7B410B3, 0x05A24773 }, { 0x3C32ABDF, 0x12857B78 }, { 0x6812513B, 0x8EB770D0 }, { 0xD2E3E665, 0x536739B9 },
{ 0x71B26468, 0x584D57E2 }, { 0xC9849725, 0xD789C78F }, { 0x7D1AE102, 0xA935BBFA }, { 0xDFA64188, 0x8B1537A3 },
{ 0xC378DE7A, 0xD0CD5D9B }, { 0x4D80CFB7, 0x4AC82C9A }, { 0x83BDB620, 0x42777F1B }, { 0x1D33BD75, 0x72D2883A },
{ 0xAB6A8F41, 0x5E7A2D4B }, { 0xBB1C95D9, 0xF4DAAB6B }, { 0xFD8D31B6, 0x905CFFE7 }, { 0x119B381F, 0x83AA6422 },
{ 0x42022C49, 0xC0AEFB84 }, { 0x63033AE3, 0xA0F908C6 }, { 0x04938826, 0xA428AF08 }, { 0x1A8A53C7, 0xADE41C34 },
{ 0x77E6A85D, 0xAE7121EE }, { 0x25929E8C, 0xC47F5C4A }, { 0x55CDD863, 0xB538E9AA }, { 0xDAD8EB29, 0x06377AA9 },
{ 0xB3279895, 0xA18AE87B }, { 0x35E48414, 0x6EDFDA6A }, { 0x825094A7, 0x6B7D9D19 }, { 0xA4E86CBF, 0xD41CFA55 },
{ 0xEA42C59C, 0xE5CAEDC9 }, { 0x0E6FC179, 0xA36C351C }, { 0x6FABBF89, 0x5181E4DE }, { 0x184D17D4, 0xFFF0C530 },
{ 0x84045892, 0x9D41EB15 }, { 0x28D73961, 0x1C0D5250 }, { 0x0CA8856A, 0xF178EC18 }, { 0x8EF811CD, 0x9A057101 },
{ 0x3EF5EFCC, 0x4091A27C }, { 0x9F6329D2, 0x19AF1523 }, { 0xF91EB990, 0x347450EF }, { 0x8DD27759, 0xE11B4A07 },
{ 0xFC601331, 0xB9561DE5 }, { 0x2DA993C0, 0x912F1F5A }, { 0x5BA2191A, 0x1654DCB6 }, { 0x8A6B99EB, 0x3E2DDE09 },
{ 0x0F82E3FE, 0x8A66D71E }, { 0xD55A08D7, 0x8C51ADB7 }, { 0x8941FF7F, 0x4533E50F }, { 0xBD4859EC, 0x02E6DD67 },
{ 0x5DF6D52F, 0xE068AABA }, { 0xFF4A75A5, 0xC24826E3 }, { 0x88ACDDF8, 0x6C39070D }, { 0x4691A46F, 0x6486548C },
{ 0x135C7C0C, 0xD1BEBD26 }, { 0x8F15334A, 0xB30F9303 }, { 0xC1BF9A69, 0x82D9849F }, { 0x5420FAE4, 0x9C320BA8 },
{ 0xAFF90767, 0xFA528243 }, { 0xE968A308, 0x9ED4D6CF }, { 0x2C44B147, 0xB825FD58 }, { 0x5EDCB3BB, 0x9B7691BC },
{ 0x48FE6516, 0xC7EA6190 }, { 0x817AF233, 0x1063A61F }, { 0x3409A693, 0x47D53868 }, { 0x4C6DED30, 0x63C2CE98 },
{ 0x6C81D91D, 0x2A9FDFD8 }, { 0x032A6694, 0x7B1E3B06 }, { 0xFBD9FD83, 0x666089EB }, { 0x7375207B, 0x0A598EE6 },
{ 0x0AFC495F, 0x07449A14 }, { 0xB6593234, 0x2CA8A571 }, { 0x45BBC2FB, 0x1F986F8A }, { 0x50B372C2, 0x381AA4A0 },
{ 0xD81FAF3A, 0x5423A3AD }, { 0x8B86BB6C, 0x17273C0B }, { 0xC869B5A2, 0xFE83258D }, { 0xD1C980F1, 0x287902BF },
{ 0x6B3837AF, 0xF5A94BD6 }, { 0xB2CABA12, 0x88800A79 }, { 0x083B0D4C, 0x55504310 }, { 0x07B9EEB2, 0xDF36940E },
{ 0x6790B2C5, 0x04D1A7CE }, { 0xF125B4DC, 0x612413FF }, { 0xC52C124F, 0x26F12B97 }, { 0xA62F28AC, 0x86082351 },
{ 0x9937E5E7, 0xEF93632F }, { 0x293A1BE6, 0x3507B052 }, { 0x570A9C70, 0xE72C30AE }, { 0xAE1425E0, 0xD3586041 },
{ 0xD79D4CC4, 0xDE4574B3 }, { 0x40C5685A, 0x92BA2280 }, { 0xDC8C271C, 0xF00B0CA5 }, { 0xF69C5A6E, 0xBE1287F1 },
{ 0xB1E0DC86, 0xF39E317F }, { 0x20EC342D, 0x495D1140 }, { 0x3F18CD4B, 0x699B407E }, { 0x6AD51528, 0xDCA3A9D4 },
{ 0x79896924, 0x0D1D14F2 }, { 0x00000000, 0x00000000 }, { 0xA196C61E, 0x593EB75F }, { 0x0B116BD8, 0x2E4E7816 },
{ 0x58887F8E, 0x6D4AE7B0 }, { 0x872E3E06, 0xE65FD013 }, { 0xD30EC4E2, 0x7A6DDBBB }, { 0xCAAEF1B1, 0xAC97FC89 },
{ 0x1E19DBE1, 0x09CCB33C }, { 0x62EE1864, 0x89F3EAC4 }, { 0xAA87ADC6, 0x7770CF49 }, { 0x6557F6D6, 0x56C57ECA },
{ 0x6D6CFB9A, 0x03953DDA }, { 0x4456E07C, 0x36928D88 }, { 0x959F608D, 0x1EEB8F37 }, { 0x4EAAA923, 0x31D6179C },
{ 0xE5C02662, 0x6FAC3AD7 }, { 0x53991456, 0x43049FA6 }, { 0xC052B8EE, 0xABD3669D }, { 0xA7C20A2B, 0xAF02C153 },
{ 0x3723C007, 0x3CCB036E }, { 0x90E1CA2C, 0x93C9C23D }, { 0x2F6ED7D3, 0xC33BC65E }, { 0x9758249E, 0x4CFF5633 },
{ 0x325D6AA6, 0xB1E94E64 }, { 0x9472420A, 0x37E16D35 }, { 0xBE623F78, 0x79F8E661 }, { 0x02C74413, 0x5214D904 },
{ 0xF0C8965B, 0x482EF1FD }, { 0xEC1609A9, 0x13F69BC5 }, { 0x14E592BE, 0x0E882928 }, { 0x2A107D72, 0x4E198B54 },
{ 0xEBAFE71B, 0xCCC00FCB }, { 0x222B703E, 0x1B49C844 }, { 0xA840E9D5, 0x2564164D }, { 0x1FF4F966, 0x20C6513E },
{ 0x910CE8AB, 0xBAC3203F }, { 0x61C47EF0, 0xF2EDD1C2 }, { 0xACD361F3, 0x814CB945 }, { 0x4A392105, 0x95FEB894 },
{ 0x1622D6AD, 0x5C9CF02C }, { 0xF77178E9, 0x971865F3 }, { 0x9BF0A1F4, 0xBD87BA2B }, { 0x59655D09, 0x444005B2 },
{ 0x247FBC0B, 0xED75BE48 }, { 0x17CFF42A, 0x7596122E }, { 0x85E97A15, 0xB44B0917 }, { 0x2755DA9F, 0x966B854E },
{ 0x49134791, 0xEEE08392 }, { 0x23C652B9, 0x32432A46 }, { 0xAD3E4374, 0xA8465B47 }, { 0x12B15E8B, 0xF8B45F24 },
{ 0x78644BA3, 0x2417F6F0 }, { 0x7FDDA511, 0xFB2162FE }, { 0x9DA46DC1, 0x4BBBCC27 }, { 0xD024A276, 0x0173E0BD },
{ 0xA2BCA08A, 0x22208C59 }, { 0xB836F34D, 0x8FC4906D }, { 0x3A6667EA, 0xE4B90D74 }, { 0x705F46EF, 0x7147B5E0 },
{ 0x1508B039, 0x2782CB2A }, { 0xF45B1E7D, 0xEC065EF5 }, { 0xCFD05B10, 0x21B5B183 }, { 0x60295C77, 0xDBE733C0 },
{ 0x394C017E, 0x9FA73672 }, { 0x86C31C81, 0xCF553211 }, { 0x0D45A7ED, 0xD8720E1A }, { 0x3DDF8958, 0x3B8F997A },
{ 0xEDFB2B2E, 0x3AFC79C7 }, { 0x43EF0ECE, 0xE9A41986 }, { 0x7B4E2D37, 0x5F09CDF6 }, { 0xFA34DF04, 0x4F6A6BE9 },
{ 0x38A123F9, 0xB6ADD470 }, { 0x057EAAA1, 0x8D224D0A }, { 0x5C1BF7A8, 0xC96248B8 }, { 0x309A2EB5, 0xE3FD9760 },
{ 0xA351820D, 0x0B2A6E5B }, { 0xFEA75722, 0xEB42C4E1 }, { 0x9A1D8373, 0x948D5829 }, { 0x64BAD451, 0x7FCF9CC8 },
{ 0xD4B72A50, 0xA55B4FB5 }, { 0xCE3D7997, 0x08BF5381 }, { 0xE42D04E5, 0x46A6D8D5 }, { 0x7E308796, 0xD22B80FC },
{ 0xB57354A0, 0x57B69E77 }, { 0x8097D0B4, 0x3969441D }, { 0xF3E2F0CF, 0x3330CAFB }, { 0xE0BE8CC3, 0xE28E77DD },
{ 0x9C494F46, 0x62B12E25 }, { 0xB9DBD1CA, 0xA6CE726F }, { 0xEED14DBA, 0x41E242C1 }, { 0x7AA30FB0, 0x76032FF4 },
};
__device__ uint2 T22[256] = {
{ 0x3ACDE4CC, 0x45B268A9 }, { 0x84549D08, 0xAF7F0BE8 }, { 0xC1468263, 0x048354B3 }, { 0xC80EFED2, 0x925435C2 },
{ 0x7FDFFBA7, 0xEE4E37F2 }, { 0x0C60F14D, 0x167A3392 }, { 0xEA03E584, 0xFB123B52 }, { 0xFDBB9007, 0x4A0CAB53 },
{ 0x0F788A19, 0x9DEAF638 }, { 0x8F0CB32A, 0xCB48EC55 }, { 0xD6FEF7E0, 0xB59DC4B2 }, { 0xF4F3ECB6, 0xDCDBCA22 },
{ 0x549A9C40, 0x11DF5813 }, { 0x568ACED3, 0xE33FDEDF }, { 0x4322E9C3, 0xA0C1C812 }, { 0x58FA6D0D, 0x07A56B81 },
{ 0xB1E1F3DD, 0x77279579 }, { 0x422AC004, 0xD9B18B74 }, { 0xFFABC294, 0xB8EC2D9F }, { 0x2D75914F, 0xF4ACF8A8 },
{ 0xEF2B6878, 0x7BBF69B1 }, { 0x487AC7E1, 0xC4F62FAF }, { 0xC67E5D0C, 0x76CE809C }, { 0x92E4C14C, 0x6711D88F },
{ 0x243DEDFE, 0x627B99D9 }, { 0xDFB68B51, 0x234AA5C3 }, { 0x262DBF6D, 0x909B1F15 }, { 0x4B62BCB5, 0x4F66EA05 },
{ 0x52AA6AE8, 0x1AE2CF5A }, { 0xD0CE0148, 0xBEA053FB }, { 0xE66314C9, 0xED6808C0 }, { 0x15A82710, 0x43FE16CD },
{ 0xA06970F6, 0xCD049231 }, { 0x97CC4CB0, 0xE7BC8A6C }, { 0xFCB3B9C0, 0x337CE835 }, { 0x7CC780F3, 0x65DEF258 },
{ 0x4132BB50, 0x52214EDE }, { 0x90F493DF, 0x95F15E43 }, { 0x5DD2E0F1, 0x87083962 }, { 0xFB8B66AF, 0x41313C1A },
{ 0x51B211BC, 0x91720AF0 }, { 0xD4EEA573, 0x477D427E }, { 0xF6E3BE25, 0x2E3B4CEE }, { 0xEB0BCC43, 0x82627834 },
{ 0x78E724C8, 0x9C03E3DD }, { 0xD9867DF9, 0x2877328A }, { 0xE243B0F2, 0x14B51945 }, { 0xF7EB97E2, 0x574B0F88 },
{ 0x9AA4943A, 0x88B6FA98 }, { 0xCB168586, 0x19C4F068 }, { 0xAF11FAEF, 0x50EE6409 }, { 0xC04EABA4, 0x7DF317D5 },
{ 0x98B4C6A9, 0x7A567C54 }, { 0x4F42188E, 0xB6BBFB80 }, { 0x3BC5CD0B, 0x3CC22BCF }, { 0xAA397713, 0xD04336EA },
{ 0xEC33132C, 0xF02FAC1B }, { 0xF0D3488D, 0x2506DBA7 }, { 0xF2C31A1E, 0xD7E65D6B }, { 0x1FF820F5, 0x5EB9B216 },
{ 0xC46E0F9F, 0x842E0650 }, { 0x9E843001, 0x716BEB1D }, { 0xAB315ED4, 0xA933758C }, { 0xA2792265, 0x3FE414FD },
{ 0x1EF00932, 0x27C9F170 }, { 0x70A771BE, 0x73A4C1CA }, { 0xE76B3D0E, 0x94184BA6 }, { 0x8C14C87E, 0x40D829FF },
{ 0xC77674CB, 0x0FBEC3FA }, { 0x4A6A9572, 0x3616A963 }, { 0xC25EF937, 0x8F139119 }, { 0x5AEA3F9E, 0xF545ED4D },
{ 0x50BA387B, 0xE8024996 }, { 0x0B582E22, 0x6437E7BD }, { 0xE053E261, 0xE6559F89 }, { 0x05288DFC, 0x80AD52E3 },
{ 0xE34B9935, 0x6DC55A23 }, { 0x1AD0AD09, 0xDE14E0F5 }, { 0xA659865E, 0xC6390578 }, { 0x09487CB1, 0x96D76171 },
{ 0x21156002, 0xE2D6CB3A }, { 0x779FAED1, 0x01E915E5 }, { 0x6A77DCB7, 0xADB0213F }, { 0xB9A1A6AB, 0x9880B76E },
{ 0x8644CF9B, 0x5D9F8D24 }, { 0xC5662658, 0xFD5E4536 }, { 0x9BACBDFD, 0xF1C6B9FE }, { 0xBE9979C4, 0xEACD6341 },
{ 0x08405576, 0xEFA72217 }, { 0xD88E543E, 0x510771EC }, { 0x671F043D, 0xC2BA51CB }, { 0x71AF5879, 0x0AD482AC },
{ 0x5CDAC936, 0xFE787A04 }, { 0x8E049AED, 0xB238AF33 }, { 0x4972EE26, 0xBD866CC9 }, { 0xBD810290, 0x615DA6EB },
{ 0x8B2C1711, 0x3295FDD0 }, { 0x73BF0AEA, 0xF8340460 }, { 0x758FFC42, 0xF3099329 }, { 0x7DCFA934, 0x1CAEB13E },
{ 0x1188832B, 0xBA230748 }, { 0x874CE65C, 0x24EFCE42 }, { 0xB0E9DA1A, 0x0E57D61F }, { 0xF99B343C, 0xB3D1BAD6 },
{ 0x893C4582, 0xC0757B1C }, { 0x403A9297, 0x2B510DB8 }, { 0xF1DB614A, 0x5C7698C1 }, { 0xD5E68CB4, 0x3E0D0118 },
{ 0x855CB4CF, 0xD60F488E }, { 0xF3CB33D9, 0xAE961E0D }, { 0x14A00ED7, 0x3A8E55AB }, { 0x623789C1, 0x42170328 },
{ 0x9C946292, 0x838B6DD1 }, { 0xED3B3AEB, 0x895FEF7D }, { 0x4E4A3149, 0xCFCBB8E6 }, { 0x2F65C3DC, 0x064C7E64 },
{ 0x4C5A63DA, 0x3D2B3E2A }, { 0xA9210C47, 0x5BD3F340 }, { 0xA1615931, 0xB474D157 }, { 0x1DE87266, 0xAC5934DA },
{ 0x7AF7765B, 0x6EE36511 }, { 0x16B05C44, 0xC86ED367 }, { 0x201D49C5, 0x9BA6885C }, { 0x88346C45, 0xB905387A },
{ 0xBAB9DDFF, 0x131072C4 }, { 0xA751AF99, 0xBF49461E }, { 0x1CE05BA1, 0xD52977BC }, { 0x6027DB52, 0xB0F785E4 },
{ 0x6E57788C, 0x546D30BA }, { 0x650F56AE, 0x305AD707 }, { 0x612FF295, 0xC987C682 }, { 0xF5FBC571, 0xA5AB8944 },
{ 0x59F244CA, 0x7ED528E7 }, { 0x2C7DB888, 0x8DDCBBCE }, { 0x328DB1BA, 0xAA154ABE }, { 0x93ECE88B, 0x1E619BE9 },
{ 0xE813B717, 0x09F2BD9E }, { 0x285D1CB3, 0x7401AA4B }, { 0x3195CAEE, 0x21858F14 }, { 0x1398D1B8, 0x48C38184 },
{ 0xB2F98889, 0xFCB750D3 }, { 0x8D1CE1B9, 0x39A86A99 }, { 0xE473465A, 0x1F888E0C }, { 0x76978716, 0x78995683 },
{ 0xEE2341BF, 0x02CF2AD7 }, { 0xB3F1A14E, 0x85C713B5 }, { 0x2B4567E7, 0xFF916FE1 }, { 0xB7D10575, 0x7C1A0230 },
{ 0x5ECA9BA5, 0x0C98FCC8 }, { 0xDA9E06AD, 0xA3E7F720 }, { 0xBBB1F438, 0x6A6031A2 }, { 0x7ED7D260, 0x973E7494 },
{ 0x18C0FF9A, 0x2CF46639 }, { 0x68678E24, 0x5F50A7F3 }, { 0xA449D4CD, 0x34D983B4 }, { 0x5592B587, 0x68AF1B75 },
{ 0x2E6DEA1B, 0x7F3C3D02 }, { 0x45121F6B, 0xABFC5F5B }, { 0x29553574, 0x0D71E92D }, { 0x6D4F03D8, 0xDFFDF510 },
{ 0x9F8C19C6, 0x081BA87B }, { 0xAC0981BB, 0xDB7EA1A3 }, { 0x66172DFA, 0xBBCA12AD }, { 0x010829C7, 0x79704366 },
{ 0x7BFF5F9C, 0x17932677 }, { 0x00000000, 0x00000000 }, { 0xC906D715, 0xEB2476A4 }, { 0x0738DF6F, 0x724DD42F },
{ 0x38DDB65F, 0xB752EE65 }, { 0x3DF53BA3, 0x37FFBC86 }, { 0xB5C157E6, 0x8EFA84FC }, { 0x272596AA, 0xE9EB5C73 },
{ 0x2535C439, 0x1B0BDABF }, { 0x2A4D4E20, 0x86E12C87 }, { 0xCE3E087A, 0x9969A28B }, { 0x9D9C4B55, 0xFAFB2EB7 },
{ 0xB6D92CB2, 0x056A4156 }, { 0xDEBEA296, 0x5A3AE6A5 }, { 0xA8292580, 0x22A3B026 }, { 0x36AD1581, 0x53C85B3B },
{ 0x17B87583, 0xB11E9001 }, { 0x3FE56930, 0xC51F3A4A }, { 0xCF3621BD, 0xE019E1ED }, { 0x91FCBA18, 0xEC811D25 },
{ 0x4D524A1D, 0x445B7D4C }, { 0xDCAEF005, 0xA8DA6069 }, { 0x309DE329, 0x58F5CC72 }, { 0x6B7FF570, 0xD4C06259 },
{ 0x39D59F98, 0xCE22AD03 }, { 0x47024DF8, 0x591CD997 }, { 0x03187B54, 0x8B90C5AA }, { 0xC356D0F0, 0xF663D27F },
{ 0x35B56ED5, 0xD8589E91 }, { 0xD3D67A1C, 0x35309651 }, { 0xCD26732E, 0x12F96721 }, { 0x441A36AC, 0xD28C1C3D },
{ 0x64077F69, 0x492A9461 }, { 0x6F5F514B, 0x2D1D73DC }, { 0x0D68D88A, 0x6F0A70F4 }, { 0xCA1EAC41, 0x60B4B30E },
{ 0x3385987D, 0xD36509D8 }, { 0x0630F6A8, 0x0B3D9749 }, { 0x96C46577, 0x9ECCC90A }, { 0xAD01A87C, 0xA20EE2C5 },
{ 0x0E70A3DE, 0xE49AB55E }, { 0x82646BA0, 0xA4429CA1 }, { 0xDB962F6A, 0xDA97B446 }, { 0xD7F6DE27, 0xCCED87D4 },
{ 0x37A53C46, 0x2AB8185D }, { 0xE15BCBA6, 0x9F25DCEF }, { 0xFEA3EB53, 0xC19C6EF9 }, { 0x1BD884CE, 0xA764A393 },
{ 0x817C10F4, 0x2FD2590B }, { 0x80743933, 0x56A21A6D }, { 0x79EF0D0F, 0xE573A0BB }, { 0x95DC1E23, 0x155C0CA0 },
{ 0x94D437E4, 0x6C2C4FC6 }, { 0x23053291, 0x10364DF6 }, { 0x836C4267, 0xDD32DFC7 }, { 0x99BCEF6E, 0x03263F32 },
{ 0xE57B6F9D, 0x66F8CD6A }, { 0x5BE21659, 0x8C35AE2B }, { 0x1290F87F, 0x31B3C2E2 }, { 0xBF915003, 0x93BD2027 },
{ 0x220D1B56, 0x69460E90 }, { 0xAE19D328, 0x299E276F }, { 0x53A2432F, 0x63928C3C }, { 0xE91B9ED0, 0x7082FEF8 },
{ 0x3EED40F7, 0xBC6F792C }, { 0xD2DE53DB, 0x4C40D537 }, { 0x5FC2B262, 0x75E8BFAE }, { 0xA541FD0A, 0x4DA9C0D2 },
{ 0x3CFD1264, 0x4E8FFFE0 }, { 0x696FA7E3, 0x2620E495 }, { 0xB8A98F6C, 0xE1F0F408 }, { 0xDDA6D9C2, 0xD1AA230F },
{ 0xD1C6288F, 0xC7D0109D }, { 0x7487D585, 0x8A79D04F }, { 0xA3710BA2, 0x4694579B }, { 0xFA834F68, 0x38417F7C },
{ 0x0A5007E5, 0x1D47A4DB }, { 0x460A643F, 0x206C9AF1 }, { 0x34BD4712, 0xA128DDF7 }, { 0x72B7232D, 0x81444706 },
{ 0x02105293, 0xF2E086CC }, { 0xBC892B57, 0x182DE58D }, { 0xF8931DFB, 0xCAA1F9B0 }, { 0xCC2E5AE9, 0x6B892447 },
{ 0x0420A43B, 0xF9DD1185 }, { 0x8A243ED6, 0x4BE5BEB6 }, { 0x19C8D65D, 0x5584255F }, { 0x633FA006, 0x3B67404E },
{ 0x6C472A1F, 0xA68DB676 }, { 0xB4C97E21, 0xF78AC79A }, { 0x1080AAEC, 0xC353442E }, { 0x5782E714, 0x9A4F9DB9 },
};
__device__ uint2 T32[256] = {
{ 0x2C9B3220, 0x05BA7BC8 }, { 0xF8B65E4F, 0x31A54665 }, { 0x7547F4D4, 0xB1B651F7 }, { 0x7BA46682, 0x8BFA0D85 },
{ 0xA16A98BB, 0x85A96C5A }, { 0x08EB79C9, 0x990FAEF9 }, { 0x47F4A62D, 0xA15E37A2 }, { 0x5D27741E, 0x76857DCD },
{ 0x0A1820BC, 0xF8C50B80 }, { 0x01F7A2B4, 0xBE65DCB2 }, { 0x6F9426E7, 0x666D1B98 }, { 0x53C4E648, 0x4CC921BF },
{ 0x93D9CA42, 0x95410A0F }, { 0x647BA4EF, 0x20CDCCAA }, { 0x890A1871, 0x429A4060 }, { 0x9B32B38B, 0x0C4EA4F6 },
{ 0xDE354CD3, 0xCCDA362D }, { 0x7C5B2FA9, 0x96DC23BC }, { 0xAA851AB3, 0xC309BB68 }, { 0x3648E013, 0xD26131A7 },
{ 0x41FC4DB2, 0x021DC529 }, { 0x704BE48A, 0xCD5ADAB7 }, { 0x84ED71E6, 0xA77965D9 }, { 0x1734BBA4, 0x32386FD6 },
{ 0x38AB7245, 0xE82D6DD5 }, { 0x6177B4B1, 0x5C2147EA }, { 0xCF091CE8, 0x5DA1AB70 }, { 0x72B8BDFF, 0xAC907FCE },
{ 0x972278A8, 0x57C85DFD }, { 0x6B6F940D, 0xA4E44C6A }, { 0x4F1FDFE4, 0x3851995B }, { 0xED71BC9E, 0x62578CCA },
{ 0xC01D2C0A, 0xD9882BB0 }, { 0x113C503B, 0x917B9D5D }, { 0xA87643C6, 0xA2C31E11 }, { 0xA399C1CE, 0xE463C923 },
{ 0x7EA876DC, 0xF71686C5 }, { 0xE096D509, 0x87B4A973 }, { 0x9D3A5814, 0xAF0D567D }, { 0x59DCC6F4, 0xB40C2A3F },
{ 0x95D121DD, 0x3602F884 }, { 0x9836484A, 0xD3E1DD3D }, { 0xA46688E5, 0xF945E71A }, { 0xB2A591F5, 0x7518547E },
{ 0x50C01D89, 0x93665874 }, { 0x658C065B, 0x9EA81018 }, { 0xBC4603A3, 0x4F54080C }, { 0x5137BF3D, 0x2D0384C6 },
{ 0xEC861E2A, 0xDC325078 }, { 0x79573FF7, 0xEA30A8FC }, { 0xCA050CB6, 0x214D2030 }, { 0x8016C30C, 0x65F0322B },
{ 0x1B247087, 0x69BE96DD }, { 0x81E161B8, 0xDB95EE99 }, { 0xD9CA05F8, 0xD1FC1814 }, { 0xCC0DE729, 0x820ED2BB },
{ 0x430F14C7, 0x63D76050 }, { 0xA09D3A0F, 0x3BCCB0E8 }, { 0x573F54A2, 0x8E40764D }, { 0xE16177BD, 0x39D175C1 },
{ 0x734F1F4B, 0x12F5A37C }, { 0x1FDFC26D, 0xAB37C12F }, { 0x395CD0F1, 0x5648B167 }, { 0x37BF42A7, 0x6C04ED15 },
{ 0x14304065, 0xED97161D }, { 0xAB72B807, 0x7D6C67DA }, { 0xBA4EE83C, 0xEC17FA87 }, { 0x0304FBC1, 0xDFAF79CB },
{ 0x71BC463E, 0x733F0605 }, { 0x87E98A27, 0x78D61C12 }, { 0x77B4ADA1, 0xD07CF48E }, { 0x6C90DD26, 0xB9C26253 },
{ 0x60801605, 0xE2449B58 }, { 0xF941FCFB, 0x8FC09AD7 }, { 0x4BE46D0E, 0xFAD8CEA9 }, { 0x0608EB9F, 0xA343F28B },
{ 0x4917347B, 0x9B126BD0 }, { 0xE7699C22, 0x9A92874A }, { 0xC4E69EE0, 0x1B017C42 }, { 0x0EE39256, 0x3A4C5C72 },
{ 0x3EA399DA, 0x4B6E9F5E }, { 0x5AD83D35, 0x6BA353F4 }, { 0x4C1B2425, 0xE7FEE090 }, { 0x2587E95D, 0x22D00983 },
{ 0x0F1430E2, 0x842980C0 }, { 0x861E2893, 0xC6B3C0A0 }, { 0x19D729F2, 0x087433A4 }, { 0xD42D6C6F, 0x341F3DAD },
{ 0xFBB2A58E, 0xEE0A3FAE }, { 0x90DD3183, 0x4AEE73C4 }, { 0xB1A16A34, 0xAAB72DB5 }, { 0x5E238FDF, 0xA92A0406 },
{ 0x686B6FCC, 0x7B4B35A1 }, { 0xF4A6956C, 0x6A23BF6E }, { 0x851AD352, 0x191CB96B }, { 0xD6DE351A, 0x55D598D4 },
{ 0xF2AE7EF3, 0xC9604DE5 }, { 0xA981E172, 0x1CA6C2A3 }, { 0xAD7A5398, 0xDE2F9551 }, { 0x56C8F616, 0x3025AAFF },
{ 0x1E2860D9, 0x15521D9D }, { 0xFA45073A, 0x506FE31C }, { 0x2B647B0B, 0x189C55F1 }, { 0xAE7EA859, 0x0180EC9A },
{ 0x050C105E, 0x7CEC8B40 }, { 0x8BF94104, 0x2350E519 }, { 0x55CC0DD7, 0xEF8AD334 }, { 0x6D677F92, 0x07A7BEE1 },
{ 0x0DE76997, 0xE5E325B9 }, { 0xA26E637A, 0x5A061591 }, { 0x18208B46, 0xB611EF16 }, { 0xB7A981AB, 0x09F4DF3E },
{ 0xE87DACC0, 0x1EBB078A }, { 0xB65E231F, 0xB791038C }, { 0x74B05660, 0x0FD38D45 }, { 0xC1EA8EBE, 0x67EDF702 },
{ 0x831238CD, 0xBA5F4BE0 }, { 0xCEFEBE5C, 0xE3C477C2 }, { 0x354C1BD2, 0x0DCE486C }, { 0x16C31910, 0x8C5DB364 },
{ 0xA7627324, 0x26EA9ED1 }, { 0xEF82E5EB, 0x039D29B3 }, { 0xCBF2AE02, 0x9F28FC82 }, { 0xF05D2786, 0xA8AAE89C },
{ 0x2774B028, 0x431AACFA }, { 0x31B7A938, 0xCF471F9E }, { 0xE3922EC8, 0x581BD0B8 }, { 0x400BEF06, 0xBC78199B },
{ 0xBF42F862, 0x90FB71C7 }, { 0x46030499, 0x1F3BEB10 }, { 0xB55AD8DE, 0x683E7A47 }, { 0xA695D190, 0x988F4263 },
{ 0x6E638453, 0xD808C72A }, { 0xC319D7CB, 0x0627527B }, { 0xD72997AE, 0xEBB04466 }, { 0xE2658C7C, 0xE67E0C0A },
{ 0xB056C880, 0x14D2F107 }, { 0x30400B8C, 0x7122C32C }, { 0xD5DACEDB, 0x8A7AE11F }, { 0xE98A0E74, 0xA0DEDB38 },
{ 0xDCC615A6, 0xAD109354 }, { 0xF655CC19, 0x0BE91A17 }, { 0xB8BDB149, 0x8DDD5FFE }, { 0xAF890AED, 0xBFE53028 },
{ 0xB4AD7A6A, 0xD65BA6F5 }, { 0x2997227E, 0x7956F088 }, { 0x32B352F9, 0x10E86655 }, { 0xDACEFE39, 0x0E5361DF },
{ 0x9FC90161, 0xCEC7F304 }, { 0x677F5F2E, 0xFF62B561 }, { 0xD22587F0, 0x975CCF26 }, { 0x543BAF63, 0x51EF0F86 },
{ 0x10CBF28F, 0x2F1E41EF }, { 0xBBB94A88, 0x52722635 }, { 0x3344F04D, 0xAE8DBAE7 }, { 0x6688FD9A, 0x410769D3 },
{ 0x34BBB966, 0xB3AB94DE }, { 0x8DF1AA9B, 0x80131792 }, { 0xC5113C54, 0xA564A0F0 }, { 0xBDB1A117, 0xF131D4BE },
{ 0xEA8EF5B5, 0x7F71A2F3 }, { 0xC8F655C3, 0x40878549 }, { 0x44F05DEC, 0x7EF14E69 }, { 0xF55137D8, 0xD44663DC },
{ 0x523344FC, 0xF2ACFD0D }, { 0x00000000, 0x00000000 }, { 0x8EF5515A, 0x5FBC6E59 }, { 0xF1AA8532, 0x16CF342E },
{ 0xDB395C8D, 0xB036BD6D }, { 0xDD31B712, 0x13754FE6 }, { 0x2D6C9094, 0xBBDFA77A }, { 0x3A582B30, 0x89E7C8AC },
{ 0xCDFA459D, 0x3C6B0E09 }, { 0xC7E26521, 0xC4AE0589 }, { 0x7F5FD468, 0x49735A77 }, { 0x1D2C9B18, 0xCAFD6456 },
{ 0x2F9FC9E1, 0xDA150203 }, { 0x94268369, 0x88672436 }, { 0x3BAF8984, 0x3782141E }, { 0x24704BE9, 0x9CB5D531 },
{ 0x1AD3D233, 0xD7DB4A6F }, { 0x2A93D9BF, 0xA6F98943 }, { 0x8A0EE3B0, 0x9D3539AB }, { 0x15C7E2D1, 0x53F2CAAF },
{ 0x76430F15, 0x6E19283C }, { 0x6384EDC4, 0x3DEBE293 }, { 0x208BF903, 0x5E3C82C3 }, { 0xB94A13FD, 0x33B8834C },
{ 0x2E686B55, 0x6470DEB1 }, { 0x7A53C436, 0x359FD137 }, { 0x02F35975, 0x61CAA579 }, { 0x82E59A79, 0x043A9752 },
{ 0x2683129C, 0xFD7F7048 }, { 0x699CCD78, 0xC52EE913 }, { 0x7DAC8D1D, 0x28B9FF0E }, { 0x78A09D43, 0x5455744E },
{ 0xB3523341, 0xCB7D88CC }, { 0x4A13CFBA, 0x44BD121B }, { 0xFDBA4E11, 0x4D49CD25 }, { 0x8C06082F, 0x3E76CB20 },
{ 0x2278A076, 0x3FF627BA }, { 0x04FBB2EA, 0xC28957F2 }, { 0xE46D67E3, 0x453DFE81 }, { 0x3DA7621B, 0x94C1E695 },
{ 0xFF491764, 0x2C83685C }, { 0xFC4DECA5, 0xF32C1197 }, { 0x922E68F6, 0x2B24D6BD }, { 0x9AC5113F, 0xB22B7844 },
{ 0xD1217C31, 0x48F3B6ED }, { 0xBEB55AD6, 0x2E9EAD75 }, { 0x5FD42D6B, 0x174FD8B4 }, { 0x1238ABFA, 0x4ED4E496 },
{ 0xFEBEB5D0, 0x92E6B4EE }, { 0x0BEF8208, 0x46A0D732 }, { 0xA5912A51, 0x47203BA8 }, { 0xE69E3E96, 0x24F75BF8 },
{ 0x13CF094E, 0xF0B13824 }, { 0xC901F777, 0xFEE259FB }, { 0x091CDB7D, 0x276A724B }, { 0xEE75475F, 0xBDF8F501 },
{ 0x4DEC8691, 0x599B3C22 }, { 0x99C1EAFE, 0x6D84018F }, { 0x1CDB39AC, 0x7498B8E4 }, { 0x217C5BB7, 0xE0595E71 },
{ 0x3C50C0AF, 0x2AA43A27 }, { 0x3F543B6E, 0xF50B43EC }, { 0x62734F70, 0x838E3E21 }, { 0x4507FF58, 0xC09492DB },
{ 0xDFC2EE67, 0x72BFEA9F }, { 0x9CCDFAA0, 0x11688ACF }, { 0x6A9836B9, 0x1A8190D8 }, { 0xC615C795, 0x7ACBD93B },
{ 0x286080CA, 0xC7332C3A }, { 0x4EE87D50, 0x863445E9 }, { 0xD0D6DE85, 0xF6966A5F }, { 0x96D5DA1C, 0xE9AD814F },
{ 0x9E3EA3D5, 0x70A22FB6 }, { 0x582B6440, 0x0A69F68D }, { 0xC2EE757F, 0xB8428EC9 }, { 0xAC8DF12C, 0x604A49E3 },
{ 0x0C10CB23, 0x5B86F90B }, { 0x8F02F3EE, 0xE1D9B2EB }, { 0xD3D22544, 0x29391394 }, { 0x5CD0D6AA, 0xC8E0A17F },
{ 0xF7A26EAD, 0xB58CC6A5 }, { 0x238F02C2, 0x8193FB08 }, { 0x5B2F9F81, 0xD5C68F46 }, { 0x88FDBAC5, 0xFCFF9CD2 },
{ 0xF359DC47, 0x77059157 }, { 0x07FF492B, 0x1D262E39 }, { 0xE59AC557, 0xFB582233 }, { 0x42F8B673, 0xDDB2BCE2 },
{ 0x48E096CF, 0x2577B762 }, { 0xD83DA74C, 0x6F99C4A6 }, { 0xEB795701, 0xC1147E41 }, { 0x912A9337, 0xF48BAF76 },
};
__device__ uint2 T42[256] = {
{ 0x9B2C0A19, 0x3EF29D24 }, { 0xB6F8622F, 0xE9E16322 }, { 0x47757F7A, 0x55369940 }, { 0xA47B0B33, 0x9F4D56D5 },
{ 0x6AA1174C, 0x82256746 }, { 0xEB082FB2, 0xB8F5057D }, { 0xF4475F53, 0xCC48C10B }, { 0x275DEC3A, 0x373088D4 },
{ 0x180AED10, 0x968F4325 }, { 0xF7016151, 0x173D232C }, { 0x946FCC13, 0xAE4ED09F }, { 0xC4539873, 0xFD4B4741 },
{ 0xD9933765, 0x1B5B3F0D }, { 0x7B644052, 0x2FFCB096 }, { 0x0A89840C, 0xE02376D2 }, { 0x329B18D7, 0xA3AE3A70 },
{ 0x35DE8526, 0x419CBD23 }, { 0x5B7C3199, 0xFAFEBF11 }, { 0x85AA9B0D, 0x0397074F }, { 0x4836B970, 0xC58AD4FB },
{ 0xFC4104A8, 0xBEC60BE3 }, { 0x4B708772, 0x1EFF36DC }, { 0xED8453B6, 0x131FDC33 }, { 0x341764D3, 0x0844E33E },
{ 0xAB38CD39, 0x0FF11B6E }, { 0x7761B85A, 0x64351F0A }, { 0x09CFBA0E, 0x3B5694F5 }, { 0xB87245D0, 0x30857084 },
{ 0x2297AE3C, 0x47AFB3BD }, { 0x6F6B554A, 0xF2BA5C2F }, { 0x1F4F70E1, 0x74BDC476 }, { 0x71EDC45E, 0xCFDFC644 },
{ 0x1DC0AF16, 0xE610784C }, { 0x3C113F28, 0x7ACA29D6 }, { 0x76A859AF, 0x2DED4117 }, { 0x99A3D5EE, 0xAC5F211E },
{ 0xA87EF33B, 0xD484F949 }, { 0x96E013E4, 0x3CE36CA5 }, { 0x3A9D432C, 0xD120F098 }, { 0xDC597563, 0x6BC40464 },
{ 0xD1956C9E, 0x69D5F5E5 }, { 0x3698BB24, 0x9AE95F04 }, { 0x66A4EF44, 0xC9ECC8DA }, { 0xA5B2EAC6, 0xD69508C8 },
{ 0xC0503B80, 0xC40C2235 }, { 0x8C652103, 0x38C193BA }, { 0x46BC9E8F, 0x1CEEC75D }, { 0x37515AD1, 0xD3310119 },
{ 0x86ECA50F, 0xD8E2E568 }, { 0x5779C991, 0xB137108D }, { 0x05CA4206, 0x709F3B69 }, { 0x1680CAEF, 0x4FEB5083 },
{ 0x241BD238, 0xEC456AF3 }, { 0xE181ABBE, 0x58D673AF }, { 0xCAD9BF8C, 0x242F54E7 }, { 0x0DCC19FD, 0x0211F181 },
{ 0x0F43C60A, 0x90BC4DBB }, { 0x9DA0761D, 0x9518446A }, { 0x3F57012A, 0xA1BFCBF1 }, { 0x61E172B5, 0x2BDE4F89 },
{ 0x4F732481, 0x27B853A8 }, { 0xDF1F4B61, 0xB0B1E643 }, { 0x5C39AC68, 0x18CC3842 }, { 0xBF37D821, 0xD2B7F7D7 },
{ 0x3014C720, 0x3103864A }, { 0x72ABFA5C, 0x14AA2463 }, { 0x4EBAC574, 0x6E600DB5 }, { 0x0403A3F3, 0x39476574 },
{ 0xBC71E623, 0x09C215F0 }, { 0xE987F045, 0x2A58B947 }, { 0xB477BDD8, 0x7B4CDF18 }, { 0x906C6FE0, 0x9709B5EB },
{ 0x8060D90B, 0x73083C26 }, { 0x41F9037E, 0xFEDC400E }, { 0xE44BE9B8, 0x284948C6 }, { 0x08065BFB, 0x728ECAE8 },
{ 0x17492B1A, 0x06330E9E }, { 0x69E7294E, 0x59508561 }, { 0xE6C4364F, 0xBAE4F4FC }, { 0xE30E7449, 0xCA7BCF95 },
{ 0xA33E96C2, 0x7D7FD186 }, { 0xD85AD690, 0x52836110 }, { 0x1B4CD312, 0x4DFAA102 }, { 0x872544FA, 0x913ABB75 },
{ 0x140F1518, 0xDD46ECB9 }, { 0x1E869114, 0x3D659A6B }, { 0xD719109A, 0xC23F2CAB }, { 0x2DD46836, 0xD713FE06 },
{ 0xB2FBC1DC, 0xD0A60656 }, { 0xDD909496, 0x221C5A79 }, { 0xA1B14935, 0xEFD26DBC }, { 0x235E4FC9, 0x0E77EDA0 },
{ 0x6B68F6B9, 0xCBFD395B }, { 0xA6F4D4C4, 0x0DE0EAEF }, { 0x1A8532E7, 0x0422FF1F }, { 0xDED6AA94, 0xF969B85E },
{ 0xAEF28F3F, 0x7F6E2007 }, { 0x81A938FE, 0x3AD0623B }, { 0x7AADA1A7, 0x6624EE8B }, { 0xC856607B, 0xB682E8DD },
{ 0x281E2A30, 0xA78CC56F }, { 0x45FAA08D, 0xC79B257A }, { 0x642B30B3, 0x5B4174E0 }, { 0x7EAE0254, 0x5F638BFF },
{ 0x0C05F808, 0x4BC9AF9C }, { 0xF98B46AE, 0xCE59308A }, { 0xCC55C388, 0x8FC58DA9 }, { 0x676D0EB1, 0x803496C7 },
{ 0xE70DD7BA, 0xF33CAAE1 }, { 0x6EA2B4BF, 0xBB620232 }, { 0x201871CB, 0xD5020F87 }, { 0xA9B712CE, 0x9D5CA754 },
{ 0x7DE83C56, 0x841669D8 }, { 0x5EB6739F, 0x8A618478 }, { 0xB0741E2B, 0x420BBA6C }, { 0xEAC1CE47, 0xF12D5B60 },
{ 0x1283691C, 0x76AC35F7 }, { 0xFECEDB5F, 0x2C6BB7D9 }, { 0x4C351A83, 0xFCCDB18F }, { 0xC3160582, 0x1F79C012 },
{ 0x62A74CB7, 0xF0ABADAE }, { 0x82EF06FC, 0xE1A5801C }, { 0xF2CB2357, 0x67A21845 }, { 0x5DF04D9D, 0x5114665F },
{ 0x74278658, 0xBF40FD2D }, { 0xB73183DA, 0xA0393D3F }, { 0x92E3B017, 0x05A409D1 }, { 0x0B4065F9, 0xA9FB28CF },
{ 0x42BF3D7C, 0x25A9A229 }, { 0x03463E02, 0xDB75E227 }, { 0x5AB5D06C, 0xB326E10C }, { 0x95A62DE6, 0xE7968E82 },
{ 0x636EAD42, 0xB973F3B3 }, { 0x19C30CE5, 0xDF571D38 }, { 0x29D7CBC5, 0xEE549B72 }, { 0x65E2D146, 0x12992AFD },
{ 0x56B02864, 0xF8EF4E90 }, { 0x4030E28B, 0xB7041E13 }, { 0xDAD50967, 0xC02EDD2A }, { 0x8AE95D07, 0x932B4AF4 },
{ 0xC6DC4784, 0x6FE6FB7B }, { 0x55F61666, 0x239AACB7 }, { 0xBDB807D6, 0x401A4BED }, { 0x89AF6305, 0x485EA8D3 },
{ 0xADB4B13D, 0xA41BC220 }, { 0x9729F211, 0x753B32B8 }, { 0xB3322029, 0x997E584B }, { 0xCEDA1C7F, 0x1D683193 },
{ 0xC99F818E, 0xFF5AB6C0 }, { 0x7F67E3A1, 0x16BBD5E2 }, { 0x25D233CD, 0xA59D34EE }, { 0x3B54A2D9, 0x98F8AE85 },
{ 0xCB105E79, 0x6DF70AFA }, { 0xB9BBA425, 0x795D2E99 }, { 0x44334178, 0x8E437B67 }, { 0x886682F0, 0x0186F6CE },
{ 0xBB347BD2, 0xEBF092A3 }, { 0xF18D1D55, 0xBCD7FA62 }, { 0x11C5571E, 0xADD9D7D0 }, { 0xB1BDFFDE, 0x0BD3E471 },
{ 0x8EEAFEF4, 0xAA6C2F80 }, { 0xF6C880A4, 0x5EE57D31 }, { 0xF044FCA0, 0xF50FA47F }, { 0x51F5B595, 0x1ADDC9C3 },
{ 0x3352F922, 0xEA76646D }, { 0x00000000, 0x00000000 }, { 0xF58EBEA6, 0x85909F16 }, { 0xAAF12CCC, 0x46294573 },
{ 0x39DB7D2E, 0x0A5512BF }, { 0x31DD26D5, 0x78DBD857 }, { 0x6C2D6B48, 0x29CFBE08 }, { 0x583A0F9B, 0x218B5D36 },
{ 0xFACD78AC, 0x152CD2AD }, { 0xE2C795BC, 0x83A39188 }, { 0x5F7F926A, 0xC3B9DA65 }, { 0x2C1D89C3, 0x9ECBA01B },
{ 0x9F2FA9EA, 0x07B5F850 }, { 0x26940DCF, 0x7EE8D6C9 }, { 0xAF3B6ECA, 0x36B67E1A }, { 0x702425AB, 0x86079859 },
{ 0xD31AB369, 0xFB7849DF }, { 0x932A51E2, 0x4C7C57CC }, { 0x0E8A27FF, 0xD96413A6 }, { 0xC715A671, 0x263EA566 },
{ 0x4376DC89, 0x6C71FC34 }, { 0x84637AF8, 0x4A4F5952 }, { 0x8B20BCF2, 0xDAF314E9 }, { 0x4AB96687, 0x572768C1 },
{ 0x682EC8BB, 0x1088DB7C }, { 0x537A6A62, 0x887075F9 }, { 0xF302C2A2, 0x2E7A4658 }, { 0xE582084D, 0x619116DB },
{ 0x8326E709, 0xA87DDE01 }, { 0x9C6997E8, 0xDCC01A77 }, { 0xAC7D50C8, 0xEDC39C3D }, { 0xA078A8C0, 0xA60A33A1 },
{ 0x52B38B97, 0xC1A82BE4 }, { 0x134A88E9, 0x3F746BEA }, { 0xBAFD9A27, 0xA228CCBE }, { 0x068C7C04, 0xABEAD94E },
{ 0x78227E50, 0xF48952B1 }, { 0xFB049959, 0x5CF48CB0 }, { 0x6DE48ABD, 0x6017E015 }, { 0xA73D3531, 0x4438B4F2 },
{ 0x49FF5885, 0x8C528AE6 }, { 0x4DFCFB76, 0xB515EF92 }, { 0x2E925634, 0x0C661C21 }, { 0xC59A7986, 0xB493195C },
{ 0x21D1903E, 0x9CDA519A }, { 0xB5BE5C2D, 0x32948105 }, { 0xD45F2E98, 0x194ACE8C }, { 0x38129CDB, 0x438D4CA2 },
{ 0xBEFE39D4, 0x9B6FA9CA }, { 0xEF0B8C41, 0x81B26009 }, { 0x91A58E15, 0xDED1EBF6 }, { 0x9EE6481F, 0x4E6DA64D },
{ 0xCF13FD8A, 0x54B06F8E }, { 0x01C9E1F5, 0x49D85E1D }, { 0x1C094EE3, 0xAFC82651 }, { 0x75EE67AD, 0xF698A330 },
{ 0xEC4DB243, 0x5AC7822E }, { 0xC199DA75, 0x8DD47C28 }, { 0xDB1CE892, 0x89F68337 }, { 0x7C21DDA3, 0xCDCE37C5 },
{ 0x503C5460, 0x530597DE }, { 0x543FF793, 0x6A42F2AA }, { 0x73621BA9, 0x5D727A7E }, { 0x07459DF1, 0xE2328753 },
{ 0xC2DFE477, 0x56A19E0F }, { 0xCD9C227D, 0xC61DD3B4 }, { 0x986A341B, 0xE5877F03 }, { 0x15C6F4ED, 0x949EB2A4 },
{ 0x60289340, 0x62061194 }, { 0xE84E11B0, 0x6380E75A }, { 0xD6D0F16F, 0x8BE772B6 }, { 0xD596CF6D, 0x50929091 },
{ 0x3E9EE0DF, 0xE86795EC }, { 0x2B581432, 0x7CF92748 }, { 0xEEC26DB4, 0xC86A3E14 }, { 0x8DACC0F6, 0x7119CDA7 },
{ 0x100CB6EB, 0xE40189CD }, { 0x028FDFF7, 0x92ADBC3A }, { 0xD2D3529C, 0xB2A017C2 }, { 0xD05C8D6B, 0x200DABF8 },
{ 0xA2F77737, 0x34A78F9B }, { 0x8F231F01, 0xE3B4719D }, { 0x2F5BB7C1, 0x45BE423C }, { 0xFD88E55D, 0xF71E55FE },
{ 0x59F3EE6E, 0x6853032B }, { 0xFF073AAA, 0x65B3E9C4 }, { 0x9AE5EBEC, 0x772AC339 }, { 0xF842A75B, 0x87816E97 },
{ 0xE0484A4B, 0x110E2DB2 }, { 0x3DD8DEDD, 0x331277CB }, { 0x79EB9FA5, 0xBD510CAC }, { 0x2A91F5C7, 0x35217955 },
};
__device__ uint2 T52[256] = {
{ 0x46E06A6D, 0x8AB0A968 }, { 0x4BF0B33A, 0x43C7E80B }, { 0x6B161EE5, 0x08C9B354 }, { 0xEBA990BE, 0x39F1C235 },
{ 0x6606C7B2, 0xC1BEF237 }, { 0x614569AA, 0x2C209233 }, { 0x6FC3289A, 0xEB01523B }, { 0x935ACEDD, 0x946953AB },
{ 0x3E13340E, 0x272838F6 }, { 0xA12BA052, 0x8B0455EC }, { 0x978FF8A2, 0x77A1B2C4 }, { 0x13E54086, 0xA55122CA },
{ 0x62D3F1CD, 0x22761358 }, { 0x08B76CFE, 0xDB8DDFDE }, { 0x9E4A178A, 0x5D1E12C8 }, { 0x03969867, 0x0E56816B },
{ 0x3303ED59, 0xEE5F7995 }, { 0xAB78D71D, 0xAFED748B }, { 0xF93E53EE, 0x6D929F2D }, { 0xBA798C2A, 0xF5D8A8F8 },
{ 0x8E39CF6B, 0xF619B169 }, { 0x749104E2, 0x95DDAF2F }, { 0xE0886427, 0xEC2A9C80 }, { 0x825B95EA, 0xCE5C8FD8 },
{ 0x3AC60271, 0xC4E0D999 }, { 0x173076F9, 0x4699C3A5 }, { 0x50A29F42, 0x3D1B151F }, { 0x2BC75946, 0x9ED505EA },
{ 0xDC7F4B98, 0x34665ACF }, { 0x292342F7, 0x61B1FB53 }, { 0x0E864130, 0xC721C008 }, { 0x96FD7B74, 0x8693CD16 },
{ 0x7136B14B, 0x87273192 }, { 0x63A1721B, 0xD3446C8A }, { 0xA6680E4A, 0x669A35E8 }, { 0x39509A16, 0xCAB658F2 },
{ 0xF42E8AB9, 0xA4E5DE4E }, { 0xE83F08D9, 0x37A7435E }, { 0xE26C7F96, 0x134E6239 }, { 0x2DF67488, 0x82791A3C },
{ 0x8329163C, 0x3F6EF00A }, { 0xFDEB6591, 0x8E5A7E42 }, { 0x7981DDB5, 0x5CAAEE4C }, { 0x5AF1E80D, 0x19F23478 },
{ 0xED98BD70, 0x255DDDE3 }, { 0xA99CCCAC, 0x50898A32 }, { 0xDA4E6656, 0x28CA4519 }, { 0x4CB31D22, 0xAE59880F },
{ 0x37D6DB26, 0x0D9798FA }, { 0xB4FFCD1A, 0x32F968F0 }, { 0x4F258545, 0xA00F0964 }, { 0x5E24DE72, 0xFA3AD517 },
{ 0x5DB24615, 0xF46C547C }, { 0xFF0F7E20, 0x713E80FB }, { 0x73D2AAFA, 0x7843CF2B }, { 0xAEDF62B4, 0xBD17EA36 },
{ 0xD16F92CF, 0xFD111BAC }, { 0xC72D67E0, 0x4ABAA7DB }, { 0xAD49FAD3, 0xB3416B5D }, { 0x4914A88B, 0xBCA316B2 },
{ 0x8AECF914, 0x15D15006 }, { 0xE31EFC40, 0xE27C1DEB }, { 0x9BEDA223, 0x4FE48C75 }, { 0x1B522C78, 0x7EDCFD14 },
{ 0x7C26681C, 0x4E5070F1 }, { 0x5815F3BC, 0xE696CAC1 }, { 0x3BB481A7, 0x35D2A64B }, { 0xFE7DFDF6, 0x800CFF29 },
{ 0xD5BAA4B0, 0x1ED9FAC3 }, { 0x1EF599D1, 0x6C2663A9 }, { 0x34404341, 0x03C11991 }, { 0x69F20554, 0xF7AD4DED },
{ 0xB61BD6AB, 0xCD9D9649 }, { 0xEADB1368, 0xC8C3BDE7 }, { 0xB02AFB65, 0xD131899F }, { 0xE1FAE7F1, 0x1D18E352 },
{ 0xEF7CA6C1, 0xDA39235A }, { 0xA8EE4F7A, 0xA1BBF5E0 }, { 0xCF9A0B1E, 0x91377805 }, { 0x80BF8E5B, 0x31387161 },
{ 0xDB3CE580, 0xD9F83ACB }, { 0xD38B897E, 0x0275E515 }, { 0xF0FBBCC6, 0x472D3F21 }, { 0x868EA395, 0x2D946EB7 },
{ 0x21942E09, 0xBA3C248D }, { 0xBFDE3983, 0xE7223645 }, { 0x02E41BB1, 0xFF64FEB9 }, { 0x0D10D957, 0xC9774163 },
{ 0xB58D4ECC, 0xC3CB1722 }, { 0x9CAE0C3B, 0xA27AEC71 }, { 0xA48C15FB, 0x99FECB51 }, { 0x6D27332B, 0x1465AC82 },
{ 0xD75EBF01, 0xE1BD047A }, { 0x941960C5, 0x79F733AF }, { 0x41A3C475, 0x672EC96C }, { 0x524684F3, 0xC27FEBA6 },
{ 0x75E38734, 0x64EFD0FD }, { 0x0743AE18, 0xED9E6004 }, { 0xB9EF144D, 0xFB8E2993 }, { 0x0C625A81, 0x38453EB1 },
{ 0x42355C12, 0x69784807 }, { 0x14A6EE9E, 0x48CF42CE }, { 0x06312DCE, 0x1CAC1FD6 }, { 0x4792E9BB, 0x7B82D6BA },
{ 0x1F871A07, 0x9D141C7B }, { 0xC11C4A2E, 0x5616B80D }, { 0xF21FA777, 0xB849C198 }, { 0xC8D9A506, 0x7CA91801 },
{ 0x7EC273AD, 0xB1348E48 }, { 0x987B3A44, 0x41B20D1E }, { 0xA3CFBBE3, 0x7460AB55 }, { 0x4576F20A, 0x84E62803 },
{ 0x897A6173, 0x1B87D16D }, { 0xE45D5258, 0x0FE27DEF }, { 0xCA3DBEB7, 0x83CDE6B8 }, { 0xD01D1119, 0x0C23647E },
{ 0xA0592384, 0x7A362A3E }, { 0xF1893F10, 0xB61F40F3 }, { 0x440471DC, 0x75D457D1 }, { 0x237035B8, 0x4558DA34 },
{ 0x87FC2043, 0xDCA61165 }, { 0xC9AB26D0, 0x8D9B67D3 }, { 0xEE0E2517, 0x2B0B5C88 }, { 0x2AB5DA90, 0x6FE77A38 },
{ 0xD9D8FE31, 0x269CC472 }, { 0xFAA8CB89, 0x63C41E46 }, { 0x1642F52F, 0xB7ABBC77 }, { 0x2F126F39, 0x7D1DE485 },
{ 0x24339BA0, 0xA8C6BA30 }, { 0xCEE888C8, 0x600507D7 }, { 0x1A20AFAE, 0x8FEE82C6 }, { 0x26D78011, 0x57A24489 },
{ 0x36A458F0, 0xFCA5E728 }, { 0x8F4B4CBD, 0x072BCEBB }, { 0xF36D24A1, 0x497BBE4A }, { 0xB769557D, 0x3CAFE99B },
{ 0x05A7B5A9, 0x12FA9EBD }, { 0x5B836BDB, 0xE8C04BAA }, { 0xAC3B7905, 0x4273148F }, { 0x2851C121, 0x90838481 },
{ 0x6C55B0FD, 0xE557D350 }, { 0xCB4F3D61, 0x72FF996A }, { 0x64E2DC03, 0x3EDA0C8E }, { 0xE6B949E9, 0xF0868356 },
{ 0xBB0B0FFC, 0x04EAD72A }, { 0x5967706A, 0x17A4B513 }, { 0x04D5367F, 0xE3C8E16F }, { 0x8DAF570C, 0xF84F3002 },
{ 0xBD3A2232, 0x1846C8FC }, { 0xF6CA9108, 0x5B8120F7 }, { 0xECEA3EA6, 0xD46FA231 }, { 0x53340725, 0x334D9474 },
{ 0xC28AD249, 0x58403966 }, { 0x9A9F21F5, 0xBED6F3A7 }, { 0xA5FE962D, 0x68CCB483 }, { 0x57E1315A, 0xD085751B },
{ 0xE52FD18E, 0xFED0023D }, { 0x20E6ADDF, 0x4B0E5B5F }, { 0x6EB1AB4C, 0x1A332DE9 }, { 0x7B65C604, 0xA3CE10F5 },
{ 0xD62C3CD7, 0x108F7BA8 }, { 0x1073D8E1, 0xAB07A3A1 }, { 0x91BED56C, 0x6B0DAD12 }, { 0x3532C097, 0xF2F36643 },
{ 0xB2CEE0D4, 0x2E557726 }, { 0x00000000, 0x00000000 }, { 0xDE9B5029, 0xCB02A476 }, { 0x8B9E7AC2, 0xE4E32FD4 },
{ 0x2C84F75E, 0x734B65EE }, { 0xCD7E10AF, 0x6E5386BC }, { 0xE7CBCA3F, 0x01B4FC84 }, { 0x65905FD5, 0xCFE8735C },
{ 0x0FF4C2E6, 0x3613BFDA }, { 0x31E7F6E8, 0x113B872C }, { 0x55052AEB, 0x2FE18BA2 }, { 0xBC48A1E4, 0xE974B72E },
{ 0xB89D979B, 0x0ABC5641 }, { 0x2202B66E, 0xB46AA5E6 }, { 0xC4BBFF87, 0x44EC26B0 }, { 0x27A503C7, 0xA6903B5B },
{ 0xFC99E647, 0x7F680190 }, { 0xA71A8D9C, 0x97A84A3A }, { 0x6037EA7C, 0xDD12EDE1 }, { 0xDD0DC84E, 0xC554251D },
{ 0x956BE313, 0x88C54C7D }, { 0x48662B5D, 0x4D916960 }, { 0x9909B992, 0xB08072CC }, { 0xC5C97C51, 0xB5DE5962 },
{ 0x19B637C9, 0x81B803AD }, { 0x4A8230EC, 0xB2F597D9 }, { 0x5F565DA4, 0x0B08AAC5 }, { 0x017283D6, 0xF1327FD2 },
{ 0x78F35E63, 0xAD98919E }, { 0x76751F53, 0x6AB95196 }, { 0x0A53774F, 0x24E92167 }, { 0x15D46D48, 0xB9FD3D1C },
{ 0xFBDA485F, 0x92F66194 }, { 0x11015B37, 0x5A35DC73 }, { 0x5477A93D, 0xDED3F470 }, { 0x81CD0D8D, 0xC00A0EB3 },
{ 0xC65FE436, 0xBB88D809 }, { 0xBEACBA55, 0x16104997 }, { 0x5693B28C, 0x21B70AC9 }, { 0x25411876, 0x59F4C5E2 },
{ 0x0B21F499, 0xD5DB5EB5 }, { 0xF55C096F, 0x55D7A19C }, { 0xC3F8519F, 0xA97246B4 }, { 0xA2BD3835, 0x8552D487 },
{ 0x1297C350, 0x54635D18 }, { 0x85183BF2, 0x23C2EFDC }, { 0xCC0C9379, 0x9F61F96E }, { 0x9DDC8FED, 0x534893A3 },
{ 0xAA0A54CB, 0x5EDF0B59 }, { 0x9F38945C, 0xAC2C6D1A }, { 0xD8AA7DE7, 0xD7AEBBA0 }, { 0x09C5EF28, 0x2ABFA00C },
{ 0x3CF72FBF, 0xD84CC64F }, { 0xB15878B3, 0x2003F64D }, { 0xC06EC9F8, 0xA724C7DF }, { 0x68808682, 0x069F323F },
{ 0x51D01C94, 0xCC296ACD }, { 0x5CC0C5C3, 0x055E2BAE }, { 0x1D6301B6, 0x6270E2C2 }, { 0x382219C0, 0x3B842720 },
{ 0x846AB824, 0xD2F0900E }, { 0x7A1745D2, 0x52FC6F27 }, { 0xE94D8B0F, 0xC6953C8C }, { 0x3095753E, 0xE009F8FE },
{ 0x92284D0B, 0x655B2C79 }, { 0x4347DFC4, 0x984A37D5 }, { 0x8808E2A5, 0xEAB5AEBF }, { 0x90CC56BA, 0x9A3FD2C0 },
{ 0xF84CD038, 0x9CA0E0FF }, { 0xAFADE162, 0x4C2595E4 }, { 0xB3BC6302, 0xDF6708F4 }, { 0x7D54EBCA, 0xBF620F23 },
{ 0x1C118260, 0x93429D10 }, { 0x8CDDD4DA, 0x097D4FD0 }, { 0x2E60ECEF, 0x8C2F9B57 }, { 0x18C4B41F, 0x708A7C7F },
{ 0xDFE9D3FF, 0x3A30DBA4 }, { 0x7FB0F07B, 0x4006F19A }, { 0x4DC19EF4, 0x5F6BF7DD }, { 0x32716E8F, 0x1F6D0647 },
{ 0x6A649D33, 0xF9FBCC86 }, { 0x67744464, 0x308C8DE5 }, { 0x72A0292C, 0x8971B0F9 }, { 0x3F61B7D8, 0xD61A4724 },
{ 0xD4C82766, 0xEFEB8511 }, { 0x40D147A3, 0x961CB6BE }, { 0xF7B812DE, 0xAAB35F25 }, { 0x7044329D, 0x76154E40 },
{ 0x4E570693, 0x513D76B6 }, { 0xD2F90AA8, 0xF3479AC7 }, { 0x77079C85, 0x9B8B2E44 }, { 0x3D85AC69, 0x297EB99D },
};
__device__ uint2 T62[256] = {
{ 0xFC7D40C3, 0x7E37E62D }, { 0xEE939E5B, 0x776F25A4 }, { 0xDD8FB5AD, 0xE045C850 }, { 0x11FF1952, 0x86ED5BA7 },
{ 0xCF616B35, 0xE91D0BD9 }, { 0x6E408FFB, 0x37E0AB25 }, { 0x31025A7A, 0x9607F6C0 }, { 0x16D23C9D, 0x0B02F5E1 },
{ 0xFB50650C, 0xF3D8486B }, { 0xC40875F5, 0x621CFF27 }, { 0xFA5FD34A, 0x7D40CB71 }, { 0xDAA29062, 0x6DAA6616 },
{ 0x23EC84E2, 0x9F5F3549 }, { 0xC507C3B3, 0xEC847C3D }, { 0x043CE205, 0x025A3668 }, { 0x4DAC0B19, 0xA8BF9E6C },
{ 0xE9BEBB94, 0xFA808BE2 }, { 0x77C74FA3, 0xB5B99C52 }, { 0xF0397BCC, 0x78D9BC95 }, { 0xDBAD2624, 0xE332E50C },
{ 0x9332797E, 0xC74FCE12 }, { 0x2EA709AB, 0x1729ECEB }, { 0x9954D1F8, 0xC2D6B9F6 }, { 0xBAB8551A, 0x5D898CBF },
{ 0x17DD8ADB, 0x859A76FB }, { 0x362F7FB5, 0x1BE85886 }, { 0xF136CD8A, 0xF6413F8F }, { 0xBBB7E35C, 0xD3110FA5 },
{ 0x14CC4D11, 0x0A2FEED5 }, { 0xCD7F1AB9, 0xE83010ED }, { 0x5F42D581, 0xA1E75DE5 }, { 0xC13B21B6, 0xEEDE4A55 },
{ 0xF94E1480, 0xF2F5535F }, { 0x1888761E, 0x0CC1B46D }, { 0x6529913B, 0xBCE15FDB }, { 0x5A7181C2, 0x2D25E897 },
{ 0xE2D7A554, 0x71817F1C }, { 0x5C53124B, 0x2E52C5CB }, { 0xEF9C281D, 0xF9F7A6BE }, { 0x21F2F56E, 0x9E722E7D },
{ 0x81DCA7E6, 0xCE170D9B }, { 0x1CB4941B, 0x0E9B8205 }, { 0x3C49D733, 0x1E712F62 }, { 0x42F9F7DC, 0x21E45CFA },
{ 0x8BBA0F60, 0xCB8E7A7F }, { 0x010FB646, 0x8E98831A }, { 0x8E895B23, 0x474CCF0D }, { 0x4FB27A95, 0xA9928558 },
{ 0x05335443, 0x8CC2B572 }, { 0x84EFF3A5, 0x42D5B8E9 }, { 0x021E718C, 0x012D1B34 }, { 0xAE74180B, 0x57A6626A },
{ 0xE3D81312, 0xFF19FC06 }, { 0x6A7C6DFE, 0x35BA9D4D }, { 0x8F86ED65, 0xC9D44C17 }, { 0xA02E5288, 0x506523E6 },
{ 0x06229389, 0x03772D5C }, { 0x0B691EC0, 0x8B01F4FE }, { 0xED825991, 0xF8DABD8A }, { 0x985B67BE, 0x4C4E3AEC },
{ 0x7FBF96A9, 0xB10DF082 }, { 0xD4F8DAE1, 0x6A69279A }, { 0xD3D5FF2E, 0xE78689DC }, { 0x1FA553D1, 0x812E1A2B },
{ 0xEBA0CA18, 0xFBAD90D6 }, { 0x34310E39, 0x1AC543B2 }, { 0x2CB97827, 0x1604F7DF }, { 0x51189F02, 0xA6241C69 },
{ 0xEAAF7C5E, 0x753513CC }, { 0xC84C4EFA, 0x64F2A59F }, { 0x489F5F5A, 0x247D2B1E }, { 0xAB474C48, 0xDB64D718 },
{ 0xF2270A40, 0x79F4A7A1 }, { 0x2A9BEBAE, 0x1573DA83 }, { 0x68621C72, 0x34978679 }, { 0xA2302304, 0x514838D2 },
{ 0xFD72F685, 0xF0AF6537 }, { 0x3A6B44BA, 0x1D06023E }, { 0xCE6EDD73, 0x678588C3 }, { 0xCC70ACFF, 0x66A893F7 },
{ 0xB5EDA9DF, 0xD4D24E29 }, { 0x70EA6A6C, 0x38563214 }, { 0x0E5A4A83, 0x07C3418C }, { 0x5635BACD, 0x2BCBB22F },
{ 0x0878D90A, 0x04B46CD0 }, { 0x0C443B0F, 0x06EE5AB8 }, { 0x76C8F9E5, 0x3B211F48 }, { 0x12EEDE98, 0x0958C389 },
{ 0xBF8B0159, 0xD14B39CD }, { 0x72F41BE0, 0x397B2920 }, { 0x13E168DE, 0x87C04093 }, { 0x47CAA39F, 0xAD26E988 },
{ 0x9C6785BB, 0x4E140C84 }, { 0xB7F3D853, 0xD5FF551D }, { 0x5D5CA40D, 0xA0CA46D1 }, { 0x87FE346F, 0xCD6020C7 },
{ 0x15C3FB57, 0x84B76DCF }, { 0xA121E4CE, 0xDEFDA0FC }, { 0x96012D3D, 0x4B8D7B60 }, { 0x298A2C64, 0x9AC642AD },
{ 0x10F0AF14, 0x0875D8BD }, { 0x7B8374AC, 0xB357C6EA }, { 0x9A451632, 0x4D6321D8 }, { 0xC719B23F, 0xEDA96709 },
{ 0xF328BC06, 0xF76C24BB }, { 0x912C08F2, 0xC662D526 }, { 0x7892B366, 0x3CE25EC4 }, { 0x6F4F39BD, 0xB978283F },
{ 0x9D6833FD, 0xC08C8F9E }, { 0x9E79F437, 0x4F3917B0 }, { 0xB2C08C10, 0x593DE06F }, { 0xB1D14BDA, 0xD6887841 },
{ 0x32139DB0, 0x19B26EEE }, { 0x75D93E2F, 0xB4948766 }, { 0x1987C058, 0x82593777 }, { 0x3D466175, 0x90E9AC78 },
{ 0xFF6C8709, 0xF1827E03 }, { 0x353EB87F, 0x945DC0A8 }, { 0x8AB5B926, 0x4516F965 }, { 0x7EB020EF, 0x3F957398 },
{ 0x6D514831, 0xB855330B }, { 0x542BCB41, 0x2AE6A91B }, { 0xC6160479, 0x6331E413 }, { 0x80D311A0, 0x408F8E81 },
{ 0xC325503A, 0xEFF35161 }, { 0xBD9570D5, 0xD06622F9 }, { 0x0D4B8D49, 0x8876D9A2 }, { 0x573A0C8B, 0xA5533135 },
{ 0xDF91C421, 0xE168D364 }, { 0xF50A2F8F, 0xF41B09E7 }, { 0x24C1A12D, 0x12B09B0F }, { 0xA9593DC4, 0xDA49CC2C },
{ 0x3E57A6BF, 0x1F5C3456 }, { 0xA8568B82, 0x54D14F36 }, { 0x43F6419A, 0xAF7CDFE0 }, { 0xC943F8BC, 0xEA6A2685 },
{ 0xD7E91D2B, 0xE5DCBFB4 }, { 0x799D0520, 0xB27ADDDE }, { 0xD6E6AB6D, 0x6B443CAE }, { 0xF61BE845, 0x7BAE91C9 },
{ 0x7CAE5163, 0x3EB868AC }, { 0x22E332A4, 0x11C7B653 }, { 0xB9A992D0, 0xD23C1491 }, { 0x0311C7CA, 0x8FB5982E },
{ 0xE0C9D4D8, 0x70AC6428 }, { 0x0F55FCC5, 0x895BC296 }, { 0xEC8DEFD7, 0x76423E90 }, { 0xDE9E7267, 0x6FF0507E },
{ 0x7A8CC2EA, 0x3DCF45F0 }, { 0x941F5CB1, 0x4AA06054 }, { 0xB0DEFD9C, 0x5810FB5B }, { 0xBC9AC693, 0x5EFEA1E3 },
{ 0xDC8003EB, 0x6EDD4B4A }, { 0xE8B10DD2, 0x741808F8 }, { 0x28859A22, 0x145EC1B7 }, { 0x50172944, 0x28BC9F73 },
{ 0x4EBDCCD3, 0x270A0642 }, { 0x331C2BF6, 0x972AEDF4 }, { 0x0A66A886, 0x059977E4 }, { 0x4A812ED6, 0x2550302A },
{ 0xA7037747, 0xDD8A8DA0 }, { 0x970E9B7B, 0xC515F87A }, { 0x601AC578, 0x3023EAA9 }, { 0x73FBADA6, 0xB7E3AA3A },
{ 0x1EAAE597, 0x0FB69931 }, { 0x00000000, 0x00000000 }, { 0x6204B4F4, 0x310EF19D }, { 0x44DB6455, 0x229371A6 },
{ 0x1A960792, 0x0DECAF59 }, { 0xB8A62496, 0x5CA4978B }, { 0x38753536, 0x1C2B190A }, { 0x82CD602C, 0x41A295B5 },
{ 0x6426277D, 0x3279DCC1 }, { 0x9F764271, 0xC1A194AA }, { 0x26DFD0A1, 0x139D803B }, { 0x41E83016, 0xAE51C4D4 },
{ 0xAD65DFC1, 0xD813FA44 }, { 0x45D4D213, 0xAC0BF2BC }, { 0x46C515D9, 0x23BE6A92 }, { 0x923DCF38, 0x49D74D08 },
{ 0x27D066E7, 0x9D050321 }, { 0x5E4D63C7, 0x2F7FDEFF }, { 0x55247D07, 0xA47E2A01 }, { 0x2FA8BFED, 0x99B16FF1 },
{ 0x8C972AAF, 0x4661D439 }, { 0xA33F9542, 0xDFD0BBC8 }, { 0xA51D06CB, 0xDCA79694 }, { 0x7DA1E725, 0xB020EBB6 },
{ 0x696DAA34, 0xBA0F0563 }, { 0xD5F76CA7, 0xE4F1A480 }, { 0x9510EAF7, 0xC438E34E }, { 0x3B64F2FC, 0x939E8124 },
{ 0x072D25CF, 0x8DEFAE46 }, { 0x586FF04E, 0x2C08F3A3 }, { 0xB3CF3A56, 0xD7A56375 }, { 0x40E78650, 0x20C947CE },
{ 0x86F18229, 0x43F8A3DD }, { 0xAC6A6987, 0x568B795E }, { 0x1DBB225D, 0x8003011F }, { 0xF7145E03, 0xF53612D3 },
{ 0x300DEC3C, 0x189F75DA }, { 0x3720C9F3, 0x9570DB9C }, { 0x6B73DBB8, 0xBB221E57 }, { 0xE4F536DD, 0x72F65240 },
{ 0x88ABC8AA, 0x443BE251 }, { 0xD9B357A8, 0xE21FFE38 }, { 0xE7E4F117, 0xFD43CA6E }, { 0x89A47EEC, 0xCAA3614B },
{ 0xE1C6629E, 0xFE34E732 }, { 0x1B99B1D4, 0x83742C43 }, { 0x83C2D66A, 0xCF3A16AF }, { 0x4990E91C, 0xAAE5A804 },
{ 0x4CA3BD5F, 0x26271D76 }, { 0x3F5810F9, 0x91C4B74C }, { 0xF841A2C6, 0x7C6DD045 }, { 0xFE63314F, 0x7F1AFD19 },
{ 0x8D989CE9, 0xC8F95723 }, { 0x5306EE8E, 0xA709075D }, { 0xAA48FA0E, 0x55FC5402 }, { 0x9023BEB4, 0x48FA563C },
{ 0xCA523F76, 0x65DFBEAB }, { 0xD8BCE1EE, 0x6C877D22 }, { 0x85E045E3, 0xCC4D3BF3 }, { 0x6115733E, 0xBEBB69B3 },
{ 0x20FD4328, 0x10EAAD67 }, { 0x71E5DC2A, 0xB6CEB10E }, { 0x6737E0B7, 0xBDCC44EF }, { 0xA412B08D, 0x523F158E },
{ 0x2DB6CE61, 0x989C74C5 }, { 0x2B945DE8, 0x9BEB5999 }, { 0x09776F4C, 0x8A2CEFCA }, { 0x5B7E3784, 0xA3BD6B8D },
{ 0xCB5D8930, 0xEB473DB1 }, { 0x9B4AA074, 0xC3FBA2C2 }, { 0x25CE176B, 0x9C281815 }, { 0xD0C438E4, 0x683311F2 },
{ 0xBE84B71F, 0x5FD3BAD7 }, { 0xE5FA809B, 0xFC6ED15A }, { 0x6C5EFE77, 0x36CDB011 }, { 0x520958C8, 0x29918447 },
{ 0x59604608, 0xA29070B9 }, { 0xA60CC101, 0x53120EBA }, { 0x74D68869, 0x3A0C047C }, { 0xD2DA4968, 0x691E0AC6 },
{ 0xE6EB4751, 0x73DB4974 }, { 0xF40599C9, 0x7A838AFD }, { 0xB4E21F99, 0x5A4ACD33 }, { 0xC03497F0, 0x6046C94F },
{ 0xD1CB8EA2, 0xE6AB92E8 }, { 0x663856F1, 0x3354C7F5 }, { 0xAF7BAE4D, 0xD93EE170 }, { 0xC22AE67C, 0x616BD27B },
{ 0x397A8370, 0x92B39A10 }, { 0x4B8E9890, 0xABC8B330 }, { 0x630B02B2, 0xBF967287 }, { 0xB6FC6E15, 0x5B67D607 },
};
__device__ uint2 T72[256] = {
{ 0xCE553FE6, 0xD031C397 }, { 0xB006B525, 0x16BA5B01 }, { 0x296E70C8, 0xA89BADE6 }, { 0x77D3435B, 0x6A1F525D },
{ 0x573DFA0B, 0x6E103570 }, { 0x17FC95AB, 0x660EFB2A }, { 0x97634BF6, 0x76327A9E }, { 0x62458BF5, 0x4BAD9D64 },
{ 0xDBC3F748, 0xF1830CAE }, { 0x669131FF, 0xC5C8F542 }, { 0xDC48B0CB, 0x95044A1C }, { 0x3CF8B866, 0x892962DF },
{ 0xE930C135, 0xB0B9E208 }, { 0x611A767C, 0xA14FB3F0 }, { 0x1C160136, 0x8D2605F2 }, { 0xFECC549E, 0xD6B71922 },
{ 0xA5907D8B, 0x37089438 }, { 0x5803D49C, 0x0B5DA38E }, { 0xEA6F3CBC, 0x5A5BCC9C }, { 0x3B73FFE5, 0xEDAE246D },
{ 0xDE22EDCE, 0xD2B87E0F }, { 0xCA8185EC, 0x5E54ABB1 }, { 0xE80561B9, 0x1DE7F88F }, { 0x0135A08C, 0xAD5E1A87 },
{ 0x65CECC76, 0x2F2ADBD6 }, { 0x82F58358, 0x5780B5A7 }, { 0xEDE47B3F, 0x3EDC8A2E }, { 0x06BEE70F, 0xC9D95C35 },
{ 0x6C4E05EE, 0x83BE111D }, { 0x59367410, 0xA603B909 }, { 0x809FDE5D, 0x103C81B4 }, { 0x7D0C774A, 0x2C69B602 },
{ 0xD5C87953, 0x399080D7 }, { 0x487406B4, 0x09D41E16 }, { 0x26505E5F, 0xCDD63B18 }, { 0x9B0298E8, 0xF99DC2F4 },
{ 0x943CB67F, 0x9CD0540A }, { 0x891F17C5, 0xBCA84B7F }, { 0xB78DF2A6, 0x723D1DB3 }, { 0xE73B4F2E, 0x78AA6E71 },
{ 0xA071670D, 0x1433E699 }, { 0x54620782, 0x84F21BE4 }, { 0xB4D20F2F, 0x98DF3327 }, { 0xD3769E5C, 0xF049DCE2 },
{ 0x9656EB7A, 0xDB6C6019 }, { 0x078B4783, 0x648746B2 }, { 0x8DCBADCF, 0x32CD2359 }, { 0xF0C7DA85, 0x1EA4955B },
{ 0x1B9D46B5, 0xE9A14340 }, { 0xBBEC21B8, 0xFD92A5D9 }, { 0x0E0B8E1B, 0xC8138C79 }, { 0x6D7BA562, 0x2EE00B9A },
{ 0x93B7F1FC, 0xF85712B8 }, { 0x0BEA949D, 0xEB28FED8 }, { 0x8A40EA4C, 0x564A65EB }, { 0x474A2823, 0x6C9988E8 },
{ 0x121D8F2D, 0x4535898B }, { 0x31ACCBF4, 0xABD8C032 }, { 0xB9867CBD, 0xBA2E91CA }, { 0xEF8E263A, 0x7960BE3D },
{ 0x602FD6F0, 0x0C11A977 }, { 0x16C93527, 0xCB50E1AD }, { 0x035FFD89, 0xEAE22E94 }, { 0x5DE2CE1A, 0x2866D12F },
{ 0xAB9BF390, 0xFF1B1841 }, { 0x8CFE0D43, 0x9F9339DE }, { 0xC48A0BF7, 0x964727C8 }, { 0xAAAE531C, 0x524502C6 },
{ 0xAC10B413, 0x9B9C5EF3 }, { 0x42AB32A5, 0x4FA2FA49 }, { 0xE551122B, 0x3F165A62 }, { 0x76E6E3D7, 0xC74148DA },
{ 0xE464B2A7, 0x924840E5 }, { 0xD69784DA, 0xD372AE43 }, { 0x05E11A86, 0x233B72A1 }, { 0x4941A638, 0xA48A0491 },
{ 0xC9DE7865, 0xB4B68525 }, { 0xA6CF8002, 0xDDEABAAC }, { 0x50B6BD88, 0x0A9773C2 }, { 0x5EBD3393, 0xC284FFBB },
{ 0x2C8F6A4E, 0x8BA0DF47 }, { 0x4D951C32, 0x2AEF6CB7 }, { 0x2A318D41, 0x42798372 }, { 0xBF389BB2, 0x73F7CDFF },
{ 0x382C026C, 0x074C0AF9 }, { 0x243A035A, 0x8A6A0F0B }, { 0x5F88931F, 0x6FDAE53C }, { 0x7E538AC3, 0xC68B9896 },
{ 0x1AA8E639, 0x44FF59C7 }, { 0x439E9229, 0xE2FCE0CE }, { 0x79D8CD40, 0xA20CDE24 }, { 0xC8EBD8E9, 0x19E89FA2 },
{ 0xF398270C, 0xF446BBCF }, { 0x2284E455, 0x43B3533E }, { 0x8E945046, 0xD82F0DCD }, { 0xB26CE820, 0x51066F12 },
{ 0x6BC5426D, 0xE73957AF }, { 0x40C16FA0, 0x081ECE5A }, { 0xC5BFAB7B, 0x3B193D4F }, { 0xDF174D42, 0x7FE66488 },
{ 0x705804D8, 0x0E9814EF }, { 0x7C39D7C6, 0x8137AC85 }, { 0xE185A821, 0xB1733244 }, { 0x6F11F867, 0x695C3F89 },
{ 0xE3EFF524, 0xF6CF0657 }, { 0xD02963D5, 0x1AABF276 }, { 0x75B91E5E, 0x2DA3664E }, { 0x1077D228, 0x0289BD98 },
{ 0xF413608F, 0x90C1FD7D }, { 0xFD93A917, 0x3C5537B6 }, { 0x3919A2E0, 0xAA12107E }, { 0x30996B78, 0x0686DAB5 },
{ 0x9EE3826E, 0xDAA6B055 }, { 0x56085A87, 0xC34E2FF7 }, { 0x4FFF4137, 0x6D5358A4 }, { 0xB35948AC, 0xFC587595 },
{ 0xC7D5F67E, 0x7CA5095C }, { 0x8B754AC0, 0xFB147F6C }, { 0x91DDACF9, 0xBFEB26AB }, { 0x67A49173, 0x6896EFC5 },
{ 0x1E7C5C33, 0xCA9A31E1 }, { 0xB13315A9, 0xBBE44186 }, { 0x689ABFE4, 0x0DDB793B }, { 0xA7FA208E, 0x70B4A02B },
{ 0x7307F951, 0xE47A3A7B }, { 0x14A36822, 0x8CECD5BE }, { 0x23B144D9, 0xEEED49B9 }, { 0xB8B3DC31, 0x17708B4D },
{ 0x2765FED3, 0x6088219F }, { 0xF1F27A09, 0xB3FA8FDC }, { 0xFCA6099B, 0x910B2D31 }, { 0x78ED6DCC, 0x0F52C4A3 },
{ 0xBAD98134, 0x50CCBF5E }, { 0x7F662A4F, 0x6BD58211 }, { 0xD4FDD9DF, 0x94CE9A50 }, { 0x45207526, 0x2B25BCFB },
{ 0x1F49FCBF, 0x67C42B66 }, { 0x723259DD, 0x492420FC }, { 0x18C2BB3C, 0x03436DD4 }, { 0xF872B391, 0x1F6E4517 },
{ 0x69AF1F68, 0xA08563BC }, { 0xEEBB86B6, 0xD43EA4BA }, { 0x08B56914, 0x01CAD04C }, { 0x0980C998, 0xAC94CACB },
{ 0x9A373864, 0x54C3D873 }, { 0x2DBACAC2, 0x26FEC5C0 }, { 0xBE0D3B3E, 0xDEA9D778 }, { 0x20EEB950, 0x040F672D },
{ 0x7BB29045, 0xE5B0EA37 }, { 0xCBB42560, 0xF30AB136 }, { 0x37122CFB, 0x62019C07 }, { 0x13282FA1, 0xE86B930C },
{ 0x2EE5374B, 0xCC1CEB54 }, { 0xA21B3A08, 0x538FD28A }, { 0xD89C0AC1, 0x1B61223A }, { 0xAD25149F, 0x36C24474 },
{ 0xF74C9D06, 0x7A23D3E9 }, { 0x9968C5ED, 0xBE21F6E7 }, { 0x36278C77, 0xCF5F8680 }, { 0xEB5A9C30, 0xF705D61B },
{ 0x52DCE08D, 0x4D2B47D1 }, { 0xC234ECF8, 0x5F9E7BFD }, { 0x3DCD18EA, 0x24777858 }, { 0x4415D5AA, 0x867BA67C },
{ 0x5A698999, 0x4CE1979D }, { 0x00000000, 0x00000000 }, { 0x33C696F1, 0xEC64F421 }, { 0xC16B1171, 0xB57C5569 },
{ 0x467F88AF, 0xC1C7926F }, { 0x0F3E2E97, 0x654D96FE }, { 0xA8C40E19, 0x15F936D5 }, { 0xA9F1AE95, 0xB8A72C52 },
{ 0x21DB19DC, 0xA9517DAA }, { 0xFA18EE94, 0x58D27104 }, { 0xF2AD8780, 0x5918A148 }, { 0xDAF657C4, 0x5CDD1629 },
{ 0x64FB6CFA, 0x8274C151 }, { 0xC6E056F2, 0xD1FB13DB }, { 0xCF609F6A, 0x7D6FD910 }, { 0xD9A9AA4D, 0xB63F38BD },
{ 0xF526C003, 0x3D9FE7FA }, { 0x871499DE, 0x74BBC706 }, { 0xB6B8522A, 0xDF630734 }, { 0xCD0AC26F, 0x3AD3ED03 },
{ 0x83C023D4, 0xFADEAF20 }, { 0x4ECAE1BB, 0xC00D4223 }, { 0x5CD76E96, 0x8538CBA8 }, { 0x6E2458EB, 0xC402250E },
{ 0x026A5D05, 0x47BC3413 }, { 0x114272A4, 0xAFD7A71F }, { 0xCC3F62E3, 0x978DF784 }, { 0xA144C781, 0xB96DFC1E },
{ 0x1596C8AE, 0x21B2CF39 }, { 0x950916F3, 0x318E4E8D }, { 0x3E92E563, 0xCE9556CC }, { 0xDD7D1047, 0x385A509B },
{ 0xB5E7AFA3, 0x358129A0 }, { 0x63702B79, 0xE6F387E3 }, { 0x53E94001, 0xE0755D56 }, { 0xFFF9F412, 0x7BE903A5 },
{ 0x90E80C75, 0x12B53C2C }, { 0x857EC4DB, 0x3307F315 }, { 0x0C61D31E, 0x8FAFB86A }, { 0x86213952, 0xD9E5DD81 },
{ 0x9FD622E2, 0x77F8AAD2 }, { 0x357871FE, 0x25BDA814 }, { 0x8FA1F0CA, 0x7571174A }, { 0x985D6561, 0x137FEC60 },
{ 0x9DBC7FE7, 0x30449EC1 }, { 0x41F4CF2C, 0xA540D4DD }, { 0xAE7AE916, 0xDC206AE0 }, { 0xE2DA55A8, 0x5B911CD0 },
{ 0xF947131D, 0xB2305F90 }, { 0xBD52C6B7, 0x344BF9EC }, { 0xD2433ED0, 0x5D17C665 }, { 0xC05EB1FD, 0x18224FEE },
{ 0x844B6457, 0x9E59E992 }, { 0xA4A5DD07, 0x9A568EBF }, { 0x716DA454, 0xA3C60E68 }, { 0xD7A22456, 0x7E2CB4C4 },
{ 0x4CA0BCBE, 0x87B17630 }, { 0x32F3367D, 0x413AEEA6 }, { 0xBC67663B, 0x9915E36B }, { 0x3A465F69, 0x40F03EEA },
{ 0xE0B008AD, 0x1C2D28C3 }, { 0x4A1E5BB1, 0x4E682A05 }, { 0x285BD044, 0x05C5B761 }, { 0x5B5C2915, 0xE1BF8D1A },
{ 0xC3014C74, 0xF2C0617A }, { 0xD11CC359, 0xB7F5E8F1 }, { 0x3FA745EF, 0x63CB4C4B }, { 0x9C89DF6B, 0x9D1A8446 },
{ 0x4B2BFB3D, 0xE3363082 }, { 0xE60EEFA2, 0xD5F474F6 }, { 0xFB2D4E18, 0xF58C6B83 }, { 0x0ADF3411, 0x4676E45F },
{ 0x1D23A1BA, 0x20781F75 }, { 0x81AA7ED1, 0xBD629B33 }, { 0x19F71BB0, 0xAE1D7753 }, { 0xA32E9A84, 0xFED1C80D },
{ 0x92825170, 0x5509083F }, { 0x5557A70E, 0x29AC0163 }, { 0x51831D04, 0xA7C96945 }, { 0x04D4BA0A, 0x8E656826 },
{ 0x882AB749, 0x11F651F8 }, { 0xF6793D8A, 0xD77DC96E }, { 0x2B042DCD, 0xEF2799F5 }, { 0x7A8730C9, 0x48EEF0B0 },
{ 0x0D547392, 0x22F1A2ED }, { 0x2FD097C7, 0x6142F1D3 }, { 0x6AF0E2E1, 0x4A674D28 }, { 0x748CBED2, 0x80FD7CC9 },
{ 0xAF4F499A, 0x717E7067 }, { 0xECD1DBB3, 0x938290A9 }, { 0x344DD172, 0x88E3B293 }, { 0x250FA3D6, 0x2734158C },
};
// KeySchedule
__constant__ const uint64_t CC[12][8] = {
{ 0xe9daca1eda5b08b1, 0x1f7c65c0812fcbeb, 0x16d0452e43766a2f, 0xfcc485758db84e71, 0x0169679291e07c4b, 0x15d360a4082a42a2, 0x234d74cc36747605, 0x0745a6f2596580dd },
{ 0x1a2f9da98ab5a36f, 0xd7b5700f469de34f, 0x982b230a72eafef3, 0x3101b5160f5ed561, 0x5899d6126b17b59a, 0xcaa70adbc261b55c, 0x56cdcbd71ba2dd55, 0xb79bb121700479e6 },
{ 0xc72fce2bacdc74f5, 0x35843d6a28fc390a, 0x8b1f9c525f5ef106, 0x7b7b29b11475eaf2, 0xb19e3590e40fe2d3, 0x09db6260373ac9c1, 0x31db7a8643f4b6c2, 0xb20aba0af5961e99 },
{ 0xd26615e8b3df1fef, 0xdde4715da0e148f9, 0x7d3c5c337e858e48, 0x3f355e68ad1c729d, 0x75d603ed822cd7a9, 0xbe0352933313b7d8, 0xf137e893a1ea5334, 0x2ed1e384bcbe0c22 },
{ 0x994747adac6bea4b, 0x6323a96c0c413f9a, 0x4a1086161f1c157f, 0xbdff0f80d7359e35, 0xa3f53a254717cdbf, 0x161a2723b700ffdf, 0xf563eaa97ea2567a, 0x57fe6c7cfd581760 },
{ 0xd9d33a1daeae4fae, 0xc039307a3bc3a46f, 0x6ca44251f9c4662d, 0xc68ef09ab49a7f18, 0xb4b79a1cb7a6facf, 0xb6c6bec2661ff20a, 0x354f903672c571bf, 0x6e7d64467a4068fa },
{ 0xecc5aaee160ec7f4, 0x540924bffe86ac51, 0xc987bfe6c7c69e39, 0xc9937a19333e47d3, 0x372c822dc5ab9209, 0x04054a2883694706, 0xf34a3ca24c451735, 0x93d4143a4d568688 },
{ 0xa7c9934d425b1f9b, 0x41416e0c02aae703, 0x1ede369c71f8b74e, 0x9ac4db4d3b44b489, 0x90069b92cb2b89f4, 0x2fc4a5d12b8dd169, 0xd9a8515935c2ac36, 0x1ee702bfd40d7fa4 },
{ 0x9b223116545a8f37, 0xde5f16ecd89a4c94, 0x244289251b3a7d3a, 0x84090de0b755d93c, 0xb1ceb2db0b440a80, 0x549c07a69a8a2b7b, 0x602a1fcb92dc380e, 0xdb5a238351446172 },
{ 0x526f0580a6debeab, 0xf3f3e4b248e52a38, 0xdb788aff1ce74189, 0x0361331b8ae1ff1f, 0x4b3369af0267e79f, 0xf452763b306c1e7a, 0xc3b63b15d1fa9836, 0xed9c4598fbc7b474 },
{ 0xfb89c8efd09ecd7b, 0x94fe5a63cdc60230, 0x6107abebbb6bfad8, 0x7966841421800120, 0xcab948eaef711d8a, 0x986e477d1dcdbaef, 0x5dd86fc04a59a2de, 0x1b2df381cda4ca6b },
{ 0xba3116f167e78e37, 0x7ab14904b08013d2, 0x771ddfbc323ca4cd, 0x9b9f2130d41220f8, 0x86cc91189def805d, 0x5228e188aaa41de7, 0x991bb2d9d517f4fa, 0x20d71bf14a92bc48 }
};
__constant__ const uint64_t precomputed_values[12][8] = {
0x8FD72F640708B0D0, 0x0DE874C7EBC3F213, 0xE92EEF3AD202E9E0, 0xC1E9DA0708013DA7, 0x9727DAB2F014BE88, 0x103051A02BCD6935, 0x33EC7E1DBD28F736, 0x1ECF460CF78AD1F4,
0x0B2D9F89C775449D, 0x6B6EEFC6DAB7E8B0, 0xF1A0D31667F6EC44, 0x2A71132D5E108166, 0x0E9357C2EC87931A, 0xC99F5C1B4A01612D, 0x7E60B16E637D4EE2, 0xA9FCB827F9BA6D81,
0x231FECA5AB3D285C, 0x70C6E1483C838C3B, 0x9C21C3C40CE4E2DA, 0x2FA796BD5688E573, 0x04C0E3FF55809FDF, 0x5FF978BFB8E3CDC8, 0xC54A19D6A3D07033, 0x0FCA83FDDE872478,
0xBDF9312726339F10, 0x51A5BA1793BC9C56, 0xC4428DA14F96D2D4, 0xEC925222374EAB1F, 0x79477893747DD92F, 0xC495E19A46886304, 0x9C23F893BA7CFA36, 0x0C47268881FC5FEB,
0xCF117966029B2CB3, 0x07179ABE77088A8F, 0x671EF4CC2650E257, 0x7474B8B170DAB5C6, 0x4224FEBECF35113E, 0x993D156C675C5537, 0x2DEE3A5782C39B45, 0xE7C586F2990DD385,
0x8608FD95B1C1138A, 0x8BB0847D9E9849AC, 0x5E76623F4F0EB0C7, 0x34C2BDBAFC5060CE, 0xE9E814475907826C, 0x22C9ED94D6AAC7C9, 0xE6B75E28171EB0D6, 0xF1329E5534E60215,
0x86BB4814B1C3CE52, 0xE8F226C9FBDDD017, 0xCEDED67991CB3087, 0x76C33E32FDBFACA5, 0xDBB13BE1A9F7474C, 0x3D0273470342C356, 0x8E7246C51CF07F61, 0xAC8C125DDEF8DF71,
0x6D73E747795B8CF3, 0x4E4AA65EA0072050, 0xA14A1582CB43C2B9, 0x748EF2B7BB63B938, 0x126789534410D7D4, 0xD4D48FF40301D791, 0xC67DFBE315C41FC0, 0x35E7A1A1AF88601C,
0x9BD33EA0FAB34007, 0xF51B7CDBE3D67D25, 0xD3ABDA0CE4186E6B, 0x8E61DDADCBCE1706, 0x58994565B41BE6A5, 0x7A87ABC1240CD31D, 0xFAFE6C28487968D0, 0x15B368609FF9EEA7,
0xAE33263CCF115818, 0x93B2DBE9CADFCFC8, 0x0A91952BF91B0147, 0x458E67CA5F1ED73A, 0x94C2E5F288F074E3, 0x377895E85C69E996, 0xF11A4456AAB37B10, 0x163131934816821A,
0xD07E4A2366BF469D, 0x5EF1A3D220213B6C, 0x3C5BB78971D8ED0F, 0x0DE05E6B9006F2D2, 0xC58CFB00B8EAA1C9, 0xEFCDB54D1F250B76, 0xFD135634FA527042, 0x4CEE791290516407,
0xD800B9264010790F, 0x974C4823E2B668D7, 0xA605A4B385C5E361, 0x3F6C92DA5A56D8D2, 0x82B9D67C12EF8277, 0x0AB6B4582561BF90, 0x46954FD98FC2CBA3, 0x70BE45CB21B6760D
};

4
x11/cuda_streebog.cu

@ -808,7 +808,9 @@ void streebog_cpu_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash)
__constant__ uint64_t target64[4]; __constant__ uint64_t target64[4];
void streebog_set_target(const uint32_t* ptarget){ __host__
void streebog_set_target(const uint32_t* ptarget)
{
cudaMemcpyToSymbol(target64,ptarget,4*sizeof(uint64_t),0,cudaMemcpyHostToDevice); cudaMemcpyToSymbol(target64,ptarget,4*sizeof(uint64_t),0,cudaMemcpyHostToDevice);
} }

Loading…
Cancel
Save