diff --git a/ccminer.cpp b/ccminer.cpp index 4274dee..3eb65f7 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -246,16 +246,16 @@ Options:\n\ skein Skein SHA2 (Skeincoin)\n\ skein2 Double Skein (Woodcoin)\n\ s3 S3 (1Coin)\n\ + vanilla Blake256-8 (VNL)\n\ veltor Thorsriddle streebog\n\ + whirlcoin Old Whirlcoin (Whirlpool algo)\n\ + whirlpool Whirlpool algo\n\ x11evo Permuted x11 (Revolver)\n\ x11 X11 (DarkCoin)\n\ x13 X13 (MaruCoin)\n\ x14 X14\n\ x15 X15\n\ x17 X17\n\ - vanilla Blake256-8 (VNL)\n\ - whirlcoin Old Whirlcoin (Whirlpool algo)\n\ - whirlpool Whirlpool algo\n\ zr5 ZR5 (ZiftrCoin)\n\ -d, --devices Comma separated list of CUDA devices to use.\n\ Device IDs start counting from 0! Alternatively takes\n\ diff --git a/neoscrypt/cuda_vectors.h b/neoscrypt/cuda_vectors.h index 08fc0ee..74e8843 100644 --- a/neoscrypt/cuda_vectors.h +++ b/neoscrypt/cuda_vectors.h @@ -482,7 +482,7 @@ static __forceinline__ __device__ uint32_t rotateR(uint32_t vec4, uint32_t shift // require a uint32_t[9] ret array // note: djm neoscrypt implementation is near the limits of gpu capabilities // and weird behaviors can happen when tuning device functions code... -__device__ void shift256R(uint32_t* ret, const uint8 &vec4, uint32_t shift) +__device__ static void shift256R(uint32_t* ret, const uint8 &vec4, uint32_t shift) { uint8_t *v = (uint8_t*) &vec4.s0; uint8_t *r = (uint8_t*) ret; @@ -496,7 +496,7 @@ __device__ void shift256R(uint32_t* ret, const uint8 &vec4, uint32_t shift) #else // same for SM 3.5+, really faster ? -__device__ void shift256R(uint32_t* ret, const uint8 &vec4, uint32_t shift) +__device__ static void shift256R(uint32_t* ret, const uint8 &vec4, uint32_t shift) { uint32_t truc = 0, truc2 = cuda_swab32(vec4.s7), truc3 = 0; asm("shf.r.clamp.b32 %0, %1, %2, %3;" : "=r"(truc) : "r"(truc3), "r"(truc2), "r"(shift)); diff --git a/x11/cuda_streebog.cu b/x11/cuda_streebog.cu index 5ad32e7..9599129 100644 --- a/x11/cuda_streebog.cu +++ b/x11/cuda_streebog.cu @@ -6,1090 +6,988 @@ * * ==========================(LICENSE BEGIN)============================ * - * @author Tanguy Pruvot 2015 + * @author Tanguy Pruvot - 2015 + * @author Alexis Provos - 2016 */ -#include -#include "cuda_helper.h" +// Further improved with shared memory utilization, Provos Alexis, June 2016 + +#include + +#include +#include +#include "neoscrypt/cuda_vectors.h" typedef unsigned char uchar; //#define FULL_UNROLL -#define memcpy(dst, src, len) { \ - for (int i = 0; i < len; i++) { \ - *((uchar*)dst + i) = *((uchar*)src + i); \ - } \ -} - // Tables for function F -__device__ static uint64_t T0[256] = { - 0xE6F87E5C5B711FD0, 0x258377800924FA16, 0xC849E07E852EA4A8, 0x5B4686A18F06C16A, - 0x0B32E9A2D77B416E, 0xABDA37A467815C66, 0xF61796A81A686676, 0xF5DC0B706391954B, - 0x4862F38DB7E64BF1, 0xFF5C629A68BD85C5, 0xCB827DA6FCD75795, 0x66D36DAF69B9F089, - 0x356C9F74483D83B0, 0x7CBCECB1238C99A1, 0x36A702AC31C4708D, 0x9EB6A8D02FBCDFD6, - 0x8B19FA51E5B3AE37, 0x9CCFB5408A127D0B, 0xBC0C78B508208F5A, 0xE533E3842288ECED, - 0xCEC2C7D377C15FD2, 0xEC7817B6505D0F5E, 0xB94CC2C08336871D, 0x8C205DB4CB0B04AD, - 0x763C855B28A0892F, 0x588D1B79F6FF3257, 0x3FECF69E4311933E, 0x0FC0D39F803A18C9, - 0xEE010A26F5F3AD83, 0x10EFE8F4411979A6, 0x5DCDA10C7DE93A10, 0x4A1BEE1D1248E92C, - 0x53BFF2DB21847339, 0xB4F50CCFA6A23D09, 0x5FB4BC9CD84798CD, 0xE88A2D8B071C56F9, - 0x7F7771695A756A9C, 0xC5F02E71A0BA1EBC, 0xA663F9AB4215E672, 0x2EB19E22DE5FBB78, - 0x0DB9CE0F2594BA14, 0x82520E6397664D84, 0x2F031E6A0208EA98, 0x5C7F2144A1BE6BF0, - 0x7A37CB1CD16362DB, 0x83E08E2B4B311C64, 0xCF70479BAB960E32, 0x856BA986B9DEE71E, - 0xB5478C877AF56CE9, 0xB8FE42885F61D6FD, 0x1BDD0156966238C8, 0x622157923EF8A92E, - 0xFC97FF42114476F8, 0x9D7D350856452CEB, 0x4C90C9B0E0A71256, 0x2308502DFBCB016C, - 0x2D7A03FAA7A64845, 0xF46E8B38BFC6C4AB, 0xBDBEF8FDD477DEBA, 0x3AAC4CEBC8079B79, - 0xF09CB105E8879D0C, 0x27FA6A10AC8A58CB, 0x8960E7C1401D0CEA, 0x1A6F811E4A356928, - 0x90C4FB0773D196FF, 0x43501A2F609D0A9F, 0xF7A516E0C63F3796, 0x1CE4A6B3B8DA9252, - 0x1324752C38E08A9B, 0xA5A864733BEC154F, 0x2BF124575549B33F, 0xD766DB15440DC5C7, - 0xA7D179E39E42B792, 0xDADF151A61997FD3, 0x86A0345EC0271423, 0x38D5517B6DA939A4, - 0x6518F077104003B4, 0x02791D90A5AEA2DD, 0x88D267899C4A5D0A, 0x930F66DF0A2865C2, - 0x4EE9D4204509B08B, 0x325538916685292A, 0x412907BFC533A842, 0xB27E2B62544DC673, - 0x6C5304456295E007, 0x5AF406E95351908A, 0x1F2F3B6BC123616F, 0xC37B09DC5255E5C6, - 0x3967D133B1FE6844, 0x298839C7F0E711E2, 0x409B87F71964F9A2, 0xE938ADC3DB4B0719, - 0x0C0B4E47F9C3EBF4, 0x5534D576D36B8843, 0x4610A05AEB8B02D8, 0x20C3CDF58232F251, - 0x6DE1840DBEC2B1E7, 0xA0E8DE06B0FA1D08, 0x7B854B540D34333B, 0x42E29A67BCCA5B7F, - 0xD8A6088AC437DD0E, 0xC63BB3A9D943ED81, 0x21714DBD5E65A3B1, 0x6761EDE7B5EEA169, - 0x2431F7C8D573ABF6, 0xD51FC685E1A3671A, 0x5E063CD40410C92D, 0x283AB98F2CB04002, - 0x8FEBC06CB2F2F790, 0x17D64F116FA1D33C, 0xE07359F1A99EE4AA, 0x784ED68C74CDC006, - 0x6E2A19D5C73B42DA, 0x8712B4161C7045C3, 0x371582E4ED93216D, 0xACE390414939F6FC, - 0x7EC5F12186223B7C, 0xC0B094042BAC16FB, 0xF9D745379A527EBF, 0x737C3F2EA3B68168, - 0x33E7B8D9BAD278CA, 0xA9A32A34C22FFEBB, 0xE48163CCFEDFBD0D, 0x8E5940246EA5A670, - 0x51C6EF4B842AD1E4, 0x22BAD065279C508C, 0xD91488C218608CEE, 0x319EA5491F7CDA17, - 0xD394E128134C9C60, 0x094BF43272D5E3B3, 0x9BF612A5A4AAD791, 0xCCBBDA43D26FFD0F, - 0x34DE1F3C946AD250, 0x4F5B5468995EE16B, 0xDF9FAF6FEA8F7794, 0x2648EA5870DD092B, - 0xBFC7E56D71D97C67, 0xDDE6B2FF4F21D549, 0x3C276B463AE86003, 0x91767B4FAF86C71F, - 0x68A13E7835D4B9A0, 0xB68C115F030C9FD4, 0x141DD2C916582001, 0x983D8F7DDD5324AC, - 0x64AA703FCC175254, 0xC2C989948E02B426, 0x3E5E76D69F46C2DE, 0x50746F03587D8004, - 0x45DB3D829272F1E5, 0x60584A029B560BF3, 0xFBAE58A73FFCDC62, 0xA15A5E4E6CAD4CE8, - 0x4BA96E55CE1FB8CC, 0x08F9747AAE82B253, 0xC102144CF7FB471B, 0x9F042898F3EB8E36, - 0x068B27ADF2EFFB7A, 0xEDCA97FE8C0A5EBE, 0x778E0513F4F7D8CF, 0x302C2501C32B8BF7, - 0x8D92DDFC175C554D, 0xF865C57F46052F5F, 0xEAF3301BA2B2F424, 0xAA68B7ECBBD60D86, - 0x998F0F350104754C, 0x0000000000000000, 0xF12E314D34D0CCEC, 0x710522BE061823B5, - 0xAF280D9930C005C1, 0x97FD5CE25D693C65, 0x19A41CC633CC9A15, 0x95844172F8C79EB8, - 0xDC5432B7937684A9, 0x9436C13A2490CF58, 0x802B13F332C8EF59, 0xC442AE397CED4F5C, - 0xFA1CD8EFE3AB8D82, 0xF2E5AC954D293FD1, 0x6AD823E8907A1B7D, 0x4D2249F83CF043B6, - 0x03CB9DD879F9F33D, 0xDE2D2F2736D82674, 0x2A43A41F891EE2DF, 0x6F98999D1B6C133A, - 0xD4AD46CD3DF436FA, 0xBB35DF50269825C0, 0x964FDCAA813E6D85, 0xEB41B0537EE5A5C4, - 0x0540BA758B160847, 0xA41AE43BE7BB44AF, 0xE3B8C429D0671797, 0x819993BBEE9FBEB9, - 0xAE9A8DD1EC975421, 0xF3572CDD917E6E31, 0x6393D7DAE2AFF8CE, 0x47A2201237DC5338, - 0xA32343DEC903EE35, 0x79FC56C4A89A91E6, 0x01B28048DC5751E0, 0x1296F564E4B7DB7B, - 0x75F7188351597A12, 0xDB6D9552BDCE2E33, 0x1E9DBB231D74308F, 0x520D7293FDD322D9, - 0xE20A44610C304677, 0xFEEEE2D2B4EAD425, 0xCA30FDEE20800675, 0x61EACA4A47015A13, - 0xE74AFE1487264E30, 0x2CC883B27BF119A5, 0x1664CF59B3F682DC, 0xA811AA7C1E78AF5B, - 0x1D5626FB648DC3B2, 0xB73E9117DF5BCE34, 0xD05F7CF06AB56F5D, 0xFD257F0ACD132718, - 0x574DC8E676C52A9E, 0x0739A7E52EB8AA9A, 0x5486553E0F3CD9A3, 0x56FF48AEAA927B7E, - 0xBE756525AD8E2D87, 0x7D0E6CF9FFDBC841, 0x3B1ECCA31450CA99, 0x6913BE30E983E840, - 0xAD511009956EA71C, 0xB1B5B6BA2DB4354E, 0x4469BDCA4E25A005, 0x15AF5281CA0F71E1, - 0x744598CB8D0E2BF2, 0x593F9B312AA863B7, 0xEFB38A6E29A4FC63, 0x6B6AA3A04C2D4A9D, - 0x3D95EB0EE6BF31E3, 0xA291C3961554BFD5, 0x18169C8EEF9BCBF5, 0x115D68BC9D4E2846, - 0xBA875F18FACF7420, 0xD1EDFCB8B6E23EBD, 0xB00736F2F1E364AE, 0x84D929CE6589B6FE, - 0x70B7A2F6DA4F7255, 0x0E7253D75C6D4929, 0x04F23A3D574159A7, 0x0A8069EA0B2C108E, - 0x49D073C56BB11A11, 0x8AAB7A1939E4FFD7, 0xCD095A0B0E38ACEF, 0xC9FB60365979F548, - 0x92BDE697D67F3422, 0xC78933E10514BC61, 0xE1C1D9B975C9B54A, 0xD2266160CF1BCD80, - 0x9A4492ED78FD8671, 0xB3CCAB2A881A9793, 0x72CEBF667FE1D088, 0xD6D45B5D985A9427 +__constant__ uint2 T02[256] = { + {0x5B711FD0,0xE6F87E5C}, {0x0924FA16,0x25837780}, {0x852EA4A8,0xC849E07E}, {0x8F06C16A,0x5B4686A1}, + {0xD77B416E,0x0B32E9A2}, {0x67815C66,0xABDA37A4}, {0x1A686676,0xF61796A8}, {0x6391954B,0xF5DC0B70}, + {0xB7E64BF1,0x4862F38D}, {0x68BD85C5,0xFF5C629A}, {0xFCD75795,0xCB827DA6}, {0x69B9F089,0x66D36DAF}, + {0x483D83B0,0x356C9F74}, {0x238C99A1,0x7CBCECB1}, {0x31C4708D,0x36A702AC}, {0x2FBCDFD6,0x9EB6A8D0}, + {0xE5B3AE37,0x8B19FA51}, {0x8A127D0B,0x9CCFB540}, {0x08208F5A,0xBC0C78B5}, {0x2288ECED,0xE533E384}, + {0x77C15FD2,0xCEC2C7D3}, {0x505D0F5E,0xEC7817B6}, {0x8336871D,0xB94CC2C0}, {0xCB0B04AD,0x8C205DB4}, + {0x28A0892F,0x763C855B}, {0xF6FF3257,0x588D1B79}, {0x4311933E,0x3FECF69E}, {0x803A18C9,0x0FC0D39F}, + {0xF5F3AD83,0xEE010A26}, {0x411979A6,0x10EFE8F4}, {0x7DE93A10,0x5DCDA10C}, {0x1248E92C,0x4A1BEE1D}, + {0x21847339,0x53BFF2DB}, {0xA6A23D09,0xB4F50CCF}, {0xD84798CD,0x5FB4BC9C}, {0x071C56F9,0xE88A2D8B}, + {0x5A756A9C,0x7F777169}, {0xA0BA1EBC,0xC5F02E71}, {0x4215E672,0xA663F9AB}, {0xDE5FBB78,0x2EB19E22}, + {0x2594BA14,0x0DB9CE0F}, {0x97664D84,0x82520E63}, {0x0208EA98,0x2F031E6A}, {0xA1BE6BF0,0x5C7F2144}, + {0xD16362DB,0x7A37CB1C}, {0x4B311C64,0x83E08E2B}, {0xAB960E32,0xCF70479B}, {0xB9DEE71E,0x856BA986}, + {0x7AF56CE9,0xB5478C87}, {0x5F61D6FD,0xB8FE4288}, {0x966238C8,0x1BDD0156}, {0x3EF8A92E,0x62215792}, + {0x114476F8,0xFC97FF42}, {0x56452CEB,0x9D7D3508}, {0xE0A71256,0x4C90C9B0}, {0xFBCB016C,0x2308502D}, + {0xA7A64845,0x2D7A03FA}, {0xBFC6C4AB,0xF46E8B38}, {0xD477DEBA,0xBDBEF8FD}, {0xC8079B79,0x3AAC4CEB}, + {0xE8879D0C,0xF09CB105}, {0xAC8A58CB,0x27FA6A10}, {0x401D0CEA,0x8960E7C1}, {0x4A356928,0x1A6F811E}, + {0x73D196FF,0x90C4FB07}, {0x609D0A9F,0x43501A2F}, {0xC63F3796,0xF7A516E0}, {0xB8DA9252,0x1CE4A6B3}, + {0x38E08A9B,0x1324752C}, {0x3BEC154F,0xA5A86473}, {0x5549B33F,0x2BF12457}, {0x440DC5C7,0xD766DB15}, + {0x9E42B792,0xA7D179E3}, {0x61997FD3,0xDADF151A}, {0xC0271423,0x86A0345E}, {0x6DA939A4,0x38D5517B}, + {0x104003B4,0x6518F077}, {0xA5AEA2DD,0x02791D90}, {0x9C4A5D0A,0x88D26789}, {0x0A2865C2,0x930F66DF}, + {0x4509B08B,0x4EE9D420}, {0x6685292A,0x32553891}, {0xC533A842,0x412907BF}, {0x544DC673,0xB27E2B62}, + {0x6295E007,0x6C530445}, {0x5351908A,0x5AF406E9}, {0xC123616F,0x1F2F3B6B}, {0x5255E5C6,0xC37B09DC}, + {0xB1FE6844,0x3967D133}, {0xF0E711E2,0x298839C7}, {0x1964F9A2,0x409B87F7}, {0xDB4B0719,0xE938ADC3}, + {0xF9C3EBF4,0x0C0B4E47}, {0xD36B8843,0x5534D576}, {0xEB8B02D8,0x4610A05A}, {0x8232F251,0x20C3CDF5}, + {0xBEC2B1E7,0x6DE1840D}, {0xB0FA1D08,0xA0E8DE06}, {0x0D34333B,0x7B854B54}, {0xBCCA5B7F,0x42E29A67}, + {0xC437DD0E,0xD8A6088A}, {0xD943ED81,0xC63BB3A9}, {0x5E65A3B1,0x21714DBD}, {0xB5EEA169,0x6761EDE7}, + {0xD573ABF6,0x2431F7C8}, {0xE1A3671A,0xD51FC685}, {0x0410C92D,0x5E063CD4}, {0x2CB04002,0x283AB98F}, + {0xB2F2F790,0x8FEBC06C}, {0x6FA1D33C,0x17D64F11}, {0xA99EE4AA,0xE07359F1}, {0x74CDC006,0x784ED68C}, + {0xC73B42DA,0x6E2A19D5}, {0x1C7045C3,0x8712B416}, {0xED93216D,0x371582E4}, {0x4939F6FC,0xACE39041}, + {0x86223B7C,0x7EC5F121}, {0x2BAC16FB,0xC0B09404}, {0x9A527EBF,0xF9D74537}, {0xA3B68168,0x737C3F2E}, + {0xBAD278CA,0x33E7B8D9}, {0xC22FFEBB,0xA9A32A34}, {0xFEDFBD0D,0xE48163CC}, {0x6EA5A670,0x8E594024}, + {0x842AD1E4,0x51C6EF4B}, {0x279C508C,0x22BAD065}, {0x18608CEE,0xD91488C2}, {0x1F7CDA17,0x319EA549}, + {0x134C9C60,0xD394E128}, {0x72D5E3B3,0x094BF432}, {0xA4AAD791,0x9BF612A5}, {0xD26FFD0F,0xCCBBDA43}, + {0x946AD250,0x34DE1F3C}, {0x995EE16B,0x4F5B5468}, {0xEA8F7794,0xDF9FAF6F}, {0x70DD092B,0x2648EA58}, + {0x71D97C67,0xBFC7E56D}, {0x4F21D549,0xDDE6B2FF}, {0x3AE86003,0x3C276B46}, {0xAF86C71F,0x91767B4F}, + {0x35D4B9A0,0x68A13E78}, {0x030C9FD4,0xB68C115F}, {0x16582001,0x141DD2C9}, {0xDD5324AC,0x983D8F7D}, + {0xCC175254,0x64AA703F}, {0x8E02B426,0xC2C98994}, {0x9F46C2DE,0x3E5E76D6}, {0x587D8004,0x50746F03}, + {0x9272F1E5,0x45DB3D82}, {0x9B560BF3,0x60584A02}, {0x3FFCDC62,0xFBAE58A7}, {0x6CAD4CE8,0xA15A5E4E}, + {0xCE1FB8CC,0x4BA96E55}, {0xAE82B253,0x08F9747A}, {0xF7FB471B,0xC102144C}, {0xF3EB8E36,0x9F042898}, + {0xF2EFFB7A,0x068B27AD}, {0x8C0A5EBE,0xEDCA97FE}, {0xF4F7D8CF,0x778E0513}, {0xC32B8BF7,0x302C2501}, + {0x175C554D,0x8D92DDFC}, {0x46052F5F,0xF865C57F}, {0xA2B2F424,0xEAF3301B}, {0xBBD60D86,0xAA68B7EC}, + {0x0104754C,0x998F0F35}, {0x00000000,0x00000000}, {0x34D0CCEC,0xF12E314D}, {0x061823B5,0x710522BE}, + {0x30C005C1,0xAF280D99}, {0x5D693C65,0x97FD5CE2}, {0x33CC9A15,0x19A41CC6}, {0xF8C79EB8,0x95844172}, + {0x937684A9,0xDC5432B7}, {0x2490CF58,0x9436C13A}, {0x32C8EF59,0x802B13F3}, {0x7CED4F5C,0xC442AE39}, + {0xE3AB8D82,0xFA1CD8EF}, {0x4D293FD1,0xF2E5AC95}, {0x907A1B7D,0x6AD823E8}, {0x3CF043B6,0x4D2249F8}, + {0x79F9F33D,0x03CB9DD8}, {0x36D82674,0xDE2D2F27}, {0x891EE2DF,0x2A43A41F}, {0x1B6C133A,0x6F98999D}, + {0x3DF436FA,0xD4AD46CD}, {0x269825C0,0xBB35DF50}, {0x813E6D85,0x964FDCAA}, {0x7EE5A5C4,0xEB41B053}, + {0x8B160847,0x0540BA75}, {0xE7BB44AF,0xA41AE43B}, {0xD0671797,0xE3B8C429}, {0xEE9FBEB9,0x819993BB}, + {0xEC975421,0xAE9A8DD1}, {0x917E6E31,0xF3572CDD}, {0xE2AFF8CE,0x6393D7DA}, {0x37DC5338,0x47A22012}, + {0xC903EE35,0xA32343DE}, {0xA89A91E6,0x79FC56C4}, {0xDC5751E0,0x01B28048}, {0xE4B7DB7B,0x1296F564}, + {0x51597A12,0x75F71883}, {0xBDCE2E33,0xDB6D9552}, {0x1D74308F,0x1E9DBB23}, {0xFDD322D9,0x520D7293}, + {0x0C304677,0xE20A4461}, {0xB4EAD425,0xFEEEE2D2}, {0x20800675,0xCA30FDEE}, {0x47015A13,0x61EACA4A}, + {0x87264E30,0xE74AFE14}, {0x7BF119A5,0x2CC883B2}, {0xB3F682DC,0x1664CF59}, {0x1E78AF5B,0xA811AA7C}, + {0x648DC3B2,0x1D5626FB}, {0xDF5BCE34,0xB73E9117}, {0x6AB56F5D,0xD05F7CF0}, {0xCD132718,0xFD257F0A}, + {0x76C52A9E,0x574DC8E6}, {0x2EB8AA9A,0x0739A7E5}, {0x0F3CD9A3,0x5486553E}, {0xAA927B7E,0x56FF48AE}, + {0xAD8E2D87,0xBE756525}, {0xFFDBC841,0x7D0E6CF9}, {0x1450CA99,0x3B1ECCA3}, {0xE983E840,0x6913BE30}, + {0x956EA71C,0xAD511009}, {0x2DB4354E,0xB1B5B6BA}, {0x4E25A005,0x4469BDCA}, {0xCA0F71E1,0x15AF5281}, + {0x8D0E2BF2,0x744598CB}, {0x2AA863B7,0x593F9B31}, {0x29A4FC63,0xEFB38A6E}, {0x4C2D4A9D,0x6B6AA3A0}, + {0xE6BF31E3,0x3D95EB0E}, {0x1554BFD5,0xA291C396}, {0xEF9BCBF5,0x18169C8E}, {0x9D4E2846,0x115D68BC}, + {0xFACF7420,0xBA875F18}, {0xB6E23EBD,0xD1EDFCB8}, {0xF1E364AE,0xB00736F2}, {0x6589B6FE,0x84D929CE}, + {0xDA4F7255,0x70B7A2F6}, {0x5C6D4929,0x0E7253D7}, {0x574159A7,0x04F23A3D}, {0x0B2C108E,0x0A8069EA}, + {0x6BB11A11,0x49D073C5}, {0x39E4FFD7,0x8AAB7A19}, {0x0E38ACEF,0xCD095A0B}, {0x5979F548,0xC9FB6036}, + {0xD67F3422,0x92BDE697}, {0x0514BC61,0xC78933E1}, {0x75C9B54A,0xE1C1D9B9}, {0xCF1BCD80,0xD2266160}, + {0x78FD8671,0x9A4492ED}, {0x881A9793,0xB3CCAB2A}, {0x7FE1D088,0x72CEBF66}, {0x985A9427,0xD6D45B5D}, +}; + +__constant__ uint2 T12[256] = { + {0x8C3F55DE,0xC811A805}, {0x96B50619,0x65F5B431}, {0xD6706E43,0xF74F96B1}, {0xCB43D336,0x859D1E8B}, + {0xCCFA3D84,0x5AAB8A85}, {0xC295FCFD,0xF9C7BF99}, {0xDE4B630F,0xA21FD5A1}, {0x3B8B456D,0xCDB3EF76}, + {0x7CF7C385,0x803F59F8}, {0x5F31913C,0xB27C73BE}, {0x33B04821,0x98E3AC66}, {0x26B8F818,0xBF61674C}, + {0xC4C130C8,0x0FFBC995}, {0x10761A98,0xAAA08620}, {0x210116AA,0x6057F342}, {0x0654CC35,0xF63C760C}, + {0x667D9042,0x2DDB45CC}, {0x4BD40382,0xBCF45A96}, {0xEF3C6F3D,0x68E8A0C3}, {0x69FF73BC,0xA7BD92D2}, + {0x01ED2287,0x290AE202}, {0xE885818F,0xB7DE34CD}, {0xDD61059B,0xD901EEA7}, {0x19A03553,0xD6FA2732}, + {0x74CCCEC9,0xD56F1AE8}, {0x2E83F554,0xEA31245C}, {0xA07BE499,0x7034555D}, {0x56E7BEF7,0xCE26D2AC}, + {0xA5054E38,0xFD161857}, {0x527436D1,0x6A0E7DA4}, {0x1CDE9FF2,0x5BD86A38}, {0x31770C32,0xCAF77562}, + {0xE279C8D0,0xB09AAED9}, {0xC60674DB,0x5DEF1091}, {0x515E5045,0x111046A2}, {0x729802FC,0x23536CE4}, + {0xF5B63CFA,0xC50CBCF7}, {0xCD171F03,0x73A16887}, {0xD9F28DBD,0x7D2941AF}, {0x5A4F3B9D,0x3F5E3EB4}, + {0x1B677140,0x84EEFE36}, {0xE7076271,0x3DB8E3D3}, {0xF20FD248,0x1A3A28F9}, {0xB49E7627,0x7EBC7C75}, + {0xC7EB565C,0x74E5F293}, {0x4F478BA4,0x18DCF59E}, {0xA9ADCB52,0x0C6EF44F}, {0x98DAC760,0xC699812D}, + {0x6E469D0E,0x788B06DC}, {0x7521EC4E,0xFC65F8EA}, {0x9E8E0B55,0x30A5F721}, {0xBCA57B6B,0x2BEC3F65}, + {0xBAF1B75E,0xDDD04969}, {0xE394EA57,0x99904CDB}, {0xE6EA40F6,0x14B201D1}, {0x41284ADD,0xBBB0C082}, + {0xBF8F1DFF,0x50F20463}, {0x93CBACB8,0xE8D7F93B}, {0x477C86E8,0x4D8CB68E}, {0x92268E3F,0xC1DD1B39}, + {0x09D62FCB,0x7C5AA112}, {0xDB35C9AE,0x2F3D98AB}, {0x2BFD5FF5,0x67136956}, {0x36CEE280,0x15C1E16C}, + {0xF8F39B17,0x1D7EB2ED}, {0xB00DFE01,0xDA94D37D}, {0x760B8ADA,0x877BC3EC}, {0xE153AE44,0xCB8495DF}, + {0xB7B410B3,0x05A24773}, {0x3C32ABDF,0x12857B78}, {0x6812513B,0x8EB770D0}, {0xD2E3E665,0x536739B9}, + {0x71B26468,0x584D57E2}, {0xC9849725,0xD789C78F}, {0x7D1AE102,0xA935BBFA}, {0xDFA64188,0x8B1537A3}, + {0xC378DE7A,0xD0CD5D9B}, {0x4D80CFB7,0x4AC82C9A}, {0x83BDB620,0x42777F1B}, {0x1D33BD75,0x72D2883A}, + {0xAB6A8F41,0x5E7A2D4B}, {0xBB1C95D9,0xF4DAAB6B}, {0xFD8D31B6,0x905CFFE7}, {0x119B381F,0x83AA6422}, + {0x42022C49,0xC0AEFB84}, {0x63033AE3,0xA0F908C6}, {0x04938826,0xA428AF08}, {0x1A8A53C7,0xADE41C34}, + {0x77E6A85D,0xAE7121EE}, {0x25929E8C,0xC47F5C4A}, {0x55CDD863,0xB538E9AA}, {0xDAD8EB29,0x06377AA9}, + {0xB3279895,0xA18AE87B}, {0x35E48414,0x6EDFDA6A}, {0x825094A7,0x6B7D9D19}, {0xA4E86CBF,0xD41CFA55}, + {0xEA42C59C,0xE5CAEDC9}, {0x0E6FC179,0xA36C351C}, {0x6FABBF89,0x5181E4DE}, {0x184D17D4,0xFFF0C530}, + {0x84045892,0x9D41EB15}, {0x28D73961,0x1C0D5250}, {0x0CA8856A,0xF178EC18}, {0x8EF811CD,0x9A057101}, + {0x3EF5EFCC,0x4091A27C}, {0x9F6329D2,0x19AF1523}, {0xF91EB990,0x347450EF}, {0x8DD27759,0xE11B4A07}, + {0xFC601331,0xB9561DE5}, {0x2DA993C0,0x912F1F5A}, {0x5BA2191A,0x1654DCB6}, {0x8A6B99EB,0x3E2DDE09}, + {0x0F82E3FE,0x8A66D71E}, {0xD55A08D7,0x8C51ADB7}, {0x8941FF7F,0x4533E50F}, {0xBD4859EC,0x02E6DD67}, + {0x5DF6D52F,0xE068AABA}, {0xFF4A75A5,0xC24826E3}, {0x88ACDDF8,0x6C39070D}, {0x4691A46F,0x6486548C}, + {0x135C7C0C,0xD1BEBD26}, {0x8F15334A,0xB30F9303}, {0xC1BF9A69,0x82D9849F}, {0x5420FAE4,0x9C320BA8}, + {0xAFF90767,0xFA528243}, {0xE968A308,0x9ED4D6CF}, {0x2C44B147,0xB825FD58}, {0x5EDCB3BB,0x9B7691BC}, + {0x48FE6516,0xC7EA6190}, {0x817AF233,0x1063A61F}, {0x3409A693,0x47D53868}, {0x4C6DED30,0x63C2CE98}, + {0x6C81D91D,0x2A9FDFD8}, {0x032A6694,0x7B1E3B06}, {0xFBD9FD83,0x666089EB}, {0x7375207B,0x0A598EE6}, + {0x0AFC495F,0x07449A14}, {0xB6593234,0x2CA8A571}, {0x45BBC2FB,0x1F986F8A}, {0x50B372C2,0x381AA4A0}, + {0xD81FAF3A,0x5423A3AD}, {0x8B86BB6C,0x17273C0B}, {0xC869B5A2,0xFE83258D}, {0xD1C980F1,0x287902BF}, + {0x6B3837AF,0xF5A94BD6}, {0xB2CABA12,0x88800A79}, {0x083B0D4C,0x55504310}, {0x07B9EEB2,0xDF36940E}, + {0x6790B2C5,0x04D1A7CE}, {0xF125B4DC,0x612413FF}, {0xC52C124F,0x26F12B97}, {0xA62F28AC,0x86082351}, + {0x9937E5E7,0xEF93632F}, {0x293A1BE6,0x3507B052}, {0x570A9C70,0xE72C30AE}, {0xAE1425E0,0xD3586041}, + {0xD79D4CC4,0xDE4574B3}, {0x40C5685A,0x92BA2280}, {0xDC8C271C,0xF00B0CA5}, {0xF69C5A6E,0xBE1287F1}, + {0xB1E0DC86,0xF39E317F}, {0x20EC342D,0x495D1140}, {0x3F18CD4B,0x699B407E}, {0x6AD51528,0xDCA3A9D4}, + {0x79896924,0x0D1D14F2}, {0x00000000,0x00000000}, {0xA196C61E,0x593EB75F}, {0x0B116BD8,0x2E4E7816}, + {0x58887F8E,0x6D4AE7B0}, {0x872E3E06,0xE65FD013}, {0xD30EC4E2,0x7A6DDBBB}, {0xCAAEF1B1,0xAC97FC89}, + {0x1E19DBE1,0x09CCB33C}, {0x62EE1864,0x89F3EAC4}, {0xAA87ADC6,0x7770CF49}, {0x6557F6D6,0x56C57ECA}, + {0x6D6CFB9A,0x03953DDA}, {0x4456E07C,0x36928D88}, {0x959F608D,0x1EEB8F37}, {0x4EAAA923,0x31D6179C}, + {0xE5C02662,0x6FAC3AD7}, {0x53991456,0x43049FA6}, {0xC052B8EE,0xABD3669D}, {0xA7C20A2B,0xAF02C153}, + {0x3723C007,0x3CCB036E}, {0x90E1CA2C,0x93C9C23D}, {0x2F6ED7D3,0xC33BC65E}, {0x9758249E,0x4CFF5633}, + {0x325D6AA6,0xB1E94E64}, {0x9472420A,0x37E16D35}, {0xBE623F78,0x79F8E661}, {0x02C74413,0x5214D904}, + {0xF0C8965B,0x482EF1FD}, {0xEC1609A9,0x13F69BC5}, {0x14E592BE,0x0E882928}, {0x2A107D72,0x4E198B54}, + {0xEBAFE71B,0xCCC00FCB}, {0x222B703E,0x1B49C844}, {0xA840E9D5,0x2564164D}, {0x1FF4F966,0x20C6513E}, + {0x910CE8AB,0xBAC3203F}, {0x61C47EF0,0xF2EDD1C2}, {0xACD361F3,0x814CB945}, {0x4A392105,0x95FEB894}, + {0x1622D6AD,0x5C9CF02C}, {0xF77178E9,0x971865F3}, {0x9BF0A1F4,0xBD87BA2B}, {0x59655D09,0x444005B2}, + {0x247FBC0B,0xED75BE48}, {0x17CFF42A,0x7596122E}, {0x85E97A15,0xB44B0917}, {0x2755DA9F,0x966B854E}, + {0x49134791,0xEEE08392}, {0x23C652B9,0x32432A46}, {0xAD3E4374,0xA8465B47}, {0x12B15E8B,0xF8B45F24}, + {0x78644BA3,0x2417F6F0}, {0x7FDDA511,0xFB2162FE}, {0x9DA46DC1,0x4BBBCC27}, {0xD024A276,0x0173E0BD}, + {0xA2BCA08A,0x22208C59}, {0xB836F34D,0x8FC4906D}, {0x3A6667EA,0xE4B90D74}, {0x705F46EF,0x7147B5E0}, + {0x1508B039,0x2782CB2A}, {0xF45B1E7D,0xEC065EF5}, {0xCFD05B10,0x21B5B183}, {0x60295C77,0xDBE733C0}, + {0x394C017E,0x9FA73672}, {0x86C31C81,0xCF553211}, {0x0D45A7ED,0xD8720E1A}, {0x3DDF8958,0x3B8F997A}, + {0xEDFB2B2E,0x3AFC79C7}, {0x43EF0ECE,0xE9A41986}, {0x7B4E2D37,0x5F09CDF6}, {0xFA34DF04,0x4F6A6BE9}, + {0x38A123F9,0xB6ADD470}, {0x057EAAA1,0x8D224D0A}, {0x5C1BF7A8,0xC96248B8}, {0x309A2EB5,0xE3FD9760}, + {0xA351820D,0x0B2A6E5B}, {0xFEA75722,0xEB42C4E1}, {0x9A1D8373,0x948D5829}, {0x64BAD451,0x7FCF9CC8}, + {0xD4B72A50,0xA55B4FB5}, {0xCE3D7997,0x08BF5381}, {0xE42D04E5,0x46A6D8D5}, {0x7E308796,0xD22B80FC}, + {0xB57354A0,0x57B69E77}, {0x8097D0B4,0x3969441D}, {0xF3E2F0CF,0x3330CAFB}, {0xE0BE8CC3,0xE28E77DD}, + {0x9C494F46,0x62B12E25}, {0xB9DBD1CA,0xA6CE726F}, {0xEED14DBA,0x41E242C1}, {0x7AA30FB0,0x76032FF4}, +}; + +__constant__ uint2 T22[256] = { + {0x3ACDE4CC,0x45B268A9}, {0x84549D08,0xAF7F0BE8}, {0xC1468263,0x048354B3}, {0xC80EFED2,0x925435C2}, + {0x7FDFFBA7,0xEE4E37F2}, {0x0C60F14D,0x167A3392}, {0xEA03E584,0xFB123B52}, {0xFDBB9007,0x4A0CAB53}, + {0x0F788A19,0x9DEAF638}, {0x8F0CB32A,0xCB48EC55}, {0xD6FEF7E0,0xB59DC4B2}, {0xF4F3ECB6,0xDCDBCA22}, + {0x549A9C40,0x11DF5813}, {0x568ACED3,0xE33FDEDF}, {0x4322E9C3,0xA0C1C812}, {0x58FA6D0D,0x07A56B81}, + {0xB1E1F3DD,0x77279579}, {0x422AC004,0xD9B18B74}, {0xFFABC294,0xB8EC2D9F}, {0x2D75914F,0xF4ACF8A8}, + {0xEF2B6878,0x7BBF69B1}, {0x487AC7E1,0xC4F62FAF}, {0xC67E5D0C,0x76CE809C}, {0x92E4C14C,0x6711D88F}, + {0x243DEDFE,0x627B99D9}, {0xDFB68B51,0x234AA5C3}, {0x262DBF6D,0x909B1F15}, {0x4B62BCB5,0x4F66EA05}, + {0x52AA6AE8,0x1AE2CF5A}, {0xD0CE0148,0xBEA053FB}, {0xE66314C9,0xED6808C0}, {0x15A82710,0x43FE16CD}, + {0xA06970F6,0xCD049231}, {0x97CC4CB0,0xE7BC8A6C}, {0xFCB3B9C0,0x337CE835}, {0x7CC780F3,0x65DEF258}, + {0x4132BB50,0x52214EDE}, {0x90F493DF,0x95F15E43}, {0x5DD2E0F1,0x87083962}, {0xFB8B66AF,0x41313C1A}, + {0x51B211BC,0x91720AF0}, {0xD4EEA573,0x477D427E}, {0xF6E3BE25,0x2E3B4CEE}, {0xEB0BCC43,0x82627834}, + {0x78E724C8,0x9C03E3DD}, {0xD9867DF9,0x2877328A}, {0xE243B0F2,0x14B51945}, {0xF7EB97E2,0x574B0F88}, + {0x9AA4943A,0x88B6FA98}, {0xCB168586,0x19C4F068}, {0xAF11FAEF,0x50EE6409}, {0xC04EABA4,0x7DF317D5}, + {0x98B4C6A9,0x7A567C54}, {0x4F42188E,0xB6BBFB80}, {0x3BC5CD0B,0x3CC22BCF}, {0xAA397713,0xD04336EA}, + {0xEC33132C,0xF02FAC1B}, {0xF0D3488D,0x2506DBA7}, {0xF2C31A1E,0xD7E65D6B}, {0x1FF820F5,0x5EB9B216}, + {0xC46E0F9F,0x842E0650}, {0x9E843001,0x716BEB1D}, {0xAB315ED4,0xA933758C}, {0xA2792265,0x3FE414FD}, + {0x1EF00932,0x27C9F170}, {0x70A771BE,0x73A4C1CA}, {0xE76B3D0E,0x94184BA6}, {0x8C14C87E,0x40D829FF}, + {0xC77674CB,0x0FBEC3FA}, {0x4A6A9572,0x3616A963}, {0xC25EF937,0x8F139119}, {0x5AEA3F9E,0xF545ED4D}, + {0x50BA387B,0xE8024996}, {0x0B582E22,0x6437E7BD}, {0xE053E261,0xE6559F89}, {0x05288DFC,0x80AD52E3}, + {0xE34B9935,0x6DC55A23}, {0x1AD0AD09,0xDE14E0F5}, {0xA659865E,0xC6390578}, {0x09487CB1,0x96D76171}, + {0x21156002,0xE2D6CB3A}, {0x779FAED1,0x01E915E5}, {0x6A77DCB7,0xADB0213F}, {0xB9A1A6AB,0x9880B76E}, + {0x8644CF9B,0x5D9F8D24}, {0xC5662658,0xFD5E4536}, {0x9BACBDFD,0xF1C6B9FE}, {0xBE9979C4,0xEACD6341}, + {0x08405576,0xEFA72217}, {0xD88E543E,0x510771EC}, {0x671F043D,0xC2BA51CB}, {0x71AF5879,0x0AD482AC}, + {0x5CDAC936,0xFE787A04}, {0x8E049AED,0xB238AF33}, {0x4972EE26,0xBD866CC9}, {0xBD810290,0x615DA6EB}, + {0x8B2C1711,0x3295FDD0}, {0x73BF0AEA,0xF8340460}, {0x758FFC42,0xF3099329}, {0x7DCFA934,0x1CAEB13E}, + {0x1188832B,0xBA230748}, {0x874CE65C,0x24EFCE42}, {0xB0E9DA1A,0x0E57D61F}, {0xF99B343C,0xB3D1BAD6}, + {0x893C4582,0xC0757B1C}, {0x403A9297,0x2B510DB8}, {0xF1DB614A,0x5C7698C1}, {0xD5E68CB4,0x3E0D0118}, + {0x855CB4CF,0xD60F488E}, {0xF3CB33D9,0xAE961E0D}, {0x14A00ED7,0x3A8E55AB}, {0x623789C1,0x42170328}, + {0x9C946292,0x838B6DD1}, {0xED3B3AEB,0x895FEF7D}, {0x4E4A3149,0xCFCBB8E6}, {0x2F65C3DC,0x064C7E64}, + {0x4C5A63DA,0x3D2B3E2A}, {0xA9210C47,0x5BD3F340}, {0xA1615931,0xB474D157}, {0x1DE87266,0xAC5934DA}, + {0x7AF7765B,0x6EE36511}, {0x16B05C44,0xC86ED367}, {0x201D49C5,0x9BA6885C}, {0x88346C45,0xB905387A}, + {0xBAB9DDFF,0x131072C4}, {0xA751AF99,0xBF49461E}, {0x1CE05BA1,0xD52977BC}, {0x6027DB52,0xB0F785E4}, + {0x6E57788C,0x546D30BA}, {0x650F56AE,0x305AD707}, {0x612FF295,0xC987C682}, {0xF5FBC571,0xA5AB8944}, + {0x59F244CA,0x7ED528E7}, {0x2C7DB888,0x8DDCBBCE}, {0x328DB1BA,0xAA154ABE}, {0x93ECE88B,0x1E619BE9}, + {0xE813B717,0x09F2BD9E}, {0x285D1CB3,0x7401AA4B}, {0x3195CAEE,0x21858F14}, {0x1398D1B8,0x48C38184}, + {0xB2F98889,0xFCB750D3}, {0x8D1CE1B9,0x39A86A99}, {0xE473465A,0x1F888E0C}, {0x76978716,0x78995683}, + {0xEE2341BF,0x02CF2AD7}, {0xB3F1A14E,0x85C713B5}, {0x2B4567E7,0xFF916FE1}, {0xB7D10575,0x7C1A0230}, + {0x5ECA9BA5,0x0C98FCC8}, {0xDA9E06AD,0xA3E7F720}, {0xBBB1F438,0x6A6031A2}, {0x7ED7D260,0x973E7494}, + {0x18C0FF9A,0x2CF46639}, {0x68678E24,0x5F50A7F3}, {0xA449D4CD,0x34D983B4}, {0x5592B587,0x68AF1B75}, + {0x2E6DEA1B,0x7F3C3D02}, {0x45121F6B,0xABFC5F5B}, {0x29553574,0x0D71E92D}, {0x6D4F03D8,0xDFFDF510}, + {0x9F8C19C6,0x081BA87B}, {0xAC0981BB,0xDB7EA1A3}, {0x66172DFA,0xBBCA12AD}, {0x010829C7,0x79704366}, + {0x7BFF5F9C,0x17932677}, {0x00000000,0x00000000}, {0xC906D715,0xEB2476A4}, {0x0738DF6F,0x724DD42F}, + {0x38DDB65F,0xB752EE65}, {0x3DF53BA3,0x37FFBC86}, {0xB5C157E6,0x8EFA84FC}, {0x272596AA,0xE9EB5C73}, + {0x2535C439,0x1B0BDABF}, {0x2A4D4E20,0x86E12C87}, {0xCE3E087A,0x9969A28B}, {0x9D9C4B55,0xFAFB2EB7}, + {0xB6D92CB2,0x056A4156}, {0xDEBEA296,0x5A3AE6A5}, {0xA8292580,0x22A3B026}, {0x36AD1581,0x53C85B3B}, + {0x17B87583,0xB11E9001}, {0x3FE56930,0xC51F3A4A}, {0xCF3621BD,0xE019E1ED}, {0x91FCBA18,0xEC811D25}, + {0x4D524A1D,0x445B7D4C}, {0xDCAEF005,0xA8DA6069}, {0x309DE329,0x58F5CC72}, {0x6B7FF570,0xD4C06259}, + {0x39D59F98,0xCE22AD03}, {0x47024DF8,0x591CD997}, {0x03187B54,0x8B90C5AA}, {0xC356D0F0,0xF663D27F}, + {0x35B56ED5,0xD8589E91}, {0xD3D67A1C,0x35309651}, {0xCD26732E,0x12F96721}, {0x441A36AC,0xD28C1C3D}, + {0x64077F69,0x492A9461}, {0x6F5F514B,0x2D1D73DC}, {0x0D68D88A,0x6F0A70F4}, {0xCA1EAC41,0x60B4B30E}, + {0x3385987D,0xD36509D8}, {0x0630F6A8,0x0B3D9749}, {0x96C46577,0x9ECCC90A}, {0xAD01A87C,0xA20EE2C5}, + {0x0E70A3DE,0xE49AB55E}, {0x82646BA0,0xA4429CA1}, {0xDB962F6A,0xDA97B446}, {0xD7F6DE27,0xCCED87D4}, + {0x37A53C46,0x2AB8185D}, {0xE15BCBA6,0x9F25DCEF}, {0xFEA3EB53,0xC19C6EF9}, {0x1BD884CE,0xA764A393}, + {0x817C10F4,0x2FD2590B}, {0x80743933,0x56A21A6D}, {0x79EF0D0F,0xE573A0BB}, {0x95DC1E23,0x155C0CA0}, + {0x94D437E4,0x6C2C4FC6}, {0x23053291,0x10364DF6}, {0x836C4267,0xDD32DFC7}, {0x99BCEF6E,0x03263F32}, + {0xE57B6F9D,0x66F8CD6A}, {0x5BE21659,0x8C35AE2B}, {0x1290F87F,0x31B3C2E2}, {0xBF915003,0x93BD2027}, + {0x220D1B56,0x69460E90}, {0xAE19D328,0x299E276F}, {0x53A2432F,0x63928C3C}, {0xE91B9ED0,0x7082FEF8}, + {0x3EED40F7,0xBC6F792C}, {0xD2DE53DB,0x4C40D537}, {0x5FC2B262,0x75E8BFAE}, {0xA541FD0A,0x4DA9C0D2}, + {0x3CFD1264,0x4E8FFFE0}, {0x696FA7E3,0x2620E495}, {0xB8A98F6C,0xE1F0F408}, {0xDDA6D9C2,0xD1AA230F}, + {0xD1C6288F,0xC7D0109D}, {0x7487D585,0x8A79D04F}, {0xA3710BA2,0x4694579B}, {0xFA834F68,0x38417F7C}, + {0x0A5007E5,0x1D47A4DB}, {0x460A643F,0x206C9AF1}, {0x34BD4712,0xA128DDF7}, {0x72B7232D,0x81444706}, + {0x02105293,0xF2E086CC}, {0xBC892B57,0x182DE58D}, {0xF8931DFB,0xCAA1F9B0}, {0xCC2E5AE9,0x6B892447}, + {0x0420A43B,0xF9DD1185}, {0x8A243ED6,0x4BE5BEB6}, {0x19C8D65D,0x5584255F}, {0x633FA006,0x3B67404E}, + {0x6C472A1F,0xA68DB676}, {0xB4C97E21,0xF78AC79A}, {0x1080AAEC,0xC353442E}, {0x5782E714,0x9A4F9DB9}, }; -__device__ static uint64_t T1[256] = { - 0xC811A8058C3F55DE, 0x65F5B43196B50619, 0xF74F96B1D6706E43, 0x859D1E8BCB43D336, - 0x5AAB8A85CCFA3D84, 0xF9C7BF99C295FCFD, 0xA21FD5A1DE4B630F, 0xCDB3EF763B8B456D, - 0x803F59F87CF7C385, 0xB27C73BE5F31913C, 0x98E3AC6633B04821, 0xBF61674C26B8F818, - 0x0FFBC995C4C130C8, 0xAAA0862010761A98, 0x6057F342210116AA, 0xF63C760C0654CC35, - 0x2DDB45CC667D9042, 0xBCF45A964BD40382, 0x68E8A0C3EF3C6F3D, 0xA7BD92D269FF73BC, - 0x290AE20201ED2287, 0xB7DE34CDE885818F, 0xD901EEA7DD61059B, 0xD6FA273219A03553, - 0xD56F1AE874CCCEC9, 0xEA31245C2E83F554, 0x7034555DA07BE499, 0xCE26D2AC56E7BEF7, - 0xFD161857A5054E38, 0x6A0E7DA4527436D1, 0x5BD86A381CDE9FF2, 0xCAF7756231770C32, - 0xB09AAED9E279C8D0, 0x5DEF1091C60674DB, 0x111046A2515E5045, 0x23536CE4729802FC, - 0xC50CBCF7F5B63CFA, 0x73A16887CD171F03, 0x7D2941AFD9F28DBD, 0x3F5E3EB45A4F3B9D, - 0x84EEFE361B677140, 0x3DB8E3D3E7076271, 0x1A3A28F9F20FD248, 0x7EBC7C75B49E7627, - 0x74E5F293C7EB565C, 0x18DCF59E4F478BA4, 0x0C6EF44FA9ADCB52, 0xC699812D98DAC760, - 0x788B06DC6E469D0E, 0xFC65F8EA7521EC4E, 0x30A5F7219E8E0B55, 0x2BEC3F65BCA57B6B, - 0xDDD04969BAF1B75E, 0x99904CDBE394EA57, 0x14B201D1E6EA40F6, 0xBBB0C08241284ADD, - 0x50F20463BF8F1DFF, 0xE8D7F93B93CBACB8, 0x4D8CB68E477C86E8, 0xC1DD1B3992268E3F, - 0x7C5AA11209D62FCB, 0x2F3D98ABDB35C9AE, 0x671369562BFD5FF5, 0x15C1E16C36CEE280, - 0x1D7EB2EDF8F39B17, 0xDA94D37DB00DFE01, 0x877BC3EC760B8ADA, 0xCB8495DFE153AE44, - 0x05A24773B7B410B3, 0x12857B783C32ABDF, 0x8EB770D06812513B, 0x536739B9D2E3E665, - 0x584D57E271B26468, 0xD789C78FC9849725, 0xA935BBFA7D1AE102, 0x8B1537A3DFA64188, - 0xD0CD5D9BC378DE7A, 0x4AC82C9A4D80CFB7, 0x42777F1B83BDB620, 0x72D2883A1D33BD75, - 0x5E7A2D4BAB6A8F41, 0xF4DAAB6BBB1C95D9, 0x905CFFE7FD8D31B6, 0x83AA6422119B381F, - 0xC0AEFB8442022C49, 0xA0F908C663033AE3, 0xA428AF0804938826, 0xADE41C341A8A53C7, - 0xAE7121EE77E6A85D, 0xC47F5C4A25929E8C, 0xB538E9AA55CDD863, 0x06377AA9DAD8EB29, - 0xA18AE87BB3279895, 0x6EDFDA6A35E48414, 0x6B7D9D19825094A7, 0xD41CFA55A4E86CBF, - 0xE5CAEDC9EA42C59C, 0xA36C351C0E6FC179, 0x5181E4DE6FABBF89, 0xFFF0C530184D17D4, - 0x9D41EB1584045892, 0x1C0D525028D73961, 0xF178EC180CA8856A, 0x9A0571018EF811CD, - 0x4091A27C3EF5EFCC, 0x19AF15239F6329D2, 0x347450EFF91EB990, 0xE11B4A078DD27759, - 0xB9561DE5FC601331, 0x912F1F5A2DA993C0, 0x1654DCB65BA2191A, 0x3E2DDE098A6B99EB, - 0x8A66D71E0F82E3FE, 0x8C51ADB7D55A08D7, 0x4533E50F8941FF7F, 0x02E6DD67BD4859EC, - 0xE068AABA5DF6D52F, 0xC24826E3FF4A75A5, 0x6C39070D88ACDDF8, 0x6486548C4691A46F, - 0xD1BEBD26135C7C0C, 0xB30F93038F15334A, 0x82D9849FC1BF9A69, 0x9C320BA85420FAE4, - 0xFA528243AFF90767, 0x9ED4D6CFE968A308, 0xB825FD582C44B147, 0x9B7691BC5EDCB3BB, - 0xC7EA619048FE6516, 0x1063A61F817AF233, 0x47D538683409A693, 0x63C2CE984C6DED30, - 0x2A9FDFD86C81D91D, 0x7B1E3B06032A6694, 0x666089EBFBD9FD83, 0x0A598EE67375207B, - 0x07449A140AFC495F, 0x2CA8A571B6593234, 0x1F986F8A45BBC2FB, 0x381AA4A050B372C2, - 0x5423A3ADD81FAF3A, 0x17273C0B8B86BB6C, 0xFE83258DC869B5A2, 0x287902BFD1C980F1, - 0xF5A94BD66B3837AF, 0x88800A79B2CABA12, 0x55504310083B0D4C, 0xDF36940E07B9EEB2, - 0x04D1A7CE6790B2C5, 0x612413FFF125B4DC, 0x26F12B97C52C124F, 0x86082351A62F28AC, - 0xEF93632F9937E5E7, 0x3507B052293A1BE6, 0xE72C30AE570A9C70, 0xD3586041AE1425E0, - 0xDE4574B3D79D4CC4, 0x92BA228040C5685A, 0xF00B0CA5DC8C271C, 0xBE1287F1F69C5A6E, - 0xF39E317FB1E0DC86, 0x495D114020EC342D, 0x699B407E3F18CD4B, 0xDCA3A9D46AD51528, - 0x0D1D14F279896924, 0x0000000000000000, 0x593EB75FA196C61E, 0x2E4E78160B116BD8, - 0x6D4AE7B058887F8E, 0xE65FD013872E3E06, 0x7A6DDBBBD30EC4E2, 0xAC97FC89CAAEF1B1, - 0x09CCB33C1E19DBE1, 0x89F3EAC462EE1864, 0x7770CF49AA87ADC6, 0x56C57ECA6557F6D6, - 0x03953DDA6D6CFB9A, 0x36928D884456E07C, 0x1EEB8F37959F608D, 0x31D6179C4EAAA923, - 0x6FAC3AD7E5C02662, 0x43049FA653991456, 0xABD3669DC052B8EE, 0xAF02C153A7C20A2B, - 0x3CCB036E3723C007, 0x93C9C23D90E1CA2C, 0xC33BC65E2F6ED7D3, 0x4CFF56339758249E, - 0xB1E94E64325D6AA6, 0x37E16D359472420A, 0x79F8E661BE623F78, 0x5214D90402C74413, - 0x482EF1FDF0C8965B, 0x13F69BC5EC1609A9, 0x0E88292814E592BE, 0x4E198B542A107D72, - 0xCCC00FCBEBAFE71B, 0x1B49C844222B703E, 0x2564164DA840E9D5, 0x20C6513E1FF4F966, - 0xBAC3203F910CE8AB, 0xF2EDD1C261C47EF0, 0x814CB945ACD361F3, 0x95FEB8944A392105, - 0x5C9CF02C1622D6AD, 0x971865F3F77178E9, 0xBD87BA2B9BF0A1F4, 0x444005B259655D09, - 0xED75BE48247FBC0B, 0x7596122E17CFF42A, 0xB44B091785E97A15, 0x966B854E2755DA9F, - 0xEEE0839249134791, 0x32432A4623C652B9, 0xA8465B47AD3E4374, 0xF8B45F2412B15E8B, - 0x2417F6F078644BA3, 0xFB2162FE7FDDA511, 0x4BBBCC279DA46DC1, 0x0173E0BDD024A276, - 0x22208C59A2BCA08A, 0x8FC4906DB836F34D, 0xE4B90D743A6667EA, 0x7147B5E0705F46EF, - 0x2782CB2A1508B039, 0xEC065EF5F45B1E7D, 0x21B5B183CFD05B10, 0xDBE733C060295C77, - 0x9FA73672394C017E, 0xCF55321186C31C81, 0xD8720E1A0D45A7ED, 0x3B8F997A3DDF8958, - 0x3AFC79C7EDFB2B2E, 0xE9A4198643EF0ECE, 0x5F09CDF67B4E2D37, 0x4F6A6BE9FA34DF04, - 0xB6ADD47038A123F9, 0x8D224D0A057EAAA1, 0xC96248B85C1BF7A8, 0xE3FD9760309A2EB5, - 0x0B2A6E5BA351820D, 0xEB42C4E1FEA75722, 0x948D58299A1D8373, 0x7FCF9CC864BAD451, - 0xA55B4FB5D4B72A50, 0x08BF5381CE3D7997, 0x46A6D8D5E42D04E5, 0xD22B80FC7E308796, - 0x57B69E77B57354A0, 0x3969441D8097D0B4, 0x3330CAFBF3E2F0CF, 0xE28E77DDE0BE8CC3, - 0x62B12E259C494F46, 0xA6CE726FB9DBD1CA, 0x41E242C1EED14DBA, 0x76032FF47AA30FB0 + +__constant__ uint2 T32[256] = { + {0x2C9B3220,0x05BA7BC8}, {0xF8B65E4F,0x31A54665}, {0x7547F4D4,0xB1B651F7}, {0x7BA46682,0x8BFA0D85}, + {0xA16A98BB,0x85A96C5A}, {0x08EB79C9,0x990FAEF9}, {0x47F4A62D,0xA15E37A2}, {0x5D27741E,0x76857DCD}, + {0x0A1820BC,0xF8C50B80}, {0x01F7A2B4,0xBE65DCB2}, {0x6F9426E7,0x666D1B98}, {0x53C4E648,0x4CC921BF}, + {0x93D9CA42,0x95410A0F}, {0x647BA4EF,0x20CDCCAA}, {0x890A1871,0x429A4060}, {0x9B32B38B,0x0C4EA4F6}, + {0xDE354CD3,0xCCDA362D}, {0x7C5B2FA9,0x96DC23BC}, {0xAA851AB3,0xC309BB68}, {0x3648E013,0xD26131A7}, + {0x41FC4DB2,0x021DC529}, {0x704BE48A,0xCD5ADAB7}, {0x84ED71E6,0xA77965D9}, {0x1734BBA4,0x32386FD6}, + {0x38AB7245,0xE82D6DD5}, {0x6177B4B1,0x5C2147EA}, {0xCF091CE8,0x5DA1AB70}, {0x72B8BDFF,0xAC907FCE}, + {0x972278A8,0x57C85DFD}, {0x6B6F940D,0xA4E44C6A}, {0x4F1FDFE4,0x3851995B}, {0xED71BC9E,0x62578CCA}, + {0xC01D2C0A,0xD9882BB0}, {0x113C503B,0x917B9D5D}, {0xA87643C6,0xA2C31E11}, {0xA399C1CE,0xE463C923}, + {0x7EA876DC,0xF71686C5}, {0xE096D509,0x87B4A973}, {0x9D3A5814,0xAF0D567D}, {0x59DCC6F4,0xB40C2A3F}, + {0x95D121DD,0x3602F884}, {0x9836484A,0xD3E1DD3D}, {0xA46688E5,0xF945E71A}, {0xB2A591F5,0x7518547E}, + {0x50C01D89,0x93665874}, {0x658C065B,0x9EA81018}, {0xBC4603A3,0x4F54080C}, {0x5137BF3D,0x2D0384C6}, + {0xEC861E2A,0xDC325078}, {0x79573FF7,0xEA30A8FC}, {0xCA050CB6,0x214D2030}, {0x8016C30C,0x65F0322B}, + {0x1B247087,0x69BE96DD}, {0x81E161B8,0xDB95EE99}, {0xD9CA05F8,0xD1FC1814}, {0xCC0DE729,0x820ED2BB}, + {0x430F14C7,0x63D76050}, {0xA09D3A0F,0x3BCCB0E8}, {0x573F54A2,0x8E40764D}, {0xE16177BD,0x39D175C1}, + {0x734F1F4B,0x12F5A37C}, {0x1FDFC26D,0xAB37C12F}, {0x395CD0F1,0x5648B167}, {0x37BF42A7,0x6C04ED15}, + {0x14304065,0xED97161D}, {0xAB72B807,0x7D6C67DA}, {0xBA4EE83C,0xEC17FA87}, {0x0304FBC1,0xDFAF79CB}, + {0x71BC463E,0x733F0605}, {0x87E98A27,0x78D61C12}, {0x77B4ADA1,0xD07CF48E}, {0x6C90DD26,0xB9C26253}, + {0x60801605,0xE2449B58}, {0xF941FCFB,0x8FC09AD7}, {0x4BE46D0E,0xFAD8CEA9}, {0x0608EB9F,0xA343F28B}, + {0x4917347B,0x9B126BD0}, {0xE7699C22,0x9A92874A}, {0xC4E69EE0,0x1B017C42}, {0x0EE39256,0x3A4C5C72}, + {0x3EA399DA,0x4B6E9F5E}, {0x5AD83D35,0x6BA353F4}, {0x4C1B2425,0xE7FEE090}, {0x2587E95D,0x22D00983}, + {0x0F1430E2,0x842980C0}, {0x861E2893,0xC6B3C0A0}, {0x19D729F2,0x087433A4}, {0xD42D6C6F,0x341F3DAD}, + {0xFBB2A58E,0xEE0A3FAE}, {0x90DD3183,0x4AEE73C4}, {0xB1A16A34,0xAAB72DB5}, {0x5E238FDF,0xA92A0406}, + {0x686B6FCC,0x7B4B35A1}, {0xF4A6956C,0x6A23BF6E}, {0x851AD352,0x191CB96B}, {0xD6DE351A,0x55D598D4}, + {0xF2AE7EF3,0xC9604DE5}, {0xA981E172,0x1CA6C2A3}, {0xAD7A5398,0xDE2F9551}, {0x56C8F616,0x3025AAFF}, + {0x1E2860D9,0x15521D9D}, {0xFA45073A,0x506FE31C}, {0x2B647B0B,0x189C55F1}, {0xAE7EA859,0x0180EC9A}, + {0x050C105E,0x7CEC8B40}, {0x8BF94104,0x2350E519}, {0x55CC0DD7,0xEF8AD334}, {0x6D677F92,0x07A7BEE1}, + {0x0DE76997,0xE5E325B9}, {0xA26E637A,0x5A061591}, {0x18208B46,0xB611EF16}, {0xB7A981AB,0x09F4DF3E}, + {0xE87DACC0,0x1EBB078A}, {0xB65E231F,0xB791038C}, {0x74B05660,0x0FD38D45}, {0xC1EA8EBE,0x67EDF702}, + {0x831238CD,0xBA5F4BE0}, {0xCEFEBE5C,0xE3C477C2}, {0x354C1BD2,0x0DCE486C}, {0x16C31910,0x8C5DB364}, + {0xA7627324,0x26EA9ED1}, {0xEF82E5EB,0x039D29B3}, {0xCBF2AE02,0x9F28FC82}, {0xF05D2786,0xA8AAE89C}, + {0x2774B028,0x431AACFA}, {0x31B7A938,0xCF471F9E}, {0xE3922EC8,0x581BD0B8}, {0x400BEF06,0xBC78199B}, + {0xBF42F862,0x90FB71C7}, {0x46030499,0x1F3BEB10}, {0xB55AD8DE,0x683E7A47}, {0xA695D190,0x988F4263}, + {0x6E638453,0xD808C72A}, {0xC319D7CB,0x0627527B}, {0xD72997AE,0xEBB04466}, {0xE2658C7C,0xE67E0C0A}, + {0xB056C880,0x14D2F107}, {0x30400B8C,0x7122C32C}, {0xD5DACEDB,0x8A7AE11F}, {0xE98A0E74,0xA0DEDB38}, + {0xDCC615A6,0xAD109354}, {0xF655CC19,0x0BE91A17}, {0xB8BDB149,0x8DDD5FFE}, {0xAF890AED,0xBFE53028}, + {0xB4AD7A6A,0xD65BA6F5}, {0x2997227E,0x7956F088}, {0x32B352F9,0x10E86655}, {0xDACEFE39,0x0E5361DF}, + {0x9FC90161,0xCEC7F304}, {0x677F5F2E,0xFF62B561}, {0xD22587F0,0x975CCF26}, {0x543BAF63,0x51EF0F86}, + {0x10CBF28F,0x2F1E41EF}, {0xBBB94A88,0x52722635}, {0x3344F04D,0xAE8DBAE7}, {0x6688FD9A,0x410769D3}, + {0x34BBB966,0xB3AB94DE}, {0x8DF1AA9B,0x80131792}, {0xC5113C54,0xA564A0F0}, {0xBDB1A117,0xF131D4BE}, + {0xEA8EF5B5,0x7F71A2F3}, {0xC8F655C3,0x40878549}, {0x44F05DEC,0x7EF14E69}, {0xF55137D8,0xD44663DC}, + {0x523344FC,0xF2ACFD0D}, {0x00000000,0x00000000}, {0x8EF5515A,0x5FBC6E59}, {0xF1AA8532,0x16CF342E}, + {0xDB395C8D,0xB036BD6D}, {0xDD31B712,0x13754FE6}, {0x2D6C9094,0xBBDFA77A}, {0x3A582B30,0x89E7C8AC}, + {0xCDFA459D,0x3C6B0E09}, {0xC7E26521,0xC4AE0589}, {0x7F5FD468,0x49735A77}, {0x1D2C9B18,0xCAFD6456}, + {0x2F9FC9E1,0xDA150203}, {0x94268369,0x88672436}, {0x3BAF8984,0x3782141E}, {0x24704BE9,0x9CB5D531}, + {0x1AD3D233,0xD7DB4A6F}, {0x2A93D9BF,0xA6F98943}, {0x8A0EE3B0,0x9D3539AB}, {0x15C7E2D1,0x53F2CAAF}, + {0x76430F15,0x6E19283C}, {0x6384EDC4,0x3DEBE293}, {0x208BF903,0x5E3C82C3}, {0xB94A13FD,0x33B8834C}, + {0x2E686B55,0x6470DEB1}, {0x7A53C436,0x359FD137}, {0x02F35975,0x61CAA579}, {0x82E59A79,0x043A9752}, + {0x2683129C,0xFD7F7048}, {0x699CCD78,0xC52EE913}, {0x7DAC8D1D,0x28B9FF0E}, {0x78A09D43,0x5455744E}, + {0xB3523341,0xCB7D88CC}, {0x4A13CFBA,0x44BD121B}, {0xFDBA4E11,0x4D49CD25}, {0x8C06082F,0x3E76CB20}, + {0x2278A076,0x3FF627BA}, {0x04FBB2EA,0xC28957F2}, {0xE46D67E3,0x453DFE81}, {0x3DA7621B,0x94C1E695}, + {0xFF491764,0x2C83685C}, {0xFC4DECA5,0xF32C1197}, {0x922E68F6,0x2B24D6BD}, {0x9AC5113F,0xB22B7844}, + {0xD1217C31,0x48F3B6ED}, {0xBEB55AD6,0x2E9EAD75}, {0x5FD42D6B,0x174FD8B4}, {0x1238ABFA,0x4ED4E496}, + {0xFEBEB5D0,0x92E6B4EE}, {0x0BEF8208,0x46A0D732}, {0xA5912A51,0x47203BA8}, {0xE69E3E96,0x24F75BF8}, + {0x13CF094E,0xF0B13824}, {0xC901F777,0xFEE259FB}, {0x091CDB7D,0x276A724B}, {0xEE75475F,0xBDF8F501}, + {0x4DEC8691,0x599B3C22}, {0x99C1EAFE,0x6D84018F}, {0x1CDB39AC,0x7498B8E4}, {0x217C5BB7,0xE0595E71}, + {0x3C50C0AF,0x2AA43A27}, {0x3F543B6E,0xF50B43EC}, {0x62734F70,0x838E3E21}, {0x4507FF58,0xC09492DB}, + {0xDFC2EE67,0x72BFEA9F}, {0x9CCDFAA0,0x11688ACF}, {0x6A9836B9,0x1A8190D8}, {0xC615C795,0x7ACBD93B}, + {0x286080CA,0xC7332C3A}, {0x4EE87D50,0x863445E9}, {0xD0D6DE85,0xF6966A5F}, {0x96D5DA1C,0xE9AD814F}, + {0x9E3EA3D5,0x70A22FB6}, {0x582B6440,0x0A69F68D}, {0xC2EE757F,0xB8428EC9}, {0xAC8DF12C,0x604A49E3}, + {0x0C10CB23,0x5B86F90B}, {0x8F02F3EE,0xE1D9B2EB}, {0xD3D22544,0x29391394}, {0x5CD0D6AA,0xC8E0A17F}, + {0xF7A26EAD,0xB58CC6A5}, {0x238F02C2,0x8193FB08}, {0x5B2F9F81,0xD5C68F46}, {0x88FDBAC5,0xFCFF9CD2}, + {0xF359DC47,0x77059157}, {0x07FF492B,0x1D262E39}, {0xE59AC557,0xFB582233}, {0x42F8B673,0xDDB2BCE2}, + {0x48E096CF,0x2577B762}, {0xD83DA74C,0x6F99C4A6}, {0xEB795701,0xC1147E41}, {0x912A9337,0xF48BAF76}, }; -__device__ static uint64_t T2[256] = { - 0x45B268A93ACDE4CC, 0xAF7F0BE884549D08, 0x048354B3C1468263, 0x925435C2C80EFED2, - 0xEE4E37F27FDFFBA7, 0x167A33920C60F14D, 0xFB123B52EA03E584, 0x4A0CAB53FDBB9007, - 0x9DEAF6380F788A19, 0xCB48EC558F0CB32A, 0xB59DC4B2D6FEF7E0, 0xDCDBCA22F4F3ECB6, - 0x11DF5813549A9C40, 0xE33FDEDF568ACED3, 0xA0C1C8124322E9C3, 0x07A56B8158FA6D0D, - 0x77279579B1E1F3DD, 0xD9B18B74422AC004, 0xB8EC2D9FFFABC294, 0xF4ACF8A82D75914F, - 0x7BBF69B1EF2B6878, 0xC4F62FAF487AC7E1, 0x76CE809CC67E5D0C, 0x6711D88F92E4C14C, - 0x627B99D9243DEDFE, 0x234AA5C3DFB68B51, 0x909B1F15262DBF6D, 0x4F66EA054B62BCB5, - 0x1AE2CF5A52AA6AE8, 0xBEA053FBD0CE0148, 0xED6808C0E66314C9, 0x43FE16CD15A82710, - 0xCD049231A06970F6, 0xE7BC8A6C97CC4CB0, 0x337CE835FCB3B9C0, 0x65DEF2587CC780F3, - 0x52214EDE4132BB50, 0x95F15E4390F493DF, 0x870839625DD2E0F1, 0x41313C1AFB8B66AF, - 0x91720AF051B211BC, 0x477D427ED4EEA573, 0x2E3B4CEEF6E3BE25, 0x82627834EB0BCC43, - 0x9C03E3DD78E724C8, 0x2877328AD9867DF9, 0x14B51945E243B0F2, 0x574B0F88F7EB97E2, - 0x88B6FA989AA4943A, 0x19C4F068CB168586, 0x50EE6409AF11FAEF, 0x7DF317D5C04EABA4, - 0x7A567C5498B4C6A9, 0xB6BBFB804F42188E, 0x3CC22BCF3BC5CD0B, 0xD04336EAAA397713, - 0xF02FAC1BEC33132C, 0x2506DBA7F0D3488D, 0xD7E65D6BF2C31A1E, 0x5EB9B2161FF820F5, - 0x842E0650C46E0F9F, 0x716BEB1D9E843001, 0xA933758CAB315ED4, 0x3FE414FDA2792265, - 0x27C9F1701EF00932, 0x73A4C1CA70A771BE, 0x94184BA6E76B3D0E, 0x40D829FF8C14C87E, - 0x0FBEC3FAC77674CB, 0x3616A9634A6A9572, 0x8F139119C25EF937, 0xF545ED4D5AEA3F9E, - 0xE802499650BA387B, 0x6437E7BD0B582E22, 0xE6559F89E053E261, 0x80AD52E305288DFC, - 0x6DC55A23E34B9935, 0xDE14E0F51AD0AD09, 0xC6390578A659865E, 0x96D7617109487CB1, - 0xE2D6CB3A21156002, 0x01E915E5779FAED1, 0xADB0213F6A77DCB7, 0x9880B76EB9A1A6AB, - 0x5D9F8D248644CF9B, 0xFD5E4536C5662658, 0xF1C6B9FE9BACBDFD, 0xEACD6341BE9979C4, - 0xEFA7221708405576, 0x510771ECD88E543E, 0xC2BA51CB671F043D, 0x0AD482AC71AF5879, - 0xFE787A045CDAC936, 0xB238AF338E049AED, 0xBD866CC94972EE26, 0x615DA6EBBD810290, - 0x3295FDD08B2C1711, 0xF834046073BF0AEA, 0xF3099329758FFC42, 0x1CAEB13E7DCFA934, - 0xBA2307481188832B, 0x24EFCE42874CE65C, 0x0E57D61FB0E9DA1A, 0xB3D1BAD6F99B343C, - 0xC0757B1C893C4582, 0x2B510DB8403A9297, 0x5C7698C1F1DB614A, 0x3E0D0118D5E68CB4, - 0xD60F488E855CB4CF, 0xAE961E0DF3CB33D9, 0x3A8E55AB14A00ED7, 0x42170328623789C1, - 0x838B6DD19C946292, 0x895FEF7DED3B3AEB, 0xCFCBB8E64E4A3149, 0x064C7E642F65C3DC, - 0x3D2B3E2A4C5A63DA, 0x5BD3F340A9210C47, 0xB474D157A1615931, 0xAC5934DA1DE87266, - 0x6EE365117AF7765B, 0xC86ED36716B05C44, 0x9BA6885C201D49C5, 0xB905387A88346C45, - 0x131072C4BAB9DDFF, 0xBF49461EA751AF99, 0xD52977BC1CE05BA1, 0xB0F785E46027DB52, - 0x546D30BA6E57788C, 0x305AD707650F56AE, 0xC987C682612FF295, 0xA5AB8944F5FBC571, - 0x7ED528E759F244CA, 0x8DDCBBCE2C7DB888, 0xAA154ABE328DB1BA, 0x1E619BE993ECE88B, - 0x09F2BD9EE813B717, 0x7401AA4B285D1CB3, 0x21858F143195CAEE, 0x48C381841398D1B8, - 0xFCB750D3B2F98889, 0x39A86A998D1CE1B9, 0x1F888E0CE473465A, 0x7899568376978716, - 0x02CF2AD7EE2341BF, 0x85C713B5B3F1A14E, 0xFF916FE12B4567E7, 0x7C1A0230B7D10575, - 0x0C98FCC85ECA9BA5, 0xA3E7F720DA9E06AD, 0x6A6031A2BBB1F438, 0x973E74947ED7D260, - 0x2CF4663918C0FF9A, 0x5F50A7F368678E24, 0x34D983B4A449D4CD, 0x68AF1B755592B587, - 0x7F3C3D022E6DEA1B, 0xABFC5F5B45121F6B, 0x0D71E92D29553574, 0xDFFDF5106D4F03D8, - 0x081BA87B9F8C19C6, 0xDB7EA1A3AC0981BB, 0xBBCA12AD66172DFA, 0x79704366010829C7, - 0x179326777BFF5F9C, 0x0000000000000000, 0xEB2476A4C906D715, 0x724DD42F0738DF6F, - 0xB752EE6538DDB65F, 0x37FFBC863DF53BA3, 0x8EFA84FCB5C157E6, 0xE9EB5C73272596AA, - 0x1B0BDABF2535C439, 0x86E12C872A4D4E20, 0x9969A28BCE3E087A, 0xFAFB2EB79D9C4B55, - 0x056A4156B6D92CB2, 0x5A3AE6A5DEBEA296, 0x22A3B026A8292580, 0x53C85B3B36AD1581, - 0xB11E900117B87583, 0xC51F3A4A3FE56930, 0xE019E1EDCF3621BD, 0xEC811D2591FCBA18, - 0x445B7D4C4D524A1D, 0xA8DA6069DCAEF005, 0x58F5CC72309DE329, 0xD4C062596B7FF570, - 0xCE22AD0339D59F98, 0x591CD99747024DF8, 0x8B90C5AA03187B54, 0xF663D27FC356D0F0, - 0xD8589E9135B56ED5, 0x35309651D3D67A1C, 0x12F96721CD26732E, 0xD28C1C3D441A36AC, - 0x492A946164077F69, 0x2D1D73DC6F5F514B, 0x6F0A70F40D68D88A, 0x60B4B30ECA1EAC41, - 0xD36509D83385987D, 0x0B3D97490630F6A8, 0x9ECCC90A96C46577, 0xA20EE2C5AD01A87C, - 0xE49AB55E0E70A3DE, 0xA4429CA182646BA0, 0xDA97B446DB962F6A, 0xCCED87D4D7F6DE27, - 0x2AB8185D37A53C46, 0x9F25DCEFE15BCBA6, 0xC19C6EF9FEA3EB53, 0xA764A3931BD884CE, - 0x2FD2590B817C10F4, 0x56A21A6D80743933, 0xE573A0BB79EF0D0F, 0x155C0CA095DC1E23, - 0x6C2C4FC694D437E4, 0x10364DF623053291, 0xDD32DFC7836C4267, 0x03263F3299BCEF6E, - 0x66F8CD6AE57B6F9D, 0x8C35AE2B5BE21659, 0x31B3C2E21290F87F, 0x93BD2027BF915003, - 0x69460E90220D1B56, 0x299E276FAE19D328, 0x63928C3C53A2432F, 0x7082FEF8E91B9ED0, - 0xBC6F792C3EED40F7, 0x4C40D537D2DE53DB, 0x75E8BFAE5FC2B262, 0x4DA9C0D2A541FD0A, - 0x4E8FFFE03CFD1264, 0x2620E495696FA7E3, 0xE1F0F408B8A98F6C, 0xD1AA230FDDA6D9C2, - 0xC7D0109DD1C6288F, 0x8A79D04F7487D585, 0x4694579BA3710BA2, 0x38417F7CFA834F68, - 0x1D47A4DB0A5007E5, 0x206C9AF1460A643F, 0xA128DDF734BD4712, 0x8144470672B7232D, - 0xF2E086CC02105293, 0x182DE58DBC892B57, 0xCAA1F9B0F8931DFB, 0x6B892447CC2E5AE9, - 0xF9DD11850420A43B, 0x4BE5BEB68A243ED6, 0x5584255F19C8D65D, 0x3B67404E633FA006, - 0xA68DB6766C472A1F, 0xF78AC79AB4C97E21, 0xC353442E1080AAEC, 0x9A4F9DB95782E714 + +__constant__ uint2 T42[256] = { + {0x9B2C0A19,0x3EF29D24}, {0xB6F8622F,0xE9E16322}, {0x47757F7A,0x55369940}, {0xA47B0B33,0x9F4D56D5}, + {0x6AA1174C,0x82256746}, {0xEB082FB2,0xB8F5057D}, {0xF4475F53,0xCC48C10B}, {0x275DEC3A,0x373088D4}, + {0x180AED10,0x968F4325}, {0xF7016151,0x173D232C}, {0x946FCC13,0xAE4ED09F}, {0xC4539873,0xFD4B4741}, + {0xD9933765,0x1B5B3F0D}, {0x7B644052,0x2FFCB096}, {0x0A89840C,0xE02376D2}, {0x329B18D7,0xA3AE3A70}, + {0x35DE8526,0x419CBD23}, {0x5B7C3199,0xFAFEBF11}, {0x85AA9B0D,0x0397074F}, {0x4836B970,0xC58AD4FB}, + {0xFC4104A8,0xBEC60BE3}, {0x4B708772,0x1EFF36DC}, {0xED8453B6,0x131FDC33}, {0x341764D3,0x0844E33E}, + {0xAB38CD39,0x0FF11B6E}, {0x7761B85A,0x64351F0A}, {0x09CFBA0E,0x3B5694F5}, {0xB87245D0,0x30857084}, + {0x2297AE3C,0x47AFB3BD}, {0x6F6B554A,0xF2BA5C2F}, {0x1F4F70E1,0x74BDC476}, {0x71EDC45E,0xCFDFC644}, + {0x1DC0AF16,0xE610784C}, {0x3C113F28,0x7ACA29D6}, {0x76A859AF,0x2DED4117}, {0x99A3D5EE,0xAC5F211E}, + {0xA87EF33B,0xD484F949}, {0x96E013E4,0x3CE36CA5}, {0x3A9D432C,0xD120F098}, {0xDC597563,0x6BC40464}, + {0xD1956C9E,0x69D5F5E5}, {0x3698BB24,0x9AE95F04}, {0x66A4EF44,0xC9ECC8DA}, {0xA5B2EAC6,0xD69508C8}, + {0xC0503B80,0xC40C2235}, {0x8C652103,0x38C193BA}, {0x46BC9E8F,0x1CEEC75D}, {0x37515AD1,0xD3310119}, + {0x86ECA50F,0xD8E2E568}, {0x5779C991,0xB137108D}, {0x05CA4206,0x709F3B69}, {0x1680CAEF,0x4FEB5083}, + {0x241BD238,0xEC456AF3}, {0xE181ABBE,0x58D673AF}, {0xCAD9BF8C,0x242F54E7}, {0x0DCC19FD,0x0211F181}, + {0x0F43C60A,0x90BC4DBB}, {0x9DA0761D,0x9518446A}, {0x3F57012A,0xA1BFCBF1}, {0x61E172B5,0x2BDE4F89}, + {0x4F732481,0x27B853A8}, {0xDF1F4B61,0xB0B1E643}, {0x5C39AC68,0x18CC3842}, {0xBF37D821,0xD2B7F7D7}, + {0x3014C720,0x3103864A}, {0x72ABFA5C,0x14AA2463}, {0x4EBAC574,0x6E600DB5}, {0x0403A3F3,0x39476574}, + {0xBC71E623,0x09C215F0}, {0xE987F045,0x2A58B947}, {0xB477BDD8,0x7B4CDF18}, {0x906C6FE0,0x9709B5EB}, + {0x8060D90B,0x73083C26}, {0x41F9037E,0xFEDC400E}, {0xE44BE9B8,0x284948C6}, {0x08065BFB,0x728ECAE8}, + {0x17492B1A,0x06330E9E}, {0x69E7294E,0x59508561}, {0xE6C4364F,0xBAE4F4FC}, {0xE30E7449,0xCA7BCF95}, + {0xA33E96C2,0x7D7FD186}, {0xD85AD690,0x52836110}, {0x1B4CD312,0x4DFAA102}, {0x872544FA,0x913ABB75}, + {0x140F1518,0xDD46ECB9}, {0x1E869114,0x3D659A6B}, {0xD719109A,0xC23F2CAB}, {0x2DD46836,0xD713FE06}, + {0xB2FBC1DC,0xD0A60656}, {0xDD909496,0x221C5A79}, {0xA1B14935,0xEFD26DBC}, {0x235E4FC9,0x0E77EDA0}, + {0x6B68F6B9,0xCBFD395B}, {0xA6F4D4C4,0x0DE0EAEF}, {0x1A8532E7,0x0422FF1F}, {0xDED6AA94,0xF969B85E}, + {0xAEF28F3F,0x7F6E2007}, {0x81A938FE,0x3AD0623B}, {0x7AADA1A7,0x6624EE8B}, {0xC856607B,0xB682E8DD}, + {0x281E2A30,0xA78CC56F}, {0x45FAA08D,0xC79B257A}, {0x642B30B3,0x5B4174E0}, {0x7EAE0254,0x5F638BFF}, + {0x0C05F808,0x4BC9AF9C}, {0xF98B46AE,0xCE59308A}, {0xCC55C388,0x8FC58DA9}, {0x676D0EB1,0x803496C7}, + {0xE70DD7BA,0xF33CAAE1}, {0x6EA2B4BF,0xBB620232}, {0x201871CB,0xD5020F87}, {0xA9B712CE,0x9D5CA754}, + {0x7DE83C56,0x841669D8}, {0x5EB6739F,0x8A618478}, {0xB0741E2B,0x420BBA6C}, {0xEAC1CE47,0xF12D5B60}, + {0x1283691C,0x76AC35F7}, {0xFECEDB5F,0x2C6BB7D9}, {0x4C351A83,0xFCCDB18F}, {0xC3160582,0x1F79C012}, + {0x62A74CB7,0xF0ABADAE}, {0x82EF06FC,0xE1A5801C}, {0xF2CB2357,0x67A21845}, {0x5DF04D9D,0x5114665F}, + {0x74278658,0xBF40FD2D}, {0xB73183DA,0xA0393D3F}, {0x92E3B017,0x05A409D1}, {0x0B4065F9,0xA9FB28CF}, + {0x42BF3D7C,0x25A9A229}, {0x03463E02,0xDB75E227}, {0x5AB5D06C,0xB326E10C}, {0x95A62DE6,0xE7968E82}, + {0x636EAD42,0xB973F3B3}, {0x19C30CE5,0xDF571D38}, {0x29D7CBC5,0xEE549B72}, {0x65E2D146,0x12992AFD}, + {0x56B02864,0xF8EF4E90}, {0x4030E28B,0xB7041E13}, {0xDAD50967,0xC02EDD2A}, {0x8AE95D07,0x932B4AF4}, + {0xC6DC4784,0x6FE6FB7B}, {0x55F61666,0x239AACB7}, {0xBDB807D6,0x401A4BED}, {0x89AF6305,0x485EA8D3}, + {0xADB4B13D,0xA41BC220}, {0x9729F211,0x753B32B8}, {0xB3322029,0x997E584B}, {0xCEDA1C7F,0x1D683193}, + {0xC99F818E,0xFF5AB6C0}, {0x7F67E3A1,0x16BBD5E2}, {0x25D233CD,0xA59D34EE}, {0x3B54A2D9,0x98F8AE85}, + {0xCB105E79,0x6DF70AFA}, {0xB9BBA425,0x795D2E99}, {0x44334178,0x8E437B67}, {0x886682F0,0x0186F6CE}, + {0xBB347BD2,0xEBF092A3}, {0xF18D1D55,0xBCD7FA62}, {0x11C5571E,0xADD9D7D0}, {0xB1BDFFDE,0x0BD3E471}, + {0x8EEAFEF4,0xAA6C2F80}, {0xF6C880A4,0x5EE57D31}, {0xF044FCA0,0xF50FA47F}, {0x51F5B595,0x1ADDC9C3}, + {0x3352F922,0xEA76646D}, {0x00000000,0x00000000}, {0xF58EBEA6,0x85909F16}, {0xAAF12CCC,0x46294573}, + {0x39DB7D2E,0x0A5512BF}, {0x31DD26D5,0x78DBD857}, {0x6C2D6B48,0x29CFBE08}, {0x583A0F9B,0x218B5D36}, + {0xFACD78AC,0x152CD2AD}, {0xE2C795BC,0x83A39188}, {0x5F7F926A,0xC3B9DA65}, {0x2C1D89C3,0x9ECBA01B}, + {0x9F2FA9EA,0x07B5F850}, {0x26940DCF,0x7EE8D6C9}, {0xAF3B6ECA,0x36B67E1A}, {0x702425AB,0x86079859}, + {0xD31AB369,0xFB7849DF}, {0x932A51E2,0x4C7C57CC}, {0x0E8A27FF,0xD96413A6}, {0xC715A671,0x263EA566}, + {0x4376DC89,0x6C71FC34}, {0x84637AF8,0x4A4F5952}, {0x8B20BCF2,0xDAF314E9}, {0x4AB96687,0x572768C1}, + {0x682EC8BB,0x1088DB7C}, {0x537A6A62,0x887075F9}, {0xF302C2A2,0x2E7A4658}, {0xE582084D,0x619116DB}, + {0x8326E709,0xA87DDE01}, {0x9C6997E8,0xDCC01A77}, {0xAC7D50C8,0xEDC39C3D}, {0xA078A8C0,0xA60A33A1}, + {0x52B38B97,0xC1A82BE4}, {0x134A88E9,0x3F746BEA}, {0xBAFD9A27,0xA228CCBE}, {0x068C7C04,0xABEAD94E}, + {0x78227E50,0xF48952B1}, {0xFB049959,0x5CF48CB0}, {0x6DE48ABD,0x6017E015}, {0xA73D3531,0x4438B4F2}, + {0x49FF5885,0x8C528AE6}, {0x4DFCFB76,0xB515EF92}, {0x2E925634,0x0C661C21}, {0xC59A7986,0xB493195C}, + {0x21D1903E,0x9CDA519A}, {0xB5BE5C2D,0x32948105}, {0xD45F2E98,0x194ACE8C}, {0x38129CDB,0x438D4CA2}, + {0xBEFE39D4,0x9B6FA9CA}, {0xEF0B8C41,0x81B26009}, {0x91A58E15,0xDED1EBF6}, {0x9EE6481F,0x4E6DA64D}, + {0xCF13FD8A,0x54B06F8E}, {0x01C9E1F5,0x49D85E1D}, {0x1C094EE3,0xAFC82651}, {0x75EE67AD,0xF698A330}, + {0xEC4DB243,0x5AC7822E}, {0xC199DA75,0x8DD47C28}, {0xDB1CE892,0x89F68337}, {0x7C21DDA3,0xCDCE37C5}, + {0x503C5460,0x530597DE}, {0x543FF793,0x6A42F2AA}, {0x73621BA9,0x5D727A7E}, {0x07459DF1,0xE2328753}, + {0xC2DFE477,0x56A19E0F}, {0xCD9C227D,0xC61DD3B4}, {0x986A341B,0xE5877F03}, {0x15C6F4ED,0x949EB2A4}, + {0x60289340,0x62061194}, {0xE84E11B0,0x6380E75A}, {0xD6D0F16F,0x8BE772B6}, {0xD596CF6D,0x50929091}, + {0x3E9EE0DF,0xE86795EC}, {0x2B581432,0x7CF92748}, {0xEEC26DB4,0xC86A3E14}, {0x8DACC0F6,0x7119CDA7}, + {0x100CB6EB,0xE40189CD}, {0x028FDFF7,0x92ADBC3A}, {0xD2D3529C,0xB2A017C2}, {0xD05C8D6B,0x200DABF8}, + {0xA2F77737,0x34A78F9B}, {0x8F231F01,0xE3B4719D}, {0x2F5BB7C1,0x45BE423C}, {0xFD88E55D,0xF71E55FE}, + {0x59F3EE6E,0x6853032B}, {0xFF073AAA,0x65B3E9C4}, {0x9AE5EBEC,0x772AC339}, {0xF842A75B,0x87816E97}, + {0xE0484A4B,0x110E2DB2}, {0x3DD8DEDD,0x331277CB}, {0x79EB9FA5,0xBD510CAC}, {0x2A91F5C7,0x35217955}, }; -__device__ static uint64_t T3[256] = { - 0x05BA7BC82C9B3220, 0x31A54665F8B65E4F, 0xB1B651F77547F4D4, 0x8BFA0D857BA46682, - 0x85A96C5AA16A98BB, 0x990FAEF908EB79C9, 0xA15E37A247F4A62D, 0x76857DCD5D27741E, - 0xF8C50B800A1820BC, 0xBE65DCB201F7A2B4, 0x666D1B986F9426E7, 0x4CC921BF53C4E648, - 0x95410A0F93D9CA42, 0x20CDCCAA647BA4EF, 0x429A4060890A1871, 0x0C4EA4F69B32B38B, - 0xCCDA362DDE354CD3, 0x96DC23BC7C5B2FA9, 0xC309BB68AA851AB3, 0xD26131A73648E013, - 0x021DC52941FC4DB2, 0xCD5ADAB7704BE48A, 0xA77965D984ED71E6, 0x32386FD61734BBA4, - 0xE82D6DD538AB7245, 0x5C2147EA6177B4B1, 0x5DA1AB70CF091CE8, 0xAC907FCE72B8BDFF, - 0x57C85DFD972278A8, 0xA4E44C6A6B6F940D, 0x3851995B4F1FDFE4, 0x62578CCAED71BC9E, - 0xD9882BB0C01D2C0A, 0x917B9D5D113C503B, 0xA2C31E11A87643C6, 0xE463C923A399C1CE, - 0xF71686C57EA876DC, 0x87B4A973E096D509, 0xAF0D567D9D3A5814, 0xB40C2A3F59DCC6F4, - 0x3602F88495D121DD, 0xD3E1DD3D9836484A, 0xF945E71AA46688E5, 0x7518547EB2A591F5, - 0x9366587450C01D89, 0x9EA81018658C065B, 0x4F54080CBC4603A3, 0x2D0384C65137BF3D, - 0xDC325078EC861E2A, 0xEA30A8FC79573FF7, 0x214D2030CA050CB6, 0x65F0322B8016C30C, - 0x69BE96DD1B247087, 0xDB95EE9981E161B8, 0xD1FC1814D9CA05F8, 0x820ED2BBCC0DE729, - 0x63D76050430F14C7, 0x3BCCB0E8A09D3A0F, 0x8E40764D573F54A2, 0x39D175C1E16177BD, - 0x12F5A37C734F1F4B, 0xAB37C12F1FDFC26D, 0x5648B167395CD0F1, 0x6C04ED1537BF42A7, - 0xED97161D14304065, 0x7D6C67DAAB72B807, 0xEC17FA87BA4EE83C, 0xDFAF79CB0304FBC1, - 0x733F060571BC463E, 0x78D61C1287E98A27, 0xD07CF48E77B4ADA1, 0xB9C262536C90DD26, - 0xE2449B5860801605, 0x8FC09AD7F941FCFB, 0xFAD8CEA94BE46D0E, 0xA343F28B0608EB9F, - 0x9B126BD04917347B, 0x9A92874AE7699C22, 0x1B017C42C4E69EE0, 0x3A4C5C720EE39256, - 0x4B6E9F5E3EA399DA, 0x6BA353F45AD83D35, 0xE7FEE0904C1B2425, 0x22D009832587E95D, - 0x842980C00F1430E2, 0xC6B3C0A0861E2893, 0x087433A419D729F2, 0x341F3DADD42D6C6F, - 0xEE0A3FAEFBB2A58E, 0x4AEE73C490DD3183, 0xAAB72DB5B1A16A34, 0xA92A04065E238FDF, - 0x7B4B35A1686B6FCC, 0x6A23BF6EF4A6956C, 0x191CB96B851AD352, 0x55D598D4D6DE351A, - 0xC9604DE5F2AE7EF3, 0x1CA6C2A3A981E172, 0xDE2F9551AD7A5398, 0x3025AAFF56C8F616, - 0x15521D9D1E2860D9, 0x506FE31CFA45073A, 0x189C55F12B647B0B, 0x0180EC9AAE7EA859, - 0x7CEC8B40050C105E, 0x2350E5198BF94104, 0xEF8AD33455CC0DD7, 0x07A7BEE16D677F92, - 0xE5E325B90DE76997, 0x5A061591A26E637A, 0xB611EF1618208B46, 0x09F4DF3EB7A981AB, - 0x1EBB078AE87DACC0, 0xB791038CB65E231F, 0x0FD38D4574B05660, 0x67EDF702C1EA8EBE, - 0xBA5F4BE0831238CD, 0xE3C477C2CEFEBE5C, 0x0DCE486C354C1BD2, 0x8C5DB36416C31910, - 0x26EA9ED1A7627324, 0x039D29B3EF82E5EB, 0x9F28FC82CBF2AE02, 0xA8AAE89CF05D2786, - 0x431AACFA2774B028, 0xCF471F9E31B7A938, 0x581BD0B8E3922EC8, 0xBC78199B400BEF06, - 0x90FB71C7BF42F862, 0x1F3BEB1046030499, 0x683E7A47B55AD8DE, 0x988F4263A695D190, - 0xD808C72A6E638453, 0x0627527BC319D7CB, 0xEBB04466D72997AE, 0xE67E0C0AE2658C7C, - 0x14D2F107B056C880, 0x7122C32C30400B8C, 0x8A7AE11FD5DACEDB, 0xA0DEDB38E98A0E74, - 0xAD109354DCC615A6, 0x0BE91A17F655CC19, 0x8DDD5FFEB8BDB149, 0xBFE53028AF890AED, - 0xD65BA6F5B4AD7A6A, 0x7956F0882997227E, 0x10E8665532B352F9, 0x0E5361DFDACEFE39, - 0xCEC7F3049FC90161, 0xFF62B561677F5F2E, 0x975CCF26D22587F0, 0x51EF0F86543BAF63, - 0x2F1E41EF10CBF28F, 0x52722635BBB94A88, 0xAE8DBAE73344F04D, 0x410769D36688FD9A, - 0xB3AB94DE34BBB966, 0x801317928DF1AA9B, 0xA564A0F0C5113C54, 0xF131D4BEBDB1A117, - 0x7F71A2F3EA8EF5B5, 0x40878549C8F655C3, 0x7EF14E6944F05DEC, 0xD44663DCF55137D8, - 0xF2ACFD0D523344FC, 0x0000000000000000, 0x5FBC6E598EF5515A, 0x16CF342EF1AA8532, - 0xB036BD6DDB395C8D, 0x13754FE6DD31B712, 0xBBDFA77A2D6C9094, 0x89E7C8AC3A582B30, - 0x3C6B0E09CDFA459D, 0xC4AE0589C7E26521, 0x49735A777F5FD468, 0xCAFD64561D2C9B18, - 0xDA1502032F9FC9E1, 0x8867243694268369, 0x3782141E3BAF8984, 0x9CB5D53124704BE9, - 0xD7DB4A6F1AD3D233, 0xA6F989432A93D9BF, 0x9D3539AB8A0EE3B0, 0x53F2CAAF15C7E2D1, - 0x6E19283C76430F15, 0x3DEBE2936384EDC4, 0x5E3C82C3208BF903, 0x33B8834CB94A13FD, - 0x6470DEB12E686B55, 0x359FD1377A53C436, 0x61CAA57902F35975, 0x043A975282E59A79, - 0xFD7F70482683129C, 0xC52EE913699CCD78, 0x28B9FF0E7DAC8D1D, 0x5455744E78A09D43, - 0xCB7D88CCB3523341, 0x44BD121B4A13CFBA, 0x4D49CD25FDBA4E11, 0x3E76CB208C06082F, - 0x3FF627BA2278A076, 0xC28957F204FBB2EA, 0x453DFE81E46D67E3, 0x94C1E6953DA7621B, - 0x2C83685CFF491764, 0xF32C1197FC4DECA5, 0x2B24D6BD922E68F6, 0xB22B78449AC5113F, - 0x48F3B6EDD1217C31, 0x2E9EAD75BEB55AD6, 0x174FD8B45FD42D6B, 0x4ED4E4961238ABFA, - 0x92E6B4EEFEBEB5D0, 0x46A0D7320BEF8208, 0x47203BA8A5912A51, 0x24F75BF8E69E3E96, - 0xF0B1382413CF094E, 0xFEE259FBC901F777, 0x276A724B091CDB7D, 0xBDF8F501EE75475F, - 0x599B3C224DEC8691, 0x6D84018F99C1EAFE, 0x7498B8E41CDB39AC, 0xE0595E71217C5BB7, - 0x2AA43A273C50C0AF, 0xF50B43EC3F543B6E, 0x838E3E2162734F70, 0xC09492DB4507FF58, - 0x72BFEA9FDFC2EE67, 0x11688ACF9CCDFAA0, 0x1A8190D86A9836B9, 0x7ACBD93BC615C795, - 0xC7332C3A286080CA, 0x863445E94EE87D50, 0xF6966A5FD0D6DE85, 0xE9AD814F96D5DA1C, - 0x70A22FB69E3EA3D5, 0x0A69F68D582B6440, 0xB8428EC9C2EE757F, 0x604A49E3AC8DF12C, - 0x5B86F90B0C10CB23, 0xE1D9B2EB8F02F3EE, 0x29391394D3D22544, 0xC8E0A17F5CD0D6AA, - 0xB58CC6A5F7A26EAD, 0x8193FB08238F02C2, 0xD5C68F465B2F9F81, 0xFCFF9CD288FDBAC5, - 0x77059157F359DC47, 0x1D262E3907FF492B, 0xFB582233E59AC557, 0xDDB2BCE242F8B673, - 0x2577B76248E096CF, 0x6F99C4A6D83DA74C, 0xC1147E41EB795701, 0xF48BAF76912A9337 + +__constant__ uint2 T52[256] = { + {0x46E06A6D,0x8AB0A968}, {0x4BF0B33A,0x43C7E80B}, {0x6B161EE5,0x08C9B354}, {0xEBA990BE,0x39F1C235}, + {0x6606C7B2,0xC1BEF237}, {0x614569AA,0x2C209233}, {0x6FC3289A,0xEB01523B}, {0x935ACEDD,0x946953AB}, + {0x3E13340E,0x272838F6}, {0xA12BA052,0x8B0455EC}, {0x978FF8A2,0x77A1B2C4}, {0x13E54086,0xA55122CA}, + {0x62D3F1CD,0x22761358}, {0x08B76CFE,0xDB8DDFDE}, {0x9E4A178A,0x5D1E12C8}, {0x03969867,0x0E56816B}, + {0x3303ED59,0xEE5F7995}, {0xAB78D71D,0xAFED748B}, {0xF93E53EE,0x6D929F2D}, {0xBA798C2A,0xF5D8A8F8}, + {0x8E39CF6B,0xF619B169}, {0x749104E2,0x95DDAF2F}, {0xE0886427,0xEC2A9C80}, {0x825B95EA,0xCE5C8FD8}, + {0x3AC60271,0xC4E0D999}, {0x173076F9,0x4699C3A5}, {0x50A29F42,0x3D1B151F}, {0x2BC75946,0x9ED505EA}, + {0xDC7F4B98,0x34665ACF}, {0x292342F7,0x61B1FB53}, {0x0E864130,0xC721C008}, {0x96FD7B74,0x8693CD16}, + {0x7136B14B,0x87273192}, {0x63A1721B,0xD3446C8A}, {0xA6680E4A,0x669A35E8}, {0x39509A16,0xCAB658F2}, + {0xF42E8AB9,0xA4E5DE4E}, {0xE83F08D9,0x37A7435E}, {0xE26C7F96,0x134E6239}, {0x2DF67488,0x82791A3C}, + {0x8329163C,0x3F6EF00A}, {0xFDEB6591,0x8E5A7E42}, {0x7981DDB5,0x5CAAEE4C}, {0x5AF1E80D,0x19F23478}, + {0xED98BD70,0x255DDDE3}, {0xA99CCCAC,0x50898A32}, {0xDA4E6656,0x28CA4519}, {0x4CB31D22,0xAE59880F}, + {0x37D6DB26,0x0D9798FA}, {0xB4FFCD1A,0x32F968F0}, {0x4F258545,0xA00F0964}, {0x5E24DE72,0xFA3AD517}, + {0x5DB24615,0xF46C547C}, {0xFF0F7E20,0x713E80FB}, {0x73D2AAFA,0x7843CF2B}, {0xAEDF62B4,0xBD17EA36}, + {0xD16F92CF,0xFD111BAC}, {0xC72D67E0,0x4ABAA7DB}, {0xAD49FAD3,0xB3416B5D}, {0x4914A88B,0xBCA316B2}, + {0x8AECF914,0x15D15006}, {0xE31EFC40,0xE27C1DEB}, {0x9BEDA223,0x4FE48C75}, {0x1B522C78,0x7EDCFD14}, + {0x7C26681C,0x4E5070F1}, {0x5815F3BC,0xE696CAC1}, {0x3BB481A7,0x35D2A64B}, {0xFE7DFDF6,0x800CFF29}, + {0xD5BAA4B0,0x1ED9FAC3}, {0x1EF599D1,0x6C2663A9}, {0x34404341,0x03C11991}, {0x69F20554,0xF7AD4DED}, + {0xB61BD6AB,0xCD9D9649}, {0xEADB1368,0xC8C3BDE7}, {0xB02AFB65,0xD131899F}, {0xE1FAE7F1,0x1D18E352}, + {0xEF7CA6C1,0xDA39235A}, {0xA8EE4F7A,0xA1BBF5E0}, {0xCF9A0B1E,0x91377805}, {0x80BF8E5B,0x31387161}, + {0xDB3CE580,0xD9F83ACB}, {0xD38B897E,0x0275E515}, {0xF0FBBCC6,0x472D3F21}, {0x868EA395,0x2D946EB7}, + {0x21942E09,0xBA3C248D}, {0xBFDE3983,0xE7223645}, {0x02E41BB1,0xFF64FEB9}, {0x0D10D957,0xC9774163}, + {0xB58D4ECC,0xC3CB1722}, {0x9CAE0C3B,0xA27AEC71}, {0xA48C15FB,0x99FECB51}, {0x6D27332B,0x1465AC82}, + {0xD75EBF01,0xE1BD047A}, {0x941960C5,0x79F733AF}, {0x41A3C475,0x672EC96C}, {0x524684F3,0xC27FEBA6}, + {0x75E38734,0x64EFD0FD}, {0x0743AE18,0xED9E6004}, {0xB9EF144D,0xFB8E2993}, {0x0C625A81,0x38453EB1}, + {0x42355C12,0x69784807}, {0x14A6EE9E,0x48CF42CE}, {0x06312DCE,0x1CAC1FD6}, {0x4792E9BB,0x7B82D6BA}, + {0x1F871A07,0x9D141C7B}, {0xC11C4A2E,0x5616B80D}, {0xF21FA777,0xB849C198}, {0xC8D9A506,0x7CA91801}, + {0x7EC273AD,0xB1348E48}, {0x987B3A44,0x41B20D1E}, {0xA3CFBBE3,0x7460AB55}, {0x4576F20A,0x84E62803}, + {0x897A6173,0x1B87D16D}, {0xE45D5258,0x0FE27DEF}, {0xCA3DBEB7,0x83CDE6B8}, {0xD01D1119,0x0C23647E}, + {0xA0592384,0x7A362A3E}, {0xF1893F10,0xB61F40F3}, {0x440471DC,0x75D457D1}, {0x237035B8,0x4558DA34}, + {0x87FC2043,0xDCA61165}, {0xC9AB26D0,0x8D9B67D3}, {0xEE0E2517,0x2B0B5C88}, {0x2AB5DA90,0x6FE77A38}, + {0xD9D8FE31,0x269CC472}, {0xFAA8CB89,0x63C41E46}, {0x1642F52F,0xB7ABBC77}, {0x2F126F39,0x7D1DE485}, + {0x24339BA0,0xA8C6BA30}, {0xCEE888C8,0x600507D7}, {0x1A20AFAE,0x8FEE82C6}, {0x26D78011,0x57A24489}, + {0x36A458F0,0xFCA5E728}, {0x8F4B4CBD,0x072BCEBB}, {0xF36D24A1,0x497BBE4A}, {0xB769557D,0x3CAFE99B}, + {0x05A7B5A9,0x12FA9EBD}, {0x5B836BDB,0xE8C04BAA}, {0xAC3B7905,0x4273148F}, {0x2851C121,0x90838481}, + {0x6C55B0FD,0xE557D350}, {0xCB4F3D61,0x72FF996A}, {0x64E2DC03,0x3EDA0C8E}, {0xE6B949E9,0xF0868356}, + {0xBB0B0FFC,0x04EAD72A}, {0x5967706A,0x17A4B513}, {0x04D5367F,0xE3C8E16F}, {0x8DAF570C,0xF84F3002}, + {0xBD3A2232,0x1846C8FC}, {0xF6CA9108,0x5B8120F7}, {0xECEA3EA6,0xD46FA231}, {0x53340725,0x334D9474}, + {0xC28AD249,0x58403966}, {0x9A9F21F5,0xBED6F3A7}, {0xA5FE962D,0x68CCB483}, {0x57E1315A,0xD085751B}, + {0xE52FD18E,0xFED0023D}, {0x20E6ADDF,0x4B0E5B5F}, {0x6EB1AB4C,0x1A332DE9}, {0x7B65C604,0xA3CE10F5}, + {0xD62C3CD7,0x108F7BA8}, {0x1073D8E1,0xAB07A3A1}, {0x91BED56C,0x6B0DAD12}, {0x3532C097,0xF2F36643}, + {0xB2CEE0D4,0x2E557726}, {0x00000000,0x00000000}, {0xDE9B5029,0xCB02A476}, {0x8B9E7AC2,0xE4E32FD4}, + {0x2C84F75E,0x734B65EE}, {0xCD7E10AF,0x6E5386BC}, {0xE7CBCA3F,0x01B4FC84}, {0x65905FD5,0xCFE8735C}, + {0x0FF4C2E6,0x3613BFDA}, {0x31E7F6E8,0x113B872C}, {0x55052AEB,0x2FE18BA2}, {0xBC48A1E4,0xE974B72E}, + {0xB89D979B,0x0ABC5641}, {0x2202B66E,0xB46AA5E6}, {0xC4BBFF87,0x44EC26B0}, {0x27A503C7,0xA6903B5B}, + {0xFC99E647,0x7F680190}, {0xA71A8D9C,0x97A84A3A}, {0x6037EA7C,0xDD12EDE1}, {0xDD0DC84E,0xC554251D}, + {0x956BE313,0x88C54C7D}, {0x48662B5D,0x4D916960}, {0x9909B992,0xB08072CC}, {0xC5C97C51,0xB5DE5962}, + {0x19B637C9,0x81B803AD}, {0x4A8230EC,0xB2F597D9}, {0x5F565DA4,0x0B08AAC5}, {0x017283D6,0xF1327FD2}, + {0x78F35E63,0xAD98919E}, {0x76751F53,0x6AB95196}, {0x0A53774F,0x24E92167}, {0x15D46D48,0xB9FD3D1C}, + {0xFBDA485F,0x92F66194}, {0x11015B37,0x5A35DC73}, {0x5477A93D,0xDED3F470}, {0x81CD0D8D,0xC00A0EB3}, + {0xC65FE436,0xBB88D809}, {0xBEACBA55,0x16104997}, {0x5693B28C,0x21B70AC9}, {0x25411876,0x59F4C5E2}, + {0x0B21F499,0xD5DB5EB5}, {0xF55C096F,0x55D7A19C}, {0xC3F8519F,0xA97246B4}, {0xA2BD3835,0x8552D487}, + {0x1297C350,0x54635D18}, {0x85183BF2,0x23C2EFDC}, {0xCC0C9379,0x9F61F96E}, {0x9DDC8FED,0x534893A3}, + {0xAA0A54CB,0x5EDF0B59}, {0x9F38945C,0xAC2C6D1A}, {0xD8AA7DE7,0xD7AEBBA0}, {0x09C5EF28,0x2ABFA00C}, + {0x3CF72FBF,0xD84CC64F}, {0xB15878B3,0x2003F64D}, {0xC06EC9F8,0xA724C7DF}, {0x68808682,0x069F323F}, + {0x51D01C94,0xCC296ACD}, {0x5CC0C5C3,0x055E2BAE}, {0x1D6301B6,0x6270E2C2}, {0x382219C0,0x3B842720}, + {0x846AB824,0xD2F0900E}, {0x7A1745D2,0x52FC6F27}, {0xE94D8B0F,0xC6953C8C}, {0x3095753E,0xE009F8FE}, + {0x92284D0B,0x655B2C79}, {0x4347DFC4,0x984A37D5}, {0x8808E2A5,0xEAB5AEBF}, {0x90CC56BA,0x9A3FD2C0}, + {0xF84CD038,0x9CA0E0FF}, {0xAFADE162,0x4C2595E4}, {0xB3BC6302,0xDF6708F4}, {0x7D54EBCA,0xBF620F23}, + {0x1C118260,0x93429D10}, {0x8CDDD4DA,0x097D4FD0}, {0x2E60ECEF,0x8C2F9B57}, {0x18C4B41F,0x708A7C7F}, + {0xDFE9D3FF,0x3A30DBA4}, {0x7FB0F07B,0x4006F19A}, {0x4DC19EF4,0x5F6BF7DD}, {0x32716E8F,0x1F6D0647}, + {0x6A649D33,0xF9FBCC86}, {0x67744464,0x308C8DE5}, {0x72A0292C,0x8971B0F9}, {0x3F61B7D8,0xD61A4724}, + {0xD4C82766,0xEFEB8511}, {0x40D147A3,0x961CB6BE}, {0xF7B812DE,0xAAB35F25}, {0x7044329D,0x76154E40}, + {0x4E570693,0x513D76B6}, {0xD2F90AA8,0xF3479AC7}, {0x77079C85,0x9B8B2E44}, {0x3D85AC69,0x297EB99D}, }; -__device__ static uint64_t T4[256] = { - 0x3EF29D249B2C0A19, 0xE9E16322B6F8622F, 0x5536994047757F7A, 0x9F4D56D5A47B0B33, - 0x822567466AA1174C, 0xB8F5057DEB082FB2, 0xCC48C10BF4475F53, 0x373088D4275DEC3A, - 0x968F4325180AED10, 0x173D232CF7016151, 0xAE4ED09F946FCC13, 0xFD4B4741C4539873, - 0x1B5B3F0DD9933765, 0x2FFCB0967B644052, 0xE02376D20A89840C, 0xA3AE3A70329B18D7, - 0x419CBD2335DE8526, 0xFAFEBF115B7C3199, 0x0397074F85AA9B0D, 0xC58AD4FB4836B970, - 0xBEC60BE3FC4104A8, 0x1EFF36DC4B708772, 0x131FDC33ED8453B6, 0x0844E33E341764D3, - 0x0FF11B6EAB38CD39, 0x64351F0A7761B85A, 0x3B5694F509CFBA0E, 0x30857084B87245D0, - 0x47AFB3BD2297AE3C, 0xF2BA5C2F6F6B554A, 0x74BDC4761F4F70E1, 0xCFDFC64471EDC45E, - 0xE610784C1DC0AF16, 0x7ACA29D63C113F28, 0x2DED411776A859AF, 0xAC5F211E99A3D5EE, - 0xD484F949A87EF33B, 0x3CE36CA596E013E4, 0xD120F0983A9D432C, 0x6BC40464DC597563, - 0x69D5F5E5D1956C9E, 0x9AE95F043698BB24, 0xC9ECC8DA66A4EF44, 0xD69508C8A5B2EAC6, - 0xC40C2235C0503B80, 0x38C193BA8C652103, 0x1CEEC75D46BC9E8F, 0xD331011937515AD1, - 0xD8E2E56886ECA50F, 0xB137108D5779C991, 0x709F3B6905CA4206, 0x4FEB50831680CAEF, - 0xEC456AF3241BD238, 0x58D673AFE181ABBE, 0x242F54E7CAD9BF8C, 0x0211F1810DCC19FD, - 0x90BC4DBB0F43C60A, 0x9518446A9DA0761D, 0xA1BFCBF13F57012A, 0x2BDE4F8961E172B5, - 0x27B853A84F732481, 0xB0B1E643DF1F4B61, 0x18CC38425C39AC68, 0xD2B7F7D7BF37D821, - 0x3103864A3014C720, 0x14AA246372ABFA5C, 0x6E600DB54EBAC574, 0x394765740403A3F3, - 0x09C215F0BC71E623, 0x2A58B947E987F045, 0x7B4CDF18B477BDD8, 0x9709B5EB906C6FE0, - 0x73083C268060D90B, 0xFEDC400E41F9037E, 0x284948C6E44BE9B8, 0x728ECAE808065BFB, - 0x06330E9E17492B1A, 0x5950856169E7294E, 0xBAE4F4FCE6C4364F, 0xCA7BCF95E30E7449, - 0x7D7FD186A33E96C2, 0x52836110D85AD690, 0x4DFAA1021B4CD312, 0x913ABB75872544FA, - 0xDD46ECB9140F1518, 0x3D659A6B1E869114, 0xC23F2CABD719109A, 0xD713FE062DD46836, - 0xD0A60656B2FBC1DC, 0x221C5A79DD909496, 0xEFD26DBCA1B14935, 0x0E77EDA0235E4FC9, - 0xCBFD395B6B68F6B9, 0x0DE0EAEFA6F4D4C4, 0x0422FF1F1A8532E7, 0xF969B85EDED6AA94, - 0x7F6E2007AEF28F3F, 0x3AD0623B81A938FE, 0x6624EE8B7AADA1A7, 0xB682E8DDC856607B, - 0xA78CC56F281E2A30, 0xC79B257A45FAA08D, 0x5B4174E0642B30B3, 0x5F638BFF7EAE0254, - 0x4BC9AF9C0C05F808, 0xCE59308AF98B46AE, 0x8FC58DA9CC55C388, 0x803496C7676D0EB1, - 0xF33CAAE1E70DD7BA, 0xBB6202326EA2B4BF, 0xD5020F87201871CB, 0x9D5CA754A9B712CE, - 0x841669D87DE83C56, 0x8A6184785EB6739F, 0x420BBA6CB0741E2B, 0xF12D5B60EAC1CE47, - 0x76AC35F71283691C, 0x2C6BB7D9FECEDB5F, 0xFCCDB18F4C351A83, 0x1F79C012C3160582, - 0xF0ABADAE62A74CB7, 0xE1A5801C82EF06FC, 0x67A21845F2CB2357, 0x5114665F5DF04D9D, - 0xBF40FD2D74278658, 0xA0393D3FB73183DA, 0x05A409D192E3B017, 0xA9FB28CF0B4065F9, - 0x25A9A22942BF3D7C, 0xDB75E22703463E02, 0xB326E10C5AB5D06C, 0xE7968E8295A62DE6, - 0xB973F3B3636EAD42, 0xDF571D3819C30CE5, 0xEE549B7229D7CBC5, 0x12992AFD65E2D146, - 0xF8EF4E9056B02864, 0xB7041E134030E28B, 0xC02EDD2ADAD50967, 0x932B4AF48AE95D07, - 0x6FE6FB7BC6DC4784, 0x239AACB755F61666, 0x401A4BEDBDB807D6, 0x485EA8D389AF6305, - 0xA41BC220ADB4B13D, 0x753B32B89729F211, 0x997E584BB3322029, 0x1D683193CEDA1C7F, - 0xFF5AB6C0C99F818E, 0x16BBD5E27F67E3A1, 0xA59D34EE25D233CD, 0x98F8AE853B54A2D9, - 0x6DF70AFACB105E79, 0x795D2E99B9BBA425, 0x8E437B6744334178, 0x0186F6CE886682F0, - 0xEBF092A3BB347BD2, 0xBCD7FA62F18D1D55, 0xADD9D7D011C5571E, 0x0BD3E471B1BDFFDE, - 0xAA6C2F808EEAFEF4, 0x5EE57D31F6C880A4, 0xF50FA47FF044FCA0, 0x1ADDC9C351F5B595, - 0xEA76646D3352F922, 0x0000000000000000, 0x85909F16F58EBEA6, 0x46294573AAF12CCC, - 0x0A5512BF39DB7D2E, 0x78DBD85731DD26D5, 0x29CFBE086C2D6B48, 0x218B5D36583A0F9B, - 0x152CD2ADFACD78AC, 0x83A39188E2C795BC, 0xC3B9DA655F7F926A, 0x9ECBA01B2C1D89C3, - 0x07B5F8509F2FA9EA, 0x7EE8D6C926940DCF, 0x36B67E1AAF3B6ECA, 0x86079859702425AB, - 0xFB7849DFD31AB369, 0x4C7C57CC932A51E2, 0xD96413A60E8A27FF, 0x263EA566C715A671, - 0x6C71FC344376DC89, 0x4A4F595284637AF8, 0xDAF314E98B20BCF2, 0x572768C14AB96687, - 0x1088DB7C682EC8BB, 0x887075F9537A6A62, 0x2E7A4658F302C2A2, 0x619116DBE582084D, - 0xA87DDE018326E709, 0xDCC01A779C6997E8, 0xEDC39C3DAC7D50C8, 0xA60A33A1A078A8C0, - 0xC1A82BE452B38B97, 0x3F746BEA134A88E9, 0xA228CCBEBAFD9A27, 0xABEAD94E068C7C04, - 0xF48952B178227E50, 0x5CF48CB0FB049959, 0x6017E0156DE48ABD, 0x4438B4F2A73D3531, - 0x8C528AE649FF5885, 0xB515EF924DFCFB76, 0x0C661C212E925634, 0xB493195CC59A7986, - 0x9CDA519A21D1903E, 0x32948105B5BE5C2D, 0x194ACE8CD45F2E98, 0x438D4CA238129CDB, - 0x9B6FA9CABEFE39D4, 0x81B26009EF0B8C41, 0xDED1EBF691A58E15, 0x4E6DA64D9EE6481F, - 0x54B06F8ECF13FD8A, 0x49D85E1D01C9E1F5, 0xAFC826511C094EE3, 0xF698A33075EE67AD, - 0x5AC7822EEC4DB243, 0x8DD47C28C199DA75, 0x89F68337DB1CE892, 0xCDCE37C57C21DDA3, - 0x530597DE503C5460, 0x6A42F2AA543FF793, 0x5D727A7E73621BA9, 0xE232875307459DF1, - 0x56A19E0FC2DFE477, 0xC61DD3B4CD9C227D, 0xE5877F03986A341B, 0x949EB2A415C6F4ED, - 0x6206119460289340, 0x6380E75AE84E11B0, 0x8BE772B6D6D0F16F, 0x50929091D596CF6D, - 0xE86795EC3E9EE0DF, 0x7CF927482B581432, 0xC86A3E14EEC26DB4, 0x7119CDA78DACC0F6, - 0xE40189CD100CB6EB, 0x92ADBC3A028FDFF7, 0xB2A017C2D2D3529C, 0x200DABF8D05C8D6B, - 0x34A78F9BA2F77737, 0xE3B4719D8F231F01, 0x45BE423C2F5BB7C1, 0xF71E55FEFD88E55D, - 0x6853032B59F3EE6E, 0x65B3E9C4FF073AAA, 0x772AC3399AE5EBEC, 0x87816E97F842A75B, - 0x110E2DB2E0484A4B, 0x331277CB3DD8DEDD, 0xBD510CAC79EB9FA5, 0x352179552A91F5C7 + +__constant__ uint2 T62[256] = { + {0xFC7D40C3,0x7E37E62D}, {0xEE939E5B,0x776F25A4}, {0xDD8FB5AD,0xE045C850}, {0x11FF1952,0x86ED5BA7}, + {0xCF616B35,0xE91D0BD9}, {0x6E408FFB,0x37E0AB25}, {0x31025A7A,0x9607F6C0}, {0x16D23C9D,0x0B02F5E1}, + {0xFB50650C,0xF3D8486B}, {0xC40875F5,0x621CFF27}, {0xFA5FD34A,0x7D40CB71}, {0xDAA29062,0x6DAA6616}, + {0x23EC84E2,0x9F5F3549}, {0xC507C3B3,0xEC847C3D}, {0x043CE205,0x025A3668}, {0x4DAC0B19,0xA8BF9E6C}, + {0xE9BEBB94,0xFA808BE2}, {0x77C74FA3,0xB5B99C52}, {0xF0397BCC,0x78D9BC95}, {0xDBAD2624,0xE332E50C}, + {0x9332797E,0xC74FCE12}, {0x2EA709AB,0x1729ECEB}, {0x9954D1F8,0xC2D6B9F6}, {0xBAB8551A,0x5D898CBF}, + {0x17DD8ADB,0x859A76FB}, {0x362F7FB5,0x1BE85886}, {0xF136CD8A,0xF6413F8F}, {0xBBB7E35C,0xD3110FA5}, + {0x14CC4D11,0x0A2FEED5}, {0xCD7F1AB9,0xE83010ED}, {0x5F42D581,0xA1E75DE5}, {0xC13B21B6,0xEEDE4A55}, + {0xF94E1480,0xF2F5535F}, {0x1888761E,0x0CC1B46D}, {0x6529913B,0xBCE15FDB}, {0x5A7181C2,0x2D25E897}, + {0xE2D7A554,0x71817F1C}, {0x5C53124B,0x2E52C5CB}, {0xEF9C281D,0xF9F7A6BE}, {0x21F2F56E,0x9E722E7D}, + {0x81DCA7E6,0xCE170D9B}, {0x1CB4941B,0x0E9B8205}, {0x3C49D733,0x1E712F62}, {0x42F9F7DC,0x21E45CFA}, + {0x8BBA0F60,0xCB8E7A7F}, {0x010FB646,0x8E98831A}, {0x8E895B23,0x474CCF0D}, {0x4FB27A95,0xA9928558}, + {0x05335443,0x8CC2B572}, {0x84EFF3A5,0x42D5B8E9}, {0x021E718C,0x012D1B34}, {0xAE74180B,0x57A6626A}, + {0xE3D81312,0xFF19FC06}, {0x6A7C6DFE,0x35BA9D4D}, {0x8F86ED65,0xC9D44C17}, {0xA02E5288,0x506523E6}, + {0x06229389,0x03772D5C}, {0x0B691EC0,0x8B01F4FE}, {0xED825991,0xF8DABD8A}, {0x985B67BE,0x4C4E3AEC}, + {0x7FBF96A9,0xB10DF082}, {0xD4F8DAE1,0x6A69279A}, {0xD3D5FF2E,0xE78689DC}, {0x1FA553D1,0x812E1A2B}, + {0xEBA0CA18,0xFBAD90D6}, {0x34310E39,0x1AC543B2}, {0x2CB97827,0x1604F7DF}, {0x51189F02,0xA6241C69}, + {0xEAAF7C5E,0x753513CC}, {0xC84C4EFA,0x64F2A59F}, {0x489F5F5A,0x247D2B1E}, {0xAB474C48,0xDB64D718}, + {0xF2270A40,0x79F4A7A1}, {0x2A9BEBAE,0x1573DA83}, {0x68621C72,0x34978679}, {0xA2302304,0x514838D2}, + {0xFD72F685,0xF0AF6537}, {0x3A6B44BA,0x1D06023E}, {0xCE6EDD73,0x678588C3}, {0xCC70ACFF,0x66A893F7}, + {0xB5EDA9DF,0xD4D24E29}, {0x70EA6A6C,0x38563214}, {0x0E5A4A83,0x07C3418C}, {0x5635BACD,0x2BCBB22F}, + {0x0878D90A,0x04B46CD0}, {0x0C443B0F,0x06EE5AB8}, {0x76C8F9E5,0x3B211F48}, {0x12EEDE98,0x0958C389}, + {0xBF8B0159,0xD14B39CD}, {0x72F41BE0,0x397B2920}, {0x13E168DE,0x87C04093}, {0x47CAA39F,0xAD26E988}, + {0x9C6785BB,0x4E140C84}, {0xB7F3D853,0xD5FF551D}, {0x5D5CA40D,0xA0CA46D1}, {0x87FE346F,0xCD6020C7}, + {0x15C3FB57,0x84B76DCF}, {0xA121E4CE,0xDEFDA0FC}, {0x96012D3D,0x4B8D7B60}, {0x298A2C64,0x9AC642AD}, + {0x10F0AF14,0x0875D8BD}, {0x7B8374AC,0xB357C6EA}, {0x9A451632,0x4D6321D8}, {0xC719B23F,0xEDA96709}, + {0xF328BC06,0xF76C24BB}, {0x912C08F2,0xC662D526}, {0x7892B366,0x3CE25EC4}, {0x6F4F39BD,0xB978283F}, + {0x9D6833FD,0xC08C8F9E}, {0x9E79F437,0x4F3917B0}, {0xB2C08C10,0x593DE06F}, {0xB1D14BDA,0xD6887841}, + {0x32139DB0,0x19B26EEE}, {0x75D93E2F,0xB4948766}, {0x1987C058,0x82593777}, {0x3D466175,0x90E9AC78}, + {0xFF6C8709,0xF1827E03}, {0x353EB87F,0x945DC0A8}, {0x8AB5B926,0x4516F965}, {0x7EB020EF,0x3F957398}, + {0x6D514831,0xB855330B}, {0x542BCB41,0x2AE6A91B}, {0xC6160479,0x6331E413}, {0x80D311A0,0x408F8E81}, + {0xC325503A,0xEFF35161}, {0xBD9570D5,0xD06622F9}, {0x0D4B8D49,0x8876D9A2}, {0x573A0C8B,0xA5533135}, + {0xDF91C421,0xE168D364}, {0xF50A2F8F,0xF41B09E7}, {0x24C1A12D,0x12B09B0F}, {0xA9593DC4,0xDA49CC2C}, + {0x3E57A6BF,0x1F5C3456}, {0xA8568B82,0x54D14F36}, {0x43F6419A,0xAF7CDFE0}, {0xC943F8BC,0xEA6A2685}, + {0xD7E91D2B,0xE5DCBFB4}, {0x799D0520,0xB27ADDDE}, {0xD6E6AB6D,0x6B443CAE}, {0xF61BE845,0x7BAE91C9}, + {0x7CAE5163,0x3EB868AC}, {0x22E332A4,0x11C7B653}, {0xB9A992D0,0xD23C1491}, {0x0311C7CA,0x8FB5982E}, + {0xE0C9D4D8,0x70AC6428}, {0x0F55FCC5,0x895BC296}, {0xEC8DEFD7,0x76423E90}, {0xDE9E7267,0x6FF0507E}, + {0x7A8CC2EA,0x3DCF45F0}, {0x941F5CB1,0x4AA06054}, {0xB0DEFD9C,0x5810FB5B}, {0xBC9AC693,0x5EFEA1E3}, + {0xDC8003EB,0x6EDD4B4A}, {0xE8B10DD2,0x741808F8}, {0x28859A22,0x145EC1B7}, {0x50172944,0x28BC9F73}, + {0x4EBDCCD3,0x270A0642}, {0x331C2BF6,0x972AEDF4}, {0x0A66A886,0x059977E4}, {0x4A812ED6,0x2550302A}, + {0xA7037747,0xDD8A8DA0}, {0x970E9B7B,0xC515F87A}, {0x601AC578,0x3023EAA9}, {0x73FBADA6,0xB7E3AA3A}, + {0x1EAAE597,0x0FB69931}, {0x00000000,0x00000000}, {0x6204B4F4,0x310EF19D}, {0x44DB6455,0x229371A6}, + {0x1A960792,0x0DECAF59}, {0xB8A62496,0x5CA4978B}, {0x38753536,0x1C2B190A}, {0x82CD602C,0x41A295B5}, + {0x6426277D,0x3279DCC1}, {0x9F764271,0xC1A194AA}, {0x26DFD0A1,0x139D803B}, {0x41E83016,0xAE51C4D4}, + {0xAD65DFC1,0xD813FA44}, {0x45D4D213,0xAC0BF2BC}, {0x46C515D9,0x23BE6A92}, {0x923DCF38,0x49D74D08}, + {0x27D066E7,0x9D050321}, {0x5E4D63C7,0x2F7FDEFF}, {0x55247D07,0xA47E2A01}, {0x2FA8BFED,0x99B16FF1}, + {0x8C972AAF,0x4661D439}, {0xA33F9542,0xDFD0BBC8}, {0xA51D06CB,0xDCA79694}, {0x7DA1E725,0xB020EBB6}, + {0x696DAA34,0xBA0F0563}, {0xD5F76CA7,0xE4F1A480}, {0x9510EAF7,0xC438E34E}, {0x3B64F2FC,0x939E8124}, + {0x072D25CF,0x8DEFAE46}, {0x586FF04E,0x2C08F3A3}, {0xB3CF3A56,0xD7A56375}, {0x40E78650,0x20C947CE}, + {0x86F18229,0x43F8A3DD}, {0xAC6A6987,0x568B795E}, {0x1DBB225D,0x8003011F}, {0xF7145E03,0xF53612D3}, + {0x300DEC3C,0x189F75DA}, {0x3720C9F3,0x9570DB9C}, {0x6B73DBB8,0xBB221E57}, {0xE4F536DD,0x72F65240}, + {0x88ABC8AA,0x443BE251}, {0xD9B357A8,0xE21FFE38}, {0xE7E4F117,0xFD43CA6E}, {0x89A47EEC,0xCAA3614B}, + {0xE1C6629E,0xFE34E732}, {0x1B99B1D4,0x83742C43}, {0x83C2D66A,0xCF3A16AF}, {0x4990E91C,0xAAE5A804}, + {0x4CA3BD5F,0x26271D76}, {0x3F5810F9,0x91C4B74C}, {0xF841A2C6,0x7C6DD045}, {0xFE63314F,0x7F1AFD19}, + {0x8D989CE9,0xC8F95723}, {0x5306EE8E,0xA709075D}, {0xAA48FA0E,0x55FC5402}, {0x9023BEB4,0x48FA563C}, + {0xCA523F76,0x65DFBEAB}, {0xD8BCE1EE,0x6C877D22}, {0x85E045E3,0xCC4D3BF3}, {0x6115733E,0xBEBB69B3}, + {0x20FD4328,0x10EAAD67}, {0x71E5DC2A,0xB6CEB10E}, {0x6737E0B7,0xBDCC44EF}, {0xA412B08D,0x523F158E}, + {0x2DB6CE61,0x989C74C5}, {0x2B945DE8,0x9BEB5999}, {0x09776F4C,0x8A2CEFCA}, {0x5B7E3784,0xA3BD6B8D}, + {0xCB5D8930,0xEB473DB1}, {0x9B4AA074,0xC3FBA2C2}, {0x25CE176B,0x9C281815}, {0xD0C438E4,0x683311F2}, + {0xBE84B71F,0x5FD3BAD7}, {0xE5FA809B,0xFC6ED15A}, {0x6C5EFE77,0x36CDB011}, {0x520958C8,0x29918447}, + {0x59604608,0xA29070B9}, {0xA60CC101,0x53120EBA}, {0x74D68869,0x3A0C047C}, {0xD2DA4968,0x691E0AC6}, + {0xE6EB4751,0x73DB4974}, {0xF40599C9,0x7A838AFD}, {0xB4E21F99,0x5A4ACD33}, {0xC03497F0,0x6046C94F}, + {0xD1CB8EA2,0xE6AB92E8}, {0x663856F1,0x3354C7F5}, {0xAF7BAE4D,0xD93EE170}, {0xC22AE67C,0x616BD27B}, + {0x397A8370,0x92B39A10}, {0x4B8E9890,0xABC8B330}, {0x630B02B2,0xBF967287}, {0xB6FC6E15,0x5B67D607}, }; -__device__ static uint64_t T5[256] = { - 0x8AB0A96846E06A6D, 0x43C7E80B4BF0B33A, 0x08C9B3546B161EE5, 0x39F1C235EBA990BE, - 0xC1BEF2376606C7B2, 0x2C209233614569AA, 0xEB01523B6FC3289A, 0x946953AB935ACEDD, - 0x272838F63E13340E, 0x8B0455ECA12BA052, 0x77A1B2C4978FF8A2, 0xA55122CA13E54086, - 0x2276135862D3F1CD, 0xDB8DDFDE08B76CFE, 0x5D1E12C89E4A178A, 0x0E56816B03969867, - 0xEE5F79953303ED59, 0xAFED748BAB78D71D, 0x6D929F2DF93E53EE, 0xF5D8A8F8BA798C2A, - 0xF619B1698E39CF6B, 0x95DDAF2F749104E2, 0xEC2A9C80E0886427, 0xCE5C8FD8825B95EA, - 0xC4E0D9993AC60271, 0x4699C3A5173076F9, 0x3D1B151F50A29F42, 0x9ED505EA2BC75946, - 0x34665ACFDC7F4B98, 0x61B1FB53292342F7, 0xC721C0080E864130, 0x8693CD1696FD7B74, - 0x872731927136B14B, 0xD3446C8A63A1721B, 0x669A35E8A6680E4A, 0xCAB658F239509A16, - 0xA4E5DE4EF42E8AB9, 0x37A7435EE83F08D9, 0x134E6239E26C7F96, 0x82791A3C2DF67488, - 0x3F6EF00A8329163C, 0x8E5A7E42FDEB6591, 0x5CAAEE4C7981DDB5, 0x19F234785AF1E80D, - 0x255DDDE3ED98BD70, 0x50898A32A99CCCAC, 0x28CA4519DA4E6656, 0xAE59880F4CB31D22, - 0x0D9798FA37D6DB26, 0x32F968F0B4FFCD1A, 0xA00F09644F258545, 0xFA3AD5175E24DE72, - 0xF46C547C5DB24615, 0x713E80FBFF0F7E20, 0x7843CF2B73D2AAFA, 0xBD17EA36AEDF62B4, - 0xFD111BACD16F92CF, 0x4ABAA7DBC72D67E0, 0xB3416B5DAD49FAD3, 0xBCA316B24914A88B, - 0x15D150068AECF914, 0xE27C1DEBE31EFC40, 0x4FE48C759BEDA223, 0x7EDCFD141B522C78, - 0x4E5070F17C26681C, 0xE696CAC15815F3BC, 0x35D2A64B3BB481A7, 0x800CFF29FE7DFDF6, - 0x1ED9FAC3D5BAA4B0, 0x6C2663A91EF599D1, 0x03C1199134404341, 0xF7AD4DED69F20554, - 0xCD9D9649B61BD6AB, 0xC8C3BDE7EADB1368, 0xD131899FB02AFB65, 0x1D18E352E1FAE7F1, - 0xDA39235AEF7CA6C1, 0xA1BBF5E0A8EE4F7A, 0x91377805CF9A0B1E, 0x3138716180BF8E5B, - 0xD9F83ACBDB3CE580, 0x0275E515D38B897E, 0x472D3F21F0FBBCC6, 0x2D946EB7868EA395, - 0xBA3C248D21942E09, 0xE7223645BFDE3983, 0xFF64FEB902E41BB1, 0xC97741630D10D957, - 0xC3CB1722B58D4ECC, 0xA27AEC719CAE0C3B, 0x99FECB51A48C15FB, 0x1465AC826D27332B, - 0xE1BD047AD75EBF01, 0x79F733AF941960C5, 0x672EC96C41A3C475, 0xC27FEBA6524684F3, - 0x64EFD0FD75E38734, 0xED9E60040743AE18, 0xFB8E2993B9EF144D, 0x38453EB10C625A81, - 0x6978480742355C12, 0x48CF42CE14A6EE9E, 0x1CAC1FD606312DCE, 0x7B82D6BA4792E9BB, - 0x9D141C7B1F871A07, 0x5616B80DC11C4A2E, 0xB849C198F21FA777, 0x7CA91801C8D9A506, - 0xB1348E487EC273AD, 0x41B20D1E987B3A44, 0x7460AB55A3CFBBE3, 0x84E628034576F20A, - 0x1B87D16D897A6173, 0x0FE27DEFE45D5258, 0x83CDE6B8CA3DBEB7, 0x0C23647ED01D1119, - 0x7A362A3EA0592384, 0xB61F40F3F1893F10, 0x75D457D1440471DC, 0x4558DA34237035B8, - 0xDCA6116587FC2043, 0x8D9B67D3C9AB26D0, 0x2B0B5C88EE0E2517, 0x6FE77A382AB5DA90, - 0x269CC472D9D8FE31, 0x63C41E46FAA8CB89, 0xB7ABBC771642F52F, 0x7D1DE4852F126F39, - 0xA8C6BA3024339BA0, 0x600507D7CEE888C8, 0x8FEE82C61A20AFAE, 0x57A2448926D78011, - 0xFCA5E72836A458F0, 0x072BCEBB8F4B4CBD, 0x497BBE4AF36D24A1, 0x3CAFE99BB769557D, - 0x12FA9EBD05A7B5A9, 0xE8C04BAA5B836BDB, 0x4273148FAC3B7905, 0x908384812851C121, - 0xE557D3506C55B0FD, 0x72FF996ACB4F3D61, 0x3EDA0C8E64E2DC03, 0xF0868356E6B949E9, - 0x04EAD72ABB0B0FFC, 0x17A4B5135967706A, 0xE3C8E16F04D5367F, 0xF84F30028DAF570C, - 0x1846C8FCBD3A2232, 0x5B8120F7F6CA9108, 0xD46FA231ECEA3EA6, 0x334D947453340725, - 0x58403966C28AD249, 0xBED6F3A79A9F21F5, 0x68CCB483A5FE962D, 0xD085751B57E1315A, - 0xFED0023DE52FD18E, 0x4B0E5B5F20E6ADDF, 0x1A332DE96EB1AB4C, 0xA3CE10F57B65C604, - 0x108F7BA8D62C3CD7, 0xAB07A3A11073D8E1, 0x6B0DAD1291BED56C, 0xF2F366433532C097, - 0x2E557726B2CEE0D4, 0x0000000000000000, 0xCB02A476DE9B5029, 0xE4E32FD48B9E7AC2, - 0x734B65EE2C84F75E, 0x6E5386BCCD7E10AF, 0x01B4FC84E7CBCA3F, 0xCFE8735C65905FD5, - 0x3613BFDA0FF4C2E6, 0x113B872C31E7F6E8, 0x2FE18BA255052AEB, 0xE974B72EBC48A1E4, - 0x0ABC5641B89D979B, 0xB46AA5E62202B66E, 0x44EC26B0C4BBFF87, 0xA6903B5B27A503C7, - 0x7F680190FC99E647, 0x97A84A3AA71A8D9C, 0xDD12EDE16037EA7C, 0xC554251DDD0DC84E, - 0x88C54C7D956BE313, 0x4D91696048662B5D, 0xB08072CC9909B992, 0xB5DE5962C5C97C51, - 0x81B803AD19B637C9, 0xB2F597D94A8230EC, 0x0B08AAC55F565DA4, 0xF1327FD2017283D6, - 0xAD98919E78F35E63, 0x6AB9519676751F53, 0x24E921670A53774F, 0xB9FD3D1C15D46D48, - 0x92F66194FBDA485F, 0x5A35DC7311015B37, 0xDED3F4705477A93D, 0xC00A0EB381CD0D8D, - 0xBB88D809C65FE436, 0x16104997BEACBA55, 0x21B70AC95693B28C, 0x59F4C5E225411876, - 0xD5DB5EB50B21F499, 0x55D7A19CF55C096F, 0xA97246B4C3F8519F, 0x8552D487A2BD3835, - 0x54635D181297C350, 0x23C2EFDC85183BF2, 0x9F61F96ECC0C9379, 0x534893A39DDC8FED, - 0x5EDF0B59AA0A54CB, 0xAC2C6D1A9F38945C, 0xD7AEBBA0D8AA7DE7, 0x2ABFA00C09C5EF28, - 0xD84CC64F3CF72FBF, 0x2003F64DB15878B3, 0xA724C7DFC06EC9F8, 0x069F323F68808682, - 0xCC296ACD51D01C94, 0x055E2BAE5CC0C5C3, 0x6270E2C21D6301B6, 0x3B842720382219C0, - 0xD2F0900E846AB824, 0x52FC6F277A1745D2, 0xC6953C8CE94D8B0F, 0xE009F8FE3095753E, - 0x655B2C7992284D0B, 0x984A37D54347DFC4, 0xEAB5AEBF8808E2A5, 0x9A3FD2C090CC56BA, - 0x9CA0E0FFF84CD038, 0x4C2595E4AFADE162, 0xDF6708F4B3BC6302, 0xBF620F237D54EBCA, - 0x93429D101C118260, 0x097D4FD08CDDD4DA, 0x8C2F9B572E60ECEF, 0x708A7C7F18C4B41F, - 0x3A30DBA4DFE9D3FF, 0x4006F19A7FB0F07B, 0x5F6BF7DD4DC19EF4, 0x1F6D064732716E8F, - 0xF9FBCC866A649D33, 0x308C8DE567744464, 0x8971B0F972A0292C, 0xD61A47243F61B7D8, - 0xEFEB8511D4C82766, 0x961CB6BE40D147A3, 0xAAB35F25F7B812DE, 0x76154E407044329D, - 0x513D76B64E570693, 0xF3479AC7D2F90AA8, 0x9B8B2E4477079C85, 0x297EB99D3D85AC69 + +__constant__ uint2 T72[256] = { + {0xCE553FE6,0xD031C397}, {0xB006B525,0x16BA5B01}, {0x296E70C8,0xA89BADE6}, {0x77D3435B,0x6A1F525D}, + {0x573DFA0B,0x6E103570}, {0x17FC95AB,0x660EFB2A}, {0x97634BF6,0x76327A9E}, {0x62458BF5,0x4BAD9D64}, + {0xDBC3F748,0xF1830CAE}, {0x669131FF,0xC5C8F542}, {0xDC48B0CB,0x95044A1C}, {0x3CF8B866,0x892962DF}, + {0xE930C135,0xB0B9E208}, {0x611A767C,0xA14FB3F0}, {0x1C160136,0x8D2605F2}, {0xFECC549E,0xD6B71922}, + {0xA5907D8B,0x37089438}, {0x5803D49C,0x0B5DA38E}, {0xEA6F3CBC,0x5A5BCC9C}, {0x3B73FFE5,0xEDAE246D}, + {0xDE22EDCE,0xD2B87E0F}, {0xCA8185EC,0x5E54ABB1}, {0xE80561B9,0x1DE7F88F}, {0x0135A08C,0xAD5E1A87}, + {0x65CECC76,0x2F2ADBD6}, {0x82F58358,0x5780B5A7}, {0xEDE47B3F,0x3EDC8A2E}, {0x06BEE70F,0xC9D95C35}, + {0x6C4E05EE,0x83BE111D}, {0x59367410,0xA603B909}, {0x809FDE5D,0x103C81B4}, {0x7D0C774A,0x2C69B602}, + {0xD5C87953,0x399080D7}, {0x487406B4,0x09D41E16}, {0x26505E5F,0xCDD63B18}, {0x9B0298E8,0xF99DC2F4}, + {0x943CB67F,0x9CD0540A}, {0x891F17C5,0xBCA84B7F}, {0xB78DF2A6,0x723D1DB3}, {0xE73B4F2E,0x78AA6E71}, + {0xA071670D,0x1433E699}, {0x54620782,0x84F21BE4}, {0xB4D20F2F,0x98DF3327}, {0xD3769E5C,0xF049DCE2}, + {0x9656EB7A,0xDB6C6019}, {0x078B4783,0x648746B2}, {0x8DCBADCF,0x32CD2359}, {0xF0C7DA85,0x1EA4955B}, + {0x1B9D46B5,0xE9A14340}, {0xBBEC21B8,0xFD92A5D9}, {0x0E0B8E1B,0xC8138C79}, {0x6D7BA562,0x2EE00B9A}, + {0x93B7F1FC,0xF85712B8}, {0x0BEA949D,0xEB28FED8}, {0x8A40EA4C,0x564A65EB}, {0x474A2823,0x6C9988E8}, + {0x121D8F2D,0x4535898B}, {0x31ACCBF4,0xABD8C032}, {0xB9867CBD,0xBA2E91CA}, {0xEF8E263A,0x7960BE3D}, + {0x602FD6F0,0x0C11A977}, {0x16C93527,0xCB50E1AD}, {0x035FFD89,0xEAE22E94}, {0x5DE2CE1A,0x2866D12F}, + {0xAB9BF390,0xFF1B1841}, {0x8CFE0D43,0x9F9339DE}, {0xC48A0BF7,0x964727C8}, {0xAAAE531C,0x524502C6}, + {0xAC10B413,0x9B9C5EF3}, {0x42AB32A5,0x4FA2FA49}, {0xE551122B,0x3F165A62}, {0x76E6E3D7,0xC74148DA}, + {0xE464B2A7,0x924840E5}, {0xD69784DA,0xD372AE43}, {0x05E11A86,0x233B72A1}, {0x4941A638,0xA48A0491}, + {0xC9DE7865,0xB4B68525}, {0xA6CF8002,0xDDEABAAC}, {0x50B6BD88,0x0A9773C2}, {0x5EBD3393,0xC284FFBB}, + {0x2C8F6A4E,0x8BA0DF47}, {0x4D951C32,0x2AEF6CB7}, {0x2A318D41,0x42798372}, {0xBF389BB2,0x73F7CDFF}, + {0x382C026C,0x074C0AF9}, {0x243A035A,0x8A6A0F0B}, {0x5F88931F,0x6FDAE53C}, {0x7E538AC3,0xC68B9896}, + {0x1AA8E639,0x44FF59C7}, {0x439E9229,0xE2FCE0CE}, {0x79D8CD40,0xA20CDE24}, {0xC8EBD8E9,0x19E89FA2}, + {0xF398270C,0xF446BBCF}, {0x2284E455,0x43B3533E}, {0x8E945046,0xD82F0DCD}, {0xB26CE820,0x51066F12}, + {0x6BC5426D,0xE73957AF}, {0x40C16FA0,0x081ECE5A}, {0xC5BFAB7B,0x3B193D4F}, {0xDF174D42,0x7FE66488}, + {0x705804D8,0x0E9814EF}, {0x7C39D7C6,0x8137AC85}, {0xE185A821,0xB1733244}, {0x6F11F867,0x695C3F89}, + {0xE3EFF524,0xF6CF0657}, {0xD02963D5,0x1AABF276}, {0x75B91E5E,0x2DA3664E}, {0x1077D228,0x0289BD98}, + {0xF413608F,0x90C1FD7D}, {0xFD93A917,0x3C5537B6}, {0x3919A2E0,0xAA12107E}, {0x30996B78,0x0686DAB5}, + {0x9EE3826E,0xDAA6B055}, {0x56085A87,0xC34E2FF7}, {0x4FFF4137,0x6D5358A4}, {0xB35948AC,0xFC587595}, + {0xC7D5F67E,0x7CA5095C}, {0x8B754AC0,0xFB147F6C}, {0x91DDACF9,0xBFEB26AB}, {0x67A49173,0x6896EFC5}, + {0x1E7C5C33,0xCA9A31E1}, {0xB13315A9,0xBBE44186}, {0x689ABFE4,0x0DDB793B}, {0xA7FA208E,0x70B4A02B}, + {0x7307F951,0xE47A3A7B}, {0x14A36822,0x8CECD5BE}, {0x23B144D9,0xEEED49B9}, {0xB8B3DC31,0x17708B4D}, + {0x2765FED3,0x6088219F}, {0xF1F27A09,0xB3FA8FDC}, {0xFCA6099B,0x910B2D31}, {0x78ED6DCC,0x0F52C4A3}, + {0xBAD98134,0x50CCBF5E}, {0x7F662A4F,0x6BD58211}, {0xD4FDD9DF,0x94CE9A50}, {0x45207526,0x2B25BCFB}, + {0x1F49FCBF,0x67C42B66}, {0x723259DD,0x492420FC}, {0x18C2BB3C,0x03436DD4}, {0xF872B391,0x1F6E4517}, + {0x69AF1F68,0xA08563BC}, {0xEEBB86B6,0xD43EA4BA}, {0x08B56914,0x01CAD04C}, {0x0980C998,0xAC94CACB}, + {0x9A373864,0x54C3D873}, {0x2DBACAC2,0x26FEC5C0}, {0xBE0D3B3E,0xDEA9D778}, {0x20EEB950,0x040F672D}, + {0x7BB29045,0xE5B0EA37}, {0xCBB42560,0xF30AB136}, {0x37122CFB,0x62019C07}, {0x13282FA1,0xE86B930C}, + {0x2EE5374B,0xCC1CEB54}, {0xA21B3A08,0x538FD28A}, {0xD89C0AC1,0x1B61223A}, {0xAD25149F,0x36C24474}, + {0xF74C9D06,0x7A23D3E9}, {0x9968C5ED,0xBE21F6E7}, {0x36278C77,0xCF5F8680}, {0xEB5A9C30,0xF705D61B}, + {0x52DCE08D,0x4D2B47D1}, {0xC234ECF8,0x5F9E7BFD}, {0x3DCD18EA,0x24777858}, {0x4415D5AA,0x867BA67C}, + {0x5A698999,0x4CE1979D}, {0x00000000,0x00000000}, {0x33C696F1,0xEC64F421}, {0xC16B1171,0xB57C5569}, + {0x467F88AF,0xC1C7926F}, {0x0F3E2E97,0x654D96FE}, {0xA8C40E19,0x15F936D5}, {0xA9F1AE95,0xB8A72C52}, + {0x21DB19DC,0xA9517DAA}, {0xFA18EE94,0x58D27104}, {0xF2AD8780,0x5918A148}, {0xDAF657C4,0x5CDD1629}, + {0x64FB6CFA,0x8274C151}, {0xC6E056F2,0xD1FB13DB}, {0xCF609F6A,0x7D6FD910}, {0xD9A9AA4D,0xB63F38BD}, + {0xF526C003,0x3D9FE7FA}, {0x871499DE,0x74BBC706}, {0xB6B8522A,0xDF630734}, {0xCD0AC26F,0x3AD3ED03}, + {0x83C023D4,0xFADEAF20}, {0x4ECAE1BB,0xC00D4223}, {0x5CD76E96,0x8538CBA8}, {0x6E2458EB,0xC402250E}, + {0x026A5D05,0x47BC3413}, {0x114272A4,0xAFD7A71F}, {0xCC3F62E3,0x978DF784}, {0xA144C781,0xB96DFC1E}, + {0x1596C8AE,0x21B2CF39}, {0x950916F3,0x318E4E8D}, {0x3E92E563,0xCE9556CC}, {0xDD7D1047,0x385A509B}, + {0xB5E7AFA3,0x358129A0}, {0x63702B79,0xE6F387E3}, {0x53E94001,0xE0755D56}, {0xFFF9F412,0x7BE903A5}, + {0x90E80C75,0x12B53C2C}, {0x857EC4DB,0x3307F315}, {0x0C61D31E,0x8FAFB86A}, {0x86213952,0xD9E5DD81}, + {0x9FD622E2,0x77F8AAD2}, {0x357871FE,0x25BDA814}, {0x8FA1F0CA,0x7571174A}, {0x985D6561,0x137FEC60}, + {0x9DBC7FE7,0x30449EC1}, {0x41F4CF2C,0xA540D4DD}, {0xAE7AE916,0xDC206AE0}, {0xE2DA55A8,0x5B911CD0}, + {0xF947131D,0xB2305F90}, {0xBD52C6B7,0x344BF9EC}, {0xD2433ED0,0x5D17C665}, {0xC05EB1FD,0x18224FEE}, + {0x844B6457,0x9E59E992}, {0xA4A5DD07,0x9A568EBF}, {0x716DA454,0xA3C60E68}, {0xD7A22456,0x7E2CB4C4}, + {0x4CA0BCBE,0x87B17630}, {0x32F3367D,0x413AEEA6}, {0xBC67663B,0x9915E36B}, {0x3A465F69,0x40F03EEA}, + {0xE0B008AD,0x1C2D28C3}, {0x4A1E5BB1,0x4E682A05}, {0x285BD044,0x05C5B761}, {0x5B5C2915,0xE1BF8D1A}, + {0xC3014C74,0xF2C0617A}, {0xD11CC359,0xB7F5E8F1}, {0x3FA745EF,0x63CB4C4B}, {0x9C89DF6B,0x9D1A8446}, + {0x4B2BFB3D,0xE3363082}, {0xE60EEFA2,0xD5F474F6}, {0xFB2D4E18,0xF58C6B83}, {0x0ADF3411,0x4676E45F}, + {0x1D23A1BA,0x20781F75}, {0x81AA7ED1,0xBD629B33}, {0x19F71BB0,0xAE1D7753}, {0xA32E9A84,0xFED1C80D}, + {0x92825170,0x5509083F}, {0x5557A70E,0x29AC0163}, {0x51831D04,0xA7C96945}, {0x04D4BA0A,0x8E656826}, + {0x882AB749,0x11F651F8}, {0xF6793D8A,0xD77DC96E}, {0x2B042DCD,0xEF2799F5}, {0x7A8730C9,0x48EEF0B0}, + {0x0D547392,0x22F1A2ED}, {0x2FD097C7,0x6142F1D3}, {0x6AF0E2E1,0x4A674D28}, {0x748CBED2,0x80FD7CC9}, + {0xAF4F499A,0x717E7067}, {0xECD1DBB3,0x938290A9}, {0x344DD172,0x88E3B293}, {0x250FA3D6,0x2734158C}, }; -__device__ static uint64_t T6[256] = { - 0x7E37E62DFC7D40C3, 0x776F25A4EE939E5B, 0xE045C850DD8FB5AD, 0x86ED5BA711FF1952, - 0xE91D0BD9CF616B35, 0x37E0AB256E408FFB, 0x9607F6C031025A7A, 0x0B02F5E116D23C9D, - 0xF3D8486BFB50650C, 0x621CFF27C40875F5, 0x7D40CB71FA5FD34A, 0x6DAA6616DAA29062, - 0x9F5F354923EC84E2, 0xEC847C3DC507C3B3, 0x025A3668043CE205, 0xA8BF9E6C4DAC0B19, - 0xFA808BE2E9BEBB94, 0xB5B99C5277C74FA3, 0x78D9BC95F0397BCC, 0xE332E50CDBAD2624, - 0xC74FCE129332797E, 0x1729ECEB2EA709AB, 0xC2D6B9F69954D1F8, 0x5D898CBFBAB8551A, - 0x859A76FB17DD8ADB, 0x1BE85886362F7FB5, 0xF6413F8FF136CD8A, 0xD3110FA5BBB7E35C, - 0x0A2FEED514CC4D11, 0xE83010EDCD7F1AB9, 0xA1E75DE55F42D581, 0xEEDE4A55C13B21B6, - 0xF2F5535FF94E1480, 0x0CC1B46D1888761E, 0xBCE15FDB6529913B, 0x2D25E8975A7181C2, - 0x71817F1CE2D7A554, 0x2E52C5CB5C53124B, 0xF9F7A6BEEF9C281D, 0x9E722E7D21F2F56E, - 0xCE170D9B81DCA7E6, 0x0E9B82051CB4941B, 0x1E712F623C49D733, 0x21E45CFA42F9F7DC, - 0xCB8E7A7F8BBA0F60, 0x8E98831A010FB646, 0x474CCF0D8E895B23, 0xA99285584FB27A95, - 0x8CC2B57205335443, 0x42D5B8E984EFF3A5, 0x012D1B34021E718C, 0x57A6626AAE74180B, - 0xFF19FC06E3D81312, 0x35BA9D4D6A7C6DFE, 0xC9D44C178F86ED65, 0x506523E6A02E5288, - 0x03772D5C06229389, 0x8B01F4FE0B691EC0, 0xF8DABD8AED825991, 0x4C4E3AEC985B67BE, - 0xB10DF0827FBF96A9, 0x6A69279AD4F8DAE1, 0xE78689DCD3D5FF2E, 0x812E1A2B1FA553D1, - 0xFBAD90D6EBA0CA18, 0x1AC543B234310E39, 0x1604F7DF2CB97827, 0xA6241C6951189F02, - 0x753513CCEAAF7C5E, 0x64F2A59FC84C4EFA, 0x247D2B1E489F5F5A, 0xDB64D718AB474C48, - 0x79F4A7A1F2270A40, 0x1573DA832A9BEBAE, 0x3497867968621C72, 0x514838D2A2302304, - 0xF0AF6537FD72F685, 0x1D06023E3A6B44BA, 0x678588C3CE6EDD73, 0x66A893F7CC70ACFF, - 0xD4D24E29B5EDA9DF, 0x3856321470EA6A6C, 0x07C3418C0E5A4A83, 0x2BCBB22F5635BACD, - 0x04B46CD00878D90A, 0x06EE5AB80C443B0F, 0x3B211F4876C8F9E5, 0x0958C38912EEDE98, - 0xD14B39CDBF8B0159, 0x397B292072F41BE0, 0x87C0409313E168DE, 0xAD26E98847CAA39F, - 0x4E140C849C6785BB, 0xD5FF551DB7F3D853, 0xA0CA46D15D5CA40D, 0xCD6020C787FE346F, - 0x84B76DCF15C3FB57, 0xDEFDA0FCA121E4CE, 0x4B8D7B6096012D3D, 0x9AC642AD298A2C64, - 0x0875D8BD10F0AF14, 0xB357C6EA7B8374AC, 0x4D6321D89A451632, 0xEDA96709C719B23F, - 0xF76C24BBF328BC06, 0xC662D526912C08F2, 0x3CE25EC47892B366, 0xB978283F6F4F39BD, - 0xC08C8F9E9D6833FD, 0x4F3917B09E79F437, 0x593DE06FB2C08C10, 0xD6887841B1D14BDA, - 0x19B26EEE32139DB0, 0xB494876675D93E2F, 0x825937771987C058, 0x90E9AC783D466175, - 0xF1827E03FF6C8709, 0x945DC0A8353EB87F, 0x4516F9658AB5B926, 0x3F9573987EB020EF, - 0xB855330B6D514831, 0x2AE6A91B542BCB41, 0x6331E413C6160479, 0x408F8E8180D311A0, - 0xEFF35161C325503A, 0xD06622F9BD9570D5, 0x8876D9A20D4B8D49, 0xA5533135573A0C8B, - 0xE168D364DF91C421, 0xF41B09E7F50A2F8F, 0x12B09B0F24C1A12D, 0xDA49CC2CA9593DC4, - 0x1F5C34563E57A6BF, 0x54D14F36A8568B82, 0xAF7CDFE043F6419A, 0xEA6A2685C943F8BC, - 0xE5DCBFB4D7E91D2B, 0xB27ADDDE799D0520, 0x6B443CAED6E6AB6D, 0x7BAE91C9F61BE845, - 0x3EB868AC7CAE5163, 0x11C7B65322E332A4, 0xD23C1491B9A992D0, 0x8FB5982E0311C7CA, - 0x70AC6428E0C9D4D8, 0x895BC2960F55FCC5, 0x76423E90EC8DEFD7, 0x6FF0507EDE9E7267, - 0x3DCF45F07A8CC2EA, 0x4AA06054941F5CB1, 0x5810FB5BB0DEFD9C, 0x5EFEA1E3BC9AC693, - 0x6EDD4B4ADC8003EB, 0x741808F8E8B10DD2, 0x145EC1B728859A22, 0x28BC9F7350172944, - 0x270A06424EBDCCD3, 0x972AEDF4331C2BF6, 0x059977E40A66A886, 0x2550302A4A812ED6, - 0xDD8A8DA0A7037747, 0xC515F87A970E9B7B, 0x3023EAA9601AC578, 0xB7E3AA3A73FBADA6, - 0x0FB699311EAAE597, 0x0000000000000000, 0x310EF19D6204B4F4, 0x229371A644DB6455, - 0x0DECAF591A960792, 0x5CA4978BB8A62496, 0x1C2B190A38753536, 0x41A295B582CD602C, - 0x3279DCC16426277D, 0xC1A194AA9F764271, 0x139D803B26DFD0A1, 0xAE51C4D441E83016, - 0xD813FA44AD65DFC1, 0xAC0BF2BC45D4D213, 0x23BE6A9246C515D9, 0x49D74D08923DCF38, - 0x9D05032127D066E7, 0x2F7FDEFF5E4D63C7, 0xA47E2A0155247D07, 0x99B16FF12FA8BFED, - 0x4661D4398C972AAF, 0xDFD0BBC8A33F9542, 0xDCA79694A51D06CB, 0xB020EBB67DA1E725, - 0xBA0F0563696DAA34, 0xE4F1A480D5F76CA7, 0xC438E34E9510EAF7, 0x939E81243B64F2FC, - 0x8DEFAE46072D25CF, 0x2C08F3A3586FF04E, 0xD7A56375B3CF3A56, 0x20C947CE40E78650, - 0x43F8A3DD86F18229, 0x568B795EAC6A6987, 0x8003011F1DBB225D, 0xF53612D3F7145E03, - 0x189F75DA300DEC3C, 0x9570DB9C3720C9F3, 0xBB221E576B73DBB8, 0x72F65240E4F536DD, - 0x443BE25188ABC8AA, 0xE21FFE38D9B357A8, 0xFD43CA6EE7E4F117, 0xCAA3614B89A47EEC, - 0xFE34E732E1C6629E, 0x83742C431B99B1D4, 0xCF3A16AF83C2D66A, 0xAAE5A8044990E91C, - 0x26271D764CA3BD5F, 0x91C4B74C3F5810F9, 0x7C6DD045F841A2C6, 0x7F1AFD19FE63314F, - 0xC8F957238D989CE9, 0xA709075D5306EE8E, 0x55FC5402AA48FA0E, 0x48FA563C9023BEB4, - 0x65DFBEABCA523F76, 0x6C877D22D8BCE1EE, 0xCC4D3BF385E045E3, 0xBEBB69B36115733E, - 0x10EAAD6720FD4328, 0xB6CEB10E71E5DC2A, 0xBDCC44EF6737E0B7, 0x523F158EA412B08D, - 0x989C74C52DB6CE61, 0x9BEB59992B945DE8, 0x8A2CEFCA09776F4C, 0xA3BD6B8D5B7E3784, - 0xEB473DB1CB5D8930, 0xC3FBA2C29B4AA074, 0x9C28181525CE176B, 0x683311F2D0C438E4, - 0x5FD3BAD7BE84B71F, 0xFC6ED15AE5FA809B, 0x36CDB0116C5EFE77, 0x29918447520958C8, - 0xA29070B959604608, 0x53120EBAA60CC101, 0x3A0C047C74D68869, 0x691E0AC6D2DA4968, - 0x73DB4974E6EB4751, 0x7A838AFDF40599C9, 0x5A4ACD33B4E21F99, 0x6046C94FC03497F0, - 0xE6AB92E8D1CB8EA2, 0x3354C7F5663856F1, 0xD93EE170AF7BAE4D, 0x616BD27BC22AE67C, - 0x92B39A10397A8370, 0xABC8B3304B8E9890, 0xBF967287630B02B2, 0x5B67D607B6FC6E15 +// KeySchedule +__constant__ static uint64_t _ALIGN(32) CC[12][8] = { + {0xe9daca1eda5b08b1, 0x1f7c65c0812fcbeb, 0x16d0452e43766a2f, 0xfcc485758db84e71, 0x0169679291e07c4b, 0x15d360a4082a42a2, 0x234d74cc36747605, 0x0745a6f2596580dd}, + {0x1a2f9da98ab5a36f, 0xd7b5700f469de34f, 0x982b230a72eafef3, 0x3101b5160f5ed561, 0x5899d6126b17b59a, 0xcaa70adbc261b55c, 0x56cdcbd71ba2dd55, 0xb79bb121700479e6}, + {0xc72fce2bacdc74f5, 0x35843d6a28fc390a, 0x8b1f9c525f5ef106, 0x7b7b29b11475eaf2, 0xb19e3590e40fe2d3, 0x09db6260373ac9c1, 0x31db7a8643f4b6c2, 0xb20aba0af5961e99}, + {0xd26615e8b3df1fef, 0xdde4715da0e148f9, 0x7d3c5c337e858e48, 0x3f355e68ad1c729d, 0x75d603ed822cd7a9, 0xbe0352933313b7d8, 0xf137e893a1ea5334, 0x2ed1e384bcbe0c22}, + {0x994747adac6bea4b, 0x6323a96c0c413f9a, 0x4a1086161f1c157f, 0xbdff0f80d7359e35, 0xa3f53a254717cdbf, 0x161a2723b700ffdf, 0xf563eaa97ea2567a, 0x57fe6c7cfd581760}, + {0xd9d33a1daeae4fae, 0xc039307a3bc3a46f, 0x6ca44251f9c4662d, 0xc68ef09ab49a7f18, 0xb4b79a1cb7a6facf, 0xb6c6bec2661ff20a, 0x354f903672c571bf, 0x6e7d64467a4068fa}, + {0xecc5aaee160ec7f4, 0x540924bffe86ac51, 0xc987bfe6c7c69e39, 0xc9937a19333e47d3, 0x372c822dc5ab9209, 0x04054a2883694706, 0xf34a3ca24c451735, 0x93d4143a4d568688}, + {0xa7c9934d425b1f9b, 0x41416e0c02aae703, 0x1ede369c71f8b74e, 0x9ac4db4d3b44b489, 0x90069b92cb2b89f4, 0x2fc4a5d12b8dd169, 0xd9a8515935c2ac36, 0x1ee702bfd40d7fa4}, + {0x9b223116545a8f37, 0xde5f16ecd89a4c94, 0x244289251b3a7d3a, 0x84090de0b755d93c, 0xb1ceb2db0b440a80, 0x549c07a69a8a2b7b, 0x602a1fcb92dc380e, 0xdb5a238351446172}, + {0x526f0580a6debeab, 0xf3f3e4b248e52a38, 0xdb788aff1ce74189, 0x0361331b8ae1ff1f, 0x4b3369af0267e79f, 0xf452763b306c1e7a, 0xc3b63b15d1fa9836, 0xed9c4598fbc7b474}, + {0xfb89c8efd09ecd7b, 0x94fe5a63cdc60230, 0x6107abebbb6bfad8, 0x7966841421800120, 0xcab948eaef711d8a, 0x986e477d1dcdbaef, 0x5dd86fc04a59a2de, 0x1b2df381cda4ca6b}, + {0xba3116f167e78e37, 0x7ab14904b08013d2, 0x771ddfbc323ca4cd, 0x9b9f2130d41220f8, 0x86cc91189def805d, 0x5228e188aaa41de7, 0x991bb2d9d517f4fa, 0x20d71bf14a92bc48} }; -__device__ static uint64_t T7[256] = { - 0xD031C397CE553FE6, 0x16BA5B01B006B525, 0xA89BADE6296E70C8, 0x6A1F525D77D3435B, - 0x6E103570573DFA0B, 0x660EFB2A17FC95AB, 0x76327A9E97634BF6, 0x4BAD9D6462458BF5, - 0xF1830CAEDBC3F748, 0xC5C8F542669131FF, 0x95044A1CDC48B0CB, 0x892962DF3CF8B866, - 0xB0B9E208E930C135, 0xA14FB3F0611A767C, 0x8D2605F21C160136, 0xD6B71922FECC549E, - 0x37089438A5907D8B, 0x0B5DA38E5803D49C, 0x5A5BCC9CEA6F3CBC, 0xEDAE246D3B73FFE5, - 0xD2B87E0FDE22EDCE, 0x5E54ABB1CA8185EC, 0x1DE7F88FE80561B9, 0xAD5E1A870135A08C, - 0x2F2ADBD665CECC76, 0x5780B5A782F58358, 0x3EDC8A2EEDE47B3F, 0xC9D95C3506BEE70F, - 0x83BE111D6C4E05EE, 0xA603B90959367410, 0x103C81B4809FDE5D, 0x2C69B6027D0C774A, - 0x399080D7D5C87953, 0x09D41E16487406B4, 0xCDD63B1826505E5F, 0xF99DC2F49B0298E8, - 0x9CD0540A943CB67F, 0xBCA84B7F891F17C5, 0x723D1DB3B78DF2A6, 0x78AA6E71E73B4F2E, - 0x1433E699A071670D, 0x84F21BE454620782, 0x98DF3327B4D20F2F, 0xF049DCE2D3769E5C, - 0xDB6C60199656EB7A, 0x648746B2078B4783, 0x32CD23598DCBADCF, 0x1EA4955BF0C7DA85, - 0xE9A143401B9D46B5, 0xFD92A5D9BBEC21B8, 0xC8138C790E0B8E1B, 0x2EE00B9A6D7BA562, - 0xF85712B893B7F1FC, 0xEB28FED80BEA949D, 0x564A65EB8A40EA4C, 0x6C9988E8474A2823, - 0x4535898B121D8F2D, 0xABD8C03231ACCBF4, 0xBA2E91CAB9867CBD, 0x7960BE3DEF8E263A, - 0x0C11A977602FD6F0, 0xCB50E1AD16C93527, 0xEAE22E94035FFD89, 0x2866D12F5DE2CE1A, - 0xFF1B1841AB9BF390, 0x9F9339DE8CFE0D43, 0x964727C8C48A0BF7, 0x524502C6AAAE531C, - 0x9B9C5EF3AC10B413, 0x4FA2FA4942AB32A5, 0x3F165A62E551122B, 0xC74148DA76E6E3D7, - 0x924840E5E464B2A7, 0xD372AE43D69784DA, 0x233B72A105E11A86, 0xA48A04914941A638, - 0xB4B68525C9DE7865, 0xDDEABAACA6CF8002, 0x0A9773C250B6BD88, 0xC284FFBB5EBD3393, - 0x8BA0DF472C8F6A4E, 0x2AEF6CB74D951C32, 0x427983722A318D41, 0x73F7CDFFBF389BB2, - 0x074C0AF9382C026C, 0x8A6A0F0B243A035A, 0x6FDAE53C5F88931F, 0xC68B98967E538AC3, - 0x44FF59C71AA8E639, 0xE2FCE0CE439E9229, 0xA20CDE2479D8CD40, 0x19E89FA2C8EBD8E9, - 0xF446BBCFF398270C, 0x43B3533E2284E455, 0xD82F0DCD8E945046, 0x51066F12B26CE820, - 0xE73957AF6BC5426D, 0x081ECE5A40C16FA0, 0x3B193D4FC5BFAB7B, 0x7FE66488DF174D42, - 0x0E9814EF705804D8, 0x8137AC857C39D7C6, 0xB1733244E185A821, 0x695C3F896F11F867, - 0xF6CF0657E3EFF524, 0x1AABF276D02963D5, 0x2DA3664E75B91E5E, 0x0289BD981077D228, - 0x90C1FD7DF413608F, 0x3C5537B6FD93A917, 0xAA12107E3919A2E0, 0x0686DAB530996B78, - 0xDAA6B0559EE3826E, 0xC34E2FF756085A87, 0x6D5358A44FFF4137, 0xFC587595B35948AC, - 0x7CA5095CC7D5F67E, 0xFB147F6C8B754AC0, 0xBFEB26AB91DDACF9, 0x6896EFC567A49173, - 0xCA9A31E11E7C5C33, 0xBBE44186B13315A9, 0x0DDB793B689ABFE4, 0x70B4A02BA7FA208E, - 0xE47A3A7B7307F951, 0x8CECD5BE14A36822, 0xEEED49B923B144D9, 0x17708B4DB8B3DC31, - 0x6088219F2765FED3, 0xB3FA8FDCF1F27A09, 0x910B2D31FCA6099B, 0x0F52C4A378ED6DCC, - 0x50CCBF5EBAD98134, 0x6BD582117F662A4F, 0x94CE9A50D4FDD9DF, 0x2B25BCFB45207526, - 0x67C42B661F49FCBF, 0x492420FC723259DD, 0x03436DD418C2BB3C, 0x1F6E4517F872B391, - 0xA08563BC69AF1F68, 0xD43EA4BAEEBB86B6, 0x01CAD04C08B56914, 0xAC94CACB0980C998, - 0x54C3D8739A373864, 0x26FEC5C02DBACAC2, 0xDEA9D778BE0D3B3E, 0x040F672D20EEB950, - 0xE5B0EA377BB29045, 0xF30AB136CBB42560, 0x62019C0737122CFB, 0xE86B930C13282FA1, - 0xCC1CEB542EE5374B, 0x538FD28AA21B3A08, 0x1B61223AD89C0AC1, 0x36C24474AD25149F, - 0x7A23D3E9F74C9D06, 0xBE21F6E79968C5ED, 0xCF5F868036278C77, 0xF705D61BEB5A9C30, - 0x4D2B47D152DCE08D, 0x5F9E7BFDC234ECF8, 0x247778583DCD18EA, 0x867BA67C4415D5AA, - 0x4CE1979D5A698999, 0x0000000000000000, 0xEC64F42133C696F1, 0xB57C5569C16B1171, - 0xC1C7926F467F88AF, 0x654D96FE0F3E2E97, 0x15F936D5A8C40E19, 0xB8A72C52A9F1AE95, - 0xA9517DAA21DB19DC, 0x58D27104FA18EE94, 0x5918A148F2AD8780, 0x5CDD1629DAF657C4, - 0x8274C15164FB6CFA, 0xD1FB13DBC6E056F2, 0x7D6FD910CF609F6A, 0xB63F38BDD9A9AA4D, - 0x3D9FE7FAF526C003, 0x74BBC706871499DE, 0xDF630734B6B8522A, 0x3AD3ED03CD0AC26F, - 0xFADEAF2083C023D4, 0xC00D42234ECAE1BB, 0x8538CBA85CD76E96, 0xC402250E6E2458EB, - 0x47BC3413026A5D05, 0xAFD7A71F114272A4, 0x978DF784CC3F62E3, 0xB96DFC1EA144C781, - 0x21B2CF391596C8AE, 0x318E4E8D950916F3, 0xCE9556CC3E92E563, 0x385A509BDD7D1047, - 0x358129A0B5E7AFA3, 0xE6F387E363702B79, 0xE0755D5653E94001, 0x7BE903A5FFF9F412, - 0x12B53C2C90E80C75, 0x3307F315857EC4DB, 0x8FAFB86A0C61D31E, 0xD9E5DD8186213952, - 0x77F8AAD29FD622E2, 0x25BDA814357871FE, 0x7571174A8FA1F0CA, 0x137FEC60985D6561, - 0x30449EC19DBC7FE7, 0xA540D4DD41F4CF2C, 0xDC206AE0AE7AE916, 0x5B911CD0E2DA55A8, - 0xB2305F90F947131D, 0x344BF9ECBD52C6B7, 0x5D17C665D2433ED0, 0x18224FEEC05EB1FD, - 0x9E59E992844B6457, 0x9A568EBFA4A5DD07, 0xA3C60E68716DA454, 0x7E2CB4C4D7A22456, - 0x87B176304CA0BCBE, 0x413AEEA632F3367D, 0x9915E36BBC67663B, 0x40F03EEA3A465F69, - 0x1C2D28C3E0B008AD, 0x4E682A054A1E5BB1, 0x05C5B761285BD044, 0xE1BF8D1A5B5C2915, - 0xF2C0617AC3014C74, 0xB7F5E8F1D11CC359, 0x63CB4C4B3FA745EF, 0x9D1A84469C89DF6B, - 0xE33630824B2BFB3D, 0xD5F474F6E60EEFA2, 0xF58C6B83FB2D4E18, 0x4676E45F0ADF3411, - 0x20781F751D23A1BA, 0xBD629B3381AA7ED1, 0xAE1D775319F71BB0, 0xFED1C80DA32E9A84, - 0x5509083F92825170, 0x29AC01635557A70E, 0xA7C9694551831D04, 0x8E65682604D4BA0A, - 0x11F651F8882AB749, 0xD77DC96EF6793D8A, 0xEF2799F52B042DCD, 0x48EEF0B07A8730C9, - 0x22F1A2ED0D547392, 0x6142F1D32FD097C7, 0x4A674D286AF0E2E1, 0x80FD7CC9748CBED2, - 0x717E7067AF4F499A, 0x938290A9ECD1DBB3, 0x88E3B293344DD172, 0x2734158C250FA3D6 + +__constant__ static uint64_t _ALIGN(32) precomputed_values[12][8] = { + 0x8FD72F640708B0D0, 0x0DE874C7EBC3F213, 0xE92EEF3AD202E9E0, 0xC1E9DA0708013DA7, 0x9727DAB2F014BE88, 0x103051A02BCD6935, 0x33EC7E1DBD28F736, 0x1ECF460CF78AD1F4, + 0x0B2D9F89C775449D, 0x6B6EEFC6DAB7E8B0, 0xF1A0D31667F6EC44, 0x2A71132D5E108166, 0x0E9357C2EC87931A, 0xC99F5C1B4A01612D, 0x7E60B16E637D4EE2, 0xA9FCB827F9BA6D81, + 0x231FECA5AB3D285C, 0x70C6E1483C838C3B, 0x9C21C3C40CE4E2DA, 0x2FA796BD5688E573, 0x04C0E3FF55809FDF, 0x5FF978BFB8E3CDC8, 0xC54A19D6A3D07033, 0x0FCA83FDDE872478, + 0xBDF9312726339F10, 0x51A5BA1793BC9C56, 0xC4428DA14F96D2D4, 0xEC925222374EAB1F, 0x79477893747DD92F, 0xC495E19A46886304, 0x9C23F893BA7CFA36, 0x0C47268881FC5FEB, + 0xCF117966029B2CB3, 0x07179ABE77088A8F, 0x671EF4CC2650E257, 0x7474B8B170DAB5C6, 0x4224FEBECF35113E, 0x993D156C675C5537, 0x2DEE3A5782C39B45, 0xE7C586F2990DD385, + 0x8608FD95B1C1138A, 0x8BB0847D9E9849AC, 0x5E76623F4F0EB0C7, 0x34C2BDBAFC5060CE, 0xE9E814475907826C, 0x22C9ED94D6AAC7C9, 0xE6B75E28171EB0D6, 0xF1329E5534E60215, + 0x86BB4814B1C3CE52, 0xE8F226C9FBDDD017, 0xCEDED67991CB3087, 0x76C33E32FDBFACA5, 0xDBB13BE1A9F7474C, 0x3D0273470342C356, 0x8E7246C51CF07F61, 0xAC8C125DDEF8DF71, + 0x6D73E747795B8CF3, 0x4E4AA65EA0072050, 0xA14A1582CB43C2B9, 0x748EF2B7BB63B938, 0x126789534410D7D4, 0xD4D48FF40301D791, 0xC67DFBE315C41FC0, 0x35E7A1A1AF88601C, + 0x9BD33EA0FAB34007, 0xF51B7CDBE3D67D25, 0xD3ABDA0CE4186E6B, 0x8E61DDADCBCE1706, 0x58994565B41BE6A5, 0x7A87ABC1240CD31D, 0xFAFE6C28487968D0, 0x15B368609FF9EEA7, + 0xAE33263CCF115818, 0x93B2DBE9CADFCFC8, 0x0A91952BF91B0147, 0x458E67CA5F1ED73A, 0x94C2E5F288F074E3, 0x377895E85C69E996, 0xF11A4456AAB37B10, 0x163131934816821A, + 0xD07E4A2366BF469D, 0x5EF1A3D220213B6C, 0x3C5BB78971D8ED0F, 0x0DE05E6B9006F2D2, 0xC58CFB00B8EAA1C9, 0xEFCDB54D1F250B76, 0xFD135634FA527042, 0x4CEE791290516407, + 0xD800B9264010790F, 0x974C4823E2B668D7, 0xA605A4B385C5E361, 0x3F6C92DA5A56D8D2, 0x82B9D67C12EF8277, 0x0AB6B4582561BF90, 0x46954FD98FC2CBA3, 0x70BE45CB21B6760D }; + +#define T0(x) shared[0][x] +#define T1(x) shared[1][x] +#define T2(x) shared[2][x] +#define T3(x) shared[3][x] +#define T4(x) shared[4][x] +#define T5(x) shared[5][x] +#define T6(x) shared[6][x] +#define T7(x) shared[7][x] + __device__ __forceinline__ -void ADD_ASM_512_32(uint32_t* x, uint32_t* const a, uint32_t* const b) -{ - asm("add.cc.u32 %0, %1, %2;\n\t" : "=r"(x[0]) : "r"(a[0]),"r"(b[0])); - #pragma unroll 14 - for(int i=1; i < 15; i++) { - asm("addc.cc.u32 %0, %1, %2;\n\t" : "=r"(x[i]) : "r"(a[i]),"r"(b[i])); +void GOST_FS(const uint2* state,uint2* return_state, const uint2 shared[8][256]){ + + #pragma unroll 4 + for(uint32_t i=0;i<4;i++){ + return_state[i] = T0(__byte_perm(state[7].x,0,0x44440 + (i&3))) ^ T1(__byte_perm(state[6].x,0,0x44440 + (i&3))) + ^ T2(__byte_perm(state[5].x,0,0x44440 + (i&3))) ^ T3(__byte_perm(state[4].x,0,0x44440 + (i&3))) + ^ T4(__byte_perm(state[3].x,0,0x44440 + (i&3))) ^ T5(__byte_perm(state[2].x,0,0x44440 + (i&3))) + ^ T6(__byte_perm(state[1].x,0,0x44440 + (i&3))) ^ T7(__byte_perm(state[0].x,0,0x44440 + (i&3))); } - asm("addc.u32 %0, %1, %2;\n\t" : "=r"(x[15]) : "r"(a[15]),"r"(b[15])); -} -__device__ __forceinline__ -void GOST_Add512(void *x, void * const a, void * const b) -{ - ADD_ASM_512_32((uint32_t*)x, (uint32_t*)a, (uint32_t*)b); + #pragma unroll 4 + for(uint32_t i=0;i<4;i++){ + return_state[i+4] = T0(__byte_perm(state[7].y,0,0x44440 + (i&3))) ^ T1(__byte_perm(state[6].y,0,0x44440 + (i&3))) + ^ T2(__byte_perm(state[5].y,0,0x44440 + (i&3))) ^ T3(__byte_perm(state[4].y,0,0x44440 + (i&3))) + ^ T4(__byte_perm(state[3].y,0,0x44440 + (i&3))) ^ T5(__byte_perm(state[2].y,0,0x44440 + (i&3))) + ^ T6(__byte_perm(state[1].y,0,0x44440 + (i&3))) ^ T7(__byte_perm(state[0].y,0,0x44440 + (i&3))); + } } __device__ __forceinline__ -void GOST_Copy512(uint64_t* dst, uint64_t* const __restrict__ src) -{ - #pragma unroll - for (int i=0; i<8; i++) - dst[i] = src[i]; +static void GOST_E12(uint2* K, uint2* state,const uint2 shared[8][256]){ + + uint2 t[ 8]; + + #pragma unroll 2 + for(uint32_t i=0; i<12; i++){ + GOST_FS(state, t, shared); + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + state[ j] = t[ j]; + K[ j] ^= *(uint2*)&CC[i][j]; + } + + GOST_FS(K, t, shared); + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + K[ j] = t[ j]; + state[ j] = state[ j] ^ t[ j]; + } + } } -__device__ __forceinline__ -void GOST_Xor512(uint64_t* C, uint64_t* const A, const uint64_t* B) +#define TPB 256 +__global__ +__launch_bounds__(TPB, 4) +void streebog_gpu_hash_64(uint64_t *g_hash) { -#if 1 - C[0] = A[0] ^ B[0]; - C[1] = A[1] ^ B[1]; - C[2] = A[2] ^ B[2]; - C[3] = A[3] ^ B[3]; - C[4] = A[4] ^ B[4]; - C[5] = A[5] ^ B[5]; - C[6] = A[6] ^ B[6]; - C[7] = A[7] ^ B[7]; -#elif 0 - /* x = y ^ z */ + const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint2 buf[8], t[8], temp[8],K0[8], hash[8]; + + __shared__ uint2 shared[8][256]; + shared[0][threadIdx.x] = T02[threadIdx.x]; + shared[1][threadIdx.x] = T12[threadIdx.x]; + shared[2][threadIdx.x] = T22[threadIdx.x]; + shared[3][threadIdx.x] = T32[threadIdx.x]; + shared[4][threadIdx.x] = T42[threadIdx.x]; + shared[5][threadIdx.x] = T52[threadIdx.x]; + shared[6][threadIdx.x] = T62[threadIdx.x]; + shared[7][threadIdx.x] = T72[threadIdx.x]; + +// __syncthreads(); +// if (thread < threads) +// { + uint64_t* inout = &g_hash[thread<<3]; + + *(uint2x4*)&hash[0] = __ldg4((uint2x4*)&inout[0]); + *(uint2x4*)&hash[4] = __ldg4((uint2x4*)&inout[4]); + + K0[0] = K0[1] = K0[2] = K0[3] = K0[4] = K0[5] = K0[6] = K0[7] = vectorize(0x74a5d4ce2efc83b3); + #pragma unroll 8 - for(int i=0; i < 8; i++) { - asm("xor.b64 %0, %1, %2;\n\t" : "=l"(C[i]) : "l"(A[i]), "l"(B[i])); + for(uint32_t i=0;i<8;i++){ + buf[ i] = hash[ i] ^ K0[ i]; } -#else - for(int i=0; i<8; i++) { - C[i] = A[i] ^ B[i]; + + #pragma unroll 11 + for(uint32_t i=0; i<11; i++){ + GOST_FS(buf, temp, shared); + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + buf[ j] = temp[ j] ^ *(uint2*)&precomputed_values[i][j]; + } + } + GOST_FS(buf, temp, shared); + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + buf[ j] = hash[ j] ^ temp[ j] ^ *(uint2*)&precomputed_values[11][j]; + K0[ j] = buf[ j]; } -#endif -} -__device__ __forceinline__ -void GOST_Xor512_3(uint64_t* C, uint64_t* const A, uint64_t* const B) -{ -#if 0 + K0[7].y ^= 0x00020000; + + GOST_FS(K0, t, shared); + + #pragma unroll 8 + for(uint32_t i=0;i<8;i++) + K0[ i] = t[ i]; + + t[7].y ^= 0x01000000; + + #pragma unroll 1 + for(uint32_t i=0; i<11; i++){ + GOST_FS(t, temp, shared); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + t[ j] = temp[ j]; + K0[ j] = K0[ j] ^ *(uint2*)&CC[i][j]; + } + + GOST_FS(K0, temp, shared); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + K0[ j] = temp[ j]; + t[ j] ^= temp[ j]; + } + } + GOST_FS(t, temp, shared); + #pragma unroll 8 - for(int i=0; i < 8; i++) { - asm( - "xor.b64 %0, %0, %1;\n\t" - "xor.b64 %0, %0, %2;\n\t" - : "+l"(C[i]) : "l"(A[i]), "l"(B[i]) - ); + for(uint32_t j=0;j<8;j++){ + t[ j] = temp[ j]; + K0[ j] = K0[ j] ^ *(uint2*)&CC[11][j]; } -#else + + GOST_FS(K0, temp, shared); + #pragma unroll 8 - for(int i=0; i<8; i++) { - C[i] ^= A[i] ^ B[i]; + for(uint32_t j=0;j<8;j++){ + t[ j] ^= temp[ j]; } -#endif + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++) + buf[ j] ^= t[ j]; + + buf[7].y ^= 0x01000000; + + GOST_FS(buf,K0, shared); + + buf[7].y ^= 0x00020000; + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++) + t[ j] = K0[ j]; + + t[7].y ^= 0x00020000; + + GOST_E12(K0, t, shared); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++) + buf[ j] ^= t[ j]; + + GOST_FS(buf,K0, shared); // K = F(h) + + hash[7]+= vectorize(0x0100000000000000); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++) + t[ j] = K0[ j] ^ hash[ j]; + + GOST_E12(K0, t, shared); + + *(ulonglong2to8*)&t[ 0] = *(ulonglong2to8*)&t[ 0] ^ *(ulonglong2to8*)&hash[ 0] ^ *(ulonglong2to8*)&buf[ 0]; + *(uint2x4*)&inout[ 0] = *(uint2x4*)&t[ 0]; + *(uint2x4*)&inout[ 4] = *(uint2x4*)&t[ 4]; } -__device__ __forceinline__ -void GOST_FS(uint64_t* const state64, uint64_t* return_state) +__host__ +void streebog_cpu_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash) { - uchar* state = (uchar*) state64; - uint64_t r; -#ifdef FULL_UNROLL - r = T0[state[56]]; - r ^= T1[state[48]]; - r ^= T2[state[40]]; - r ^= T3[state[32]]; - r ^= T4[state[24]]; - r ^= T5[state[16]]; - r ^= T6[state[8]]; - r ^= T7[state[0]]; - return_state[0] = r; - - r = T0[state[57]]; - r ^= T1[state[49]]; - r ^= T2[state[41]]; - r ^= T3[state[33]]; - r ^= T4[state[25]]; - r ^= T5[state[17]]; - r ^= T6[state[9]]; - r ^= T7[state[1]]; - return_state[1] = r; - - r = T0[state[58]]; - r ^= T1[state[50]]; - r ^= T2[state[42]]; - r ^= T3[state[34]]; - r ^= T4[state[26]]; - r ^= T5[state[18]]; - r ^= T6[state[10]]; - r ^= T7[state[2]]; - return_state[2] = r; - - r = T0[state[59]]; - r ^= T1[state[51]]; - r ^= T2[state[43]]; - r ^= T3[state[35]]; - r ^= T4[state[27]]; - r ^= T5[state[19]]; - r ^= T6[state[11]]; - r ^= T7[state[3]]; - return_state[3] = r; - - r = T0[state[60]]; - r ^= T1[state[52]]; - r ^= T2[state[44]]; - r ^= T3[state[36]]; - r ^= T4[state[28]]; - r ^= T5[state[20]]; - r ^= T6[state[12]]; - r ^= T7[state[4]]; - return_state[4] = r; - - r = T0[state[61]]; - r ^= T1[state[53]]; - r ^= T2[state[45]]; - r ^= T3[state[37]]; - r ^= T4[state[29]]; - r ^= T5[state[21]]; - r ^= T6[state[13]]; - r ^= T7[state[5]]; - return_state[5] = r; - - r = T0[state[62]]; - r ^= T1[state[54]]; - r ^= T2[state[46]]; - r ^= T3[state[38]]; - r ^= T4[state[30]]; - r ^= T5[state[22]]; - r ^= T6[state[14]]; - r ^= T7[state[6]]; - return_state[6] = r; - - r = T0[state[63]]; - r ^= T1[state[55]]; - r ^= T2[state[47]]; - r ^= T3[state[39]]; - r ^= T4[state[31]]; - r ^= T5[state[23]]; - r ^= T6[state[15]]; - r ^= T7[state[7]]; - return_state[7] = r; -#else - for (int b=0; b<8; b++) { - r = T0[state[b+56]]; - r ^= T1[state[b+48]]; - r ^= T2[state[b+40]]; - r ^= T3[state[b+32]]; - r ^= T4[state[b+24]]; - r ^= T5[state[b+16]]; - r ^= T6[state[b+8]]; - r ^= T7[state[b]]; - return_state[b] = r; + dim3 grid((threads + TPB-1) / TPB); + dim3 block(TPB); + if(((float)((threads + TPB-1) / TPB) - (int)((threads + TPB-1) / TPB))!=0.0){ + applog(LOG_WARNING,"Invalid intensity for streeebog"); } -#endif + + streebog_gpu_hash_64<<>>((uint64_t*)d_hash); } -__device__ __forceinline__ -static void GOST_F(uint64_t* state) -{ - uint64_t t[8]; - GOST_FS(state, t); - //memcpy(state, t, 64); - GOST_Copy512(state, t); +#undef T0 +#undef T1 +#undef T2 +#undef T3 +#undef T4 +#undef T5 +#undef T6 +#undef T7 + +#define T0(x) shared[0][x] +#define T1(x) shared[1][x] +#define T2(x) shared[2][x] +#define T3(x) shared[3][x] +#define T4(x) shared[4][x] +#define T5(x) shared[5][x] +#define T6(x) shared[6][x] +#define T7(x) shared[7][x] + +__constant__ uint64_t target64[4]; + +void streebog_set_target(const uint32_t* ptarget){ + cudaMemcpyToSymbol(target64,ptarget,4*sizeof(uint64_t),0,cudaMemcpyHostToDevice); } -__device__ -static void GOST_E12(uint64_t* K, uint64_t *state) +__global__ +__launch_bounds__(TPB, 4) +void streebog_gpu_hash_64_final(uint64_t *g_hash, uint32_t* resNonce) { -#if 0 - // First Round - { - GOST_F(state); - K[0] ^= 0xe9daca1eda5b08b1; - K[1] ^= 0x1f7c65c0812fcbeb; - K[2] ^= 0x16d0452e43766a2f; - K[3] ^= 0xfcc485758db84e71; - K[4] ^= 0x0169679291e07c4b; - K[5] ^= 0x15d360a4082a42a2; - K[6] ^= 0x234d74cc36747605; - K[7] ^= 0x0745a6f2596580dd; - GOST_F(K); - GOST_Xor512(state, state, K); - } - // 2nd Round - { - GOST_F(state); - K[0] ^= 0x1a2f9da98ab5a36f; - K[1] ^= 0xd7b5700f469de34f; - K[2] ^= 0x982b230a72eafef3; - K[3] ^= 0x3101b5160f5ed561; - K[4] ^= 0x5899d6126b17b59a; - K[5] ^= 0xcaa70adbc261b55c; - K[6] ^= 0x56cdcbd71ba2dd55; - K[7] ^= 0xb79bb121700479e6; - GOST_F(K); - GOST_Xor512(state, state, K); - } - // 3rd Round - { - GOST_F(state); - K[0] ^= 0xc72fce2bacdc74f5; - K[1] ^= 0x35843d6a28fc390a; - K[2] ^= 0x8b1f9c525f5ef106; - K[3] ^= 0x7b7b29b11475eaf2; - K[4] ^= 0xb19e3590e40fe2d3; - K[5] ^= 0x09db6260373ac9c1; - K[6] ^= 0x31db7a8643f4b6c2; - K[7] ^= 0xb20aba0af5961e99; - GOST_F(K); - GOST_Xor512(state, state, K); - } - // 4th Round - { - GOST_F(state); - K[0] ^= 0xd26615e8b3df1fef; - K[1] ^= 0xdde4715da0e148f9; - K[2] ^= 0x7d3c5c337e858e48; - K[3] ^= 0x3f355e68ad1c729d; - K[4] ^= 0x75d603ed822cd7a9; - K[5] ^= 0xbe0352933313b7d8; - K[6] ^= 0xf137e893a1ea5334; - K[7] ^= 0x2ed1e384bcbe0c22; - GOST_F(K); - GOST_Xor512(state, state, K); - } - // 5th Round - { - GOST_F(state); - K[0] ^= 0x994747adac6bea4b; - K[1] ^= 0x6323a96c0c413f9a; - K[2] ^= 0x4a1086161f1c157f; - K[3] ^= 0xbdff0f80d7359e35; - K[4] ^= 0xa3f53a254717cdbf; - K[5] ^= 0x161a2723b700ffdf; - K[6] ^= 0xf563eaa97ea2567a; - K[7] ^= 0x57fe6c7cfd581760; - GOST_F(K); - GOST_Xor512(state, state, K); - } - // 6th Round - { - GOST_F(state); - K[0] ^= 0xd9d33a1daeae4fae; - K[1] ^= 0xc039307a3bc3a46f; - K[2] ^= 0x6ca44251f9c4662d; - K[3] ^= 0xc68ef09ab49a7f18; - K[4] ^= 0xb4b79a1cb7a6facf; - K[5] ^= 0xb6c6bec2661ff20a; - K[6] ^= 0x354f903672c571bf; - K[7] ^= 0x6e7d64467a4068fa; - GOST_F(K); - GOST_Xor512(state, state, K); - } - // 7th Round - { - GOST_F(state); - K[0] ^= 0xecc5aaee160ec7f4; - K[1] ^= 0x540924bffe86ac51; - K[2] ^= 0xc987bfe6c7c69e39; - K[3] ^= 0xc9937a19333e47d3; - K[4] ^= 0x372c822dc5ab9209; - K[5] ^= 0x04054a2883694706; - K[6] ^= 0xf34a3ca24c451735; - K[7] ^= 0x93d4143a4d568688; - GOST_F(K); - GOST_Xor512(state, state, K); - } - // 8th Round - { - GOST_F(state); - K[0] ^= 0xa7c9934d425b1f9b; - K[1] ^= 0x41416e0c02aae703; - K[2] ^= 0x1ede369c71f8b74e; - K[3] ^= 0x9ac4db4d3b44b489; - K[4] ^= 0x90069b92cb2b89f4; - K[5] ^= 0x2fc4a5d12b8dd169; - K[6] ^= 0xd9a8515935c2ac36; - K[7] ^= 0x1ee702bfd40d7fa4; - GOST_F(K); - GOST_Xor512(state, state, K); + const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint2 buf[8], t[8], temp[8],K0[8], hash[8]; + + __shared__ uint2 shared[8][256]; + shared[0][threadIdx.x] = T02[threadIdx.x]; + shared[1][threadIdx.x] = T12[threadIdx.x]; + shared[2][threadIdx.x] = T22[threadIdx.x]; + shared[3][threadIdx.x] = T32[threadIdx.x]; + shared[4][threadIdx.x] = T42[threadIdx.x]; + shared[5][threadIdx.x] = T52[threadIdx.x]; + shared[6][threadIdx.x] = T62[threadIdx.x]; + shared[7][threadIdx.x] = T72[threadIdx.x]; + +// __syncthreads(); +// if (thread < threads) +// { + uint64_t* inout = &g_hash[thread<<3]; + + *(uint2x4*)&hash[0] = __ldg4((uint2x4*)&inout[0]); + *(uint2x4*)&hash[4] = __ldg4((uint2x4*)&inout[4]); + + K0[0] = K0[1] = K0[2] = K0[3] = K0[4] = K0[5] = K0[6] = K0[7] = vectorize(0x74a5d4ce2efc83b3); + + #pragma unroll 8 + for(uint32_t i=0;i<8;i++){ + buf[ i] = hash[ i] ^ K0[ i]; } - // 9th Round - { - GOST_F(state); - K[0] ^= 0x9b223116545a8f37; - K[1] ^= 0xde5f16ecd89a4c94; - K[2] ^= 0x244289251b3a7d3a; - K[3] ^= 0x84090de0b755d93c; - K[4] ^= 0xb1ceb2db0b440a80; - K[5] ^= 0x549c07a69a8a2b7b; - K[6] ^= 0x602a1fcb92dc380e; - K[7] ^= 0xdb5a238351446172; - GOST_F(K); - GOST_Xor512(state, state, K); + + #pragma unroll 11 + for(uint32_t i=0; i<11; i++){ + GOST_FS(buf, temp, shared); + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + buf[ j] = temp[ j] ^ *(uint2*)&precomputed_values[i][j]; + } } - // 10th Round - { - GOST_F(state); - K[0] ^= 0x526f0580a6debeab; - K[1] ^= 0xf3f3e4b248e52a38; - K[2] ^= 0xdb788aff1ce74189; - K[3] ^= 0x0361331b8ae1ff1f; - K[4] ^= 0x4b3369af0267e79f; - K[5] ^= 0xf452763b306c1e7a; - K[6] ^= 0xc3b63b15d1fa9836; - K[7] ^= 0xed9c4598fbc7b474; - GOST_F(K); - GOST_Xor512(state, state, K); + GOST_FS(buf, temp, shared); + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + buf[ j] = hash[ j] ^ temp[ j] ^ *(uint2*)&precomputed_values[11][j]; + K0[ j] = buf[ j]; } - // 11th Round - { - GOST_F(state); - K[0] ^= 0xfb89c8efd09ecd7b; - K[1] ^= 0x94fe5a63cdc60230; - K[2] ^= 0x6107abebbb6bfad8; - K[3] ^= 0x7966841421800120; - K[4] ^= 0xcab948eaef711d8a; - K[5] ^= 0x986e477d1dcdbaef; - K[6] ^= 0x5dd86fc04a59a2de; - K[7] ^= 0x1b2df381cda4ca6b; - GOST_F(K); - GOST_Xor512(state, state, K); + + K0[7].y ^= 0x00020000; + + GOST_FS(K0, t, shared); + + #pragma unroll 8 + for(uint32_t i=0;i<8;i++) + K0[ i] = t[ i]; + + t[7].y ^= 0x01000000; + + #pragma unroll 1 + for(uint32_t i=0; i<11; i++){ + GOST_FS(t, temp, shared); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + t[ j] = temp[ j]; + K0[ j] = K0[ j] ^ *(uint2*)&CC[i][j]; + } + + GOST_FS(K0, temp, shared); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + K0[ j] = temp[ j]; + t[ j] ^= temp[ j]; + } } - // 12th Round - { - GOST_F(state); - K[0] ^= 0xba3116f167e78e37; - K[1] ^= 0x7ab14904b08013d2; - K[2] ^= 0x771ddfbc323ca4cd; - K[3] ^= 0x9b9f2130d41220f8; - K[4] ^= 0x86cc91189def805d; - K[5] ^= 0x5228e188aaa41de7; - K[6] ^= 0x991bb2d9d517f4fa; - K[7] ^= 0x20d71bf14a92bc48; - GOST_F(K); - GOST_Xor512(state, state, K); + GOST_FS(t, temp, shared); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + t[ j] = temp[ j]; + K0[ j] = K0[ j] ^ *(uint2*)&CC[11][j]; } -#else - // KeySchedule - uint64_t const CC[12][8] = {{ - 0xe9daca1eda5b08b1, 0x1f7c65c0812fcbeb, 0x16d0452e43766a2f, 0xfcc485758db84e71, - 0x0169679291e07c4b, 0x15d360a4082a42a2, 0x234d74cc36747605, 0x0745a6f2596580dd - }, { - 0x1a2f9da98ab5a36f, 0xd7b5700f469de34f, 0x982b230a72eafef3, 0x3101b5160f5ed561, - 0x5899d6126b17b59a, 0xcaa70adbc261b55c, 0x56cdcbd71ba2dd55, 0xb79bb121700479e6 - }, { - 0xc72fce2bacdc74f5, 0x35843d6a28fc390a, 0x8b1f9c525f5ef106, 0x7b7b29b11475eaf2, - 0xb19e3590e40fe2d3, 0x09db6260373ac9c1, 0x31db7a8643f4b6c2, 0xb20aba0af5961e99 - }, { - 0xd26615e8b3df1fef, 0xdde4715da0e148f9, 0x7d3c5c337e858e48, 0x3f355e68ad1c729d, - 0x75d603ed822cd7a9, 0xbe0352933313b7d8, 0xf137e893a1ea5334, 0x2ed1e384bcbe0c22 - }, { - 0x994747adac6bea4b, 0x6323a96c0c413f9a, 0x4a1086161f1c157f, 0xbdff0f80d7359e35, - 0xa3f53a254717cdbf, 0x161a2723b700ffdf, 0xf563eaa97ea2567a, 0x57fe6c7cfd581760 - }, { - 0xd9d33a1daeae4fae, 0xc039307a3bc3a46f, 0x6ca44251f9c4662d, 0xc68ef09ab49a7f18, - 0xb4b79a1cb7a6facf, 0xb6c6bec2661ff20a, 0x354f903672c571bf, 0x6e7d64467a4068fa - }, { - 0xecc5aaee160ec7f4, 0x540924bffe86ac51, 0xc987bfe6c7c69e39, 0xc9937a19333e47d3, - 0x372c822dc5ab9209, 0x04054a2883694706, 0xf34a3ca24c451735, 0x93d4143a4d568688 - }, { - 0xa7c9934d425b1f9b, 0x41416e0c02aae703, 0x1ede369c71f8b74e, 0x9ac4db4d3b44b489, - 0x90069b92cb2b89f4, 0x2fc4a5d12b8dd169, 0xd9a8515935c2ac36, 0x1ee702bfd40d7fa4 - }, { - 0x9b223116545a8f37, 0xde5f16ecd89a4c94, 0x244289251b3a7d3a, 0x84090de0b755d93c, - 0xb1ceb2db0b440a80, 0x549c07a69a8a2b7b, 0x602a1fcb92dc380e, 0xdb5a238351446172 - }, { - 0x526f0580a6debeab, 0xf3f3e4b248e52a38, 0xdb788aff1ce74189, 0x0361331b8ae1ff1f, - 0x4b3369af0267e79f, 0xf452763b306c1e7a, 0xc3b63b15d1fa9836, 0xed9c4598fbc7b474 - }, { - 0xfb89c8efd09ecd7b, 0x94fe5a63cdc60230, 0x6107abebbb6bfad8, 0x7966841421800120, - 0xcab948eaef711d8a, 0x986e477d1dcdbaef, 0x5dd86fc04a59a2de, 0x1b2df381cda4ca6b - }, { - 0xba3116f167e78e37, 0x7ab14904b08013d2, 0x771ddfbc323ca4cd, 0x9b9f2130d41220f8, - 0x86cc91189def805d, 0x5228e188aaa41de7, 0x991bb2d9d517f4fa, 0x20d71bf14a92bc48 - }}; - - //#pragma unroll 1 - for(int i=0; i<12; i++) - { - GOST_F(state); - GOST_Xor512(K, K, CC[i]); - GOST_F(K); - GOST_Xor512(state, state, K); + + GOST_FS(K0, temp, shared); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + t[ j] ^= temp[ j]; } -#endif -} -__device__ -void GOST_E(uint64_t* K, uint64_t* const m, uint64_t *state /* out only */) -{ - GOST_Xor512(state, m, K); // state = m ^ K - GOST_E12(K, state); -} + #pragma unroll 8 + for(uint32_t j=0;j<8;j++) + buf[ j] ^= t[ j]; -__device__ -void GOST_g_0(uint64_t* h, uint64_t* const M) -{ - uint64_t K0[8]; - // GOST_F(0); - K0[0] = K0[1] = K0[2] = K0[3] = 0x74a5d4ce2efc83b3; - K0[4] = K0[5] = K0[6] = K0[7] = 0x74a5d4ce2efc83b3; - - uint64_t t[8]; - GOST_Xor512(t, M, K0); // t = M ^ K0 - GOST_E12(K0, t); - GOST_Xor512(h, t, M); -} + buf[7].y ^= 0x01000000; -__device__ -void GOST_g_N(uint64_t* h, uint64_t* const M, uint64_t* const N) -{ - uint64_t K[8]; + GOST_FS(buf,K0, shared); - GOST_Xor512(K, N, h); // K = N ^ h - GOST_F(K); + buf[7].y ^= 0x00020000; - uint64_t t[8]; - GOST_E(K, M, t); + #pragma unroll 8 + for(uint32_t j=0;j<8;j++) + t[ j] = K0[ j]; -// GOST_Xor512(t, t, h); -// GOST_Xor512(h, t, M); - GOST_Xor512_3(h, t, M); // h = h ^ t ^ M -} + t[7].y ^= 0x00020000; -__device__ -void GOST_g_1(uint64_t* h, uint64_t* N) -{ - uint64_t K[8]; - GOST_FS(h, K); // K = F(h) - GOST_Xor512(h, h, N); + GOST_E12(K0, t, shared); - uint64_t t[8]; - GOST_E(K, N, t); - GOST_Xor512(h, h, t); -} + #pragma unroll 8 + for(uint32_t j=0;j<8;j++) + buf[ j] ^= t[ j]; -__device__ -void GOST_g_F(uint64_t* out, uint64_t* h, uint64_t* M) -{ - uint64_t K[8]; - GOST_FS(h, K); // K = F(h) - GOST_Add512(M, M, out); + GOST_FS(buf,K0, shared); // K = F(h) - uint64_t t[8]; - GOST_E(K, M, t); - GOST_Xor512(t, t, M); + hash[7]+= vectorize(0x0100000000000000); - GOST_Xor512(out, t, h); -} + #pragma unroll 8 + for(uint32_t j=0;j<8;j++) + t[ j] = K0[ j] ^ hash[ j]; + +// #pragma unroll + for(uint32_t i=0; i<10; i++){ + GOST_FS(t, temp, shared); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + t[ j] = temp[ j]; + K0[ j] = K0[ j] ^ *(uint2*)&CC[ i][ j]; + } + + GOST_FS(K0, temp, shared); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + K0[ j] = temp[ j]; + t[ j]^= temp[ j]; + } + } -__device__ __forceinline__ -void GOST_hash_X(uint64_t *hash, uchar * const message, uint64_t len) -{ - uint64_t Sigma[8] = { 0 }; - uint64_t N[8] = { 0 }; - uchar v512[64] = { 0 }; - v512[62] = 0x02; - - // Stage 2 - while (len >= 512) - { - uint64_t X[8]; - memcpy(X, message + len/8 - 63 - ( (len & 0x7) == 0 ), 64); - - GOST_g_N(hash, X, N); - GOST_Add512(N, N, v512); - GOST_Add512(Sigma, Sigma, X); - len -= 512; + GOST_FS(t, temp, shared); + + #pragma unroll 8 + for(uint32_t j=0;j<8;j++){ + t[ j] = temp[ j]; + K0[ j] = K0[ j] ^ *(uint2*)&CC[10][ j]; } - uint64_t M[8]; - uchar* m = (uchar*) M; - memset(m, 0, 64); - memcpy(m + 63 - len/8 + ( (len & 0x7) == 0 ), message, len/8 + 1 - ( (len & 0x7) == 0 )); + GOST_FS(K0, temp, shared); - // Stage 3 (Close) - m[ 63 - len/8 ] |= (1 << (len & 0x7)); + #pragma unroll 8 + for(int i=7;i>=0;i--){ + t[i].x = t[i].x ^ temp[i].x; + temp[i].x = temp[i].x ^ ((uint32_t*)&CC[11])[i<<1]; + } - GOST_g_N(hash, M, N); - v512[63] = len & 0xFF; - v512[62] = len >> 8; - GOST_Add512(N, N, v512); - GOST_Add512(Sigma, Sigma, M); + uint2 last[2]; - memset(M, 0, 64); - GOST_g_N(hash, N, M); - GOST_g_N(hash, Sigma, M); -} + last[ 0] = T0(__byte_perm(t[7].x,0,0x44443)) ^ T1(__byte_perm(t[6].x,0,0x44443)) + ^ T2(__byte_perm(t[5].x,0,0x44443)) ^ T3(__byte_perm(t[4].x,0,0x44443)) + ^ T4(__byte_perm(t[3].x,0,0x44443)) ^ T5(__byte_perm(t[2].x,0,0x44443)) + ^ T6(__byte_perm(t[1].x,0,0x44443)) ^ T7(__byte_perm(t[0].x,0,0x44443)); -__global__ -__launch_bounds__(128, 3) -void streebog_gpu_hash_64(uint32_t threads, uint64_t *g_hash) -{ - uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); - if (thread < threads) - { - uint64_t* inout = (&g_hash[thread * 8U]); -#if 0 - uint64_t hash[8] = { 0 }; - GOST_hash_X(hash, (uchar*) inout, 512); - GOST_Copy512(inout, hash); -#else - uint64_t hash[8]; - GOST_g_0(hash, inout); // init hash - - uint64_t N[8] = { 0 }; - ((uchar*)N)[62] = 0x2; // len 0x200 bits - uint64_t M[8] = { 0 }; - ((uchar*)M)[63] = 0x1; // end tag - - GOST_g_N(hash, M, N); - GOST_g_1(hash, N); - GOST_g_F(inout, hash, M); -#endif + last[ 1] = T0(__byte_perm(temp[7].x,0,0x44443)) ^ T1(__byte_perm(temp[6].x,0,0x44443)) + ^ T2(__byte_perm(temp[5].x,0,0x44443)) ^ T3(__byte_perm(temp[4].x,0,0x44443)) + ^ T4(__byte_perm(temp[3].x,0,0x44443)) ^ T5(__byte_perm(temp[2].x,0,0x44443)) + ^ T6(__byte_perm(temp[1].x,0,0x44443)) ^ T7(__byte_perm(temp[0].x,0,0x44443)); + + if(devectorize(buf[3] ^ hash[3] ^ last[ 0] ^ last[ 1]) <= target64[3]){ + uint32_t tmp = atomicExch(&resNonce[0], thread); + if (tmp != UINT32_MAX) + resNonce[1] = tmp; } } __host__ -void streebog_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash) +void streebog_cpu_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash,uint32_t* d_resNonce) { - const int threadsperblock = 128; - dim3 grid((threads + threadsperblock-1) / threadsperblock); - dim3 block(threadsperblock); + dim3 grid((threads + TPB-1) / TPB); + dim3 block(TPB); + if(((float)((threads + TPB-1) / TPB) - (int)((threads + TPB-1) / TPB))!=0.0){ + applog(LOG_WARNING,"Invalid intensity for streeebog final"); + } - streebog_gpu_hash_64<<>>(threads, (uint64_t*)d_hash); + streebog_gpu_hash_64_final<<>>((uint64_t*)d_hash, d_resNonce); } diff --git a/x11/sib.cu b/x11/sib.cu index 5629cc2..8804821 100644 --- a/x11/sib.cu +++ b/x11/sib.cu @@ -17,7 +17,7 @@ extern "C" { #include "cuda_helper.h" #include "cuda_x11.h" -extern void streebog_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash); +extern void streebog_cpu_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash); #include #include @@ -104,9 +104,11 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; - int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 19 : 18; - uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 19=256*256*8; - //if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); + const int dev_id = device_map[thr_id]; + int intensity = (device_sm[dev_id] >= 500 && !is_windows()) ? 19 : 18; // 2^18 = 262144 cuda threads + if (device_sm[dev_id] >= 600) intensity = 20; + uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ptarget[7] = 0xf; @@ -132,9 +134,9 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u x11_shavite512_cpu_init(thr_id, throughput); x11_echo512_cpu_init(thr_id, throughput); if (x11_simd512_cpu_init(thr_id, throughput) != 0) { - return 0; + return -1; } - CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0); + CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), -1); cuda_check_cpu_init(thr_id, throughput); @@ -165,7 +167,7 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u TRACE("jh512 :"); quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); TRACE("keccak :"); - streebog_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id]); + streebog_cpu_hash_64(thr_id, throughput, d_hash[thr_id]); TRACE("gost :"); x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++); TRACE("luffa+c:"); @@ -186,7 +188,6 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { int res = 1; - // check if there was some other ones... uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work_set_target_ratio(work, vhash64); *hashes_done = pdata[19] - first_nonce + throughput; @@ -200,7 +201,7 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u } pdata[19] = foundNonce; return res; - } else { + } else if (vhash64[7] > Htarg && !opt_quiet) { gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); pdata[19] = foundNonce + 1; continue; diff --git a/x11/veltor.cu b/x11/veltor.cu index ddcb9ce..452a346 100644 --- a/x11/veltor.cu +++ b/x11/veltor.cu @@ -9,29 +9,29 @@ extern "C" { #include "cuda_helper.h" #include "cuda_x11.h" -extern void streebog_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash); - -extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads); -extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); - -extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); extern void skein512_cpu_setBlock_80(void *pdata); +extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap); +extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void streebog_cpu_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce); +extern void streebog_set_target(const uint32_t* ptarget); #include #include +#define NBN 2 static uint32_t *d_hash[MAX_GPUS]; +static uint32_t *d_resNonce[MAX_GPUS]; -// Veltor CPU Hash +// veltorcoin CPU Hash extern "C" void veltorhash(void *output, const void *input) { - uint8_t _ALIGN(64) hash[128] = { 0 }; + unsigned char _ALIGN(128) hash[128] = { 0 }; sph_skein512_context ctx_skein; - sph_shavite512_context ctx_shavite; - sph_shabal512_context ctx_shabal; sph_gost512_context ctx_gost; + sph_shabal512_context ctx_shabal; + sph_shavite512_context ctx_shavite; sph_skein512_init(&ctx_skein); sph_skein512(&ctx_skein, input, 80); @@ -52,19 +52,18 @@ extern "C" void veltorhash(void *output, const void *input) memcpy(output, hash, 32); } -//#define _DEBUG -#define _DEBUG_PREFIX "veltor" -#include "cuda_debug.cuh" - static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) { + int dev_id = device_map[thr_id]; + uint32_t *pdata = work->data; uint32_t *ptarget = work->target; const uint32_t first_nonce = pdata[19]; - int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 18; - uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 19=256*256*8; + int intensity = (device_sm[device_map[thr_id]] > 500) ? 20 : 18; + if (strstr(device_name[dev_id], "GTX 10")) intensity = 21; + uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); //if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) @@ -79,58 +78,59 @@ extern "C" int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); CUDA_LOG_ERROR(); } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); quark_skein512_cpu_init(thr_id, throughput); x11_shavite512_cpu_init(thr_id, throughput); - x14_shabal512_cpu_init(thr_id, throughput); CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0); - - cuda_check_cpu_init(thr_id, throughput); + CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), -1); init[thr_id] = true; } - uint32_t endiandata[20]; + uint32_t _ALIGN(64) h_resNonce[NBN]; + uint32_t _ALIGN(64) endiandata[20]; for (int k=0; k < 20; k++) be32enc(&endiandata[k], pdata[k]); skein512_cpu_setBlock_80(endiandata); - cuda_check_cpu_setTarget(ptarget); + + cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t)); + streebog_set_target(ptarget); do { int order = 0; - uint32_t foundNonce; - - // Hash with CUDA skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1); order++; - TRACE("blake :"); x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - TRACE("shavite:"); x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - TRACE("shabal :"); - streebog_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id]); - TRACE("gost :"); + streebog_cpu_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]); + + cudaMemcpy(h_resNonce, d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost); *hashes_done = pdata[19] - first_nonce + throughput; - foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); - if (foundNonce != UINT32_MAX) + if (h_resNonce[0] != UINT32_MAX) { - const uint32_t Htarg = ptarget[7]; uint32_t _ALIGN(64) vhash[8]; - be32enc(&endiandata[19], foundNonce); - veltorhash(vhash, endiandata); + const uint32_t Htarg = ptarget[7]; + const uint32_t startNounce = pdata[19]; - if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + be32enc(&endiandata[19], startNounce + h_resNonce[0]); + veltorhash(vhash, endiandata); + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) + { int res = 1; - // check if there was another one... - uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], res); work_set_target_ratio(work, vhash); - if (secNonce != 0) { + work->nonces[0] = startNounce + h_resNonce[0]; + if (h_resNonce[1] != UINT32_MAX) + { + uint32_t secNonce = work->nonces[1] = startNounce + h_resNonce[1]; + gpulog(LOG_DEBUG, thr_id, "Found 2nd nonce: %08x", secNonce); be32enc(&endiandata[19], secNonce); veltorhash(vhash, endiandata); work->nonces[1] = secNonce; + if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) { work_set_target_ratio(work, vhash); xchg(work->nonces[1], work->nonces[0]); @@ -139,24 +139,25 @@ extern "C" int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce } res++; } - pdata[19] = work->nonces[0] = foundNonce; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; // next scan return res; - } else { - gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); - pdata[19] = foundNonce + 1; - continue; + } + else if (vhash[7] > Htarg && !opt_quiet) { + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", h_resNonce[0]); + cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t)); } } - if ((uint64_t) throughput + pdata[19] >= max_nonce) { pdata[19] = max_nonce; break; } + pdata[19] += throughput; } while (!work_restart[thr_id].restart); *hashes_done = pdata[19] - first_nonce; + return 0; }