From 6c60ce23524a5bff6b60499b758bafc61c995964 Mon Sep 17 00:00:00 2001 From: Elbandi Date: Fri, 27 Jun 2014 18:52:31 +0200 Subject: [PATCH] Fix kernel code style: every hash has own codeblocks --- kernel/animecoin.cl | 472 ++++++++--------- kernel/darkcoin.cl | 1098 ++++++++++++++++++++-------------------- kernel/groestlcoin.cl | 163 +++--- kernel/inkcoin.cl | 225 ++++----- kernel/marucoin.cl | 1122 +++++++++++++++++++++-------------------- kernel/quarkcoin.cl | 546 ++++++++++---------- kernel/qubitcoin.cl | 664 ++++++++++++------------ kernel/sifcoin.cl | 435 ++++++++-------- kernel/twecoin.cl | 94 ++-- 9 files changed, 2425 insertions(+), 2394 deletions(-) diff --git a/kernel/animecoin.cl b/kernel/animecoin.cl index db2694df..f201440e 100644 --- a/kernel/animecoin.cl +++ b/kernel/animecoin.cl @@ -107,36 +107,37 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp } hash; // bmw - sph_u64 BMW_H[16]; - for(unsigned u = 0; u < 16; u++) - BMW_H[u] = BMW_IV512[u]; - - sph_u64 BMW_h1[16], BMW_h2[16]; - sph_u64 mv[16]; - - mv[0] = DEC64LE(block + 0); - mv[1] = DEC64LE(block + 8); - mv[2] = DEC64LE(block + 16); - mv[3] = DEC64LE(block + 24); - mv[4] = DEC64LE(block + 32); - mv[5] = DEC64LE(block + 40); - mv[6] = DEC64LE(block + 48); - mv[7] = DEC64LE(block + 56); - mv[8] = DEC64LE(block + 64); - mv[9] = DEC64LE(block + 72); - mv[9] &= 0x00000000FFFFFFFF; - mv[9] ^= ((sph_u64) gid) << 32; - mv[10] = 0x80; - mv[11] = 0; - mv[12] = 0; - mv[13] = 0; - mv[14] = 0; - mv[15] = 0x280; + { + sph_u64 BMW_H[16]; + for(unsigned u = 0; u < 16; u++) + BMW_H[u] = BMW_IV512[u]; + + sph_u64 BMW_h1[16], BMW_h2[16]; + sph_u64 mv[16]; + + mv[0] = DEC64LE(block + 0); + mv[1] = DEC64LE(block + 8); + mv[2] = DEC64LE(block + 16); + mv[3] = DEC64LE(block + 24); + mv[4] = DEC64LE(block + 32); + mv[5] = DEC64LE(block + 40); + mv[6] = DEC64LE(block + 48); + mv[7] = DEC64LE(block + 56); + mv[8] = DEC64LE(block + 64); + mv[9] = DEC64LE(block + 72); + mv[9] &= 0x00000000FFFFFFFF; + mv[9] ^= ((sph_u64) gid) << 32; + mv[10] = 0x80; + mv[11] = 0; + mv[12] = 0; + mv[13] = 0; + mv[14] = 0; + mv[15] = 0x280; #define M(x) (mv[x]) #define H(x) (BMW_H[x]) #define dH(x) (BMW_h2[x]) - FOLDb; + FOLDb; #undef M #undef H @@ -146,69 +147,70 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp #define H(x) (final_b[x]) #define dH(x) (BMW_h1[x]) - FOLDb; + FOLDb; #undef M #undef H #undef dH - hash.h8[0] = SWAP8(BMW_h1[8]); - hash.h8[1] = SWAP8(BMW_h1[9]); - hash.h8[2] = SWAP8(BMW_h1[10]); - hash.h8[3] = SWAP8(BMW_h1[11]); - hash.h8[4] = SWAP8(BMW_h1[12]); - hash.h8[5] = SWAP8(BMW_h1[13]); - hash.h8[6] = SWAP8(BMW_h1[14]); - hash.h8[7] = SWAP8(BMW_h1[15]); + hash.h8[0] = SWAP8(BMW_h1[8]); + hash.h8[1] = SWAP8(BMW_h1[9]); + hash.h8[2] = SWAP8(BMW_h1[10]); + hash.h8[3] = SWAP8(BMW_h1[11]); + hash.h8[4] = SWAP8(BMW_h1[12]); + hash.h8[5] = SWAP8(BMW_h1[13]); + hash.h8[6] = SWAP8(BMW_h1[14]); + hash.h8[7] = SWAP8(BMW_h1[15]); + } // blake -{ - sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); - sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); - sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); - sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); - sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; - sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (64 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; - - if ((T0 = SPH_T64(T0 + 1024)) < 1024) { - T1 = SPH_T64(T1 + 1); + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (64 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + { + T1 = SPH_T64(T1 + 1); + } + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + M0 = hash.h8[0]; + M1 = hash.h8[1]; + M2 = hash.h8[2]; + M3 = hash.h8[3]; + M4 = hash.h8[4]; + M5 = hash.h8[5]; + M6 = hash.h8[6]; + M7 = hash.h8[7]; + M8 = 0x8000000000000000; + M9 = 0; + MA = 0; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x200; + + COMPRESS64; + + hash.h8[0] = H0; + hash.h8[1] = H1; + hash.h8[2] = H2; + hash.h8[3] = H3; + hash.h8[4] = H4; + hash.h8[5] = H5; + hash.h8[6] = H6; + hash.h8[7] = H7; } - sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; - sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; - sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; - sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; - M0 = hash.h8[0]; - M1 = hash.h8[1]; - M2 = hash.h8[2]; - M3 = hash.h8[3]; - M4 = hash.h8[4]; - M5 = hash.h8[5]; - M6 = hash.h8[6]; - M7 = hash.h8[7]; - M8 = 0x8000000000000000; - M9 = 0; - MA = 0; - MB = 0; - MC = 0; - MD = 1; - ME = 0; - MF = 0x200; - - COMPRESS64; - - hash.h8[0] = H0; - hash.h8[1] = H1; - hash.h8[2] = H2; - hash.h8[3] = H3; - hash.h8[4] = H4; - hash.h8[5] = H5; - hash.h8[6] = H6; - hash.h8[7] = H7; -} + bool dec = ((hash.h1[7] & 0x8) != 0); { - // groestl sph_u64 H[16]; @@ -251,7 +253,6 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp H[u] ^= xH[u]; for (unsigned int u = 0; u < 8; u ++) hash.h8[u] = (dec ? DEC64E(H[u + 8]) : hash.h8[u]); - } { @@ -284,95 +285,96 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp } // groestl - - sph_u64 H[16]; - for (unsigned int u = 0; u < 15; u ++) - H[u] = 0; + { + sph_u64 H[16]; + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; #if USE_LE - H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); #else - H[15] = (sph_u64)512; + H[15] = (sph_u64)512; #endif - sph_u64 g[16], m[16]; - m[0] = DEC64E(hash.h8[0]); - m[1] = DEC64E(hash.h8[1]); - m[2] = DEC64E(hash.h8[2]); - m[3] = DEC64E(hash.h8[3]); - m[4] = DEC64E(hash.h8[4]); - m[5] = DEC64E(hash.h8[5]); - m[6] = DEC64E(hash.h8[6]); - m[7] = DEC64E(hash.h8[7]); - for (unsigned int u = 0; u < 16; u ++) - g[u] = m[u] ^ H[u]; - m[8] = 0x80; g[8] = m[8] ^ H[8]; - m[9] = 0; g[9] = m[9] ^ H[9]; - m[10] = 0; g[10] = m[10] ^ H[10]; - m[11] = 0; g[11] = m[11] ^ H[11]; - m[12] = 0; g[12] = m[12] ^ H[12]; - m[13] = 0; g[13] = m[13] ^ H[13]; - m[14] = 0; g[14] = m[14] ^ H[14]; - m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; - PERM_BIG_P(g); - PERM_BIG_Q(m); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= g[u] ^ m[u]; - sph_u64 xH[16]; - for (unsigned int u = 0; u < 16; u ++) - xH[u] = H[u]; - PERM_BIG_P(xH); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= xH[u]; - for (unsigned int u = 0; u < 8; u ++) - hash.h8[u] = DEC64E(H[u + 8]); + sph_u64 g[16], m[16]; + m[0] = DEC64E(hash.h8[0]); + m[1] = DEC64E(hash.h8[1]); + m[2] = DEC64E(hash.h8[2]); + m[3] = DEC64E(hash.h8[3]); + m[4] = DEC64E(hash.h8[4]); + m[5] = DEC64E(hash.h8[5]); + m[6] = DEC64E(hash.h8[6]); + m[7] = DEC64E(hash.h8[7]); + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + for (unsigned int u = 0; u < 8; u ++) + hash.h8[u] = DEC64E(H[u + 8]); + } // jh + { + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; - sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); - sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); - sph_u64 tmp; + for(int i = 0; i < 2; i++) + { + if (i == 0) { + h0h ^= DEC64E(hash.h8[0]); + h0l ^= DEC64E(hash.h8[1]); + h1h ^= DEC64E(hash.h8[2]); + h1l ^= DEC64E(hash.h8[3]); + h2h ^= DEC64E(hash.h8[4]); + h2l ^= DEC64E(hash.h8[5]); + h3h ^= DEC64E(hash.h8[6]); + h3l ^= DEC64E(hash.h8[7]); + } else if(i == 1) { + h4h ^= DEC64E(hash.h8[0]); + h4l ^= DEC64E(hash.h8[1]); + h5h ^= DEC64E(hash.h8[2]); + h5l ^= DEC64E(hash.h8[3]); + h6h ^= DEC64E(hash.h8[4]); + h6l ^= DEC64E(hash.h8[5]); + h7h ^= DEC64E(hash.h8[6]); + h7l ^= DEC64E(hash.h8[7]); - for(int i = 0; i < 2; i++) - { - if (i == 0) { - h0h ^= DEC64E(hash.h8[0]); - h0l ^= DEC64E(hash.h8[1]); - h1h ^= DEC64E(hash.h8[2]); - h1l ^= DEC64E(hash.h8[3]); - h2h ^= DEC64E(hash.h8[4]); - h2l ^= DEC64E(hash.h8[5]); - h3h ^= DEC64E(hash.h8[6]); - h3l ^= DEC64E(hash.h8[7]); - } else if(i == 1) { - h4h ^= DEC64E(hash.h8[0]); - h4l ^= DEC64E(hash.h8[1]); - h5h ^= DEC64E(hash.h8[2]); - h5l ^= DEC64E(hash.h8[3]); - h6h ^= DEC64E(hash.h8[4]); - h6l ^= DEC64E(hash.h8[5]); - h7h ^= DEC64E(hash.h8[6]); - h7l ^= DEC64E(hash.h8[7]); - - h0h ^= 0x80; - h3l ^= 0x2000000000000; + h0h ^= 0x80; + h3l ^= 0x2000000000000; + } + E8; } - E8; + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash.h8[0] = DEC64E(h4h); + hash.h8[1] = DEC64E(h4l); + hash.h8[2] = DEC64E(h5h); + hash.h8[3] = DEC64E(h5l); + hash.h8[4] = DEC64E(h6h); + hash.h8[5] = DEC64E(h6l); + hash.h8[6] = DEC64E(h7h); + hash.h8[7] = DEC64E(h7l); } - h4h ^= 0x80; - h7l ^= 0x2000000000000; - - hash.h8[0] = DEC64E(h4h); - hash.h8[1] = DEC64E(h4l); - hash.h8[2] = DEC64E(h5h); - hash.h8[3] = DEC64E(h5l); - hash.h8[4] = DEC64E(h6h); - hash.h8[5] = DEC64E(h6l); - hash.h8[6] = DEC64E(h7h); - hash.h8[7] = DEC64E(h7l); dec = ((hash.h1[7] & 0x8) != 0); { - // blake sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); @@ -417,7 +419,6 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp hash.h8[5] = (dec ? H5 : hash.h8[5]); hash.h8[6] = (dec ? H6 : hash.h8[6]); hash.h8[7] = (dec ? H7 : hash.h8[7]); - } { @@ -477,75 +478,75 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp } // keccak + { + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; - sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; - sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; - sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; - sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; - sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; - - a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); - - a00 ^= SWAP8(hash.h8[0]); - a10 ^= SWAP8(hash.h8[1]); - a20 ^= SWAP8(hash.h8[2]); - a30 ^= SWAP8(hash.h8[3]); - a40 ^= SWAP8(hash.h8[4]); - a01 ^= SWAP8(hash.h8[5]); - a11 ^= SWAP8(hash.h8[6]); - a21 ^= SWAP8(hash.h8[7]); - a31 ^= 0x8000000000000001; - KECCAK_F_1600; - // Finalize the "lane complement" - a10 = ~a10; - a20 = ~a20; - - hash.h8[0] = SWAP8(a00); - hash.h8[1] = SWAP8(a10); - hash.h8[2] = SWAP8(a20); - hash.h8[3] = SWAP8(a30); - hash.h8[4] = SWAP8(a40); - hash.h8[5] = SWAP8(a01); - hash.h8[6] = SWAP8(a11); - hash.h8[7] = SWAP8(a21); + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); - // skein + a00 ^= SWAP8(hash.h8[0]); + a10 ^= SWAP8(hash.h8[1]); + a20 ^= SWAP8(hash.h8[2]); + a30 ^= SWAP8(hash.h8[3]); + a40 ^= SWAP8(hash.h8[4]); + a01 ^= SWAP8(hash.h8[5]); + a11 ^= SWAP8(hash.h8[6]); + a21 ^= SWAP8(hash.h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; - sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); - sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; - sph_u64 bcount = 0; - - m0 = SWAP8(hash.h8[0]); - m1 = SWAP8(hash.h8[1]); - m2 = SWAP8(hash.h8[2]); - m3 = SWAP8(hash.h8[3]); - m4 = SWAP8(hash.h8[4]); - m5 = SWAP8(hash.h8[5]); - m6 = SWAP8(hash.h8[6]); - m7 = SWAP8(hash.h8[7]); - UBI_BIG(480, 64); - bcount = 0; - m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; - UBI_BIG(510, 8); - hash.h8[0] = SWAP8(h0); - hash.h8[1] = SWAP8(h1); - hash.h8[2] = SWAP8(h2); - hash.h8[3] = SWAP8(h3); - hash.h8[4] = SWAP8(h4); - hash.h8[5] = SWAP8(h5); - hash.h8[6] = SWAP8(h6); - hash.h8[7] = SWAP8(h7); + hash.h8[0] = SWAP8(a00); + hash.h8[1] = SWAP8(a10); + hash.h8[2] = SWAP8(a20); + hash.h8[3] = SWAP8(a30); + hash.h8[4] = SWAP8(a40); + hash.h8[5] = SWAP8(a01); + hash.h8[6] = SWAP8(a11); + hash.h8[7] = SWAP8(a21); + } - dec = ((hash.h1[7] & 0x8) != 0); + // skein { + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; - // keccak + m0 = SWAP8(hash.h8[0]); + m1 = SWAP8(hash.h8[1]); + m2 = SWAP8(hash.h8[2]); + m3 = SWAP8(hash.h8[3]); + m4 = SWAP8(hash.h8[4]); + m5 = SWAP8(hash.h8[5]); + m6 = SWAP8(hash.h8[6]); + m7 = SWAP8(hash.h8[7]); + UBI_BIG(480, 64); + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + UBI_BIG(510, 8); + hash.h8[0] = SWAP8(h0); + hash.h8[1] = SWAP8(h1); + hash.h8[2] = SWAP8(h2); + hash.h8[3] = SWAP8(h3); + hash.h8[4] = SWAP8(h4); + hash.h8[5] = SWAP8(h5); + hash.h8[6] = SWAP8(h6); + hash.h8[7] = SWAP8(h7); + } + if ((hash.h1[7] & 0x8) != 0) + { + // keccak sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; @@ -573,16 +574,16 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp a10 = ~a10; a20 = ~a20; - hash.h8[0] = (dec ? SWAP8(a00) : hash.h8[0]); - hash.h8[1] = (dec ? SWAP8(a10) : hash.h8[1]); - hash.h8[2] = (dec ? SWAP8(a20) : hash.h8[2]); - hash.h8[3] = (dec ? SWAP8(a30) : hash.h8[3]); - hash.h8[4] = (dec ? SWAP8(a40) : hash.h8[4]); - hash.h8[5] = (dec ? SWAP8(a01) : hash.h8[5]); - hash.h8[6] = (dec ? SWAP8(a11) : hash.h8[6]); - hash.h8[7] = (dec ? SWAP8(a21) : hash.h8[7]); - + hash.h8[0] = SWAP8(a00); + hash.h8[1] = SWAP8(a10); + hash.h8[2] = SWAP8(a20); + hash.h8[3] = SWAP8(a30); + hash.h8[4] = SWAP8(a40); + hash.h8[5] = SWAP8(a01); + hash.h8[6] = SWAP8(a11); + hash.h8[7] = SWAP8(a21); } + else { // jh @@ -620,15 +621,14 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp h4h ^= 0x80; h7l ^= 0x2000000000000; - hash.h8[0] = (!dec ? DEC64E(h4h) : hash.h8[0]); - hash.h8[1] = (!dec ? DEC64E(h4l) : hash.h8[1]); - hash.h8[2] = (!dec ? DEC64E(h5h) : hash.h8[2]); - hash.h8[3] = (!dec ? DEC64E(h5l) : hash.h8[3]); - hash.h8[4] = (!dec ? DEC64E(h6h) : hash.h8[4]); - hash.h8[5] = (!dec ? DEC64E(h6l) : hash.h8[5]); - hash.h8[6] = (!dec ? DEC64E(h7h) : hash.h8[6]); - hash.h8[7] = (!dec ? DEC64E(h7l) : hash.h8[7]); - + hash.h8[0] = DEC64E(h4h); + hash.h8[1] = DEC64E(h4l); + hash.h8[2] = DEC64E(h5h); + hash.h8[3] = DEC64E(h5l); + hash.h8[4] = DEC64E(h6h); + hash.h8[5] = DEC64E(h6l); + hash.h8[6] = DEC64E(h7h); + hash.h8[7] = DEC64E(h7l); } bool result = (SWAP8(hash.h8[3]) <= target); diff --git a/kernel/darkcoin.cl b/kernel/darkcoin.cl index c661aec8..9fe40565 100644 --- a/kernel/darkcoin.cl +++ b/kernel/darkcoin.cl @@ -122,81 +122,82 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp barrier(CLK_LOCAL_MEM_FENCE); // blake -{ - sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); - sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); - sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); - sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); - sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; - sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; - - if ((T0 = SPH_T64(T0 + 1024)) < 1024) { - T1 = SPH_T64(T1 + 1); + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + { + T1 = SPH_T64(T1 + 1); + } + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + M0 = DEC64BE(block + 0); + M1 = DEC64BE(block + 8); + M2 = DEC64BE(block + 16); + M3 = DEC64BE(block + 24); + M4 = DEC64BE(block + 32); + M5 = DEC64BE(block + 40); + M6 = DEC64BE(block + 48); + M7 = DEC64BE(block + 56); + M8 = DEC64BE(block + 64); + M9 = DEC64BE(block + 72); + M9 &= 0xFFFFFFFF00000000; + M9 ^= SWAP4(gid); + MA = 0x8000000000000000; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x280; + + COMPRESS64; + + hash.h8[0] = H0; + hash.h8[1] = H1; + hash.h8[2] = H2; + hash.h8[3] = H3; + hash.h8[4] = H4; + hash.h8[5] = H5; + hash.h8[6] = H6; + hash.h8[7] = H7; } - sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; - sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; - sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; - sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; - M0 = DEC64BE(block + 0); - M1 = DEC64BE(block + 8); - M2 = DEC64BE(block + 16); - M3 = DEC64BE(block + 24); - M4 = DEC64BE(block + 32); - M5 = DEC64BE(block + 40); - M6 = DEC64BE(block + 48); - M7 = DEC64BE(block + 56); - M8 = DEC64BE(block + 64); - M9 = DEC64BE(block + 72); - M9 &= 0xFFFFFFFF00000000; - M9 ^= SWAP4(gid); - MA = 0x8000000000000000; - MB = 0; - MC = 0; - MD = 1; - ME = 0; - MF = 0x280; - - COMPRESS64; - - hash.h8[0] = H0; - hash.h8[1] = H1; - hash.h8[2] = H2; - hash.h8[3] = H3; - hash.h8[4] = H4; - hash.h8[5] = H5; - hash.h8[6] = H6; - hash.h8[7] = H7; -} // bmw - sph_u64 BMW_H[16]; - for(unsigned u = 0; u < 16; u++) - BMW_H[u] = BMW_IV512[u]; - - sph_u64 BMW_h1[16], BMW_h2[16]; - sph_u64 mv[16]; - - mv[ 0] = SWAP8(hash.h8[0]); - mv[ 1] = SWAP8(hash.h8[1]); - mv[ 2] = SWAP8(hash.h8[2]); - mv[ 3] = SWAP8(hash.h8[3]); - mv[ 4] = SWAP8(hash.h8[4]); - mv[ 5] = SWAP8(hash.h8[5]); - mv[ 6] = SWAP8(hash.h8[6]); - mv[ 7] = SWAP8(hash.h8[7]); - mv[ 8] = 0x80; - mv[ 9] = 0; - mv[10] = 0; - mv[11] = 0; - mv[12] = 0; - mv[13] = 0; - mv[14] = 0; - mv[15] = 0x200; + { + sph_u64 BMW_H[16]; + for(unsigned u = 0; u < 16; u++) + BMW_H[u] = BMW_IV512[u]; + + sph_u64 BMW_h1[16], BMW_h2[16]; + sph_u64 mv[16]; + + mv[ 0] = SWAP8(hash.h8[0]); + mv[ 1] = SWAP8(hash.h8[1]); + mv[ 2] = SWAP8(hash.h8[2]); + mv[ 3] = SWAP8(hash.h8[3]); + mv[ 4] = SWAP8(hash.h8[4]); + mv[ 5] = SWAP8(hash.h8[5]); + mv[ 6] = SWAP8(hash.h8[6]); + mv[ 7] = SWAP8(hash.h8[7]); + mv[ 8] = 0x80; + mv[ 9] = 0; + mv[10] = 0; + mv[11] = 0; + mv[12] = 0; + mv[13] = 0; + mv[14] = 0; + mv[15] = 0x200; #define M(x) (mv[x]) #define H(x) (BMW_H[x]) #define dH(x) (BMW_h2[x]) - FOLDb; + FOLDb; #undef M #undef H @@ -206,516 +207,527 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp #define H(x) (final_b[x]) #define dH(x) (BMW_h1[x]) - FOLDb; + FOLDb; #undef M #undef H #undef dH - hash.h8[0] = SWAP8(BMW_h1[8]); - hash.h8[1] = SWAP8(BMW_h1[9]); - hash.h8[2] = SWAP8(BMW_h1[10]); - hash.h8[3] = SWAP8(BMW_h1[11]); - hash.h8[4] = SWAP8(BMW_h1[12]); - hash.h8[5] = SWAP8(BMW_h1[13]); - hash.h8[6] = SWAP8(BMW_h1[14]); - hash.h8[7] = SWAP8(BMW_h1[15]); + hash.h8[0] = SWAP8(BMW_h1[8]); + hash.h8[1] = SWAP8(BMW_h1[9]); + hash.h8[2] = SWAP8(BMW_h1[10]); + hash.h8[3] = SWAP8(BMW_h1[11]); + hash.h8[4] = SWAP8(BMW_h1[12]); + hash.h8[5] = SWAP8(BMW_h1[13]); + hash.h8[6] = SWAP8(BMW_h1[14]); + hash.h8[7] = SWAP8(BMW_h1[15]); + } // groestl - - sph_u64 H[16]; - for (unsigned int u = 0; u < 15; u ++) - H[u] = 0; -#if USE_LE - H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); -#else - H[15] = (sph_u64)512; -#endif - - sph_u64 g[16], m[16]; - m[0] = DEC64E(hash.h8[0]); - m[1] = DEC64E(hash.h8[1]); - m[2] = DEC64E(hash.h8[2]); - m[3] = DEC64E(hash.h8[3]); - m[4] = DEC64E(hash.h8[4]); - m[5] = DEC64E(hash.h8[5]); - m[6] = DEC64E(hash.h8[6]); - m[7] = DEC64E(hash.h8[7]); - for (unsigned int u = 0; u < 16; u ++) - g[u] = m[u] ^ H[u]; - m[8] = 0x80; g[8] = m[8] ^ H[8]; - m[9] = 0; g[9] = m[9] ^ H[9]; - m[10] = 0; g[10] = m[10] ^ H[10]; - m[11] = 0; g[11] = m[11] ^ H[11]; - m[12] = 0; g[12] = m[12] ^ H[12]; - m[13] = 0; g[13] = m[13] ^ H[13]; - m[14] = 0; g[14] = m[14] ^ H[14]; - m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; - PERM_BIG_P(g); - PERM_BIG_Q(m); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= g[u] ^ m[u]; - sph_u64 xH[16]; - for (unsigned int u = 0; u < 16; u ++) - xH[u] = H[u]; - PERM_BIG_P(xH); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= xH[u]; - for (unsigned int u = 0; u < 8; u ++) - hash.h8[u] = DEC64E(H[u + 8]); + { + sph_u64 H[16]; + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; + #if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); + #else + H[15] = (sph_u64)512; + #endif + + sph_u64 g[16], m[16]; + m[0] = DEC64E(hash.h8[0]); + m[1] = DEC64E(hash.h8[1]); + m[2] = DEC64E(hash.h8[2]); + m[3] = DEC64E(hash.h8[3]); + m[4] = DEC64E(hash.h8[4]); + m[5] = DEC64E(hash.h8[5]); + m[6] = DEC64E(hash.h8[6]); + m[7] = DEC64E(hash.h8[7]); + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + for (unsigned int u = 0; u < 8; u ++) + hash.h8[u] = DEC64E(H[u + 8]); + } // skein - - sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); - sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; - sph_u64 bcount = 0; - - m0 = SWAP8(hash.h8[0]); - m1 = SWAP8(hash.h8[1]); - m2 = SWAP8(hash.h8[2]); - m3 = SWAP8(hash.h8[3]); - m4 = SWAP8(hash.h8[4]); - m5 = SWAP8(hash.h8[5]); - m6 = SWAP8(hash.h8[6]); - m7 = SWAP8(hash.h8[7]); - UBI_BIG(480, 64); - bcount = 0; - m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; - UBI_BIG(510, 8); - hash.h8[0] = SWAP8(h0); - hash.h8[1] = SWAP8(h1); - hash.h8[2] = SWAP8(h2); - hash.h8[3] = SWAP8(h3); - hash.h8[4] = SWAP8(h4); - hash.h8[5] = SWAP8(h5); - hash.h8[6] = SWAP8(h6); - hash.h8[7] = SWAP8(h7); + { + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; + + m0 = SWAP8(hash.h8[0]); + m1 = SWAP8(hash.h8[1]); + m2 = SWAP8(hash.h8[2]); + m3 = SWAP8(hash.h8[3]); + m4 = SWAP8(hash.h8[4]); + m5 = SWAP8(hash.h8[5]); + m6 = SWAP8(hash.h8[6]); + m7 = SWAP8(hash.h8[7]); + UBI_BIG(480, 64); + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + UBI_BIG(510, 8); + hash.h8[0] = SWAP8(h0); + hash.h8[1] = SWAP8(h1); + hash.h8[2] = SWAP8(h2); + hash.h8[3] = SWAP8(h3); + hash.h8[4] = SWAP8(h4); + hash.h8[5] = SWAP8(h5); + hash.h8[6] = SWAP8(h6); + hash.h8[7] = SWAP8(h7); + } // jh - - sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); - sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); - sph_u64 tmp; - - for(int i = 0; i < 2; i++) { - if (i == 0) { - h0h ^= DEC64E(hash.h8[0]); - h0l ^= DEC64E(hash.h8[1]); - h1h ^= DEC64E(hash.h8[2]); - h1l ^= DEC64E(hash.h8[3]); - h2h ^= DEC64E(hash.h8[4]); - h2l ^= DEC64E(hash.h8[5]); - h3h ^= DEC64E(hash.h8[6]); - h3l ^= DEC64E(hash.h8[7]); - } else if(i == 1) { - h4h ^= DEC64E(hash.h8[0]); - h4l ^= DEC64E(hash.h8[1]); - h5h ^= DEC64E(hash.h8[2]); - h5l ^= DEC64E(hash.h8[3]); - h6h ^= DEC64E(hash.h8[4]); - h6l ^= DEC64E(hash.h8[5]); - h7h ^= DEC64E(hash.h8[6]); - h7l ^= DEC64E(hash.h8[7]); - - h0h ^= 0x80; - h3l ^= 0x2000000000000; + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; + + for(int i = 0; i < 2; i++) + { + if (i == 0) { + h0h ^= DEC64E(hash.h8[0]); + h0l ^= DEC64E(hash.h8[1]); + h1h ^= DEC64E(hash.h8[2]); + h1l ^= DEC64E(hash.h8[3]); + h2h ^= DEC64E(hash.h8[4]); + h2l ^= DEC64E(hash.h8[5]); + h3h ^= DEC64E(hash.h8[6]); + h3l ^= DEC64E(hash.h8[7]); + } else if(i == 1) { + h4h ^= DEC64E(hash.h8[0]); + h4l ^= DEC64E(hash.h8[1]); + h5h ^= DEC64E(hash.h8[2]); + h5l ^= DEC64E(hash.h8[3]); + h6h ^= DEC64E(hash.h8[4]); + h6l ^= DEC64E(hash.h8[5]); + h7h ^= DEC64E(hash.h8[6]); + h7l ^= DEC64E(hash.h8[7]); + + h0h ^= 0x80; + h3l ^= 0x2000000000000; + } + E8; } - E8; + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash.h8[0] = DEC64E(h4h); + hash.h8[1] = DEC64E(h4l); + hash.h8[2] = DEC64E(h5h); + hash.h8[3] = DEC64E(h5l); + hash.h8[4] = DEC64E(h6h); + hash.h8[5] = DEC64E(h6l); + hash.h8[6] = DEC64E(h7h); + hash.h8[7] = DEC64E(h7l); } - h4h ^= 0x80; - h7l ^= 0x2000000000000; - - hash.h8[0] = DEC64E(h4h); - hash.h8[1] = DEC64E(h4l); - hash.h8[2] = DEC64E(h5h); - hash.h8[3] = DEC64E(h5l); - hash.h8[4] = DEC64E(h6h); - hash.h8[5] = DEC64E(h6l); - hash.h8[6] = DEC64E(h7h); - hash.h8[7] = DEC64E(h7l); // keccak - - sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; - sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; - sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; - sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; - sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; - - a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); - - a00 ^= SWAP8(hash.h8[0]); - a10 ^= SWAP8(hash.h8[1]); - a20 ^= SWAP8(hash.h8[2]); - a30 ^= SWAP8(hash.h8[3]); - a40 ^= SWAP8(hash.h8[4]); - a01 ^= SWAP8(hash.h8[5]); - a11 ^= SWAP8(hash.h8[6]); - a21 ^= SWAP8(hash.h8[7]); - a31 ^= 0x8000000000000001; - KECCAK_F_1600; - // Finalize the "lane complement" - a10 = ~a10; - a20 = ~a20; - - hash.h8[0] = SWAP8(a00); - hash.h8[1] = SWAP8(a10); - hash.h8[2] = SWAP8(a20); - hash.h8[3] = SWAP8(a30); - hash.h8[4] = SWAP8(a40); - hash.h8[5] = SWAP8(a01); - hash.h8[6] = SWAP8(a11); - hash.h8[7] = SWAP8(a21); + { + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; + + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); + + a00 ^= SWAP8(hash.h8[0]); + a10 ^= SWAP8(hash.h8[1]); + a20 ^= SWAP8(hash.h8[2]); + a30 ^= SWAP8(hash.h8[3]); + a40 ^= SWAP8(hash.h8[4]); + a01 ^= SWAP8(hash.h8[5]); + a11 ^= SWAP8(hash.h8[6]); + a21 ^= SWAP8(hash.h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; + + hash.h8[0] = SWAP8(a00); + hash.h8[1] = SWAP8(a10); + hash.h8[2] = SWAP8(a20); + hash.h8[3] = SWAP8(a30); + hash.h8[4] = SWAP8(a40); + hash.h8[5] = SWAP8(a01); + hash.h8[6] = SWAP8(a11); + hash.h8[7] = SWAP8(a21); + } // luffa - - sph_u32 V00 = SPH_C32(0x6d251e69), V01 = SPH_C32(0x44b051e0), V02 = SPH_C32(0x4eaa6fb4), V03 = SPH_C32(0xdbf78465), V04 = SPH_C32(0x6e292011), V05 = SPH_C32(0x90152df4), V06 = SPH_C32(0xee058139), V07 = SPH_C32(0xdef610bb); - sph_u32 V10 = SPH_C32(0xc3b44b95), V11 = SPH_C32(0xd9d2f256), V12 = SPH_C32(0x70eee9a0), V13 = SPH_C32(0xde099fa3), V14 = SPH_C32(0x5d9b0557), V15 = SPH_C32(0x8fc944b3), V16 = SPH_C32(0xcf1ccf0e), V17 = SPH_C32(0x746cd581); - sph_u32 V20 = SPH_C32(0xf7efc89d), V21 = SPH_C32(0x5dba5781), V22 = SPH_C32(0x04016ce5), V23 = SPH_C32(0xad659c05), V24 = SPH_C32(0x0306194f), V25 = SPH_C32(0x666d1836), V26 = SPH_C32(0x24aa230a), V27 = SPH_C32(0x8b264ae7); - sph_u32 V30 = SPH_C32(0x858075d5), V31 = SPH_C32(0x36d79cce), V32 = SPH_C32(0xe571f7d7), V33 = SPH_C32(0x204b1f67), V34 = SPH_C32(0x35870c6a), V35 = SPH_C32(0x57e9e923), V36 = SPH_C32(0x14bcb808), V37 = SPH_C32(0x7cde72ce); - sph_u32 V40 = SPH_C32(0x6c68e9be), V41 = SPH_C32(0x5ec41e22), V42 = SPH_C32(0xc825b7c7), V43 = SPH_C32(0xaffb4363), V44 = SPH_C32(0xf5df3999), V45 = SPH_C32(0x0fc688f1), V46 = SPH_C32(0xb07224cc), V47 = SPH_C32(0x03e86cea); - - DECL_TMP8(M); - - M0 = hash.h4[1]; - M1 = hash.h4[0]; - M2 = hash.h4[3]; - M3 = hash.h4[2]; - M4 = hash.h4[5]; - M5 = hash.h4[4]; - M6 = hash.h4[7]; - M7 = hash.h4[6]; - - for(uint i = 0; i < 5; i++) { - MI5; - LUFFA_P5; - - if(i == 0) { - M0 = hash.h4[9]; - M1 = hash.h4[8]; - M2 = hash.h4[11]; - M3 = hash.h4[10]; - M4 = hash.h4[13]; - M5 = hash.h4[12]; - M6 = hash.h4[15]; - M7 = hash.h4[14]; - } else if(i == 1) { - M0 = 0x80000000; - M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; - } else if(i == 2) { - M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; - } else if(i == 3) { - hash.h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; - hash.h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; - hash.h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; - hash.h4[2] = V03 ^ V13 ^ V23 ^ V33 ^ V43; - hash.h4[5] = V04 ^ V14 ^ V24 ^ V34 ^ V44; - hash.h4[4] = V05 ^ V15 ^ V25 ^ V35 ^ V45; - hash.h4[7] = V06 ^ V16 ^ V26 ^ V36 ^ V46; - hash.h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + sph_u32 V00 = SPH_C32(0x6d251e69), V01 = SPH_C32(0x44b051e0), V02 = SPH_C32(0x4eaa6fb4), V03 = SPH_C32(0xdbf78465), V04 = SPH_C32(0x6e292011), V05 = SPH_C32(0x90152df4), V06 = SPH_C32(0xee058139), V07 = SPH_C32(0xdef610bb); + sph_u32 V10 = SPH_C32(0xc3b44b95), V11 = SPH_C32(0xd9d2f256), V12 = SPH_C32(0x70eee9a0), V13 = SPH_C32(0xde099fa3), V14 = SPH_C32(0x5d9b0557), V15 = SPH_C32(0x8fc944b3), V16 = SPH_C32(0xcf1ccf0e), V17 = SPH_C32(0x746cd581); + sph_u32 V20 = SPH_C32(0xf7efc89d), V21 = SPH_C32(0x5dba5781), V22 = SPH_C32(0x04016ce5), V23 = SPH_C32(0xad659c05), V24 = SPH_C32(0x0306194f), V25 = SPH_C32(0x666d1836), V26 = SPH_C32(0x24aa230a), V27 = SPH_C32(0x8b264ae7); + sph_u32 V30 = SPH_C32(0x858075d5), V31 = SPH_C32(0x36d79cce), V32 = SPH_C32(0xe571f7d7), V33 = SPH_C32(0x204b1f67), V34 = SPH_C32(0x35870c6a), V35 = SPH_C32(0x57e9e923), V36 = SPH_C32(0x14bcb808), V37 = SPH_C32(0x7cde72ce); + sph_u32 V40 = SPH_C32(0x6c68e9be), V41 = SPH_C32(0x5ec41e22), V42 = SPH_C32(0xc825b7c7), V43 = SPH_C32(0xaffb4363), V44 = SPH_C32(0xf5df3999), V45 = SPH_C32(0x0fc688f1), V46 = SPH_C32(0xb07224cc), V47 = SPH_C32(0x03e86cea); + + DECL_TMP8(M); + + M0 = hash.h4[1]; + M1 = hash.h4[0]; + M2 = hash.h4[3]; + M3 = hash.h4[2]; + M4 = hash.h4[5]; + M5 = hash.h4[4]; + M6 = hash.h4[7]; + M7 = hash.h4[6]; + + for(uint i = 0; i < 5; i++) + { + MI5; + LUFFA_P5; + + if(i == 0) { + M0 = hash.h4[9]; + M1 = hash.h4[8]; + M2 = hash.h4[11]; + M3 = hash.h4[10]; + M4 = hash.h4[13]; + M5 = hash.h4[12]; + M6 = hash.h4[15]; + M7 = hash.h4[14]; + } else if(i == 1) { + M0 = 0x80000000; + M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } else if(i == 2) { + M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } else if(i == 3) { + hash.h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash.h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash.h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash.h4[2] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash.h4[5] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash.h4[4] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash.h4[7] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash.h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + } } + hash.h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash.h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash.h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash.h4[10] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash.h4[13] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash.h4[12] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash.h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash.h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47; } - hash.h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40; - hash.h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41; - hash.h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42; - hash.h4[10] = V03 ^ V13 ^ V23 ^ V33 ^ V43; - hash.h4[13] = V04 ^ V14 ^ V24 ^ V34 ^ V44; - hash.h4[12] = V05 ^ V15 ^ V25 ^ V35 ^ V45; - hash.h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46; - hash.h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47; // cubehash.h1 - - sph_u32 x0 = SPH_C32(0x2AEA2A61), x1 = SPH_C32(0x50F494D4), x2 = SPH_C32(0x2D538B8B), x3 = SPH_C32(0x4167D83E); - sph_u32 x4 = SPH_C32(0x3FEE2313), x5 = SPH_C32(0xC701CF8C), x6 = SPH_C32(0xCC39968E), x7 = SPH_C32(0x50AC5695); - sph_u32 x8 = SPH_C32(0x4D42C787), x9 = SPH_C32(0xA647A8B3), xa = SPH_C32(0x97CF0BEF), xb = SPH_C32(0x825B4537); - sph_u32 xc = SPH_C32(0xEEF864D2), xd = SPH_C32(0xF22090C4), xe = SPH_C32(0xD0E5CD33), xf = SPH_C32(0xA23911AE); - sph_u32 xg = SPH_C32(0xFCD398D9), xh = SPH_C32(0x148FE485), xi = SPH_C32(0x1B017BEF), xj = SPH_C32(0xB6444532); - sph_u32 xk = SPH_C32(0x6A536159), xl = SPH_C32(0x2FF5781C), xm = SPH_C32(0x91FA7934), xn = SPH_C32(0x0DBADEA9); - sph_u32 xo = SPH_C32(0xD65C8A2B), xp = SPH_C32(0xA5A70E75), xq = SPH_C32(0xB1C62456), xr = SPH_C32(0xBC796576); - sph_u32 xs = SPH_C32(0x1921C8F7), xt = SPH_C32(0xE7989AF1), xu = SPH_C32(0x7795D246), xv = SPH_C32(0xD43E3B44); - - x0 ^= SWAP4(hash.h4[1]); - x1 ^= SWAP4(hash.h4[0]); - x2 ^= SWAP4(hash.h4[3]); - x3 ^= SWAP4(hash.h4[2]); - x4 ^= SWAP4(hash.h4[5]); - x5 ^= SWAP4(hash.h4[4]); - x6 ^= SWAP4(hash.h4[7]); - x7 ^= SWAP4(hash.h4[6]); - - for (int i = 0; i < 13; i ++) { - SIXTEEN_ROUNDS; - - if (i == 0) { - x0 ^= SWAP4(hash.h4[9]); - x1 ^= SWAP4(hash.h4[8]); - x2 ^= SWAP4(hash.h4[11]); - x3 ^= SWAP4(hash.h4[10]); - x4 ^= SWAP4(hash.h4[13]); - x5 ^= SWAP4(hash.h4[12]); - x6 ^= SWAP4(hash.h4[15]); - x7 ^= SWAP4(hash.h4[14]); - } else if(i == 1) { - x0 ^= 0x80; - } else if (i == 2) { - xv ^= SPH_C32(1); + { + sph_u32 x0 = SPH_C32(0x2AEA2A61), x1 = SPH_C32(0x50F494D4), x2 = SPH_C32(0x2D538B8B), x3 = SPH_C32(0x4167D83E); + sph_u32 x4 = SPH_C32(0x3FEE2313), x5 = SPH_C32(0xC701CF8C), x6 = SPH_C32(0xCC39968E), x7 = SPH_C32(0x50AC5695); + sph_u32 x8 = SPH_C32(0x4D42C787), x9 = SPH_C32(0xA647A8B3), xa = SPH_C32(0x97CF0BEF), xb = SPH_C32(0x825B4537); + sph_u32 xc = SPH_C32(0xEEF864D2), xd = SPH_C32(0xF22090C4), xe = SPH_C32(0xD0E5CD33), xf = SPH_C32(0xA23911AE); + sph_u32 xg = SPH_C32(0xFCD398D9), xh = SPH_C32(0x148FE485), xi = SPH_C32(0x1B017BEF), xj = SPH_C32(0xB6444532); + sph_u32 xk = SPH_C32(0x6A536159), xl = SPH_C32(0x2FF5781C), xm = SPH_C32(0x91FA7934), xn = SPH_C32(0x0DBADEA9); + sph_u32 xo = SPH_C32(0xD65C8A2B), xp = SPH_C32(0xA5A70E75), xq = SPH_C32(0xB1C62456), xr = SPH_C32(0xBC796576); + sph_u32 xs = SPH_C32(0x1921C8F7), xt = SPH_C32(0xE7989AF1), xu = SPH_C32(0x7795D246), xv = SPH_C32(0xD43E3B44); + + x0 ^= SWAP4(hash.h4[1]); + x1 ^= SWAP4(hash.h4[0]); + x2 ^= SWAP4(hash.h4[3]); + x3 ^= SWAP4(hash.h4[2]); + x4 ^= SWAP4(hash.h4[5]); + x5 ^= SWAP4(hash.h4[4]); + x6 ^= SWAP4(hash.h4[7]); + x7 ^= SWAP4(hash.h4[6]); + + for (int i = 0; i < 13; i ++) { + SIXTEEN_ROUNDS; + + if (i == 0) { + x0 ^= SWAP4(hash.h4[9]); + x1 ^= SWAP4(hash.h4[8]); + x2 ^= SWAP4(hash.h4[11]); + x3 ^= SWAP4(hash.h4[10]); + x4 ^= SWAP4(hash.h4[13]); + x5 ^= SWAP4(hash.h4[12]); + x6 ^= SWAP4(hash.h4[15]); + x7 ^= SWAP4(hash.h4[14]); + } else if(i == 1) { + x0 ^= 0x80; + } else if (i == 2) { + xv ^= SPH_C32(1); + } } - } - hash.h4[0] = x0; - hash.h4[1] = x1; - hash.h4[2] = x2; - hash.h4[3] = x3; - hash.h4[4] = x4; - hash.h4[5] = x5; - hash.h4[6] = x6; - hash.h4[7] = x7; - hash.h4[8] = x8; - hash.h4[9] = x9; - hash.h4[10] = xa; - hash.h4[11] = xb; - hash.h4[12] = xc; - hash.h4[13] = xd; - hash.h4[14] = xe; - hash.h4[15] = xf; + hash.h4[0] = x0; + hash.h4[1] = x1; + hash.h4[2] = x2; + hash.h4[3] = x3; + hash.h4[4] = x4; + hash.h4[5] = x5; + hash.h4[6] = x6; + hash.h4[7] = x7; + hash.h4[8] = x8; + hash.h4[9] = x9; + hash.h4[10] = xa; + hash.h4[11] = xb; + hash.h4[12] = xc; + hash.h4[13] = xd; + hash.h4[14] = xe; + hash.h4[15] = xf; + } // shavite { - // IV - sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); - sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); - sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); - sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); - - // state - sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; - sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; - sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; - sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; - - sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; - - rk00 = hash.h4[0]; - rk01 = hash.h4[1]; - rk02 = hash.h4[2]; - rk03 = hash.h4[3]; - rk04 = hash.h4[4]; - rk05 = hash.h4[5]; - rk06 = hash.h4[6]; - rk07 = hash.h4[7]; - rk08 = hash.h4[8]; - rk09 = hash.h4[9]; - rk0A = hash.h4[10]; - rk0B = hash.h4[11]; - rk0C = hash.h4[12]; - rk0D = hash.h4[13]; - rk0E = hash.h4[14]; - rk0F = hash.h4[15]; - rk10 = 0x80; - rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; - rk1B = 0x2000000; - rk1C = rk1D = rk1E = 0; - rk1F = 0x2000000; - - c512(buf); - - hash.h4[0] = h0; - hash.h4[1] = h1; - hash.h4[2] = h2; - hash.h4[3] = h3; - hash.h4[4] = h4; - hash.h4[5] = h5; - hash.h4[6] = h6; - hash.h4[7] = h7; - hash.h4[8] = h8; - hash.h4[9] = h9; - hash.h4[10] = hA; - hash.h4[11] = hB; - hash.h4[12] = hC; - hash.h4[13] = hD; - hash.h4[14] = hE; - hash.h4[15] = hF; + // IV + sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); + sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); + sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); + sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); + + // state + sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; + sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; + sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; + sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; + + sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; + + rk00 = hash.h4[0]; + rk01 = hash.h4[1]; + rk02 = hash.h4[2]; + rk03 = hash.h4[3]; + rk04 = hash.h4[4]; + rk05 = hash.h4[5]; + rk06 = hash.h4[6]; + rk07 = hash.h4[7]; + rk08 = hash.h4[8]; + rk09 = hash.h4[9]; + rk0A = hash.h4[10]; + rk0B = hash.h4[11]; + rk0C = hash.h4[12]; + rk0D = hash.h4[13]; + rk0E = hash.h4[14]; + rk0F = hash.h4[15]; + rk10 = 0x80; + rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; + rk1B = 0x2000000; + rk1C = rk1D = rk1E = 0; + rk1F = 0x2000000; + + c512(buf); + + hash.h4[0] = h0; + hash.h4[1] = h1; + hash.h4[2] = h2; + hash.h4[3] = h3; + hash.h4[4] = h4; + hash.h4[5] = h5; + hash.h4[6] = h6; + hash.h4[7] = h7; + hash.h4[8] = h8; + hash.h4[9] = h9; + hash.h4[10] = hA; + hash.h4[11] = hB; + hash.h4[12] = hC; + hash.h4[13] = hD; + hash.h4[14] = hE; + hash.h4[15] = hF; } // simd - s32 q[256]; - unsigned char x[128]; - for(unsigned int i = 0; i < 64; i++) - x[i] = hash.h1[i]; - for(unsigned int i = 64; i < 128; i++) - x[i] = 0; - - u32 A0 = C32(0x0BA16B95), A1 = C32(0x72F999AD), A2 = C32(0x9FECC2AE), A3 = C32(0xBA3264FC), A4 = C32(0x5E894929), A5 = C32(0x8E9F30E5), A6 = C32(0x2F1DAA37), A7 = C32(0xF0F2C558); - u32 B0 = C32(0xAC506643), B1 = C32(0xA90635A5), B2 = C32(0xE25B878B), B3 = C32(0xAAB7878F), B4 = C32(0x88817F7A), B5 = C32(0x0A02892B), B6 = C32(0x559A7550), B7 = C32(0x598F657E); - u32 C0 = C32(0x7EEF60A1), C1 = C32(0x6B70E3E8), C2 = C32(0x9C1714D1), C3 = C32(0xB958E2A8), C4 = C32(0xAB02675E), C5 = C32(0xED1C014F), C6 = C32(0xCD8D65BB), C7 = C32(0xFDB7A257); - u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); - - FFT256(0, 1, 0, ll1); - for (int i = 0; i < 256; i ++) { - s32 tq; - - tq = q[i] + yoff_b_n[i]; - tq = REDS2(tq); - tq = REDS1(tq); - tq = REDS1(tq); - q[i] = (tq <= 128 ? tq : tq - 257); - } + { + s32 q[256]; + unsigned char x[128]; + for(unsigned int i = 0; i < 64; i++) + x[i] = hash.h1[i]; + for(unsigned int i = 64; i < 128; i++) + x[i] = 0; + + u32 A0 = C32(0x0BA16B95), A1 = C32(0x72F999AD), A2 = C32(0x9FECC2AE), A3 = C32(0xBA3264FC), A4 = C32(0x5E894929), A5 = C32(0x8E9F30E5), A6 = C32(0x2F1DAA37), A7 = C32(0xF0F2C558); + u32 B0 = C32(0xAC506643), B1 = C32(0xA90635A5), B2 = C32(0xE25B878B), B3 = C32(0xAAB7878F), B4 = C32(0x88817F7A), B5 = C32(0x0A02892B), B6 = C32(0x559A7550), B7 = C32(0x598F657E); + u32 C0 = C32(0x7EEF60A1), C1 = C32(0x6B70E3E8), C2 = C32(0x9C1714D1), C3 = C32(0xB958E2A8), C4 = C32(0xAB02675E), C5 = C32(0xED1C014F), C6 = C32(0xCD8D65BB), C7 = C32(0xFDB7A257); + u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); + + FFT256(0, 1, 0, ll1); + for (int i = 0; i < 256; i ++) { + s32 tq; + + tq = q[i] + yoff_b_n[i]; + tq = REDS2(tq); + tq = REDS1(tq); + tq = REDS1(tq); + q[i] = (tq <= 128 ? tq : tq - 257); + } - A0 ^= hash.h4[0]; - A1 ^= hash.h4[1]; - A2 ^= hash.h4[2]; - A3 ^= hash.h4[3]; - A4 ^= hash.h4[4]; - A5 ^= hash.h4[5]; - A6 ^= hash.h4[6]; - A7 ^= hash.h4[7]; - B0 ^= hash.h4[8]; - B1 ^= hash.h4[9]; - B2 ^= hash.h4[10]; - B3 ^= hash.h4[11]; - B4 ^= hash.h4[12]; - B5 ^= hash.h4[13]; - B6 ^= hash.h4[14]; - B7 ^= hash.h4[15]; - - ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); - ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); - ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); - ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); - - STEP_BIG( - C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), - C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), - IF, 4, 13, PP8_4_); - STEP_BIG( - C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), - C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), - IF, 13, 10, PP8_5_); - STEP_BIG( - C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), - C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), - IF, 10, 25, PP8_6_); - STEP_BIG( - C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), - C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), - IF, 25, 4, PP8_0_); - - u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7; - u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7; - u32 COPY_C0 = C0, COPY_C1 = C1, COPY_C2 = C2, COPY_C3 = C3, COPY_C4 = C4, COPY_C5 = C5, COPY_C6 = C6, COPY_C7 = C7; - u32 COPY_D0 = D0, COPY_D1 = D1, COPY_D2 = D2, COPY_D3 = D3, COPY_D4 = D4, COPY_D5 = D5, COPY_D6 = D6, COPY_D7 = D7; - - #define q SIMD_Q - - A0 ^= 0x200; - - ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); - ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); - ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); - ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); - STEP_BIG( - COPY_A0, COPY_A1, COPY_A2, COPY_A3, - COPY_A4, COPY_A5, COPY_A6, COPY_A7, - IF, 4, 13, PP8_4_); - STEP_BIG( - COPY_B0, COPY_B1, COPY_B2, COPY_B3, - COPY_B4, COPY_B5, COPY_B6, COPY_B7, - IF, 13, 10, PP8_5_); - STEP_BIG( - COPY_C0, COPY_C1, COPY_C2, COPY_C3, - COPY_C4, COPY_C5, COPY_C6, COPY_C7, - IF, 10, 25, PP8_6_); - STEP_BIG( - COPY_D0, COPY_D1, COPY_D2, COPY_D3, - COPY_D4, COPY_D5, COPY_D6, COPY_D7, - IF, 25, 4, PP8_0_); - #undef q - - hash.h4[0] = A0; - hash.h4[1] = A1; - hash.h4[2] = A2; - hash.h4[3] = A3; - hash.h4[4] = A4; - hash.h4[5] = A5; - hash.h4[6] = A6; - hash.h4[7] = A7; - hash.h4[8] = B0; - hash.h4[9] = B1; - hash.h4[10] = B2; - hash.h4[11] = B3; - hash.h4[12] = B4; - hash.h4[13] = B5; - hash.h4[14] = B6; - hash.h4[15] = B7; + A0 ^= hash.h4[0]; + A1 ^= hash.h4[1]; + A2 ^= hash.h4[2]; + A3 ^= hash.h4[3]; + A4 ^= hash.h4[4]; + A5 ^= hash.h4[5]; + A6 ^= hash.h4[6]; + A7 ^= hash.h4[7]; + B0 ^= hash.h4[8]; + B1 ^= hash.h4[9]; + B2 ^= hash.h4[10]; + B3 ^= hash.h4[11]; + B4 ^= hash.h4[12]; + B5 ^= hash.h4[13]; + B6 ^= hash.h4[14]; + B7 ^= hash.h4[15]; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + + STEP_BIG( + C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), + C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), + IF, 4, 13, PP8_4_); + STEP_BIG( + C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), + C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), + IF, 13, 10, PP8_5_); + STEP_BIG( + C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), + C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), + IF, 10, 25, PP8_6_); + STEP_BIG( + C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), + C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), + IF, 25, 4, PP8_0_); + + u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7; + u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7; + u32 COPY_C0 = C0, COPY_C1 = C1, COPY_C2 = C2, COPY_C3 = C3, COPY_C4 = C4, COPY_C5 = C5, COPY_C6 = C6, COPY_C7 = C7; + u32 COPY_D0 = D0, COPY_D1 = D1, COPY_D2 = D2, COPY_D3 = D3, COPY_D4 = D4, COPY_D5 = D5, COPY_D6 = D6, COPY_D7 = D7; + + #define q SIMD_Q + + A0 ^= 0x200; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + STEP_BIG( + COPY_A0, COPY_A1, COPY_A2, COPY_A3, + COPY_A4, COPY_A5, COPY_A6, COPY_A7, + IF, 4, 13, PP8_4_); + STEP_BIG( + COPY_B0, COPY_B1, COPY_B2, COPY_B3, + COPY_B4, COPY_B5, COPY_B6, COPY_B7, + IF, 13, 10, PP8_5_); + STEP_BIG( + COPY_C0, COPY_C1, COPY_C2, COPY_C3, + COPY_C4, COPY_C5, COPY_C6, COPY_C7, + IF, 10, 25, PP8_6_); + STEP_BIG( + COPY_D0, COPY_D1, COPY_D2, COPY_D3, + COPY_D4, COPY_D5, COPY_D6, COPY_D7, + IF, 25, 4, PP8_0_); + #undef q + + hash.h4[0] = A0; + hash.h4[1] = A1; + hash.h4[2] = A2; + hash.h4[3] = A3; + hash.h4[4] = A4; + hash.h4[5] = A5; + hash.h4[6] = A6; + hash.h4[7] = A7; + hash.h4[8] = B0; + hash.h4[9] = B1; + hash.h4[10] = B2; + hash.h4[11] = B3; + hash.h4[12] = B4; + hash.h4[13] = B5; + hash.h4[14] = B6; + hash.h4[15] = B7; + } // echo - sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; - sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; - Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; - Vb01 = Vb11 = Vb21 = Vb31 = Vb41 = Vb51 = Vb61 = Vb71 = 0; - - sph_u32 K0 = 512; - sph_u32 K1 = 0; - sph_u32 K2 = 0; - sph_u32 K3 = 0; - - W00 = Vb00; - W01 = Vb01; - W10 = Vb10; - W11 = Vb11; - W20 = Vb20; - W21 = Vb21; - W30 = Vb30; - W31 = Vb31; - W40 = Vb40; - W41 = Vb41; - W50 = Vb50; - W51 = Vb51; - W60 = Vb60; - W61 = Vb61; - W70 = Vb70; - W71 = Vb71; - W80 = hash.h8[0]; - W81 = hash.h8[1]; - W90 = hash.h8[2]; - W91 = hash.h8[3]; - WA0 = hash.h8[4]; - WA1 = hash.h8[5]; - WB0 = hash.h8[6]; - WB1 = hash.h8[7]; - WC0 = 0x80; - WC1 = 0; - WD0 = 0; - WD1 = 0; - WE0 = 0; - WE1 = 0x200000000000000; - WF0 = 0x200; - WF1 = 0; - - for (unsigned u = 0; u < 10; u ++) { - BIG_ROUND; - } + { + sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; + sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; + Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; + Vb01 = Vb11 = Vb21 = Vb31 = Vb41 = Vb51 = Vb61 = Vb71 = 0; + + sph_u32 K0 = 512; + sph_u32 K1 = 0; + sph_u32 K2 = 0; + sph_u32 K3 = 0; + + W00 = Vb00; + W01 = Vb01; + W10 = Vb10; + W11 = Vb11; + W20 = Vb20; + W21 = Vb21; + W30 = Vb30; + W31 = Vb31; + W40 = Vb40; + W41 = Vb41; + W50 = Vb50; + W51 = Vb51; + W60 = Vb60; + W61 = Vb61; + W70 = Vb70; + W71 = Vb71; + W80 = hash.h8[0]; + W81 = hash.h8[1]; + W90 = hash.h8[2]; + W91 = hash.h8[3]; + WA0 = hash.h8[4]; + WA1 = hash.h8[5]; + WB0 = hash.h8[6]; + WB1 = hash.h8[7]; + WC0 = 0x80; + WC1 = 0; + WD0 = 0; + WD1 = 0; + WE0 = 0; + WE1 = 0x200000000000000; + WF0 = 0x200; + WF1 = 0; + + for (unsigned u = 0; u < 10; u ++) { + BIG_ROUND; + } - Vb00 ^= hash.h8[0] ^ W00 ^ W80; - Vb01 ^= hash.h8[1] ^ W01 ^ W81; - Vb10 ^= hash.h8[2] ^ W10 ^ W90; - Vb11 ^= hash.h8[3] ^ W11 ^ W91; - Vb20 ^= hash.h8[4] ^ W20 ^ WA0; - Vb21 ^= hash.h8[5] ^ W21 ^ WA1; - Vb30 ^= hash.h8[6] ^ W30 ^ WB0; - Vb31 ^= hash.h8[7] ^ W31 ^ WB1; - - bool result = (Vb11 <= target); - if (result) - output[output[0xFF]++] = SWAP4(gid); + Vb00 ^= hash.h8[0] ^ W00 ^ W80; + Vb01 ^= hash.h8[1] ^ W01 ^ W81; + Vb10 ^= hash.h8[2] ^ W10 ^ W90; + Vb11 ^= hash.h8[3] ^ W11 ^ W91; + Vb20 ^= hash.h8[4] ^ W20 ^ WA0; + Vb21 ^= hash.h8[5] ^ W21 ^ WA1; + Vb30 ^= hash.h8[6] ^ W30 ^ WB0; + Vb31 ^= hash.h8[7] ^ W31 ^ WB1; + + bool result = (Vb11 <= target); + if (result) + output[output[0xFF]++] = SWAP4(gid); + } } #endif // DARKCOIN_CL diff --git a/kernel/groestlcoin.cl b/kernel/groestlcoin.cl index f8803869..9ef4435e 100644 --- a/kernel/groestlcoin.cl +++ b/kernel/groestlcoin.cl @@ -161,87 +161,88 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp #define T7 T7_L // groestl - - sph_u64 H[16]; - for (unsigned int u = 0; u < 15; u ++) - H[u] = 0; -#if USE_LE - H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); -#else - H[15] = (sph_u64)512; -#endif - - sph_u64 g[16], m[16]; - m[0] = DEC64E(block + 0 * 8); - m[1] = DEC64E(block + 1 * 8); - m[2] = DEC64E(block + 2 * 8); - m[3] = DEC64E(block + 3 * 8); - m[4] = DEC64E(block + 4 * 8); - m[5] = DEC64E(block + 5 * 8); - m[6] = DEC64E(block + 6 * 8); - m[7] = DEC64E(block + 7 * 8); - m[8] = DEC64E(block + 8 * 8); - m[9] = DEC64E(block + 9 * 8); - m[9] &= 0x00000000FFFFFFFF; - m[9] |= ((sph_u64) gid << 32); - m[10] = 0x80; - m[11] = 0; - m[12] = 0; - m[13] = 0; - m[14] = 0; - m[15] = 0x100000000000000; - for (unsigned int u = 0; u < 16; u ++) - g[u] = m[u] ^ H[u]; - PERM_BIG_P(g); - PERM_BIG_Q(m); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= g[u] ^ m[u]; - sph_u64 xH[16]; - for (unsigned int u = 0; u < 16; u ++) - xH[u] = H[u]; - PERM_BIG_P(xH); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= xH[u]; - for (unsigned int u = 0; u < 8; u ++) - hash.h8[u] = ENC64E(H[u + 8]); - - for (unsigned int u = 0; u < 15; u ++) - H[u] = 0; -#if USE_LE - H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); -#else - H[15] = (sph_u64)512; -#endif - - m[0] = hash.h8[0]; - m[1] = hash.h8[1]; - m[2] = hash.h8[2]; - m[3] = hash.h8[3]; - m[4] = hash.h8[4]; - m[5] = hash.h8[5]; - m[6] = hash.h8[6]; - m[7] = hash.h8[7]; - for (unsigned int u = 0; u < 16; u ++) - g[u] = m[u] ^ H[u]; - m[8] = 0x80; g[8] = m[8] ^ H[8]; - m[9] = 0; g[9] = m[9] ^ H[9]; - m[10] = 0; g[10] = m[10] ^ H[10]; - m[11] = 0; g[11] = m[11] ^ H[11]; - m[12] = 0; g[12] = m[12] ^ H[12]; - m[13] = 0; g[13] = m[13] ^ H[13]; - m[14] = 0; g[14] = m[14] ^ H[14]; - m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; - PERM_BIG_P(g); - PERM_BIG_Q(m); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= g[u] ^ m[u]; - for (unsigned int u = 0; u < 16; u ++) - xH[u] = H[u]; - PERM_BIG_P(xH); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= xH[u]; - for (unsigned int u = 0; u < 8; u ++) - hash.h8[u] = H[u + 8]; + { + sph_u64 H[16]; + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; + #if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); + #else + H[15] = (sph_u64)512; + #endif + + sph_u64 g[16], m[16]; + m[0] = DEC64E(block + 0 * 8); + m[1] = DEC64E(block + 1 * 8); + m[2] = DEC64E(block + 2 * 8); + m[3] = DEC64E(block + 3 * 8); + m[4] = DEC64E(block + 4 * 8); + m[5] = DEC64E(block + 5 * 8); + m[6] = DEC64E(block + 6 * 8); + m[7] = DEC64E(block + 7 * 8); + m[8] = DEC64E(block + 8 * 8); + m[9] = DEC64E(block + 9 * 8); + m[9] &= 0x00000000FFFFFFFF; + m[9] |= ((sph_u64) gid << 32); + m[10] = 0x80; + m[11] = 0; + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 0x100000000000000; + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + PERM_BIG_P(g); + PERM_BIG_Q(m); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + for (unsigned int u = 0; u < 8; u ++) + hash.h8[u] = ENC64E(H[u + 8]); + + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; + #if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); + #else + H[15] = (sph_u64)512; + #endif + + m[0] = hash.h8[0]; + m[1] = hash.h8[1]; + m[2] = hash.h8[2]; + m[3] = hash.h8[3]; + m[4] = hash.h8[4]; + m[5] = hash.h8[5]; + m[6] = hash.h8[6]; + m[7] = hash.h8[7]; + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + for (unsigned int u = 0; u < 8; u ++) + hash.h8[u] = H[u + 8]; + } bool result = (hash.h8[3] <= target); if (result) diff --git a/kernel/inkcoin.cl b/kernel/inkcoin.cl index f2b31a7b..0fc25e65 100644 --- a/kernel/inkcoin.cl +++ b/kernel/inkcoin.cl @@ -115,123 +115,122 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp // shavite { - // IV - sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); - sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); - sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); - sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); - - // state - sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; - sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; - sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; - sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; - - sph_u32 sc_count0 = (80 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; - - rk00 = DEC32LE(block + 0 * 4);; - rk01 = DEC32LE(block + 1 * 4);; - rk02 = DEC32LE(block + 2 * 4);; - rk03 = DEC32LE(block + 3 * 4);; - rk04 = DEC32LE(block + 4 * 4);; - rk05 = DEC32LE(block + 5 * 4);; - rk06 = DEC32LE(block + 6 * 4);; - rk07 = DEC32LE(block + 7 * 4);; - rk08 = DEC32LE(block + 8 * 4);; - rk09 = DEC32LE(block + 9 * 4);; - rk0A = DEC32LE(block + 10 * 4);; - rk0B = DEC32LE(block + 11 * 4);; - rk0C = DEC32LE(block + 12 * 4);; - rk0D = DEC32LE(block + 13 * 4);; - rk0E = DEC32LE(block + 14 * 4);; - rk0F = DEC32LE(block + 15 * 4);; - rk10 = DEC32LE(block + 16 * 4);; - rk11 = DEC32LE(block + 17 * 4);; - rk12 = DEC32LE(block + 18 * 4);; - rk13 = gid; - rk14 = 0x80; - rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; - rk1B = 0x2800000; - rk1C = rk1D = rk1E = 0; - rk1F = 0x2000000; - - c512(buf); - - hash.h4[0] = h0; - hash.h4[1] = h1; - hash.h4[2] = h2; - hash.h4[3] = h3; - hash.h4[4] = h4; - hash.h4[5] = h5; - hash.h4[6] = h6; - hash.h4[7] = h7; - hash.h4[8] = h8; - hash.h4[9] = h9; - hash.h4[10] = hA; - hash.h4[11] = hB; - hash.h4[12] = hC; - hash.h4[13] = hD; - hash.h4[14] = hE; - hash.h4[15] = hF; + // IV + sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); + sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); + sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); + sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); + + // state + sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; + sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; + sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; + sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; + + sph_u32 sc_count0 = (80 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; + + rk00 = DEC32LE(block + 0 * 4);; + rk01 = DEC32LE(block + 1 * 4);; + rk02 = DEC32LE(block + 2 * 4);; + rk03 = DEC32LE(block + 3 * 4);; + rk04 = DEC32LE(block + 4 * 4);; + rk05 = DEC32LE(block + 5 * 4);; + rk06 = DEC32LE(block + 6 * 4);; + rk07 = DEC32LE(block + 7 * 4);; + rk08 = DEC32LE(block + 8 * 4);; + rk09 = DEC32LE(block + 9 * 4);; + rk0A = DEC32LE(block + 10 * 4);; + rk0B = DEC32LE(block + 11 * 4);; + rk0C = DEC32LE(block + 12 * 4);; + rk0D = DEC32LE(block + 13 * 4);; + rk0E = DEC32LE(block + 14 * 4);; + rk0F = DEC32LE(block + 15 * 4);; + rk10 = DEC32LE(block + 16 * 4);; + rk11 = DEC32LE(block + 17 * 4);; + rk12 = DEC32LE(block + 18 * 4);; + rk13 = gid; + rk14 = 0x80; + rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; + rk1B = 0x2800000; + rk1C = rk1D = rk1E = 0; + rk1F = 0x2000000; + + c512(buf); + + hash.h4[0] = h0; + hash.h4[1] = h1; + hash.h4[2] = h2; + hash.h4[3] = h3; + hash.h4[4] = h4; + hash.h4[5] = h5; + hash.h4[6] = h6; + hash.h4[7] = h7; + hash.h4[8] = h8; + hash.h4[9] = h9; + hash.h4[10] = hA; + hash.h4[11] = hB; + hash.h4[12] = hC; + hash.h4[13] = hD; + hash.h4[14] = hE; + hash.h4[15] = hF; } - // shavite { - // IV - sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); - sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); - sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); - sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); - - // state - sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; - sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; - sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; - sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; - - sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; - - rk00 = hash.h4[0]; - rk01 = hash.h4[1]; - rk02 = hash.h4[2]; - rk03 = hash.h4[3]; - rk04 = hash.h4[4]; - rk05 = hash.h4[5]; - rk06 = hash.h4[6]; - rk07 = hash.h4[7]; - rk08 = hash.h4[8]; - rk09 = hash.h4[9]; - rk0A = hash.h4[10]; - rk0B = hash.h4[11]; - rk0C = hash.h4[12]; - rk0D = hash.h4[13]; - rk0E = hash.h4[14]; - rk0F = hash.h4[15]; - rk10 = 0x80; - rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; - rk1B = 0x2000000; - rk1C = rk1D = rk1E = 0; - rk1F = 0x2000000; - - c512(buf); - - hash.h4[0] = h0; - hash.h4[1] = h1; - hash.h4[2] = h2; - hash.h4[3] = h3; - hash.h4[4] = h4; - hash.h4[5] = h5; - hash.h4[6] = h6; - hash.h4[7] = h7; - hash.h4[8] = h8; - hash.h4[9] = h9; - hash.h4[10] = hA; - hash.h4[11] = hB; - hash.h4[12] = hC; - hash.h4[13] = hD; - hash.h4[14] = hE; - hash.h4[15] = hF; + // IV + sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); + sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); + sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); + sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); + + // state + sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; + sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; + sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; + sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; + + sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; + + rk00 = hash.h4[0]; + rk01 = hash.h4[1]; + rk02 = hash.h4[2]; + rk03 = hash.h4[3]; + rk04 = hash.h4[4]; + rk05 = hash.h4[5]; + rk06 = hash.h4[6]; + rk07 = hash.h4[7]; + rk08 = hash.h4[8]; + rk09 = hash.h4[9]; + rk0A = hash.h4[10]; + rk0B = hash.h4[11]; + rk0C = hash.h4[12]; + rk0D = hash.h4[13]; + rk0E = hash.h4[14]; + rk0F = hash.h4[15]; + rk10 = 0x80; + rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; + rk1B = 0x2000000; + rk1C = rk1D = rk1E = 0; + rk1F = 0x2000000; + + c512(buf); + + hash.h4[0] = h0; + hash.h4[1] = h1; + hash.h4[2] = h2; + hash.h4[3] = h3; + hash.h4[4] = h4; + hash.h4[5] = h5; + hash.h4[6] = h6; + hash.h4[7] = h7; + hash.h4[8] = h8; + hash.h4[9] = h9; + hash.h4[10] = hA; + hash.h4[11] = hB; + hash.h4[12] = hC; + hash.h4[13] = hD; + hash.h4[14] = hE; + hash.h4[15] = hF; } bool result = (hash.h8[3] <= target); diff --git a/kernel/marucoin.cl b/kernel/marucoin.cl index 101e3734..76342615 100644 --- a/kernel/marucoin.cl +++ b/kernel/marucoin.cl @@ -126,82 +126,83 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp barrier(CLK_LOCAL_MEM_FENCE); // blake -{ - sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); - sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); - sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); - sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); - sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; - sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; - - if ((T0 = SPH_T64(T0 + 1024)) < 1024) { - T1 = SPH_T64(T1 + 1); + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + { + T1 = SPH_T64(T1 + 1); + } + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + M0 = DEC64BE(block + 0); + M1 = DEC64BE(block + 8); + M2 = DEC64BE(block + 16); + M3 = DEC64BE(block + 24); + M4 = DEC64BE(block + 32); + M5 = DEC64BE(block + 40); + M6 = DEC64BE(block + 48); + M7 = DEC64BE(block + 56); + M8 = DEC64BE(block + 64); + M9 = DEC64BE(block + 72); + M9 &= 0xFFFFFFFF00000000; + M9 ^= SWAP4(gid); + MA = 0x8000000000000000; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x280; + + COMPRESS64; + + hash.h8[0] = H0; + hash.h8[1] = H1; + hash.h8[2] = H2; + hash.h8[3] = H3; + hash.h8[4] = H4; + hash.h8[5] = H5; + hash.h8[6] = H6; + hash.h8[7] = H7; } - sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; - sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; - sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; - sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; - M0 = DEC64BE(block + 0); - M1 = DEC64BE(block + 8); - M2 = DEC64BE(block + 16); - M3 = DEC64BE(block + 24); - M4 = DEC64BE(block + 32); - M5 = DEC64BE(block + 40); - M6 = DEC64BE(block + 48); - M7 = DEC64BE(block + 56); - M8 = DEC64BE(block + 64); - M9 = DEC64BE(block + 72); - M9 &= 0xFFFFFFFF00000000; - M9 ^= SWAP4(gid); - MA = 0x8000000000000000; - MB = 0; - MC = 0; - MD = 1; - ME = 0; - MF = 0x280; - - COMPRESS64; - - hash.h8[0] = H0; - hash.h8[1] = H1; - hash.h8[2] = H2; - hash.h8[3] = H3; - hash.h8[4] = H4; - hash.h8[5] = H5; - hash.h8[6] = H6; - hash.h8[7] = H7; -} // bmw - sph_u64 BMW_H[16]; - for(unsigned u = 0; u < 16; u++) - BMW_H[u] = BMW_IV512[u]; - - sph_u64 BMW_h1[16], BMW_h2[16]; - sph_u64 mv[16]; - - mv[ 0] = SWAP8(hash.h8[0]); - mv[ 1] = SWAP8(hash.h8[1]); - mv[ 2] = SWAP8(hash.h8[2]); - mv[ 3] = SWAP8(hash.h8[3]); - mv[ 4] = SWAP8(hash.h8[4]); - mv[ 5] = SWAP8(hash.h8[5]); - mv[ 6] = SWAP8(hash.h8[6]); - mv[ 7] = SWAP8(hash.h8[7]); - mv[ 8] = 0x80; - mv[ 9] = 0; - mv[10] = 0; - mv[11] = 0; - mv[12] = 0; - mv[13] = 0; - mv[14] = 0; - mv[15] = 0x200; + { + sph_u64 BMW_H[16]; + for(unsigned u = 0; u < 16; u++) + BMW_H[u] = BMW_IV512[u]; + + sph_u64 BMW_h1[16], BMW_h2[16]; + sph_u64 mv[16]; + + mv[ 0] = SWAP8(hash.h8[0]); + mv[ 1] = SWAP8(hash.h8[1]); + mv[ 2] = SWAP8(hash.h8[2]); + mv[ 3] = SWAP8(hash.h8[3]); + mv[ 4] = SWAP8(hash.h8[4]); + mv[ 5] = SWAP8(hash.h8[5]); + mv[ 6] = SWAP8(hash.h8[6]); + mv[ 7] = SWAP8(hash.h8[7]); + mv[ 8] = 0x80; + mv[ 9] = 0; + mv[10] = 0; + mv[11] = 0; + mv[12] = 0; + mv[13] = 0; + mv[14] = 0; + mv[15] = 0x200; #define M(x) (mv[x]) #define H(x) (BMW_H[x]) #define dH(x) (BMW_h2[x]) - FOLDb; + FOLDb; #undef M #undef H @@ -211,524 +212,535 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp #define H(x) (final_b[x]) #define dH(x) (BMW_h1[x]) - FOLDb; + FOLDb; #undef M #undef H #undef dH - hash.h8[0] = SWAP8(BMW_h1[8]); - hash.h8[1] = SWAP8(BMW_h1[9]); - hash.h8[2] = SWAP8(BMW_h1[10]); - hash.h8[3] = SWAP8(BMW_h1[11]); - hash.h8[4] = SWAP8(BMW_h1[12]); - hash.h8[5] = SWAP8(BMW_h1[13]); - hash.h8[6] = SWAP8(BMW_h1[14]); - hash.h8[7] = SWAP8(BMW_h1[15]); + hash.h8[0] = SWAP8(BMW_h1[8]); + hash.h8[1] = SWAP8(BMW_h1[9]); + hash.h8[2] = SWAP8(BMW_h1[10]); + hash.h8[3] = SWAP8(BMW_h1[11]); + hash.h8[4] = SWAP8(BMW_h1[12]); + hash.h8[5] = SWAP8(BMW_h1[13]); + hash.h8[6] = SWAP8(BMW_h1[14]); + hash.h8[7] = SWAP8(BMW_h1[15]); + } // groestl - - sph_u64 H[16]; - for (unsigned int u = 0; u < 15; u ++) - H[u] = 0; -#if USE_LE - H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); -#else - H[15] = (sph_u64)512; -#endif - - sph_u64 g[16], m[16]; - m[0] = DEC64E(hash.h8[0]); - m[1] = DEC64E(hash.h8[1]); - m[2] = DEC64E(hash.h8[2]); - m[3] = DEC64E(hash.h8[3]); - m[4] = DEC64E(hash.h8[4]); - m[5] = DEC64E(hash.h8[5]); - m[6] = DEC64E(hash.h8[6]); - m[7] = DEC64E(hash.h8[7]); - for (unsigned int u = 0; u < 16; u ++) - g[u] = m[u] ^ H[u]; - m[8] = 0x80; g[8] = m[8] ^ H[8]; - m[9] = 0; g[9] = m[9] ^ H[9]; - m[10] = 0; g[10] = m[10] ^ H[10]; - m[11] = 0; g[11] = m[11] ^ H[11]; - m[12] = 0; g[12] = m[12] ^ H[12]; - m[13] = 0; g[13] = m[13] ^ H[13]; - m[14] = 0; g[14] = m[14] ^ H[14]; - m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; - PERM_BIG_P(g); - PERM_BIG_Q(m); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= g[u] ^ m[u]; - sph_u64 xH[16]; - for (unsigned int u = 0; u < 16; u ++) - xH[u] = H[u]; - PERM_BIG_P(xH); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= xH[u]; - for (unsigned int u = 0; u < 8; u ++) - hash.h8[u] = DEC64E(H[u + 8]); + { + sph_u64 H[16]; + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; + #if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); + #else + H[15] = (sph_u64)512; + #endif + + sph_u64 g[16], m[16]; + m[0] = DEC64E(hash.h8[0]); + m[1] = DEC64E(hash.h8[1]); + m[2] = DEC64E(hash.h8[2]); + m[3] = DEC64E(hash.h8[3]); + m[4] = DEC64E(hash.h8[4]); + m[5] = DEC64E(hash.h8[5]); + m[6] = DEC64E(hash.h8[6]); + m[7] = DEC64E(hash.h8[7]); + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + for (unsigned int u = 0; u < 8; u ++) + hash.h8[u] = DEC64E(H[u + 8]); + } // skein + { + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; + + m0 = SWAP8(hash.h8[0]); + m1 = SWAP8(hash.h8[1]); + m2 = SWAP8(hash.h8[2]); + m3 = SWAP8(hash.h8[3]); + m4 = SWAP8(hash.h8[4]); + m5 = SWAP8(hash.h8[5]); + m6 = SWAP8(hash.h8[6]); + m7 = SWAP8(hash.h8[7]); + UBI_BIG(480, 64); + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + UBI_BIG(510, 8); + hash.h8[0] = SWAP8(h0); + hash.h8[1] = SWAP8(h1); + hash.h8[2] = SWAP8(h2); + hash.h8[3] = SWAP8(h3); + hash.h8[4] = SWAP8(h4); + hash.h8[5] = SWAP8(h5); + hash.h8[6] = SWAP8(h6); + hash.h8[7] = SWAP8(h7); + } - sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); - sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; - sph_u64 bcount = 0; - - m0 = SWAP8(hash.h8[0]); - m1 = SWAP8(hash.h8[1]); - m2 = SWAP8(hash.h8[2]); - m3 = SWAP8(hash.h8[3]); - m4 = SWAP8(hash.h8[4]); - m5 = SWAP8(hash.h8[5]); - m6 = SWAP8(hash.h8[6]); - m7 = SWAP8(hash.h8[7]); - UBI_BIG(480, 64); - bcount = 0; - m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; - UBI_BIG(510, 8); - hash.h8[0] = SWAP8(h0); - hash.h8[1] = SWAP8(h1); - hash.h8[2] = SWAP8(h2); - hash.h8[3] = SWAP8(h3); - hash.h8[4] = SWAP8(h4); - hash.h8[5] = SWAP8(h5); - hash.h8[6] = SWAP8(h6); - hash.h8[7] = SWAP8(h7); - - // jh - - sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); - sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); - sph_u64 tmp; - - for(int i = 0; i < 2; i++) + // jh { - if (i == 0) { - h0h ^= DEC64E(hash.h8[0]); - h0l ^= DEC64E(hash.h8[1]); - h1h ^= DEC64E(hash.h8[2]); - h1l ^= DEC64E(hash.h8[3]); - h2h ^= DEC64E(hash.h8[4]); - h2l ^= DEC64E(hash.h8[5]); - h3h ^= DEC64E(hash.h8[6]); - h3l ^= DEC64E(hash.h8[7]); - } else if(i == 1) { - h4h ^= DEC64E(hash.h8[0]); - h4l ^= DEC64E(hash.h8[1]); - h5h ^= DEC64E(hash.h8[2]); - h5l ^= DEC64E(hash.h8[3]); - h6h ^= DEC64E(hash.h8[4]); - h6l ^= DEC64E(hash.h8[5]); - h7h ^= DEC64E(hash.h8[6]); - h7l ^= DEC64E(hash.h8[7]); - - h0h ^= 0x80; - h3l ^= 0x2000000000000; + + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; + + for(int i = 0; i < 2; i++) + { + if (i == 0) { + h0h ^= DEC64E(hash.h8[0]); + h0l ^= DEC64E(hash.h8[1]); + h1h ^= DEC64E(hash.h8[2]); + h1l ^= DEC64E(hash.h8[3]); + h2h ^= DEC64E(hash.h8[4]); + h2l ^= DEC64E(hash.h8[5]); + h3h ^= DEC64E(hash.h8[6]); + h3l ^= DEC64E(hash.h8[7]); + } else if(i == 1) { + h4h ^= DEC64E(hash.h8[0]); + h4l ^= DEC64E(hash.h8[1]); + h5h ^= DEC64E(hash.h8[2]); + h5l ^= DEC64E(hash.h8[3]); + h6h ^= DEC64E(hash.h8[4]); + h6l ^= DEC64E(hash.h8[5]); + h7h ^= DEC64E(hash.h8[6]); + h7l ^= DEC64E(hash.h8[7]); + + h0h ^= 0x80; + h3l ^= 0x2000000000000; + } + E8; } - E8; + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash.h8[0] = DEC64E(h4h); + hash.h8[1] = DEC64E(h4l); + hash.h8[2] = DEC64E(h5h); + hash.h8[3] = DEC64E(h5l); + hash.h8[4] = DEC64E(h6h); + hash.h8[5] = DEC64E(h6l); + hash.h8[6] = DEC64E(h7h); + hash.h8[7] = DEC64E(h7l); } - h4h ^= 0x80; - h7l ^= 0x2000000000000; - - hash.h8[0] = DEC64E(h4h); - hash.h8[1] = DEC64E(h4l); - hash.h8[2] = DEC64E(h5h); - hash.h8[3] = DEC64E(h5l); - hash.h8[4] = DEC64E(h6h); - hash.h8[5] = DEC64E(h6l); - hash.h8[6] = DEC64E(h7h); - hash.h8[7] = DEC64E(h7l); // keccak - - sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; - sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; - sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; - sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; - sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; - - a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); - - a00 ^= SWAP8(hash.h8[0]); - a10 ^= SWAP8(hash.h8[1]); - a20 ^= SWAP8(hash.h8[2]); - a30 ^= SWAP8(hash.h8[3]); - a40 ^= SWAP8(hash.h8[4]); - a01 ^= SWAP8(hash.h8[5]); - a11 ^= SWAP8(hash.h8[6]); - a21 ^= SWAP8(hash.h8[7]); - a31 ^= 0x8000000000000001; - KECCAK_F_1600; - // Finalize the "lane complement" - a10 = ~a10; - a20 = ~a20; - - hash.h8[0] = SWAP8(a00); - hash.h8[1] = SWAP8(a10); - hash.h8[2] = SWAP8(a20); - hash.h8[3] = SWAP8(a30); - hash.h8[4] = SWAP8(a40); - hash.h8[5] = SWAP8(a01); - hash.h8[6] = SWAP8(a11); - hash.h8[7] = SWAP8(a21); + { + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; + + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); + + a00 ^= SWAP8(hash.h8[0]); + a10 ^= SWAP8(hash.h8[1]); + a20 ^= SWAP8(hash.h8[2]); + a30 ^= SWAP8(hash.h8[3]); + a40 ^= SWAP8(hash.h8[4]); + a01 ^= SWAP8(hash.h8[5]); + a11 ^= SWAP8(hash.h8[6]); + a21 ^= SWAP8(hash.h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; + + hash.h8[0] = SWAP8(a00); + hash.h8[1] = SWAP8(a10); + hash.h8[2] = SWAP8(a20); + hash.h8[3] = SWAP8(a30); + hash.h8[4] = SWAP8(a40); + hash.h8[5] = SWAP8(a01); + hash.h8[6] = SWAP8(a11); + hash.h8[7] = SWAP8(a21); + } // luffa - - sph_u32 V00 = SPH_C32(0x6d251e69), V01 = SPH_C32(0x44b051e0), V02 = SPH_C32(0x4eaa6fb4), V03 = SPH_C32(0xdbf78465), V04 = SPH_C32(0x6e292011), V05 = SPH_C32(0x90152df4), V06 = SPH_C32(0xee058139), V07 = SPH_C32(0xdef610bb); - sph_u32 V10 = SPH_C32(0xc3b44b95), V11 = SPH_C32(0xd9d2f256), V12 = SPH_C32(0x70eee9a0), V13 = SPH_C32(0xde099fa3), V14 = SPH_C32(0x5d9b0557), V15 = SPH_C32(0x8fc944b3), V16 = SPH_C32(0xcf1ccf0e), V17 = SPH_C32(0x746cd581); - sph_u32 V20 = SPH_C32(0xf7efc89d), V21 = SPH_C32(0x5dba5781), V22 = SPH_C32(0x04016ce5), V23 = SPH_C32(0xad659c05), V24 = SPH_C32(0x0306194f), V25 = SPH_C32(0x666d1836), V26 = SPH_C32(0x24aa230a), V27 = SPH_C32(0x8b264ae7); - sph_u32 V30 = SPH_C32(0x858075d5), V31 = SPH_C32(0x36d79cce), V32 = SPH_C32(0xe571f7d7), V33 = SPH_C32(0x204b1f67), V34 = SPH_C32(0x35870c6a), V35 = SPH_C32(0x57e9e923), V36 = SPH_C32(0x14bcb808), V37 = SPH_C32(0x7cde72ce); - sph_u32 V40 = SPH_C32(0x6c68e9be), V41 = SPH_C32(0x5ec41e22), V42 = SPH_C32(0xc825b7c7), V43 = SPH_C32(0xaffb4363), V44 = SPH_C32(0xf5df3999), V45 = SPH_C32(0x0fc688f1), V46 = SPH_C32(0xb07224cc), V47 = SPH_C32(0x03e86cea); - - DECL_TMP8(M); - - M0 = hash.h4[1]; - M1 = hash.h4[0]; - M2 = hash.h4[3]; - M3 = hash.h4[2]; - M4 = hash.h4[5]; - M5 = hash.h4[4]; - M6 = hash.h4[7]; - M7 = hash.h4[6]; - - for(uint i = 0; i < 5; i++) - { - MI5; - LUFFA_P5; - - if(i == 0) { - M0 = hash.h4[9]; - M1 = hash.h4[8]; - M2 = hash.h4[11]; - M3 = hash.h4[10]; - M4 = hash.h4[13]; - M5 = hash.h4[12]; - M6 = hash.h4[15]; - M7 = hash.h4[14]; - } else if(i == 1) { - M0 = 0x80000000; - M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; - } else if(i == 2) { - M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; - } else if(i == 3) { - hash.h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; - hash.h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; - hash.h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; - hash.h4[2] = V03 ^ V13 ^ V23 ^ V33 ^ V43; - hash.h4[5] = V04 ^ V14 ^ V24 ^ V34 ^ V44; - hash.h4[4] = V05 ^ V15 ^ V25 ^ V35 ^ V45; - hash.h4[7] = V06 ^ V16 ^ V26 ^ V36 ^ V46; - hash.h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47; +{ + sph_u32 V00 = SPH_C32(0x6d251e69), V01 = SPH_C32(0x44b051e0), V02 = SPH_C32(0x4eaa6fb4), V03 = SPH_C32(0xdbf78465), V04 = SPH_C32(0x6e292011), V05 = SPH_C32(0x90152df4), V06 = SPH_C32(0xee058139), V07 = SPH_C32(0xdef610bb); + sph_u32 V10 = SPH_C32(0xc3b44b95), V11 = SPH_C32(0xd9d2f256), V12 = SPH_C32(0x70eee9a0), V13 = SPH_C32(0xde099fa3), V14 = SPH_C32(0x5d9b0557), V15 = SPH_C32(0x8fc944b3), V16 = SPH_C32(0xcf1ccf0e), V17 = SPH_C32(0x746cd581); + sph_u32 V20 = SPH_C32(0xf7efc89d), V21 = SPH_C32(0x5dba5781), V22 = SPH_C32(0x04016ce5), V23 = SPH_C32(0xad659c05), V24 = SPH_C32(0x0306194f), V25 = SPH_C32(0x666d1836), V26 = SPH_C32(0x24aa230a), V27 = SPH_C32(0x8b264ae7); + sph_u32 V30 = SPH_C32(0x858075d5), V31 = SPH_C32(0x36d79cce), V32 = SPH_C32(0xe571f7d7), V33 = SPH_C32(0x204b1f67), V34 = SPH_C32(0x35870c6a), V35 = SPH_C32(0x57e9e923), V36 = SPH_C32(0x14bcb808), V37 = SPH_C32(0x7cde72ce); + sph_u32 V40 = SPH_C32(0x6c68e9be), V41 = SPH_C32(0x5ec41e22), V42 = SPH_C32(0xc825b7c7), V43 = SPH_C32(0xaffb4363), V44 = SPH_C32(0xf5df3999), V45 = SPH_C32(0x0fc688f1), V46 = SPH_C32(0xb07224cc), V47 = SPH_C32(0x03e86cea); + + DECL_TMP8(M); + + M0 = hash.h4[1]; + M1 = hash.h4[0]; + M2 = hash.h4[3]; + M3 = hash.h4[2]; + M4 = hash.h4[5]; + M5 = hash.h4[4]; + M6 = hash.h4[7]; + M7 = hash.h4[6]; + + for(uint i = 0; i < 5; i++) + { + MI5; + LUFFA_P5; + + if(i == 0) { + M0 = hash.h4[9]; + M1 = hash.h4[8]; + M2 = hash.h4[11]; + M3 = hash.h4[10]; + M4 = hash.h4[13]; + M5 = hash.h4[12]; + M6 = hash.h4[15]; + M7 = hash.h4[14]; + } else if(i == 1) { + M0 = 0x80000000; + M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } else if(i == 2) { + M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } else if(i == 3) { + hash.h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash.h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash.h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash.h4[2] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash.h4[5] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash.h4[4] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash.h4[7] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash.h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + } } + hash.h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash.h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash.h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash.h4[10] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash.h4[13] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash.h4[12] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash.h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash.h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47; } - hash.h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40; - hash.h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41; - hash.h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42; - hash.h4[10] = V03 ^ V13 ^ V23 ^ V33 ^ V43; - hash.h4[13] = V04 ^ V14 ^ V24 ^ V34 ^ V44; - hash.h4[12] = V05 ^ V15 ^ V25 ^ V35 ^ V45; - hash.h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46; - hash.h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47; // cubehash.h1 - - sph_u32 x0 = SPH_C32(0x2AEA2A61), x1 = SPH_C32(0x50F494D4), x2 = SPH_C32(0x2D538B8B), x3 = SPH_C32(0x4167D83E); - sph_u32 x4 = SPH_C32(0x3FEE2313), x5 = SPH_C32(0xC701CF8C), x6 = SPH_C32(0xCC39968E), x7 = SPH_C32(0x50AC5695); - sph_u32 x8 = SPH_C32(0x4D42C787), x9 = SPH_C32(0xA647A8B3), xa = SPH_C32(0x97CF0BEF), xb = SPH_C32(0x825B4537); - sph_u32 xc = SPH_C32(0xEEF864D2), xd = SPH_C32(0xF22090C4), xe = SPH_C32(0xD0E5CD33), xf = SPH_C32(0xA23911AE); - sph_u32 xg = SPH_C32(0xFCD398D9), xh = SPH_C32(0x148FE485), xi = SPH_C32(0x1B017BEF), xj = SPH_C32(0xB6444532); - sph_u32 xk = SPH_C32(0x6A536159), xl = SPH_C32(0x2FF5781C), xm = SPH_C32(0x91FA7934), xn = SPH_C32(0x0DBADEA9); - sph_u32 xo = SPH_C32(0xD65C8A2B), xp = SPH_C32(0xA5A70E75), xq = SPH_C32(0xB1C62456), xr = SPH_C32(0xBC796576); - sph_u32 xs = SPH_C32(0x1921C8F7), xt = SPH_C32(0xE7989AF1), xu = SPH_C32(0x7795D246), xv = SPH_C32(0xD43E3B44); - - x0 ^= SWAP4(hash.h4[1]); - x1 ^= SWAP4(hash.h4[0]); - x2 ^= SWAP4(hash.h4[3]); - x3 ^= SWAP4(hash.h4[2]); - x4 ^= SWAP4(hash.h4[5]); - x5 ^= SWAP4(hash.h4[4]); - x6 ^= SWAP4(hash.h4[7]); - x7 ^= SWAP4(hash.h4[6]); - - for (int i = 0; i < 13; i ++) { - SIXTEEN_ROUNDS; - - if (i == 0) { - x0 ^= SWAP4(hash.h4[9]); - x1 ^= SWAP4(hash.h4[8]); - x2 ^= SWAP4(hash.h4[11]); - x3 ^= SWAP4(hash.h4[10]); - x4 ^= SWAP4(hash.h4[13]); - x5 ^= SWAP4(hash.h4[12]); - x6 ^= SWAP4(hash.h4[15]); - x7 ^= SWAP4(hash.h4[14]); - } else if(i == 1) { - x0 ^= 0x80; - } else if (i == 2) { - xv ^= SPH_C32(1); + { + sph_u32 x0 = SPH_C32(0x2AEA2A61), x1 = SPH_C32(0x50F494D4), x2 = SPH_C32(0x2D538B8B), x3 = SPH_C32(0x4167D83E); + sph_u32 x4 = SPH_C32(0x3FEE2313), x5 = SPH_C32(0xC701CF8C), x6 = SPH_C32(0xCC39968E), x7 = SPH_C32(0x50AC5695); + sph_u32 x8 = SPH_C32(0x4D42C787), x9 = SPH_C32(0xA647A8B3), xa = SPH_C32(0x97CF0BEF), xb = SPH_C32(0x825B4537); + sph_u32 xc = SPH_C32(0xEEF864D2), xd = SPH_C32(0xF22090C4), xe = SPH_C32(0xD0E5CD33), xf = SPH_C32(0xA23911AE); + sph_u32 xg = SPH_C32(0xFCD398D9), xh = SPH_C32(0x148FE485), xi = SPH_C32(0x1B017BEF), xj = SPH_C32(0xB6444532); + sph_u32 xk = SPH_C32(0x6A536159), xl = SPH_C32(0x2FF5781C), xm = SPH_C32(0x91FA7934), xn = SPH_C32(0x0DBADEA9); + sph_u32 xo = SPH_C32(0xD65C8A2B), xp = SPH_C32(0xA5A70E75), xq = SPH_C32(0xB1C62456), xr = SPH_C32(0xBC796576); + sph_u32 xs = SPH_C32(0x1921C8F7), xt = SPH_C32(0xE7989AF1), xu = SPH_C32(0x7795D246), xv = SPH_C32(0xD43E3B44); + + x0 ^= SWAP4(hash.h4[1]); + x1 ^= SWAP4(hash.h4[0]); + x2 ^= SWAP4(hash.h4[3]); + x3 ^= SWAP4(hash.h4[2]); + x4 ^= SWAP4(hash.h4[5]); + x5 ^= SWAP4(hash.h4[4]); + x6 ^= SWAP4(hash.h4[7]); + x7 ^= SWAP4(hash.h4[6]); + + for (int i = 0; i < 13; i ++) { + SIXTEEN_ROUNDS; + + if (i == 0) { + x0 ^= SWAP4(hash.h4[9]); + x1 ^= SWAP4(hash.h4[8]); + x2 ^= SWAP4(hash.h4[11]); + x3 ^= SWAP4(hash.h4[10]); + x4 ^= SWAP4(hash.h4[13]); + x5 ^= SWAP4(hash.h4[12]); + x6 ^= SWAP4(hash.h4[15]); + x7 ^= SWAP4(hash.h4[14]); + } else if(i == 1) { + x0 ^= 0x80; + } else if (i == 2) { + xv ^= SPH_C32(1); + } } - } - hash.h4[0] = x0; - hash.h4[1] = x1; - hash.h4[2] = x2; - hash.h4[3] = x3; - hash.h4[4] = x4; - hash.h4[5] = x5; - hash.h4[6] = x6; - hash.h4[7] = x7; - hash.h4[8] = x8; - hash.h4[9] = x9; - hash.h4[10] = xa; - hash.h4[11] = xb; - hash.h4[12] = xc; - hash.h4[13] = xd; - hash.h4[14] = xe; - hash.h4[15] = xf; + hash.h4[0] = x0; + hash.h4[1] = x1; + hash.h4[2] = x2; + hash.h4[3] = x3; + hash.h4[4] = x4; + hash.h4[5] = x5; + hash.h4[6] = x6; + hash.h4[7] = x7; + hash.h4[8] = x8; + hash.h4[9] = x9; + hash.h4[10] = xa; + hash.h4[11] = xb; + hash.h4[12] = xc; + hash.h4[13] = xd; + hash.h4[14] = xe; + hash.h4[15] = xf; + } // shavite { - // IV - sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); - sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); - sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); - sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); - - // state - sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; - sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; - sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; - sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; - - sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; - - rk00 = hash.h4[0]; - rk01 = hash.h4[1]; - rk02 = hash.h4[2]; - rk03 = hash.h4[3]; - rk04 = hash.h4[4]; - rk05 = hash.h4[5]; - rk06 = hash.h4[6]; - rk07 = hash.h4[7]; - rk08 = hash.h4[8]; - rk09 = hash.h4[9]; - rk0A = hash.h4[10]; - rk0B = hash.h4[11]; - rk0C = hash.h4[12]; - rk0D = hash.h4[13]; - rk0E = hash.h4[14]; - rk0F = hash.h4[15]; - rk10 = 0x80; - rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; - rk1B = 0x2000000; - rk1C = rk1D = rk1E = 0; - rk1F = 0x2000000; - - c512(buf); - - hash.h4[0] = h0; - hash.h4[1] = h1; - hash.h4[2] = h2; - hash.h4[3] = h3; - hash.h4[4] = h4; - hash.h4[5] = h5; - hash.h4[6] = h6; - hash.h4[7] = h7; - hash.h4[8] = h8; - hash.h4[9] = h9; - hash.h4[10] = hA; - hash.h4[11] = hB; - hash.h4[12] = hC; - hash.h4[13] = hD; - hash.h4[14] = hE; - hash.h4[15] = hF; + // IV + sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); + sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); + sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); + sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); + + // state + sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; + sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; + sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; + sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; + + sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; + + rk00 = hash.h4[0]; + rk01 = hash.h4[1]; + rk02 = hash.h4[2]; + rk03 = hash.h4[3]; + rk04 = hash.h4[4]; + rk05 = hash.h4[5]; + rk06 = hash.h4[6]; + rk07 = hash.h4[7]; + rk08 = hash.h4[8]; + rk09 = hash.h4[9]; + rk0A = hash.h4[10]; + rk0B = hash.h4[11]; + rk0C = hash.h4[12]; + rk0D = hash.h4[13]; + rk0E = hash.h4[14]; + rk0F = hash.h4[15]; + rk10 = 0x80; + rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; + rk1B = 0x2000000; + rk1C = rk1D = rk1E = 0; + rk1F = 0x2000000; + + c512(buf); + + hash.h4[0] = h0; + hash.h4[1] = h1; + hash.h4[2] = h2; + hash.h4[3] = h3; + hash.h4[4] = h4; + hash.h4[5] = h5; + hash.h4[6] = h6; + hash.h4[7] = h7; + hash.h4[8] = h8; + hash.h4[9] = h9; + hash.h4[10] = hA; + hash.h4[11] = hB; + hash.h4[12] = hC; + hash.h4[13] = hD; + hash.h4[14] = hE; + hash.h4[15] = hF; } // simd { - s32 q[256]; - unsigned char x[128]; - for(unsigned int i = 0; i < 64; i++) - x[i] = hash.h1[i]; - for(unsigned int i = 64; i < 128; i++) - x[i] = 0; - - u32 A0 = C32(0x0BA16B95), A1 = C32(0x72F999AD), A2 = C32(0x9FECC2AE), A3 = C32(0xBA3264FC), A4 = C32(0x5E894929), A5 = C32(0x8E9F30E5), A6 = C32(0x2F1DAA37), A7 = C32(0xF0F2C558); - u32 B0 = C32(0xAC506643), B1 = C32(0xA90635A5), B2 = C32(0xE25B878B), B3 = C32(0xAAB7878F), B4 = C32(0x88817F7A), B5 = C32(0x0A02892B), B6 = C32(0x559A7550), B7 = C32(0x598F657E); - u32 C0 = C32(0x7EEF60A1), C1 = C32(0x6B70E3E8), C2 = C32(0x9C1714D1), C3 = C32(0xB958E2A8), C4 = C32(0xAB02675E), C5 = C32(0xED1C014F), C6 = C32(0xCD8D65BB), C7 = C32(0xFDB7A257); - u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); - - FFT256(0, 1, 0, ll1); - for (int i = 0; i < 256; i ++) { - s32 tq; - - tq = q[i] + yoff_b_n[i]; - tq = REDS2(tq); - tq = REDS1(tq); - tq = REDS1(tq); - q[i] = (tq <= 128 ? tq : tq - 257); - } + s32 q[256]; + unsigned char x[128]; + for(unsigned int i = 0; i < 64; i++) + x[i] = hash.h1[i]; + for(unsigned int i = 64; i < 128; i++) + x[i] = 0; + + u32 A0 = C32(0x0BA16B95), A1 = C32(0x72F999AD), A2 = C32(0x9FECC2AE), A3 = C32(0xBA3264FC), A4 = C32(0x5E894929), A5 = C32(0x8E9F30E5), A6 = C32(0x2F1DAA37), A7 = C32(0xF0F2C558); + u32 B0 = C32(0xAC506643), B1 = C32(0xA90635A5), B2 = C32(0xE25B878B), B3 = C32(0xAAB7878F), B4 = C32(0x88817F7A), B5 = C32(0x0A02892B), B6 = C32(0x559A7550), B7 = C32(0x598F657E); + u32 C0 = C32(0x7EEF60A1), C1 = C32(0x6B70E3E8), C2 = C32(0x9C1714D1), C3 = C32(0xB958E2A8), C4 = C32(0xAB02675E), C5 = C32(0xED1C014F), C6 = C32(0xCD8D65BB), C7 = C32(0xFDB7A257); + u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); + + FFT256(0, 1, 0, ll1); + for (int i = 0; i < 256; i ++) { + s32 tq; + + tq = q[i] + yoff_b_n[i]; + tq = REDS2(tq); + tq = REDS1(tq); + tq = REDS1(tq); + q[i] = (tq <= 128 ? tq : tq - 257); + } - A0 ^= hash.h4[0]; - A1 ^= hash.h4[1]; - A2 ^= hash.h4[2]; - A3 ^= hash.h4[3]; - A4 ^= hash.h4[4]; - A5 ^= hash.h4[5]; - A6 ^= hash.h4[6]; - A7 ^= hash.h4[7]; - B0 ^= hash.h4[8]; - B1 ^= hash.h4[9]; - B2 ^= hash.h4[10]; - B3 ^= hash.h4[11]; - B4 ^= hash.h4[12]; - B5 ^= hash.h4[13]; - B6 ^= hash.h4[14]; - B7 ^= hash.h4[15]; - - ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); - ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); - ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); - ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); - - STEP_BIG( - C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), - C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), - IF, 4, 13, PP8_4_); - STEP_BIG( - C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), - C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), - IF, 13, 10, PP8_5_); - STEP_BIG( - C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), - C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), - IF, 10, 25, PP8_6_); - STEP_BIG( - C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), - C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), - IF, 25, 4, PP8_0_); - - u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7; - u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7; - u32 COPY_C0 = C0, COPY_C1 = C1, COPY_C2 = C2, COPY_C3 = C3, COPY_C4 = C4, COPY_C5 = C5, COPY_C6 = C6, COPY_C7 = C7; - u32 COPY_D0 = D0, COPY_D1 = D1, COPY_D2 = D2, COPY_D3 = D3, COPY_D4 = D4, COPY_D5 = D5, COPY_D6 = D6, COPY_D7 = D7; - - #define q SIMD_Q - - A0 ^= 0x200; - - ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); - ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); - ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); - ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); - STEP_BIG( - COPY_A0, COPY_A1, COPY_A2, COPY_A3, - COPY_A4, COPY_A5, COPY_A6, COPY_A7, - IF, 4, 13, PP8_4_); - STEP_BIG( - COPY_B0, COPY_B1, COPY_B2, COPY_B3, - COPY_B4, COPY_B5, COPY_B6, COPY_B7, - IF, 13, 10, PP8_5_); - STEP_BIG( - COPY_C0, COPY_C1, COPY_C2, COPY_C3, - COPY_C4, COPY_C5, COPY_C6, COPY_C7, - IF, 10, 25, PP8_6_); - STEP_BIG( - COPY_D0, COPY_D1, COPY_D2, COPY_D3, - COPY_D4, COPY_D5, COPY_D6, COPY_D7, - IF, 25, 4, PP8_0_); - #undef q - - hash.h4[0] = A0; - hash.h4[1] = A1; - hash.h4[2] = A2; - hash.h4[3] = A3; - hash.h4[4] = A4; - hash.h4[5] = A5; - hash.h4[6] = A6; - hash.h4[7] = A7; - hash.h4[8] = B0; - hash.h4[9] = B1; - hash.h4[10] = B2; - hash.h4[11] = B3; - hash.h4[12] = B4; - hash.h4[13] = B5; - hash.h4[14] = B6; - hash.h4[15] = B7; + A0 ^= hash.h4[0]; + A1 ^= hash.h4[1]; + A2 ^= hash.h4[2]; + A3 ^= hash.h4[3]; + A4 ^= hash.h4[4]; + A5 ^= hash.h4[5]; + A6 ^= hash.h4[6]; + A7 ^= hash.h4[7]; + B0 ^= hash.h4[8]; + B1 ^= hash.h4[9]; + B2 ^= hash.h4[10]; + B3 ^= hash.h4[11]; + B4 ^= hash.h4[12]; + B5 ^= hash.h4[13]; + B6 ^= hash.h4[14]; + B7 ^= hash.h4[15]; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + + STEP_BIG( + C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), + C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), + IF, 4, 13, PP8_4_); + STEP_BIG( + C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), + C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), + IF, 13, 10, PP8_5_); + STEP_BIG( + C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), + C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), + IF, 10, 25, PP8_6_); + STEP_BIG( + C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), + C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), + IF, 25, 4, PP8_0_); + + u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7; + u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7; + u32 COPY_C0 = C0, COPY_C1 = C1, COPY_C2 = C2, COPY_C3 = C3, COPY_C4 = C4, COPY_C5 = C5, COPY_C6 = C6, COPY_C7 = C7; + u32 COPY_D0 = D0, COPY_D1 = D1, COPY_D2 = D2, COPY_D3 = D3, COPY_D4 = D4, COPY_D5 = D5, COPY_D6 = D6, COPY_D7 = D7; + + #define q SIMD_Q + + A0 ^= 0x200; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + STEP_BIG( + COPY_A0, COPY_A1, COPY_A2, COPY_A3, + COPY_A4, COPY_A5, COPY_A6, COPY_A7, + IF, 4, 13, PP8_4_); + STEP_BIG( + COPY_B0, COPY_B1, COPY_B2, COPY_B3, + COPY_B4, COPY_B5, COPY_B6, COPY_B7, + IF, 13, 10, PP8_5_); + STEP_BIG( + COPY_C0, COPY_C1, COPY_C2, COPY_C3, + COPY_C4, COPY_C5, COPY_C6, COPY_C7, + IF, 10, 25, PP8_6_); + STEP_BIG( + COPY_D0, COPY_D1, COPY_D2, COPY_D3, + COPY_D4, COPY_D5, COPY_D6, COPY_D7, + IF, 25, 4, PP8_0_); + #undef q + + hash.h4[0] = A0; + hash.h4[1] = A1; + hash.h4[2] = A2; + hash.h4[3] = A3; + hash.h4[4] = A4; + hash.h4[5] = A5; + hash.h4[6] = A6; + hash.h4[7] = A7; + hash.h4[8] = B0; + hash.h4[9] = B1; + hash.h4[10] = B2; + hash.h4[11] = B3; + hash.h4[12] = B4; + hash.h4[13] = B5; + hash.h4[14] = B6; + hash.h4[15] = B7; } // echo - sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; - sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; - Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; - Vb01 = Vb11 = Vb21 = Vb31 = Vb41 = Vb51 = Vb61 = Vb71 = 0; - - sph_u32 K0 = 512; - sph_u32 K1 = 0; - sph_u32 K2 = 0; - sph_u32 K3 = 0; - - W00 = Vb00; - W01 = Vb01; - W10 = Vb10; - W11 = Vb11; - W20 = Vb20; - W21 = Vb21; - W30 = Vb30; - W31 = Vb31; - W40 = Vb40; - W41 = Vb41; - W50 = Vb50; - W51 = Vb51; - W60 = Vb60; - W61 = Vb61; - W70 = Vb70; - W71 = Vb71; - W80 = hash.h8[0]; - W81 = hash.h8[1]; - W90 = hash.h8[2]; - W91 = hash.h8[3]; - WA0 = hash.h8[4]; - WA1 = hash.h8[5]; - WB0 = hash.h8[6]; - WB1 = hash.h8[7]; - WC0 = 0x80; - WC1 = 0; - WD0 = 0; - WD1 = 0; - WE0 = 0; - WE1 = 0x200000000000000; - WF0 = 0x200; - WF1 = 0; - - for (unsigned u = 0; u < 10; u ++) { - BIG_ROUND; - } + { + sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; + sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; + Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; + Vb01 = Vb11 = Vb21 = Vb31 = Vb41 = Vb51 = Vb61 = Vb71 = 0; + + sph_u32 K0 = 512; + sph_u32 K1 = 0; + sph_u32 K2 = 0; + sph_u32 K3 = 0; + + W00 = Vb00; + W01 = Vb01; + W10 = Vb10; + W11 = Vb11; + W20 = Vb20; + W21 = Vb21; + W30 = Vb30; + W31 = Vb31; + W40 = Vb40; + W41 = Vb41; + W50 = Vb50; + W51 = Vb51; + W60 = Vb60; + W61 = Vb61; + W70 = Vb70; + W71 = Vb71; + W80 = hash.h8[0]; + W81 = hash.h8[1]; + W90 = hash.h8[2]; + W91 = hash.h8[3]; + WA0 = hash.h8[4]; + WA1 = hash.h8[5]; + WB0 = hash.h8[6]; + WB1 = hash.h8[7]; + WC0 = 0x80; + WC1 = 0; + WD0 = 0; + WD1 = 0; + WE0 = 0; + WE1 = 0x200000000000000; + WF0 = 0x200; + WF1 = 0; + + for (unsigned u = 0; u < 10; u ++) { + BIG_ROUND; + } - Vb00 ^= hash.h8[0] ^ W00 ^ W80; - Vb01 ^= hash.h8[1] ^ W01 ^ W81; - Vb10 ^= hash.h8[2] ^ W10 ^ W90; - Vb11 ^= hash.h8[3] ^ W11 ^ W91; - Vb20 ^= hash.h8[4] ^ W20 ^ WA0; - Vb21 ^= hash.h8[5] ^ W21 ^ WA1; - Vb30 ^= hash.h8[6] ^ W30 ^ WB0; - Vb31 ^= hash.h8[7] ^ W31 ^ WB1; - - hash.h8[0] = Vb00; - hash.h8[1] = Vb01; - hash.h8[2] = Vb10; - hash.h8[3] = Vb11; - hash.h8[4] = Vb20; - hash.h8[5] = Vb21; - hash.h8[6] = Vb30; - hash.h8[7] = Vb31; + Vb00 ^= hash.h8[0] ^ W00 ^ W80; + Vb01 ^= hash.h8[1] ^ W01 ^ W81; + Vb10 ^= hash.h8[2] ^ W10 ^ W90; + Vb11 ^= hash.h8[3] ^ W11 ^ W91; + Vb20 ^= hash.h8[4] ^ W20 ^ WA0; + Vb21 ^= hash.h8[5] ^ W21 ^ WA1; + Vb30 ^= hash.h8[6] ^ W30 ^ WB0; + Vb31 ^= hash.h8[7] ^ W31 ^ WB1; + + hash.h8[0] = Vb00; + hash.h8[1] = Vb01; + hash.h8[2] = Vb10; + hash.h8[3] = Vb11; + hash.h8[4] = Vb20; + hash.h8[5] = Vb21; + hash.h8[6] = Vb30; + hash.h8[7] = Vb31; + } + // hamsi { sph_u32 c0 = HAMSI_IV512[0], c1 = HAMSI_IV512[1], c2 = HAMSI_IV512[2], c3 = HAMSI_IV512[3]; sph_u32 c4 = HAMSI_IV512[4], c5 = HAMSI_IV512[5], c6 = HAMSI_IV512[6], c7 = HAMSI_IV512[7]; @@ -759,12 +771,12 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp hash.h4[u] = h[u]; } + // fugue { - // fugue - sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09; - sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19; - sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29; - sph_u32 S30, S31, S32, S33, S34, S35; + sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09; + sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19; + sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29; + sph_u32 S30, S31, S32, S33, S34, S35; ulong fc_bit_count = (sph_u64) 64 << 3; diff --git a/kernel/quarkcoin.cl b/kernel/quarkcoin.cl index f4f46bbc..76b21968 100644 --- a/kernel/quarkcoin.cl +++ b/kernel/quarkcoin.cl @@ -105,81 +105,83 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp } hash; // blake -{ - sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); - sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); - sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); - sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); - sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; - sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; - - if ((T0 = SPH_T64(T0 + 1024)) < 1024) { - T1 = SPH_T64(T1 + 1); + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + { + T1 = SPH_T64(T1 + 1); + } + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + M0 = DEC64BE(block + 0); + M1 = DEC64BE(block + 8); + M2 = DEC64BE(block + 16); + M3 = DEC64BE(block + 24); + M4 = DEC64BE(block + 32); + M5 = DEC64BE(block + 40); + M6 = DEC64BE(block + 48); + M7 = DEC64BE(block + 56); + M8 = DEC64BE(block + 64); + M9 = DEC64BE(block + 72); + M9 &= 0xFFFFFFFF00000000; + M9 ^= SWAP4(gid); + MA = 0x8000000000000000; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x280; + + COMPRESS64; + + hash.h8[0] = H0; + hash.h8[1] = H1; + hash.h8[2] = H2; + hash.h8[3] = H3; + hash.h8[4] = H4; + hash.h8[5] = H5; + hash.h8[6] = H6; + hash.h8[7] = H7; } - sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; - sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; - sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; - sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; - M0 = DEC64BE(block + 0); - M1 = DEC64BE(block + 8); - M2 = DEC64BE(block + 16); - M3 = DEC64BE(block + 24); - M4 = DEC64BE(block + 32); - M5 = DEC64BE(block + 40); - M6 = DEC64BE(block + 48); - M7 = DEC64BE(block + 56); - M8 = DEC64BE(block + 64); - M9 = DEC64BE(block + 72); - M9 &= 0xFFFFFFFF00000000; - M9 ^= SWAP4(gid); - MA = 0x8000000000000000; - MB = 0; - MC = 0; - MD = 1; - ME = 0; - MF = 0x280; - - COMPRESS64; - - hash.h8[0] = H0; - hash.h8[1] = H1; - hash.h8[2] = H2; - hash.h8[3] = H3; - hash.h8[4] = H4; - hash.h8[5] = H5; - hash.h8[6] = H6; - hash.h8[7] = H7; -} + // bmw - sph_u64 BMW_H[16]; - for(unsigned u = 0; u < 16; u++) - BMW_H[u] = BMW_IV512[u]; - - sph_u64 BMW_h1[16], BMW_h2[16]; - sph_u64 mv[16]; - - mv[ 0] = SWAP8(hash.h8[0]); - mv[ 1] = SWAP8(hash.h8[1]); - mv[ 2] = SWAP8(hash.h8[2]); - mv[ 3] = SWAP8(hash.h8[3]); - mv[ 4] = SWAP8(hash.h8[4]); - mv[ 5] = SWAP8(hash.h8[5]); - mv[ 6] = SWAP8(hash.h8[6]); - mv[ 7] = SWAP8(hash.h8[7]); - mv[ 8] = 0x80; - mv[ 9] = 0; - mv[10] = 0; - mv[11] = 0; - mv[12] = 0; - mv[13] = 0; - mv[14] = 0; - mv[15] = 0x200; + { + sph_u64 BMW_H[16]; + for(unsigned u = 0; u < 16; u++) + BMW_H[u] = BMW_IV512[u]; + + sph_u64 BMW_h1[16], BMW_h2[16]; + sph_u64 mv[16]; + + mv[ 0] = SWAP8(hash.h8[0]); + mv[ 1] = SWAP8(hash.h8[1]); + mv[ 2] = SWAP8(hash.h8[2]); + mv[ 3] = SWAP8(hash.h8[3]); + mv[ 4] = SWAP8(hash.h8[4]); + mv[ 5] = SWAP8(hash.h8[5]); + mv[ 6] = SWAP8(hash.h8[6]); + mv[ 7] = SWAP8(hash.h8[7]); + mv[ 8] = 0x80; + mv[ 9] = 0; + mv[10] = 0; + mv[11] = 0; + mv[12] = 0; + mv[13] = 0; + mv[14] = 0; + mv[15] = 0x200; #define M(x) (mv[x]) #define H(x) (BMW_H[x]) #define dH(x) (BMW_h2[x]) - FOLDb; + FOLDb; #undef M #undef H @@ -189,26 +191,25 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp #define H(x) (final_b[x]) #define dH(x) (BMW_h1[x]) - FOLDb; + FOLDb; #undef M #undef H #undef dH - hash.h8[0] = SWAP8(BMW_h1[8]); - hash.h8[1] = SWAP8(BMW_h1[9]); - hash.h8[2] = SWAP8(BMW_h1[10]); - hash.h8[3] = SWAP8(BMW_h1[11]); - hash.h8[4] = SWAP8(BMW_h1[12]); - hash.h8[5] = SWAP8(BMW_h1[13]); - hash.h8[6] = SWAP8(BMW_h1[14]); - hash.h8[7] = SWAP8(BMW_h1[15]); + hash.h8[0] = SWAP8(BMW_h1[8]); + hash.h8[1] = SWAP8(BMW_h1[9]); + hash.h8[2] = SWAP8(BMW_h1[10]); + hash.h8[3] = SWAP8(BMW_h1[11]); + hash.h8[4] = SWAP8(BMW_h1[12]); + hash.h8[5] = SWAP8(BMW_h1[13]); + hash.h8[6] = SWAP8(BMW_h1[14]); + hash.h8[7] = SWAP8(BMW_h1[15]); + } - bool dec = ((hash.h1[7] & 0x8) != 0); + if ((hash.h1[7] & 0x8) != 0) { - // groestl - sph_u64 H[16]; for (unsigned int u = 0; u < 15; u ++) H[u] = 0; @@ -248,13 +249,11 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp for (unsigned int u = 0; u < 16; u ++) H[u] ^= xH[u]; for (unsigned int u = 0; u < 8; u ++) - hash.h8[u] = (dec ? DEC64E(H[u + 8]) : hash.h8[u]); - + hash.h8[u] = DEC64E(H[u + 8]); } + else { - // skein - sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; sph_u64 bcount = 0; @@ -271,108 +270,108 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp bcount = 0; m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; UBI_BIG(510, 8); - hash.h8[0] = (!dec ? SWAP8(h0) : hash.h8[0]); - hash.h8[1] = (!dec ? SWAP8(h1) : hash.h8[1]); - hash.h8[2] = (!dec ? SWAP8(h2) : hash.h8[2]); - hash.h8[3] = (!dec ? SWAP8(h3) : hash.h8[3]); - hash.h8[4] = (!dec ? SWAP8(h4) : hash.h8[4]); - hash.h8[5] = (!dec ? SWAP8(h5) : hash.h8[5]); - hash.h8[6] = (!dec ? SWAP8(h6) : hash.h8[6]); - hash.h8[7] = (!dec ? SWAP8(h7) : hash.h8[7]); + hash.h8[0] = SWAP8(h0); + hash.h8[1] = SWAP8(h1); + hash.h8[2] = SWAP8(h2); + hash.h8[3] = SWAP8(h3); + hash.h8[4] = SWAP8(h4); + hash.h8[5] = SWAP8(h5); + hash.h8[6] = SWAP8(h6); + hash.h8[7] = SWAP8(h7); } // groestl + { + sph_u64 H[16]; + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; + #if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); + #else + H[15] = (sph_u64)512; + #endif - sph_u64 H[16]; - for (unsigned int u = 0; u < 15; u ++) - H[u] = 0; -#if USE_LE - H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); -#else - H[15] = (sph_u64)512; -#endif - - sph_u64 g[16], m[16]; - m[0] = DEC64E(hash.h8[0]); - m[1] = DEC64E(hash.h8[1]); - m[2] = DEC64E(hash.h8[2]); - m[3] = DEC64E(hash.h8[3]); - m[4] = DEC64E(hash.h8[4]); - m[5] = DEC64E(hash.h8[5]); - m[6] = DEC64E(hash.h8[6]); - m[7] = DEC64E(hash.h8[7]); - for (unsigned int u = 0; u < 16; u ++) - g[u] = m[u] ^ H[u]; - m[8] = 0x80; g[8] = m[8] ^ H[8]; - m[9] = 0; g[9] = m[9] ^ H[9]; - m[10] = 0; g[10] = m[10] ^ H[10]; - m[11] = 0; g[11] = m[11] ^ H[11]; - m[12] = 0; g[12] = m[12] ^ H[12]; - m[13] = 0; g[13] = m[13] ^ H[13]; - m[14] = 0; g[14] = m[14] ^ H[14]; - m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; - PERM_BIG_P(g); - PERM_BIG_Q(m); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= g[u] ^ m[u]; - sph_u64 xH[16]; - for (unsigned int u = 0; u < 16; u ++) - xH[u] = H[u]; - PERM_BIG_P(xH); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= xH[u]; - for (unsigned int u = 0; u < 8; u ++) - hash.h8[u] = DEC64E(H[u + 8]); + sph_u64 g[16], m[16]; + m[0] = DEC64E(hash.h8[0]); + m[1] = DEC64E(hash.h8[1]); + m[2] = DEC64E(hash.h8[2]); + m[3] = DEC64E(hash.h8[3]); + m[4] = DEC64E(hash.h8[4]); + m[5] = DEC64E(hash.h8[5]); + m[6] = DEC64E(hash.h8[6]); + m[7] = DEC64E(hash.h8[7]); + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + for (unsigned int u = 0; u < 8; u ++) + hash.h8[u] = DEC64E(H[u + 8]); + } // jh + { + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; - sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); - sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); - sph_u64 tmp; + for(int i = 0; i < 2; i++) + { + if (i == 0) { + h0h ^= DEC64E(hash.h8[0]); + h0l ^= DEC64E(hash.h8[1]); + h1h ^= DEC64E(hash.h8[2]); + h1l ^= DEC64E(hash.h8[3]); + h2h ^= DEC64E(hash.h8[4]); + h2l ^= DEC64E(hash.h8[5]); + h3h ^= DEC64E(hash.h8[6]); + h3l ^= DEC64E(hash.h8[7]); + } else if(i == 1) { + h4h ^= DEC64E(hash.h8[0]); + h4l ^= DEC64E(hash.h8[1]); + h5h ^= DEC64E(hash.h8[2]); + h5l ^= DEC64E(hash.h8[3]); + h6h ^= DEC64E(hash.h8[4]); + h6l ^= DEC64E(hash.h8[5]); + h7h ^= DEC64E(hash.h8[6]); + h7l ^= DEC64E(hash.h8[7]); - for(int i = 0; i < 2; i++) - { - if (i == 0) { - h0h ^= DEC64E(hash.h8[0]); - h0l ^= DEC64E(hash.h8[1]); - h1h ^= DEC64E(hash.h8[2]); - h1l ^= DEC64E(hash.h8[3]); - h2h ^= DEC64E(hash.h8[4]); - h2l ^= DEC64E(hash.h8[5]); - h3h ^= DEC64E(hash.h8[6]); - h3l ^= DEC64E(hash.h8[7]); - } else if(i == 1) { - h4h ^= DEC64E(hash.h8[0]); - h4l ^= DEC64E(hash.h8[1]); - h5h ^= DEC64E(hash.h8[2]); - h5l ^= DEC64E(hash.h8[3]); - h6h ^= DEC64E(hash.h8[4]); - h6l ^= DEC64E(hash.h8[5]); - h7h ^= DEC64E(hash.h8[6]); - h7l ^= DEC64E(hash.h8[7]); - - h0h ^= 0x80; - h3l ^= 0x2000000000000; + h0h ^= 0x80; + h3l ^= 0x2000000000000; + } + E8; } - E8; + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash.h8[0] = DEC64E(h4h); + hash.h8[1] = DEC64E(h4l); + hash.h8[2] = DEC64E(h5h); + hash.h8[3] = DEC64E(h5l); + hash.h8[4] = DEC64E(h6h); + hash.h8[5] = DEC64E(h6l); + hash.h8[6] = DEC64E(h7h); + hash.h8[7] = DEC64E(h7l); } - h4h ^= 0x80; - h7l ^= 0x2000000000000; - - hash.h8[0] = DEC64E(h4h); - hash.h8[1] = DEC64E(h4l); - hash.h8[2] = DEC64E(h5h); - hash.h8[3] = DEC64E(h5l); - hash.h8[4] = DEC64E(h6h); - hash.h8[5] = DEC64E(h6l); - hash.h8[6] = DEC64E(h7h); - hash.h8[7] = DEC64E(h7l); - - dec = ((hash.h1[7] & 0x8) != 0); - { + if ((hash.h1[7] & 0x8) != 0) + { // blake - sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); @@ -407,18 +406,17 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp COMPRESS64; - hash.h8[0] = (dec ? H0 : hash.h8[0]); - hash.h8[1] = (dec ? H1 : hash.h8[1]); - hash.h8[2] = (dec ? H2 : hash.h8[2]); - hash.h8[3] = (dec ? H3 : hash.h8[3]); - hash.h8[4] = (dec ? H4 : hash.h8[4]); - hash.h8[5] = (dec ? H5 : hash.h8[5]); - hash.h8[6] = (dec ? H6 : hash.h8[6]); - hash.h8[7] = (dec ? H7 : hash.h8[7]); - + hash.h8[0] = H0; + hash.h8[1] = H1; + hash.h8[2] = H2; + hash.h8[3] = H3; + hash.h8[4] = H4; + hash.h8[5] = H5; + hash.h8[6] = H6; + hash.h8[7] = H7; } + else { - // bmw sph_u64 BMW_H[16]; for(unsigned u = 0; u < 16; u++) @@ -463,87 +461,86 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp #undef H #undef dH - hash.h8[0] = (!dec ? SWAP8(BMW_h1[8]) : hash.h8[0]); - hash.h8[1] = (!dec ? SWAP8(BMW_h1[9]) : hash.h8[1]); - hash.h8[2] = (!dec ? SWAP8(BMW_h1[10]) : hash.h8[2]); - hash.h8[3] = (!dec ? SWAP8(BMW_h1[11]) : hash.h8[3]); - hash.h8[4] = (!dec ? SWAP8(BMW_h1[12]) : hash.h8[4]); - hash.h8[5] = (!dec ? SWAP8(BMW_h1[13]) : hash.h8[5]); - hash.h8[6] = (!dec ? SWAP8(BMW_h1[14]) : hash.h8[6]); - hash.h8[7] = (!dec ? SWAP8(BMW_h1[15]) : hash.h8[7]); - + hash.h8[0] = SWAP8(BMW_h1[8]); + hash.h8[1] = SWAP8(BMW_h1[9]); + hash.h8[2] = SWAP8(BMW_h1[10]); + hash.h8[3] = SWAP8(BMW_h1[11]); + hash.h8[4] = SWAP8(BMW_h1[12]); + hash.h8[5] = SWAP8(BMW_h1[13]); + hash.h8[6] = SWAP8(BMW_h1[14]); + hash.h8[7] = SWAP8(BMW_h1[15]); } // keccak + { + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; - sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; - sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; - sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; - sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; - sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; - - a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); - - a00 ^= SWAP8(hash.h8[0]); - a10 ^= SWAP8(hash.h8[1]); - a20 ^= SWAP8(hash.h8[2]); - a30 ^= SWAP8(hash.h8[3]); - a40 ^= SWAP8(hash.h8[4]); - a01 ^= SWAP8(hash.h8[5]); - a11 ^= SWAP8(hash.h8[6]); - a21 ^= SWAP8(hash.h8[7]); - a31 ^= 0x8000000000000001; - KECCAK_F_1600; - // Finalize the "lane complement" - a10 = ~a10; - a20 = ~a20; - - hash.h8[0] = SWAP8(a00); - hash.h8[1] = SWAP8(a10); - hash.h8[2] = SWAP8(a20); - hash.h8[3] = SWAP8(a30); - hash.h8[4] = SWAP8(a40); - hash.h8[5] = SWAP8(a01); - hash.h8[6] = SWAP8(a11); - hash.h8[7] = SWAP8(a21); + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); - // skein + a00 ^= SWAP8(hash.h8[0]); + a10 ^= SWAP8(hash.h8[1]); + a20 ^= SWAP8(hash.h8[2]); + a30 ^= SWAP8(hash.h8[3]); + a40 ^= SWAP8(hash.h8[4]); + a01 ^= SWAP8(hash.h8[5]); + a11 ^= SWAP8(hash.h8[6]); + a21 ^= SWAP8(hash.h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; - sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); - sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; - sph_u64 bcount = 0; - - m0 = SWAP8(hash.h8[0]); - m1 = SWAP8(hash.h8[1]); - m2 = SWAP8(hash.h8[2]); - m3 = SWAP8(hash.h8[3]); - m4 = SWAP8(hash.h8[4]); - m5 = SWAP8(hash.h8[5]); - m6 = SWAP8(hash.h8[6]); - m7 = SWAP8(hash.h8[7]); - UBI_BIG(480, 64); - bcount = 0; - m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; - UBI_BIG(510, 8); - hash.h8[0] = SWAP8(h0); - hash.h8[1] = SWAP8(h1); - hash.h8[2] = SWAP8(h2); - hash.h8[3] = SWAP8(h3); - hash.h8[4] = SWAP8(h4); - hash.h8[5] = SWAP8(h5); - hash.h8[6] = SWAP8(h6); - hash.h8[7] = SWAP8(h7); - - dec = ((hash.h1[7] & 0x8) != 0); + hash.h8[0] = SWAP8(a00); + hash.h8[1] = SWAP8(a10); + hash.h8[2] = SWAP8(a20); + hash.h8[3] = SWAP8(a30); + hash.h8[4] = SWAP8(a40); + hash.h8[5] = SWAP8(a01); + hash.h8[6] = SWAP8(a11); + hash.h8[7] = SWAP8(a21); + } + + // skein { + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; - // keccak + m0 = SWAP8(hash.h8[0]); + m1 = SWAP8(hash.h8[1]); + m2 = SWAP8(hash.h8[2]); + m3 = SWAP8(hash.h8[3]); + m4 = SWAP8(hash.h8[4]); + m5 = SWAP8(hash.h8[5]); + m6 = SWAP8(hash.h8[6]); + m7 = SWAP8(hash.h8[7]); + UBI_BIG(480, 64); + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + UBI_BIG(510, 8); + hash.h8[0] = SWAP8(h0); + hash.h8[1] = SWAP8(h1); + hash.h8[2] = SWAP8(h2); + hash.h8[3] = SWAP8(h3); + hash.h8[4] = SWAP8(h4); + hash.h8[5] = SWAP8(h5); + hash.h8[6] = SWAP8(h6); + hash.h8[7] = SWAP8(h7); + } + if ((hash.h1[7] & 0x8) != 0) + { + // keccak sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; @@ -571,20 +568,18 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp a10 = ~a10; a20 = ~a20; - hash.h8[0] = (dec ? SWAP8(a00) : hash.h8[0]); - hash.h8[1] = (dec ? SWAP8(a10) : hash.h8[1]); - hash.h8[2] = (dec ? SWAP8(a20) : hash.h8[2]); - hash.h8[3] = (dec ? SWAP8(a30) : hash.h8[3]); - hash.h8[4] = (dec ? SWAP8(a40) : hash.h8[4]); - hash.h8[5] = (dec ? SWAP8(a01) : hash.h8[5]); - hash.h8[6] = (dec ? SWAP8(a11) : hash.h8[6]); - hash.h8[7] = (dec ? SWAP8(a21) : hash.h8[7]); - + hash.h8[0] = SWAP8(a00); + hash.h8[1] = SWAP8(a10); + hash.h8[2] = SWAP8(a20); + hash.h8[3] = SWAP8(a30); + hash.h8[4] = SWAP8(a40); + hash.h8[5] = SWAP8(a01); + hash.h8[6] = SWAP8(a11); + hash.h8[7] = SWAP8(a21); } + else { - // jh - sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); sph_u64 tmp; @@ -618,15 +613,14 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp h4h ^= 0x80; h7l ^= 0x2000000000000; - hash.h8[0] = (!dec ? DEC64E(h4h) : hash.h8[0]); - hash.h8[1] = (!dec ? DEC64E(h4l) : hash.h8[1]); - hash.h8[2] = (!dec ? DEC64E(h5h) : hash.h8[2]); - hash.h8[3] = (!dec ? DEC64E(h5l) : hash.h8[3]); - hash.h8[4] = (!dec ? DEC64E(h6h) : hash.h8[4]); - hash.h8[5] = (!dec ? DEC64E(h6l) : hash.h8[5]); - hash.h8[6] = (!dec ? DEC64E(h7h) : hash.h8[6]); - hash.h8[7] = (!dec ? DEC64E(h7l) : hash.h8[7]); - + hash.h8[0] = DEC64E(h4h); + hash.h8[1] = DEC64E(h4l); + hash.h8[2] = DEC64E(h5h); + hash.h8[3] = DEC64E(h5l); + hash.h8[4] = DEC64E(h6h); + hash.h8[5] = DEC64E(h6l); + hash.h8[6] = DEC64E(h7h); + hash.h8[7] = DEC64E(h7l); } bool result = (SWAP8(hash.h8[3]) <= target); diff --git a/kernel/qubitcoin.cl b/kernel/qubitcoin.cl index a5bc2ba2..398eb58e 100644 --- a/kernel/qubitcoin.cl +++ b/kernel/qubitcoin.cl @@ -108,352 +108,358 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp barrier(CLK_LOCAL_MEM_FENCE); // luffa - - sph_u32 V00 = SPH_C32(0x6d251e69), V01 = SPH_C32(0x44b051e0), V02 = SPH_C32(0x4eaa6fb4), V03 = SPH_C32(0xdbf78465), V04 = SPH_C32(0x6e292011), V05 = SPH_C32(0x90152df4), V06 = SPH_C32(0xee058139), V07 = SPH_C32(0xdef610bb); - sph_u32 V10 = SPH_C32(0xc3b44b95), V11 = SPH_C32(0xd9d2f256), V12 = SPH_C32(0x70eee9a0), V13 = SPH_C32(0xde099fa3), V14 = SPH_C32(0x5d9b0557), V15 = SPH_C32(0x8fc944b3), V16 = SPH_C32(0xcf1ccf0e), V17 = SPH_C32(0x746cd581); - sph_u32 V20 = SPH_C32(0xf7efc89d), V21 = SPH_C32(0x5dba5781), V22 = SPH_C32(0x04016ce5), V23 = SPH_C32(0xad659c05), V24 = SPH_C32(0x0306194f), V25 = SPH_C32(0x666d1836), V26 = SPH_C32(0x24aa230a), V27 = SPH_C32(0x8b264ae7); - sph_u32 V30 = SPH_C32(0x858075d5), V31 = SPH_C32(0x36d79cce), V32 = SPH_C32(0xe571f7d7), V33 = SPH_C32(0x204b1f67), V34 = SPH_C32(0x35870c6a), V35 = SPH_C32(0x57e9e923), V36 = SPH_C32(0x14bcb808), V37 = SPH_C32(0x7cde72ce); - sph_u32 V40 = SPH_C32(0x6c68e9be), V41 = SPH_C32(0x5ec41e22), V42 = SPH_C32(0xc825b7c7), V43 = SPH_C32(0xaffb4363), V44 = SPH_C32(0xf5df3999), V45 = SPH_C32(0x0fc688f1), V46 = SPH_C32(0xb07224cc), V47 = SPH_C32(0x03e86cea); - - DECL_TMP8(M); - - M0 = DEC32BE(block + 0); - M1 = DEC32BE(block + 4); - M2 = DEC32BE(block + 8); - M3 = DEC32BE(block + 12); - M4 = DEC32BE(block + 16); - M5 = DEC32BE(block + 20); - M6 = DEC32BE(block + 24); - M7 = DEC32BE(block + 28); - - for(uint i = 0; i < 5; i++) { - MI5; - LUFFA_P5; - - if(i == 0) { - M0 = DEC32BE(block + 32); - M1 = DEC32BE(block + 36); - M2 = DEC32BE(block + 40); - M3 = DEC32BE(block + 44); - M4 = DEC32BE(block + 48); - M5 = DEC32BE(block + 52); - M6 = DEC32BE(block + 56); - M7 = DEC32BE(block + 60); - } else if(i == 1) { - M0 = DEC32BE(block + 64); - M1 = DEC32BE(block + 68); - M2 = DEC32BE(block + 72); - M3 = SWAP4(gid); - M4 = 0x80000000; - M5 = M6 = M7 = 0; - } else if(i == 2) { - M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; - } else if(i == 3) { - hash.h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; - hash.h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; - hash.h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; - hash.h4[2] = V03 ^ V13 ^ V23 ^ V33 ^ V43; - hash.h4[5] = V04 ^ V14 ^ V24 ^ V34 ^ V44; - hash.h4[4] = V05 ^ V15 ^ V25 ^ V35 ^ V45; - hash.h4[7] = V06 ^ V16 ^ V26 ^ V36 ^ V46; - hash.h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + sph_u32 V00 = SPH_C32(0x6d251e69), V01 = SPH_C32(0x44b051e0), V02 = SPH_C32(0x4eaa6fb4), V03 = SPH_C32(0xdbf78465), V04 = SPH_C32(0x6e292011), V05 = SPH_C32(0x90152df4), V06 = SPH_C32(0xee058139), V07 = SPH_C32(0xdef610bb); + sph_u32 V10 = SPH_C32(0xc3b44b95), V11 = SPH_C32(0xd9d2f256), V12 = SPH_C32(0x70eee9a0), V13 = SPH_C32(0xde099fa3), V14 = SPH_C32(0x5d9b0557), V15 = SPH_C32(0x8fc944b3), V16 = SPH_C32(0xcf1ccf0e), V17 = SPH_C32(0x746cd581); + sph_u32 V20 = SPH_C32(0xf7efc89d), V21 = SPH_C32(0x5dba5781), V22 = SPH_C32(0x04016ce5), V23 = SPH_C32(0xad659c05), V24 = SPH_C32(0x0306194f), V25 = SPH_C32(0x666d1836), V26 = SPH_C32(0x24aa230a), V27 = SPH_C32(0x8b264ae7); + sph_u32 V30 = SPH_C32(0x858075d5), V31 = SPH_C32(0x36d79cce), V32 = SPH_C32(0xe571f7d7), V33 = SPH_C32(0x204b1f67), V34 = SPH_C32(0x35870c6a), V35 = SPH_C32(0x57e9e923), V36 = SPH_C32(0x14bcb808), V37 = SPH_C32(0x7cde72ce); + sph_u32 V40 = SPH_C32(0x6c68e9be), V41 = SPH_C32(0x5ec41e22), V42 = SPH_C32(0xc825b7c7), V43 = SPH_C32(0xaffb4363), V44 = SPH_C32(0xf5df3999), V45 = SPH_C32(0x0fc688f1), V46 = SPH_C32(0xb07224cc), V47 = SPH_C32(0x03e86cea); + + DECL_TMP8(M); + + M0 = DEC32BE(block + 0); + M1 = DEC32BE(block + 4); + M2 = DEC32BE(block + 8); + M3 = DEC32BE(block + 12); + M4 = DEC32BE(block + 16); + M5 = DEC32BE(block + 20); + M6 = DEC32BE(block + 24); + M7 = DEC32BE(block + 28); + + for(uint i = 0; i < 5; i++) + { + MI5; + LUFFA_P5; + + if(i == 0) { + M0 = DEC32BE(block + 32); + M1 = DEC32BE(block + 36); + M2 = DEC32BE(block + 40); + M3 = DEC32BE(block + 44); + M4 = DEC32BE(block + 48); + M5 = DEC32BE(block + 52); + M6 = DEC32BE(block + 56); + M7 = DEC32BE(block + 60); + } else if(i == 1) { + M0 = DEC32BE(block + 64); + M1 = DEC32BE(block + 68); + M2 = DEC32BE(block + 72); + M3 = SWAP4(gid); + M4 = 0x80000000; + M5 = M6 = M7 = 0; + } else if(i == 2) { + M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } else if(i == 3) { + hash.h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash.h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash.h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash.h4[2] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash.h4[5] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash.h4[4] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash.h4[7] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash.h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + } } + hash.h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash.h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash.h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash.h4[10] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash.h4[13] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash.h4[12] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash.h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash.h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47; } - hash.h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40; - hash.h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41; - hash.h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42; - hash.h4[10] = V03 ^ V13 ^ V23 ^ V33 ^ V43; - hash.h4[13] = V04 ^ V14 ^ V24 ^ V34 ^ V44; - hash.h4[12] = V05 ^ V15 ^ V25 ^ V35 ^ V45; - hash.h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46; - hash.h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47; // cubehash.h1 - - sph_u32 x0 = SPH_C32(0x2AEA2A61), x1 = SPH_C32(0x50F494D4), x2 = SPH_C32(0x2D538B8B), x3 = SPH_C32(0x4167D83E); - sph_u32 x4 = SPH_C32(0x3FEE2313), x5 = SPH_C32(0xC701CF8C), x6 = SPH_C32(0xCC39968E), x7 = SPH_C32(0x50AC5695); - sph_u32 x8 = SPH_C32(0x4D42C787), x9 = SPH_C32(0xA647A8B3), xa = SPH_C32(0x97CF0BEF), xb = SPH_C32(0x825B4537); - sph_u32 xc = SPH_C32(0xEEF864D2), xd = SPH_C32(0xF22090C4), xe = SPH_C32(0xD0E5CD33), xf = SPH_C32(0xA23911AE); - sph_u32 xg = SPH_C32(0xFCD398D9), xh = SPH_C32(0x148FE485), xi = SPH_C32(0x1B017BEF), xj = SPH_C32(0xB6444532); - sph_u32 xk = SPH_C32(0x6A536159), xl = SPH_C32(0x2FF5781C), xm = SPH_C32(0x91FA7934), xn = SPH_C32(0x0DBADEA9); - sph_u32 xo = SPH_C32(0xD65C8A2B), xp = SPH_C32(0xA5A70E75), xq = SPH_C32(0xB1C62456), xr = SPH_C32(0xBC796576); - sph_u32 xs = SPH_C32(0x1921C8F7), xt = SPH_C32(0xE7989AF1), xu = SPH_C32(0x7795D246), xv = SPH_C32(0xD43E3B44); - - x0 ^= SWAP4(hash.h4[1]); - x1 ^= SWAP4(hash.h4[0]); - x2 ^= SWAP4(hash.h4[3]); - x3 ^= SWAP4(hash.h4[2]); - x4 ^= SWAP4(hash.h4[5]); - x5 ^= SWAP4(hash.h4[4]); - x6 ^= SWAP4(hash.h4[7]); - x7 ^= SWAP4(hash.h4[6]); - - for (int i = 0; i < 13; i ++) { - SIXTEEN_ROUNDS; - - if (i == 0) { - x0 ^= SWAP4(hash.h4[9]); - x1 ^= SWAP4(hash.h4[8]); - x2 ^= SWAP4(hash.h4[11]); - x3 ^= SWAP4(hash.h4[10]); - x4 ^= SWAP4(hash.h4[13]); - x5 ^= SWAP4(hash.h4[12]); - x6 ^= SWAP4(hash.h4[15]); - x7 ^= SWAP4(hash.h4[14]); - } else if(i == 1) { - x0 ^= 0x80; - } else if (i == 2) { - xv ^= SPH_C32(1); + { + sph_u32 x0 = SPH_C32(0x2AEA2A61), x1 = SPH_C32(0x50F494D4), x2 = SPH_C32(0x2D538B8B), x3 = SPH_C32(0x4167D83E); + sph_u32 x4 = SPH_C32(0x3FEE2313), x5 = SPH_C32(0xC701CF8C), x6 = SPH_C32(0xCC39968E), x7 = SPH_C32(0x50AC5695); + sph_u32 x8 = SPH_C32(0x4D42C787), x9 = SPH_C32(0xA647A8B3), xa = SPH_C32(0x97CF0BEF), xb = SPH_C32(0x825B4537); + sph_u32 xc = SPH_C32(0xEEF864D2), xd = SPH_C32(0xF22090C4), xe = SPH_C32(0xD0E5CD33), xf = SPH_C32(0xA23911AE); + sph_u32 xg = SPH_C32(0xFCD398D9), xh = SPH_C32(0x148FE485), xi = SPH_C32(0x1B017BEF), xj = SPH_C32(0xB6444532); + sph_u32 xk = SPH_C32(0x6A536159), xl = SPH_C32(0x2FF5781C), xm = SPH_C32(0x91FA7934), xn = SPH_C32(0x0DBADEA9); + sph_u32 xo = SPH_C32(0xD65C8A2B), xp = SPH_C32(0xA5A70E75), xq = SPH_C32(0xB1C62456), xr = SPH_C32(0xBC796576); + sph_u32 xs = SPH_C32(0x1921C8F7), xt = SPH_C32(0xE7989AF1), xu = SPH_C32(0x7795D246), xv = SPH_C32(0xD43E3B44); + + x0 ^= SWAP4(hash.h4[1]); + x1 ^= SWAP4(hash.h4[0]); + x2 ^= SWAP4(hash.h4[3]); + x3 ^= SWAP4(hash.h4[2]); + x4 ^= SWAP4(hash.h4[5]); + x5 ^= SWAP4(hash.h4[4]); + x6 ^= SWAP4(hash.h4[7]); + x7 ^= SWAP4(hash.h4[6]); + + for (int i = 0; i < 13; i ++) { + SIXTEEN_ROUNDS; + + if (i == 0) { + x0 ^= SWAP4(hash.h4[9]); + x1 ^= SWAP4(hash.h4[8]); + x2 ^= SWAP4(hash.h4[11]); + x3 ^= SWAP4(hash.h4[10]); + x4 ^= SWAP4(hash.h4[13]); + x5 ^= SWAP4(hash.h4[12]); + x6 ^= SWAP4(hash.h4[15]); + x7 ^= SWAP4(hash.h4[14]); + } else if(i == 1) { + x0 ^= 0x80; + } else if (i == 2) { + xv ^= SPH_C32(1); + } } - } - hash.h4[0] = x0; - hash.h4[1] = x1; - hash.h4[2] = x2; - hash.h4[3] = x3; - hash.h4[4] = x4; - hash.h4[5] = x5; - hash.h4[6] = x6; - hash.h4[7] = x7; - hash.h4[8] = x8; - hash.h4[9] = x9; - hash.h4[10] = xa; - hash.h4[11] = xb; - hash.h4[12] = xc; - hash.h4[13] = xd; - hash.h4[14] = xe; - hash.h4[15] = xf; + hash.h4[0] = x0; + hash.h4[1] = x1; + hash.h4[2] = x2; + hash.h4[3] = x3; + hash.h4[4] = x4; + hash.h4[5] = x5; + hash.h4[6] = x6; + hash.h4[7] = x7; + hash.h4[8] = x8; + hash.h4[9] = x9; + hash.h4[10] = xa; + hash.h4[11] = xb; + hash.h4[12] = xc; + hash.h4[13] = xd; + hash.h4[14] = xe; + hash.h4[15] = xf; + } // shavite { - // IV - sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); - sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); - sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); - sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); - - // state - sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; - sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; - sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; - sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; - - sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; - - rk00 = hash.h4[0]; - rk01 = hash.h4[1]; - rk02 = hash.h4[2]; - rk03 = hash.h4[3]; - rk04 = hash.h4[4]; - rk05 = hash.h4[5]; - rk06 = hash.h4[6]; - rk07 = hash.h4[7]; - rk08 = hash.h4[8]; - rk09 = hash.h4[9]; - rk0A = hash.h4[10]; - rk0B = hash.h4[11]; - rk0C = hash.h4[12]; - rk0D = hash.h4[13]; - rk0E = hash.h4[14]; - rk0F = hash.h4[15]; - rk10 = 0x80; - rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; - rk1B = 0x2000000; - rk1C = rk1D = rk1E = 0; - rk1F = 0x2000000; - - c512(buf); - - hash.h4[0] = h0; - hash.h4[1] = h1; - hash.h4[2] = h2; - hash.h4[3] = h3; - hash.h4[4] = h4; - hash.h4[5] = h5; - hash.h4[6] = h6; - hash.h4[7] = h7; - hash.h4[8] = h8; - hash.h4[9] = h9; - hash.h4[10] = hA; - hash.h4[11] = hB; - hash.h4[12] = hC; - hash.h4[13] = hD; - hash.h4[14] = hE; - hash.h4[15] = hF; + // IV + sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); + sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); + sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); + sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); + + // state + sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; + sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; + sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; + sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; + + sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; + + rk00 = hash.h4[0]; + rk01 = hash.h4[1]; + rk02 = hash.h4[2]; + rk03 = hash.h4[3]; + rk04 = hash.h4[4]; + rk05 = hash.h4[5]; + rk06 = hash.h4[6]; + rk07 = hash.h4[7]; + rk08 = hash.h4[8]; + rk09 = hash.h4[9]; + rk0A = hash.h4[10]; + rk0B = hash.h4[11]; + rk0C = hash.h4[12]; + rk0D = hash.h4[13]; + rk0E = hash.h4[14]; + rk0F = hash.h4[15]; + rk10 = 0x80; + rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; + rk1B = 0x2000000; + rk1C = rk1D = rk1E = 0; + rk1F = 0x2000000; + + c512(buf); + + hash.h4[0] = h0; + hash.h4[1] = h1; + hash.h4[2] = h2; + hash.h4[3] = h3; + hash.h4[4] = h4; + hash.h4[5] = h5; + hash.h4[6] = h6; + hash.h4[7] = h7; + hash.h4[8] = h8; + hash.h4[9] = h9; + hash.h4[10] = hA; + hash.h4[11] = hB; + hash.h4[12] = hC; + hash.h4[13] = hD; + hash.h4[14] = hE; + hash.h4[15] = hF; } // simd - s32 q[256]; - unsigned char x[128]; - for(unsigned int i = 0; i < 64; i++) - x[i] = hash.h1[i]; - for(unsigned int i = 64; i < 128; i++) - x[i] = 0; - - u32 A0 = C32(0x0BA16B95), A1 = C32(0x72F999AD), A2 = C32(0x9FECC2AE), A3 = C32(0xBA3264FC), A4 = C32(0x5E894929), A5 = C32(0x8E9F30E5), A6 = C32(0x2F1DAA37), A7 = C32(0xF0F2C558); - u32 B0 = C32(0xAC506643), B1 = C32(0xA90635A5), B2 = C32(0xE25B878B), B3 = C32(0xAAB7878F), B4 = C32(0x88817F7A), B5 = C32(0x0A02892B), B6 = C32(0x559A7550), B7 = C32(0x598F657E); - u32 C0 = C32(0x7EEF60A1), C1 = C32(0x6B70E3E8), C2 = C32(0x9C1714D1), C3 = C32(0xB958E2A8), C4 = C32(0xAB02675E), C5 = C32(0xED1C014F), C6 = C32(0xCD8D65BB), C7 = C32(0xFDB7A257); - u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); - - FFT256(0, 1, 0, ll1); - for (int i = 0; i < 256; i ++) { - s32 tq; - - tq = q[i] + yoff_b_n[i]; - tq = REDS2(tq); - tq = REDS1(tq); - tq = REDS1(tq); - q[i] = (tq <= 128 ? tq : tq - 257); - } + { + s32 q[256]; + unsigned char x[128]; + for(unsigned int i = 0; i < 64; i++) + x[i] = hash.h1[i]; + for(unsigned int i = 64; i < 128; i++) + x[i] = 0; + + u32 A0 = C32(0x0BA16B95), A1 = C32(0x72F999AD), A2 = C32(0x9FECC2AE), A3 = C32(0xBA3264FC), A4 = C32(0x5E894929), A5 = C32(0x8E9F30E5), A6 = C32(0x2F1DAA37), A7 = C32(0xF0F2C558); + u32 B0 = C32(0xAC506643), B1 = C32(0xA90635A5), B2 = C32(0xE25B878B), B3 = C32(0xAAB7878F), B4 = C32(0x88817F7A), B5 = C32(0x0A02892B), B6 = C32(0x559A7550), B7 = C32(0x598F657E); + u32 C0 = C32(0x7EEF60A1), C1 = C32(0x6B70E3E8), C2 = C32(0x9C1714D1), C3 = C32(0xB958E2A8), C4 = C32(0xAB02675E), C5 = C32(0xED1C014F), C6 = C32(0xCD8D65BB), C7 = C32(0xFDB7A257); + u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); + + FFT256(0, 1, 0, ll1); + for (int i = 0; i < 256; i ++) { + s32 tq; + + tq = q[i] + yoff_b_n[i]; + tq = REDS2(tq); + tq = REDS1(tq); + tq = REDS1(tq); + q[i] = (tq <= 128 ? tq : tq - 257); + } - A0 ^= hash.h4[0]; - A1 ^= hash.h4[1]; - A2 ^= hash.h4[2]; - A3 ^= hash.h4[3]; - A4 ^= hash.h4[4]; - A5 ^= hash.h4[5]; - A6 ^= hash.h4[6]; - A7 ^= hash.h4[7]; - B0 ^= hash.h4[8]; - B1 ^= hash.h4[9]; - B2 ^= hash.h4[10]; - B3 ^= hash.h4[11]; - B4 ^= hash.h4[12]; - B5 ^= hash.h4[13]; - B6 ^= hash.h4[14]; - B7 ^= hash.h4[15]; - - ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); - ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); - ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); - ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); - - STEP_BIG( - C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), - C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), - IF, 4, 13, PP8_4_); - STEP_BIG( - C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), - C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), - IF, 13, 10, PP8_5_); - STEP_BIG( - C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), - C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), - IF, 10, 25, PP8_6_); - STEP_BIG( - C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), - C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), - IF, 25, 4, PP8_0_); - - u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7; - u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7; - u32 COPY_C0 = C0, COPY_C1 = C1, COPY_C2 = C2, COPY_C3 = C3, COPY_C4 = C4, COPY_C5 = C5, COPY_C6 = C6, COPY_C7 = C7; - u32 COPY_D0 = D0, COPY_D1 = D1, COPY_D2 = D2, COPY_D3 = D3, COPY_D4 = D4, COPY_D5 = D5, COPY_D6 = D6, COPY_D7 = D7; - - #define q SIMD_Q - - A0 ^= 0x200; - - ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); - ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); - ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); - ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); - STEP_BIG( - COPY_A0, COPY_A1, COPY_A2, COPY_A3, - COPY_A4, COPY_A5, COPY_A6, COPY_A7, - IF, 4, 13, PP8_4_); - STEP_BIG( - COPY_B0, COPY_B1, COPY_B2, COPY_B3, - COPY_B4, COPY_B5, COPY_B6, COPY_B7, - IF, 13, 10, PP8_5_); - STEP_BIG( - COPY_C0, COPY_C1, COPY_C2, COPY_C3, - COPY_C4, COPY_C5, COPY_C6, COPY_C7, - IF, 10, 25, PP8_6_); - STEP_BIG( - COPY_D0, COPY_D1, COPY_D2, COPY_D3, - COPY_D4, COPY_D5, COPY_D6, COPY_D7, - IF, 25, 4, PP8_0_); - #undef q - - hash.h4[0] = A0; - hash.h4[1] = A1; - hash.h4[2] = A2; - hash.h4[3] = A3; - hash.h4[4] = A4; - hash.h4[5] = A5; - hash.h4[6] = A6; - hash.h4[7] = A7; - hash.h4[8] = B0; - hash.h4[9] = B1; - hash.h4[10] = B2; - hash.h4[11] = B3; - hash.h4[12] = B4; - hash.h4[13] = B5; - hash.h4[14] = B6; - hash.h4[15] = B7; + A0 ^= hash.h4[0]; + A1 ^= hash.h4[1]; + A2 ^= hash.h4[2]; + A3 ^= hash.h4[3]; + A4 ^= hash.h4[4]; + A5 ^= hash.h4[5]; + A6 ^= hash.h4[6]; + A7 ^= hash.h4[7]; + B0 ^= hash.h4[8]; + B1 ^= hash.h4[9]; + B2 ^= hash.h4[10]; + B3 ^= hash.h4[11]; + B4 ^= hash.h4[12]; + B5 ^= hash.h4[13]; + B6 ^= hash.h4[14]; + B7 ^= hash.h4[15]; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + + STEP_BIG( + C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), + C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), + IF, 4, 13, PP8_4_); + STEP_BIG( + C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), + C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), + IF, 13, 10, PP8_5_); + STEP_BIG( + C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), + C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), + IF, 10, 25, PP8_6_); + STEP_BIG( + C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), + C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), + IF, 25, 4, PP8_0_); + + u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7; + u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7; + u32 COPY_C0 = C0, COPY_C1 = C1, COPY_C2 = C2, COPY_C3 = C3, COPY_C4 = C4, COPY_C5 = C5, COPY_C6 = C6, COPY_C7 = C7; + u32 COPY_D0 = D0, COPY_D1 = D1, COPY_D2 = D2, COPY_D3 = D3, COPY_D4 = D4, COPY_D5 = D5, COPY_D6 = D6, COPY_D7 = D7; + + #define q SIMD_Q + + A0 ^= 0x200; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + STEP_BIG( + COPY_A0, COPY_A1, COPY_A2, COPY_A3, + COPY_A4, COPY_A5, COPY_A6, COPY_A7, + IF, 4, 13, PP8_4_); + STEP_BIG( + COPY_B0, COPY_B1, COPY_B2, COPY_B3, + COPY_B4, COPY_B5, COPY_B6, COPY_B7, + IF, 13, 10, PP8_5_); + STEP_BIG( + COPY_C0, COPY_C1, COPY_C2, COPY_C3, + COPY_C4, COPY_C5, COPY_C6, COPY_C7, + IF, 10, 25, PP8_6_); + STEP_BIG( + COPY_D0, COPY_D1, COPY_D2, COPY_D3, + COPY_D4, COPY_D5, COPY_D6, COPY_D7, + IF, 25, 4, PP8_0_); + #undef q + + hash.h4[0] = A0; + hash.h4[1] = A1; + hash.h4[2] = A2; + hash.h4[3] = A3; + hash.h4[4] = A4; + hash.h4[5] = A5; + hash.h4[6] = A6; + hash.h4[7] = A7; + hash.h4[8] = B0; + hash.h4[9] = B1; + hash.h4[10] = B2; + hash.h4[11] = B3; + hash.h4[12] = B4; + hash.h4[13] = B5; + hash.h4[14] = B6; + hash.h4[15] = B7; + } // echo - sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; - sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; - Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; - Vb01 = Vb11 = Vb21 = Vb31 = Vb41 = Vb51 = Vb61 = Vb71 = 0; - - sph_u32 K0 = 512; - sph_u32 K1 = 0; - sph_u32 K2 = 0; - sph_u32 K3 = 0; - - W00 = Vb00; - W01 = Vb01; - W10 = Vb10; - W11 = Vb11; - W20 = Vb20; - W21 = Vb21; - W30 = Vb30; - W31 = Vb31; - W40 = Vb40; - W41 = Vb41; - W50 = Vb50; - W51 = Vb51; - W60 = Vb60; - W61 = Vb61; - W70 = Vb70; - W71 = Vb71; - W80 = hash.h8[0]; - W81 = hash.h8[1]; - W90 = hash.h8[2]; - W91 = hash.h8[3]; - WA0 = hash.h8[4]; - WA1 = hash.h8[5]; - WB0 = hash.h8[6]; - WB1 = hash.h8[7]; - WC0 = 0x80; - WC1 = 0; - WD0 = 0; - WD1 = 0; - WE0 = 0; - WE1 = 0x200000000000000; - WF0 = 0x200; - WF1 = 0; - - for (unsigned u = 0; u < 10; u ++) { - BIG_ROUND; - } + { + sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; + sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; + Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; + Vb01 = Vb11 = Vb21 = Vb31 = Vb41 = Vb51 = Vb61 = Vb71 = 0; + + sph_u32 K0 = 512; + sph_u32 K1 = 0; + sph_u32 K2 = 0; + sph_u32 K3 = 0; + + W00 = Vb00; + W01 = Vb01; + W10 = Vb10; + W11 = Vb11; + W20 = Vb20; + W21 = Vb21; + W30 = Vb30; + W31 = Vb31; + W40 = Vb40; + W41 = Vb41; + W50 = Vb50; + W51 = Vb51; + W60 = Vb60; + W61 = Vb61; + W70 = Vb70; + W71 = Vb71; + W80 = hash.h8[0]; + W81 = hash.h8[1]; + W90 = hash.h8[2]; + W91 = hash.h8[3]; + WA0 = hash.h8[4]; + WA1 = hash.h8[5]; + WB0 = hash.h8[6]; + WB1 = hash.h8[7]; + WC0 = 0x80; + WC1 = 0; + WD0 = 0; + WD1 = 0; + WE0 = 0; + WE1 = 0x200000000000000; + WF0 = 0x200; + WF1 = 0; + + for (unsigned u = 0; u < 10; u ++) { + BIG_ROUND; + } - Vb00 ^= hash.h8[0] ^ W00 ^ W80; - Vb01 ^= hash.h8[1] ^ W01 ^ W81; - Vb10 ^= hash.h8[2] ^ W10 ^ W90; - Vb11 ^= hash.h8[3] ^ W11 ^ W91; - Vb20 ^= hash.h8[4] ^ W20 ^ WA0; - Vb21 ^= hash.h8[5] ^ W21 ^ WA1; - Vb30 ^= hash.h8[6] ^ W30 ^ WB0; - Vb31 ^= hash.h8[7] ^ W31 ^ WB1; - - bool result = (Vb11 <= target); - if (result) - output[output[0xFF]++] = SWAP4(gid); + Vb00 ^= hash.h8[0] ^ W00 ^ W80; + Vb01 ^= hash.h8[1] ^ W01 ^ W81; + Vb10 ^= hash.h8[2] ^ W10 ^ W90; + Vb11 ^= hash.h8[3] ^ W11 ^ W91; + Vb20 ^= hash.h8[4] ^ W20 ^ WA0; + Vb21 ^= hash.h8[5] ^ W21 ^ WA1; + Vb30 ^= hash.h8[6] ^ W30 ^ WB0; + Vb31 ^= hash.h8[7] ^ W31 ^ WB1; + + bool result = (Vb11 <= target); + if (result) + output[output[0xFF]++] = SWAP4(gid); + } } #endif // QUBITCOIN_CL diff --git a/kernel/sifcoin.cl b/kernel/sifcoin.cl index fbb186a3..8c5c8fbe 100644 --- a/kernel/sifcoin.cl +++ b/kernel/sifcoin.cl @@ -105,81 +105,83 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp } hash; // blake -{ - sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); - sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); - sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); - sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); - sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; - sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; - - if ((T0 = SPH_T64(T0 + 1024)) < 1024) { - T1 = SPH_T64(T1 + 1); + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + { + T1 = SPH_T64(T1 + 1); + } + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + M0 = DEC64BE(block + 0); + M1 = DEC64BE(block + 8); + M2 = DEC64BE(block + 16); + M3 = DEC64BE(block + 24); + M4 = DEC64BE(block + 32); + M5 = DEC64BE(block + 40); + M6 = DEC64BE(block + 48); + M7 = DEC64BE(block + 56); + M8 = DEC64BE(block + 64); + M9 = DEC64BE(block + 72); + M9 &= 0xFFFFFFFF00000000; + M9 ^= SWAP4(gid); + MA = 0x8000000000000000; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x280; + + COMPRESS64; + + hash.h8[0] = H0; + hash.h8[1] = H1; + hash.h8[2] = H2; + hash.h8[3] = H3; + hash.h8[4] = H4; + hash.h8[5] = H5; + hash.h8[6] = H6; + hash.h8[7] = H7; } - sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; - sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; - sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; - sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; - M0 = DEC64BE(block + 0); - M1 = DEC64BE(block + 8); - M2 = DEC64BE(block + 16); - M3 = DEC64BE(block + 24); - M4 = DEC64BE(block + 32); - M5 = DEC64BE(block + 40); - M6 = DEC64BE(block + 48); - M7 = DEC64BE(block + 56); - M8 = DEC64BE(block + 64); - M9 = DEC64BE(block + 72); - M9 &= 0xFFFFFFFF00000000; - M9 ^= SWAP4(gid); - MA = 0x8000000000000000; - MB = 0; - MC = 0; - MD = 1; - ME = 0; - MF = 0x280; - - COMPRESS64; - - hash.h8[0] = H0; - hash.h8[1] = H1; - hash.h8[2] = H2; - hash.h8[3] = H3; - hash.h8[4] = H4; - hash.h8[5] = H5; - hash.h8[6] = H6; - hash.h8[7] = H7; -} + // bmw - sph_u64 BMW_H[16]; - for(unsigned u = 0; u < 16; u++) - BMW_H[u] = BMW_IV512[u]; - - sph_u64 BMW_h1[16], BMW_h2[16]; - sph_u64 mv[16]; - - mv[ 0] = SWAP8(hash.h8[0]); - mv[ 1] = SWAP8(hash.h8[1]); - mv[ 2] = SWAP8(hash.h8[2]); - mv[ 3] = SWAP8(hash.h8[3]); - mv[ 4] = SWAP8(hash.h8[4]); - mv[ 5] = SWAP8(hash.h8[5]); - mv[ 6] = SWAP8(hash.h8[6]); - mv[ 7] = SWAP8(hash.h8[7]); - mv[ 8] = 0x80; - mv[ 9] = 0; - mv[10] = 0; - mv[11] = 0; - mv[12] = 0; - mv[13] = 0; - mv[14] = 0; - mv[15] = 0x200; + { + sph_u64 BMW_H[16]; + for(unsigned u = 0; u < 16; u++) + BMW_H[u] = BMW_IV512[u]; + + sph_u64 BMW_h1[16], BMW_h2[16]; + sph_u64 mv[16]; + + mv[ 0] = SWAP8(hash.h8[0]); + mv[ 1] = SWAP8(hash.h8[1]); + mv[ 2] = SWAP8(hash.h8[2]); + mv[ 3] = SWAP8(hash.h8[3]); + mv[ 4] = SWAP8(hash.h8[4]); + mv[ 5] = SWAP8(hash.h8[5]); + mv[ 6] = SWAP8(hash.h8[6]); + mv[ 7] = SWAP8(hash.h8[7]); + mv[ 8] = 0x80; + mv[ 9] = 0; + mv[10] = 0; + mv[11] = 0; + mv[12] = 0; + mv[13] = 0; + mv[14] = 0; + mv[15] = 0x200; #define M(x) (mv[x]) #define H(x) (BMW_H[x]) #define dH(x) (BMW_h2[x]) - FOLDb; + FOLDb; #undef M #undef H @@ -189,172 +191,177 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp #define H(x) (final_b[x]) #define dH(x) (BMW_h1[x]) - FOLDb; + FOLDb; #undef M #undef H #undef dH - hash.h8[0] = SWAP8(BMW_h1[8]); - hash.h8[1] = SWAP8(BMW_h1[9]); - hash.h8[2] = SWAP8(BMW_h1[10]); - hash.h8[3] = SWAP8(BMW_h1[11]); - hash.h8[4] = SWAP8(BMW_h1[12]); - hash.h8[5] = SWAP8(BMW_h1[13]); - hash.h8[6] = SWAP8(BMW_h1[14]); - hash.h8[7] = SWAP8(BMW_h1[15]); + hash.h8[0] = SWAP8(BMW_h1[8]); + hash.h8[1] = SWAP8(BMW_h1[9]); + hash.h8[2] = SWAP8(BMW_h1[10]); + hash.h8[3] = SWAP8(BMW_h1[11]); + hash.h8[4] = SWAP8(BMW_h1[12]); + hash.h8[5] = SWAP8(BMW_h1[13]); + hash.h8[6] = SWAP8(BMW_h1[14]); + hash.h8[7] = SWAP8(BMW_h1[15]); + } // groestl - - sph_u64 H[16]; - for (unsigned int u = 0; u < 15; u ++) - H[u] = 0; + { + sph_u64 H[16]; + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; #if USE_LE - H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); #else - H[15] = (sph_u64)512; + H[15] = (sph_u64)512; #endif - sph_u64 g[16], m[16]; - m[0] = DEC64E(hash.h8[0]); - m[1] = DEC64E(hash.h8[1]); - m[2] = DEC64E(hash.h8[2]); - m[3] = DEC64E(hash.h8[3]); - m[4] = DEC64E(hash.h8[4]); - m[5] = DEC64E(hash.h8[5]); - m[6] = DEC64E(hash.h8[6]); - m[7] = DEC64E(hash.h8[7]); - for (unsigned int u = 0; u < 16; u ++) - g[u] = m[u] ^ H[u]; - m[8] = 0x80; g[8] = m[8] ^ H[8]; - m[9] = 0; g[9] = m[9] ^ H[9]; - m[10] = 0; g[10] = m[10] ^ H[10]; - m[11] = 0; g[11] = m[11] ^ H[11]; - m[12] = 0; g[12] = m[12] ^ H[12]; - m[13] = 0; g[13] = m[13] ^ H[13]; - m[14] = 0; g[14] = m[14] ^ H[14]; - m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; - PERM_BIG_P(g); - PERM_BIG_Q(m); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= g[u] ^ m[u]; - sph_u64 xH[16]; - for (unsigned int u = 0; u < 16; u ++) - xH[u] = H[u]; - PERM_BIG_P(xH); - for (unsigned int u = 0; u < 16; u ++) - H[u] ^= xH[u]; - for (unsigned int u = 0; u < 8; u ++) - hash.h8[u] = DEC64E(H[u + 8]); + sph_u64 g[16], m[16]; + m[0] = DEC64E(hash.h8[0]); + m[1] = DEC64E(hash.h8[1]); + m[2] = DEC64E(hash.h8[2]); + m[3] = DEC64E(hash.h8[3]); + m[4] = DEC64E(hash.h8[4]); + m[5] = DEC64E(hash.h8[5]); + m[6] = DEC64E(hash.h8[6]); + m[7] = DEC64E(hash.h8[7]); + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + for (unsigned int u = 0; u < 8; u ++) + hash.h8[u] = DEC64E(H[u + 8]); + } // jh - - sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); - sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); - sph_u64 tmp; - - for(int i = 0; i < 2; i++) { - if (i == 0) { - h0h ^= DEC64E(hash.h8[0]); - h0l ^= DEC64E(hash.h8[1]); - h1h ^= DEC64E(hash.h8[2]); - h1l ^= DEC64E(hash.h8[3]); - h2h ^= DEC64E(hash.h8[4]); - h2l ^= DEC64E(hash.h8[5]); - h3h ^= DEC64E(hash.h8[6]); - h3l ^= DEC64E(hash.h8[7]); - } else if(i == 1) { - h4h ^= DEC64E(hash.h8[0]); - h4l ^= DEC64E(hash.h8[1]); - h5h ^= DEC64E(hash.h8[2]); - h5l ^= DEC64E(hash.h8[3]); - h6h ^= DEC64E(hash.h8[4]); - h6l ^= DEC64E(hash.h8[5]); - h7h ^= DEC64E(hash.h8[6]); - h7l ^= DEC64E(hash.h8[7]); - - h0h ^= 0x80; - h3l ^= 0x2000000000000; + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; + + for(int i = 0; i < 2; i++) + { + if (i == 0) { + h0h ^= DEC64E(hash.h8[0]); + h0l ^= DEC64E(hash.h8[1]); + h1h ^= DEC64E(hash.h8[2]); + h1l ^= DEC64E(hash.h8[3]); + h2h ^= DEC64E(hash.h8[4]); + h2l ^= DEC64E(hash.h8[5]); + h3h ^= DEC64E(hash.h8[6]); + h3l ^= DEC64E(hash.h8[7]); + } else if(i == 1) { + h4h ^= DEC64E(hash.h8[0]); + h4l ^= DEC64E(hash.h8[1]); + h5h ^= DEC64E(hash.h8[2]); + h5l ^= DEC64E(hash.h8[3]); + h6h ^= DEC64E(hash.h8[4]); + h6l ^= DEC64E(hash.h8[5]); + h7h ^= DEC64E(hash.h8[6]); + h7l ^= DEC64E(hash.h8[7]); + + h0h ^= 0x80; + h3l ^= 0x2000000000000; + } + E8; } - E8; + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash.h8[0] = DEC64E(h4h); + hash.h8[1] = DEC64E(h4l); + hash.h8[2] = DEC64E(h5h); + hash.h8[3] = DEC64E(h5l); + hash.h8[4] = DEC64E(h6h); + hash.h8[5] = DEC64E(h6l); + hash.h8[6] = DEC64E(h7h); + hash.h8[7] = DEC64E(h7l); } - h4h ^= 0x80; - h7l ^= 0x2000000000000; - - hash.h8[0] = DEC64E(h4h); - hash.h8[1] = DEC64E(h4l); - hash.h8[2] = DEC64E(h5h); - hash.h8[3] = DEC64E(h5l); - hash.h8[4] = DEC64E(h6h); - hash.h8[5] = DEC64E(h6l); - hash.h8[6] = DEC64E(h7h); - hash.h8[7] = DEC64E(h7l); // keccak - - sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; - sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; - sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; - sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; - sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; - - a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); - a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); - - a00 ^= SWAP8(hash.h8[0]); - a10 ^= SWAP8(hash.h8[1]); - a20 ^= SWAP8(hash.h8[2]); - a30 ^= SWAP8(hash.h8[3]); - a40 ^= SWAP8(hash.h8[4]); - a01 ^= SWAP8(hash.h8[5]); - a11 ^= SWAP8(hash.h8[6]); - a21 ^= SWAP8(hash.h8[7]); - a31 ^= 0x8000000000000001; - KECCAK_F_1600; - // Finalize the "lane complement" - a10 = ~a10; - a20 = ~a20; - - hash.h8[0] = SWAP8(a00); - hash.h8[1] = SWAP8(a10); - hash.h8[2] = SWAP8(a20); - hash.h8[3] = SWAP8(a30); - hash.h8[4] = SWAP8(a40); - hash.h8[5] = SWAP8(a01); - hash.h8[6] = SWAP8(a11); - hash.h8[7] = SWAP8(a21); + { + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; + + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); + + a00 ^= SWAP8(hash.h8[0]); + a10 ^= SWAP8(hash.h8[1]); + a20 ^= SWAP8(hash.h8[2]); + a30 ^= SWAP8(hash.h8[3]); + a40 ^= SWAP8(hash.h8[4]); + a01 ^= SWAP8(hash.h8[5]); + a11 ^= SWAP8(hash.h8[6]); + a21 ^= SWAP8(hash.h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; + + hash.h8[0] = SWAP8(a00); + hash.h8[1] = SWAP8(a10); + hash.h8[2] = SWAP8(a20); + hash.h8[3] = SWAP8(a30); + hash.h8[4] = SWAP8(a40); + hash.h8[5] = SWAP8(a01); + hash.h8[6] = SWAP8(a11); + hash.h8[7] = SWAP8(a21); + } // skein - - sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); - sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; - sph_u64 bcount = 0; - - m0 = SWAP8(hash.h8[0]); - m1 = SWAP8(hash.h8[1]); - m2 = SWAP8(hash.h8[2]); - m3 = SWAP8(hash.h8[3]); - m4 = SWAP8(hash.h8[4]); - m5 = SWAP8(hash.h8[5]); - m6 = SWAP8(hash.h8[6]); - m7 = SWAP8(hash.h8[7]); - UBI_BIG(480, 64); - bcount = 0; - m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; - UBI_BIG(510, 8); - hash.h8[0] = SWAP8(h0); - hash.h8[1] = SWAP8(h1); - hash.h8[2] = SWAP8(h2); - hash.h8[3] = SWAP8(h3); - hash.h8[4] = SWAP8(h4); - hash.h8[5] = SWAP8(h5); - hash.h8[6] = SWAP8(h6); - hash.h8[7] = SWAP8(h7); + { + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; + + m0 = SWAP8(hash.h8[0]); + m1 = SWAP8(hash.h8[1]); + m2 = SWAP8(hash.h8[2]); + m3 = SWAP8(hash.h8[3]); + m4 = SWAP8(hash.h8[4]); + m5 = SWAP8(hash.h8[5]); + m6 = SWAP8(hash.h8[6]); + m7 = SWAP8(hash.h8[7]); + UBI_BIG(480, 64); + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + UBI_BIG(510, 8); + hash.h8[0] = SWAP8(h0); + hash.h8[1] = SWAP8(h1); + hash.h8[2] = SWAP8(h2); + hash.h8[3] = SWAP8(h3); + hash.h8[4] = SWAP8(h4); + hash.h8[5] = SWAP8(h5); + hash.h8[6] = SWAP8(h6); + hash.h8[7] = SWAP8(h7); + } bool result = (SWAP8(hash.h8[3]) <= target); if (result) diff --git a/kernel/twecoin.cl b/kernel/twecoin.cl index ef784849..d9c87107 100644 --- a/kernel/twecoin.cl +++ b/kernel/twecoin.cl @@ -283,53 +283,53 @@ __kernel void search(__global unsigned char* block, volatile __global uint* outp // shavite { - sph_u32 h[] = { SPH_C32(0x49BB3E47), SPH_C32(0x2674860D), SPH_C32(0xA8B392AC), SPH_C32(0x021AC4E6), SPH_C32(0x409283CF), SPH_C32(0x620E5D86), SPH_C32(0x6D929DCB), SPH_C32(0x96CC2A8B) }; - sph_u32 rk0, rk1, rk2, rk3, rk4, rk5, rk6, rk7; - sph_u32 rk8, rk9, rkA, rkB, rkC, rkD, rkE, rkF; - sph_u32 count0, count1; - - rk0 = sph_dec32le_aligned((const unsigned char *)&hash + 0); - rk1 = sph_dec32le_aligned((const unsigned char *)&hash + 4); - rk2 = sph_dec32le_aligned((const unsigned char *)&hash + 8); - rk3 = sph_dec32le_aligned((const unsigned char *)&hash + 12); - rk4 = sph_dec32le_aligned((const unsigned char *)&hash + 16); - rk5 = sph_dec32le_aligned((const unsigned char *)&hash + 20); - rk6 = sph_dec32le_aligned((const unsigned char *)&hash + 24); - rk7 = sph_dec32le_aligned((const unsigned char *)&hash + 28); - rk8 = sph_dec32le_aligned((const unsigned char *)&hash + 32); - rk9 = sph_dec32le_aligned((const unsigned char *)&hash + 36); - rkA = sph_dec32le_aligned((const unsigned char *)&hash + 40); - rkB = sph_dec32le_aligned((const unsigned char *)&hash + 44); - rkC = sph_dec32le_aligned((const unsigned char *)&hash + 48); - rkD = sph_dec32le_aligned((const unsigned char *)&hash + 52); - rkE = sph_dec32le_aligned((const unsigned char *)&hash + 56); - rkF = sph_dec32le_aligned((const unsigned char *)&hash + 60); - count0 = 0x200; - count1 = 0; - c256(buf); - - rk0 = 0x80; - rk1 = 0; - rk2 = 0; - rk3 = 0; - rk4 = 0; - rk5 = 0; - rk6 = 0; - rk7 = 0; - rk8 = 0; - rk9 = 0; - rkA = 0; - rkB = 0; - rkC = 0; - rkD = 0x2000000; - rkE = 0; - rkF = 0x1000000; - count0 = 0; - count1 = 0; - c256(buf); - - for (unsigned u = 0; u < 8; u ++) - sph_enc32le((unsigned char *)&hash + (u << 2), h[u]); + sph_u32 h[] = { SPH_C32(0x49BB3E47), SPH_C32(0x2674860D), SPH_C32(0xA8B392AC), SPH_C32(0x021AC4E6), SPH_C32(0x409283CF), SPH_C32(0x620E5D86), SPH_C32(0x6D929DCB), SPH_C32(0x96CC2A8B) }; + sph_u32 rk0, rk1, rk2, rk3, rk4, rk5, rk6, rk7; + sph_u32 rk8, rk9, rkA, rkB, rkC, rkD, rkE, rkF; + sph_u32 count0, count1; + + rk0 = sph_dec32le_aligned((const unsigned char *)&hash + 0); + rk1 = sph_dec32le_aligned((const unsigned char *)&hash + 4); + rk2 = sph_dec32le_aligned((const unsigned char *)&hash + 8); + rk3 = sph_dec32le_aligned((const unsigned char *)&hash + 12); + rk4 = sph_dec32le_aligned((const unsigned char *)&hash + 16); + rk5 = sph_dec32le_aligned((const unsigned char *)&hash + 20); + rk6 = sph_dec32le_aligned((const unsigned char *)&hash + 24); + rk7 = sph_dec32le_aligned((const unsigned char *)&hash + 28); + rk8 = sph_dec32le_aligned((const unsigned char *)&hash + 32); + rk9 = sph_dec32le_aligned((const unsigned char *)&hash + 36); + rkA = sph_dec32le_aligned((const unsigned char *)&hash + 40); + rkB = sph_dec32le_aligned((const unsigned char *)&hash + 44); + rkC = sph_dec32le_aligned((const unsigned char *)&hash + 48); + rkD = sph_dec32le_aligned((const unsigned char *)&hash + 52); + rkE = sph_dec32le_aligned((const unsigned char *)&hash + 56); + rkF = sph_dec32le_aligned((const unsigned char *)&hash + 60); + count0 = 0x200; + count1 = 0; + c256(buf); + + rk0 = 0x80; + rk1 = 0; + rk2 = 0; + rk3 = 0; + rk4 = 0; + rk5 = 0; + rk6 = 0; + rk7 = 0; + rk8 = 0; + rk9 = 0; + rkA = 0; + rkB = 0; + rkC = 0; + rkD = 0x2000000; + rkE = 0; + rkF = 0x1000000; + count0 = 0; + count1 = 0; + c256(buf); + + for (unsigned u = 0; u < 8; u ++) + sph_enc32le((unsigned char *)&hash + (u << 2), h[u]); } // hamsi