|
|
|
@ -1,5 +1,5 @@
@@ -1,5 +1,5 @@
|
|
|
|
|
/* |
|
|
|
|
* X13 kernel implementation. |
|
|
|
|
* X15 kernel implementation. |
|
|
|
|
* |
|
|
|
|
* ==========================(LICENSE BEGIN)============================ |
|
|
|
|
* |
|
|
|
@ -75,7 +75,9 @@ typedef long sph_s64;
@@ -75,7 +75,9 @@ typedef long sph_s64;
|
|
|
|
|
#define SPH_GROESTL_BIG_ENDIAN 0 |
|
|
|
|
#define SPH_CUBEHASH_UNROLL 0 |
|
|
|
|
#define SPH_KECCAK_UNROLL 1 |
|
|
|
|
#ifndef SPH_HAMSI_EXPAND_BIG |
|
|
|
|
#define SPH_HAMSI_EXPAND_BIG 4 |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#include "blake.cl" |
|
|
|
|
#include "bmw.cl" |
|
|
|
@ -115,8 +117,8 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes)
@@ -115,8 +117,8 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes)
|
|
|
|
|
{ |
|
|
|
|
uint gid = get_global_id(0); |
|
|
|
|
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
|
|
|
|
|
|
|
|
|
// blake |
|
|
|
|
|
|
|
|
|
sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); |
|
|
|
|
sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); |
|
|
|
|
sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); |
|
|
|
@ -125,13 +127,13 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes)
@@ -125,13 +127,13 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes)
|
|
|
|
|
sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; |
|
|
|
|
|
|
|
|
|
if ((T0 = SPH_T64(T0 + 1024)) < 1024) |
|
|
|
|
{ |
|
|
|
|
T1 = SPH_T64(T1 + 1); |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; |
|
|
|
|
sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; |
|
|
|
|
sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; |
|
|
|
|
sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; |
|
|
|
|
|
|
|
|
|
M0 = DEC64BE(block + 0); |
|
|
|
|
M1 = DEC64BE(block + 8); |
|
|
|
|
M2 = DEC64BE(block + 16); |
|
|
|
@ -170,16 +172,13 @@ __kernel void search1(__global hash_t* hashes)
@@ -170,16 +172,13 @@ __kernel void search1(__global hash_t* hashes)
|
|
|
|
|
{ |
|
|
|
|
uint gid = get_global_id(0); |
|
|
|
|
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
|
|
|
|
|
|
|
|
|
// bmw |
|
|
|
|
sph_u64 BMW_H[16]; |
|
|
|
|
|
|
|
|
|
#pragma unroll 16 |
|
|
|
|
for(unsigned u = 0; u < 16; u++) |
|
|
|
|
BMW_H[u] = BMW_IV512[u]; |
|
|
|
|
|
|
|
|
|
sph_u64 mv[16],q[32]; |
|
|
|
|
sph_u64 tmp; |
|
|
|
|
sph_u64 BMW_h1[16], BMW_h2[16]; |
|
|
|
|
sph_u64 mv[16]; |
|
|
|
|
|
|
|
|
|
mv[ 0] = SWAP8(hash->h8[0]); |
|
|
|
|
mv[ 1] = SWAP8(hash->h8[1]); |
|
|
|
@ -196,243 +195,35 @@ __kernel void search1(__global hash_t* hashes)
@@ -196,243 +195,35 @@ __kernel void search1(__global hash_t* hashes)
|
|
|
|
|
mv[12] = 0; |
|
|
|
|
mv[13] = 0; |
|
|
|
|
mv[14] = 0; |
|
|
|
|
mv[15] = SPH_C64(512); |
|
|
|
|
|
|
|
|
|
tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]); |
|
|
|
|
q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1]; |
|
|
|
|
tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]); |
|
|
|
|
q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2]; |
|
|
|
|
tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); |
|
|
|
|
q[2] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[3]; |
|
|
|
|
tmp = (mv[0] ^ BMW_H[0]) - (mv[1] ^ BMW_H[1]) + (mv[8] ^ BMW_H[8]) - (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]); |
|
|
|
|
q[3] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[4]; |
|
|
|
|
tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]); |
|
|
|
|
q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5]; |
|
|
|
|
tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); |
|
|
|
|
q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6]; |
|
|
|
|
tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]); |
|
|
|
|
q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7]; |
|
|
|
|
tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]); |
|
|
|
|
q[7] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[8]; |
|
|
|
|
tmp = (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) - (mv[6] ^ BMW_H[6]) + (mv[13] ^ BMW_H[13]) - (mv[15] ^ BMW_H[15]); |
|
|
|
|
q[8] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[9]; |
|
|
|
|
tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]); |
|
|
|
|
q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10]; |
|
|
|
|
tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]); |
|
|
|
|
q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11]; |
|
|
|
|
tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]); |
|
|
|
|
q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12]; |
|
|
|
|
tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]); |
|
|
|
|
q[12] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[13]; |
|
|
|
|
tmp = (mv[2] ^ BMW_H[2]) + (mv[4] ^ BMW_H[4]) + (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[11] ^ BMW_H[11]); |
|
|
|
|
q[13] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[14]; |
|
|
|
|
tmp = (mv[3] ^ BMW_H[3]) - (mv[5] ^ BMW_H[5]) + (mv[8] ^ BMW_H[8]) - (mv[11] ^ BMW_H[11]) - (mv[12] ^ BMW_H[12]); |
|
|
|
|
q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15]; |
|
|
|
|
tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]); |
|
|
|
|
q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0]; |
|
|
|
|
|
|
|
|
|
#pragma unroll 2 |
|
|
|
|
for(int i=0;i<2;i++) |
|
|
|
|
{ |
|
|
|
|
q[i+16] = |
|
|
|
|
(SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) + |
|
|
|
|
(SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) + |
|
|
|
|
(SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) + |
|
|
|
|
(SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) + |
|
|
|
|
(SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) + |
|
|
|
|
(SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) + |
|
|
|
|
(SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) + |
|
|
|
|
(SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) + |
|
|
|
|
(SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) + |
|
|
|
|
(SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) + |
|
|
|
|
(SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) + |
|
|
|
|
(SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) + |
|
|
|
|
(SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) + |
|
|
|
|
(SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) + |
|
|
|
|
(SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) + |
|
|
|
|
(SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) + |
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#pragma unroll 4 |
|
|
|
|
for(int i=2;i<6;i++) |
|
|
|
|
{ |
|
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#pragma unroll 3 |
|
|
|
|
for(int i=6;i<9;i++) |
|
|
|
|
{ |
|
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]); |
|
|
|
|
} |
|
|
|
|
mv[15] = 0x200; |
|
|
|
|
#define M(x) (mv[x]) |
|
|
|
|
#define H(x) (BMW_H[x]) |
|
|
|
|
#define dH(x) (BMW_h2[x]) |
|
|
|
|
|
|
|
|
|
#pragma unroll 4 |
|
|
|
|
for(int i=9;i<13;i++) |
|
|
|
|
{ |
|
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); |
|
|
|
|
} |
|
|
|
|
FOLDb; |
|
|
|
|
|
|
|
|
|
#pragma unroll 3 |
|
|
|
|
for(int i=13;i<16;i++) |
|
|
|
|
{ |
|
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
sph_u64 XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23]; |
|
|
|
|
sph_u64 XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31]; |
|
|
|
|
|
|
|
|
|
BMW_H[0] = (SHL(XH64, 5) ^ SHR(q[16],5) ^ mv[0]) + ( XL64 ^ q[24] ^ q[0]); |
|
|
|
|
BMW_H[1] = (SHR(XH64, 7) ^ SHL(q[17],8) ^ mv[1]) + ( XL64 ^ q[25] ^ q[1]); |
|
|
|
|
BMW_H[2] = (SHR(XH64, 5) ^ SHL(q[18],5) ^ mv[2]) + ( XL64 ^ q[26] ^ q[2]); |
|
|
|
|
BMW_H[3] = (SHR(XH64, 1) ^ SHL(q[19],5) ^ mv[3]) + ( XL64 ^ q[27] ^ q[3]); |
|
|
|
|
BMW_H[4] = (SHR(XH64, 3) ^ q[20] ^ mv[4]) + ( XL64 ^ q[28] ^ q[4]); |
|
|
|
|
BMW_H[5] = (SHL(XH64, 6) ^ SHR(q[21],6) ^ mv[5]) + ( XL64 ^ q[29] ^ q[5]); |
|
|
|
|
BMW_H[6] = (SHR(XH64, 4) ^ SHL(q[22],6) ^ mv[6]) + ( XL64 ^ q[30] ^ q[6]); |
|
|
|
|
BMW_H[7] = (SHR(XH64,11) ^ SHL(q[23],2) ^ mv[7]) + ( XL64 ^ q[31] ^ q[7]); |
|
|
|
|
|
|
|
|
|
BMW_H[8] = SPH_ROTL64(BMW_H[4], 9) + ( XH64 ^ q[24] ^ mv[8]) + (SHL(XL64,8) ^ q[23] ^ q[8]); |
|
|
|
|
BMW_H[9] = SPH_ROTL64(BMW_H[5],10) + ( XH64 ^ q[25] ^ mv[9]) + (SHR(XL64,6) ^ q[16] ^ q[9]); |
|
|
|
|
BMW_H[10] = SPH_ROTL64(BMW_H[6],11) + ( XH64 ^ q[26] ^ mv[10]) + (SHL(XL64,6) ^ q[17] ^ q[10]); |
|
|
|
|
BMW_H[11] = SPH_ROTL64(BMW_H[7],12) + ( XH64 ^ q[27] ^ mv[11]) + (SHL(XL64,4) ^ q[18] ^ q[11]); |
|
|
|
|
BMW_H[12] = SPH_ROTL64(BMW_H[0],13) + ( XH64 ^ q[28] ^ mv[12]) + (SHR(XL64,3) ^ q[19] ^ q[12]); |
|
|
|
|
BMW_H[13] = SPH_ROTL64(BMW_H[1],14) + ( XH64 ^ q[29] ^ mv[13]) + (SHR(XL64,4) ^ q[20] ^ q[13]); |
|
|
|
|
BMW_H[14] = SPH_ROTL64(BMW_H[2],15) + ( XH64 ^ q[30] ^ mv[14]) + (SHR(XL64,7) ^ q[21] ^ q[14]); |
|
|
|
|
BMW_H[15] = SPH_ROTL64(BMW_H[3],16) + ( XH64 ^ q[31] ^ mv[15]) + (SHR(XL64,2) ^ q[22] ^ q[15]); |
|
|
|
|
|
|
|
|
|
#pragma unroll 16 |
|
|
|
|
for(int i=0;i<16;i++) |
|
|
|
|
{ |
|
|
|
|
mv[i] = BMW_H[i]; |
|
|
|
|
BMW_H[i] = 0xaaaaaaaaaaaaaaa0ull + (sph_u64)i; |
|
|
|
|
} |
|
|
|
|
#undef M |
|
|
|
|
#undef H |
|
|
|
|
#undef dH |
|
|
|
|
|
|
|
|
|
tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]); |
|
|
|
|
q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1]; |
|
|
|
|
tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]); |
|
|
|
|
q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2]; |
|
|
|
|
tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); |
|
|
|
|
q[2] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[3]; |
|
|
|
|
tmp = (mv[0] ^ BMW_H[0]) - (mv[1] ^ BMW_H[1]) + (mv[8] ^ BMW_H[8]) - (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]); |
|
|
|
|
q[3] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[4]; |
|
|
|
|
tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]); |
|
|
|
|
q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5]; |
|
|
|
|
tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); |
|
|
|
|
q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6]; |
|
|
|
|
tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]); |
|
|
|
|
q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7]; |
|
|
|
|
tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]); |
|
|
|
|
q[7] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[8]; |
|
|
|
|
tmp = (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) - (mv[6] ^ BMW_H[6]) + (mv[13] ^ BMW_H[13]) - (mv[15] ^ BMW_H[15]); |
|
|
|
|
q[8] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[9]; |
|
|
|
|
tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]); |
|
|
|
|
q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10]; |
|
|
|
|
tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]); |
|
|
|
|
q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11]; |
|
|
|
|
tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]); |
|
|
|
|
q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12]; |
|
|
|
|
tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]); |
|
|
|
|
q[12] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[13]; |
|
|
|
|
tmp = (mv[2] ^ BMW_H[2]) + (mv[4] ^ BMW_H[4]) + (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[11] ^ BMW_H[11]); |
|
|
|
|
q[13] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[14]; |
|
|
|
|
tmp = (mv[3] ^ BMW_H[3]) - (mv[5] ^ BMW_H[5]) + (mv[8] ^ BMW_H[8]) - (mv[11] ^ BMW_H[11]) - (mv[12] ^ BMW_H[12]); |
|
|
|
|
q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15]; |
|
|
|
|
tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]); |
|
|
|
|
q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0]; |
|
|
|
|
|
|
|
|
|
#pragma unroll 2 |
|
|
|
|
for(int i=0;i<2;i++) |
|
|
|
|
{ |
|
|
|
|
q[i+16] = |
|
|
|
|
(SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) + |
|
|
|
|
(SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) + |
|
|
|
|
(SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) + |
|
|
|
|
(SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) + |
|
|
|
|
(SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) + |
|
|
|
|
(SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) + |
|
|
|
|
(SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) + |
|
|
|
|
(SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) + |
|
|
|
|
(SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) + |
|
|
|
|
(SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) + |
|
|
|
|
(SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) + |
|
|
|
|
(SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) + |
|
|
|
|
(SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) + |
|
|
|
|
(SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) + |
|
|
|
|
(SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) + |
|
|
|
|
(SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) + |
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#pragma unroll 4 |
|
|
|
|
for(int i=2;i<6;i++) |
|
|
|
|
{ |
|
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#pragma unroll 3 |
|
|
|
|
for(int i=6;i<9;i++) |
|
|
|
|
{ |
|
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]); |
|
|
|
|
} |
|
|
|
|
#define M(x) (BMW_h2[x]) |
|
|
|
|
#define H(x) (final_b[x]) |
|
|
|
|
#define dH(x) (BMW_h1[x]) |
|
|
|
|
|
|
|
|
|
#pragma unroll 4 |
|
|
|
|
for(int i=9;i<13;i++) |
|
|
|
|
{ |
|
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); |
|
|
|
|
} |
|
|
|
|
FOLDb; |
|
|
|
|
|
|
|
|
|
#pragma unroll 3 |
|
|
|
|
for(int i=13;i<16;i++) |
|
|
|
|
{ |
|
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); |
|
|
|
|
} |
|
|
|
|
#undef M |
|
|
|
|
#undef H |
|
|
|
|
#undef dH |
|
|
|
|
|
|
|
|
|
XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23]; |
|
|
|
|
XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31]; |
|
|
|
|
|
|
|
|
|
BMW_H[0] = (SHL(XH64, 5) ^ SHR(q[16],5) ^ mv[0]) + ( XL64 ^ q[24] ^ q[0]); |
|
|
|
|
BMW_H[1] = (SHR(XH64, 7) ^ SHL(q[17],8) ^ mv[1]) + ( XL64 ^ q[25] ^ q[1]); |
|
|
|
|
BMW_H[2] = (SHR(XH64, 5) ^ SHL(q[18],5) ^ mv[2]) + ( XL64 ^ q[26] ^ q[2]); |
|
|
|
|
BMW_H[3] = (SHR(XH64, 1) ^ SHL(q[19],5) ^ mv[3]) + ( XL64 ^ q[27] ^ q[3]); |
|
|
|
|
BMW_H[4] = (SHR(XH64, 3) ^ q[20] ^ mv[4]) + ( XL64 ^ q[28] ^ q[4]); |
|
|
|
|
BMW_H[5] = (SHL(XH64, 6) ^ SHR(q[21],6) ^ mv[5]) + ( XL64 ^ q[29] ^ q[5]); |
|
|
|
|
BMW_H[6] = (SHR(XH64, 4) ^ SHL(q[22],6) ^ mv[6]) + ( XL64 ^ q[30] ^ q[6]); |
|
|
|
|
BMW_H[7] = (SHR(XH64,11) ^ SHL(q[23],2) ^ mv[7]) + ( XL64 ^ q[31] ^ q[7]); |
|
|
|
|
|
|
|
|
|
BMW_H[8] = SPH_ROTL64(BMW_H[4], 9) + ( XH64 ^ q[24] ^ mv[8]) + (SHL(XL64,8) ^ q[23] ^ q[8]); |
|
|
|
|
BMW_H[9] = SPH_ROTL64(BMW_H[5],10) + ( XH64 ^ q[25] ^ mv[9]) + (SHR(XL64,6) ^ q[16] ^ q[9]); |
|
|
|
|
BMW_H[10] = SPH_ROTL64(BMW_H[6],11) + ( XH64 ^ q[26] ^ mv[10]) + (SHL(XL64,6) ^ q[17] ^ q[10]); |
|
|
|
|
BMW_H[11] = SPH_ROTL64(BMW_H[7],12) + ( XH64 ^ q[27] ^ mv[11]) + (SHL(XL64,4) ^ q[18] ^ q[11]); |
|
|
|
|
BMW_H[12] = SPH_ROTL64(BMW_H[0],13) + ( XH64 ^ q[28] ^ mv[12]) + (SHR(XL64,3) ^ q[19] ^ q[12]); |
|
|
|
|
BMW_H[13] = SPH_ROTL64(BMW_H[1],14) + ( XH64 ^ q[29] ^ mv[13]) + (SHR(XL64,4) ^ q[20] ^ q[13]); |
|
|
|
|
BMW_H[14] = SPH_ROTL64(BMW_H[2],15) + ( XH64 ^ q[30] ^ mv[14]) + (SHR(XL64,7) ^ q[21] ^ q[14]); |
|
|
|
|
BMW_H[15] = SPH_ROTL64(BMW_H[3],16) + ( XH64 ^ q[31] ^ mv[15]) + (SHR(XL64,2) ^ q[22] ^ q[15]); |
|
|
|
|
|
|
|
|
|
hash->h8[0] = SWAP8(BMW_H[8]); |
|
|
|
|
hash->h8[1] = SWAP8(BMW_H[9]); |
|
|
|
|
hash->h8[2] = SWAP8(BMW_H[10]); |
|
|
|
|
hash->h8[3] = SWAP8(BMW_H[11]); |
|
|
|
|
hash->h8[4] = SWAP8(BMW_H[12]); |
|
|
|
|
hash->h8[5] = SWAP8(BMW_H[13]); |
|
|
|
|
hash->h8[6] = SWAP8(BMW_H[14]); |
|
|
|
|
hash->h8[7] = SWAP8(BMW_H[15]); |
|
|
|
|
hash->h8[0] = SWAP8(BMW_h1[8]); |
|
|
|
|
hash->h8[1] = SWAP8(BMW_h1[9]); |
|
|
|
|
hash->h8[2] = SWAP8(BMW_h1[10]); |
|
|
|
|
hash->h8[3] = SWAP8(BMW_h1[11]); |
|
|
|
|
hash->h8[4] = SWAP8(BMW_h1[12]); |
|
|
|
|
hash->h8[5] = SWAP8(BMW_h1[13]); |
|
|
|
|
hash->h8[6] = SWAP8(BMW_h1[14]); |
|
|
|
|
hash->h8[7] = SWAP8(BMW_h1[15]); |
|
|
|
|
|
|
|
|
|
barrier(CLK_GLOBAL_MEM_FENCE); |
|
|
|
|
} |
|
|
|
@ -451,15 +242,14 @@ __kernel void search2(__global hash_t* hashes)
@@ -451,15 +242,14 @@ __kernel void search2(__global hash_t* hashes)
|
|
|
|
|
for (int i = init; i < 256; i += step) |
|
|
|
|
{ |
|
|
|
|
T0_L[i] = T0[i]; |
|
|
|
|
T4_L[i] = T4[i]; |
|
|
|
|
T1_L[i] = T1[i]; |
|
|
|
|
T2_L[i] = T2[i]; |
|
|
|
|
T3_L[i] = T3[i]; |
|
|
|
|
T4_L[i] = T4[i]; |
|
|
|
|
T5_L[i] = T5[i]; |
|
|
|
|
T6_L[i] = T6[i]; |
|
|
|
|
T7_L[i] = T7[i]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
#define T0 T0_L |
|
|
|
@ -472,38 +262,47 @@ __kernel void search2(__global hash_t* hashes)
@@ -472,38 +262,47 @@ __kernel void search2(__global hash_t* hashes)
|
|
|
|
|
#define T7 T7_L |
|
|
|
|
|
|
|
|
|
// groestl |
|
|
|
|
sph_u64 H[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0002000000000000}; |
|
|
|
|
|
|
|
|
|
sph_u64 g[16], m[16]; |
|
|
|
|
g[0] = m[0] = DEC64E(hash->h8[0]); |
|
|
|
|
g[1] = m[1] = DEC64E(hash->h8[1]); |
|
|
|
|
g[2] = m[2] = DEC64E(hash->h8[2]); |
|
|
|
|
g[3] = m[3] = DEC64E(hash->h8[3]); |
|
|
|
|
g[4] = m[4] = DEC64E(hash->h8[4]); |
|
|
|
|
g[5] = m[5] = DEC64E(hash->h8[5]); |
|
|
|
|
g[6] = m[6] = DEC64E(hash->h8[6]); |
|
|
|
|
g[7] = m[7] = DEC64E(hash->h8[7]); |
|
|
|
|
g[8] = m[8] = 0x80; |
|
|
|
|
g[9] = m[9] = 0; |
|
|
|
|
g[10] = m[10] = 0; |
|
|
|
|
g[11] = m[11] = 0; |
|
|
|
|
g[12] = m[12] = 0; |
|
|
|
|
g[13] = m[13] = 0; |
|
|
|
|
g[14] = m[14] = 0; |
|
|
|
|
g[15] = 0x102000000000000; |
|
|
|
|
m[15] = 0x100000000000000; |
|
|
|
|
sph_u64 H[16]; |
|
|
|
|
for (unsigned int u = 0; u < 15; u ++) |
|
|
|
|
H[u] = 0; |
|
|
|
|
#if USE_LE |
|
|
|
|
H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); |
|
|
|
|
#else |
|
|
|
|
H[15] = (sph_u64)512; |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
sph_u64 g[16], m[16]; |
|
|
|
|
m[0] = DEC64E(hash->h8[0]); |
|
|
|
|
m[1] = DEC64E(hash->h8[1]); |
|
|
|
|
m[2] = DEC64E(hash->h8[2]); |
|
|
|
|
m[3] = DEC64E(hash->h8[3]); |
|
|
|
|
m[4] = DEC64E(hash->h8[4]); |
|
|
|
|
m[5] = DEC64E(hash->h8[5]); |
|
|
|
|
m[6] = DEC64E(hash->h8[6]); |
|
|
|
|
m[7] = DEC64E(hash->h8[7]); |
|
|
|
|
for (unsigned int u = 0; u < 16; u ++) |
|
|
|
|
g[u] = m[u] ^ H[u]; |
|
|
|
|
m[8] = 0x80; g[8] = m[8] ^ H[8]; |
|
|
|
|
m[9] = 0; g[9] = m[9] ^ H[9]; |
|
|
|
|
m[10] = 0; g[10] = m[10] ^ H[10]; |
|
|
|
|
m[11] = 0; g[11] = m[11] ^ H[11]; |
|
|
|
|
m[12] = 0; g[12] = m[12] ^ H[12]; |
|
|
|
|
m[13] = 0; g[13] = m[13] ^ H[13]; |
|
|
|
|
m[14] = 0; g[14] = m[14] ^ H[14]; |
|
|
|
|
m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; |
|
|
|
|
PERM_BIG_P(g); |
|
|
|
|
PERM_BIG_Q(m); |
|
|
|
|
|
|
|
|
|
for (unsigned int u = 0; u < 16; u ++) |
|
|
|
|
H[u] ^= g[u] ^ m[u]; |
|
|
|
|
sph_u64 xH[16]; |
|
|
|
|
for (unsigned int u = 0; u < 16; u ++) |
|
|
|
|
xH[u] = H[u] ^= g[u] ^ m[u]; |
|
|
|
|
|
|
|
|
|
xH[u] = H[u]; |
|
|
|
|
PERM_BIG_P(xH); |
|
|
|
|
|
|
|
|
|
for (unsigned int u = 8; u < 16; u ++) |
|
|
|
|
hash->h8[u-8] = DEC64E(H[u] ^ xH[u]); |
|
|
|
|
for (unsigned int u = 0; u < 16; u ++) |
|
|
|
|
H[u] ^= xH[u]; |
|
|
|
|
for (unsigned int u = 0; u < 8; u ++) |
|
|
|
|
hash->h8[u] = DEC64E(H[u + 8]); |
|
|
|
|
|
|
|
|
|
barrier(CLK_GLOBAL_MEM_FENCE); |
|
|
|
|
} |
|
|
|
@ -528,14 +327,10 @@ __kernel void search3(__global hash_t* hashes)
@@ -528,14 +327,10 @@ __kernel void search3(__global hash_t* hashes)
|
|
|
|
|
m5 = SWAP8(hash->h8[5]); |
|
|
|
|
m6 = SWAP8(hash->h8[6]); |
|
|
|
|
m7 = SWAP8(hash->h8[7]); |
|
|
|
|
|
|
|
|
|
UBI_BIG(480, 64); |
|
|
|
|
|
|
|
|
|
bcount = 0; |
|
|
|
|
m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; |
|
|
|
|
|
|
|
|
|
UBI_BIG(510, 8); |
|
|
|
|
|
|
|
|
|
hash->h8[0] = SWAP8(h0); |
|
|
|
|
hash->h8[1] = SWAP8(h1); |
|
|
|
|
hash->h8[2] = SWAP8(h2); |
|
|
|
@ -562,8 +357,7 @@ __kernel void search4(__global hash_t* hashes)
@@ -562,8 +357,7 @@ __kernel void search4(__global hash_t* hashes)
|
|
|
|
|
|
|
|
|
|
for(int i = 0; i < 2; i++) |
|
|
|
|
{ |
|
|
|
|
if (i == 0) |
|
|
|
|
{ |
|
|
|
|
if (i == 0) { |
|
|
|
|
h0h ^= DEC64E(hash->h8[0]); |
|
|
|
|
h0l ^= DEC64E(hash->h8[1]); |
|
|
|
|
h1h ^= DEC64E(hash->h8[2]); |
|
|
|
@ -572,9 +366,7 @@ __kernel void search4(__global hash_t* hashes)
@@ -572,9 +366,7 @@ __kernel void search4(__global hash_t* hashes)
|
|
|
|
|
h2l ^= DEC64E(hash->h8[5]); |
|
|
|
|
h3h ^= DEC64E(hash->h8[6]); |
|
|
|
|
h3l ^= DEC64E(hash->h8[7]); |
|
|
|
|
} |
|
|
|
|
else if(i == 1) |
|
|
|
|
{ |
|
|
|
|
} else if(i == 1) { |
|
|
|
|
h4h ^= DEC64E(hash->h8[0]); |
|
|
|
|
h4l ^= DEC64E(hash->h8[1]); |
|
|
|
|
h5h ^= DEC64E(hash->h8[2]); |
|
|
|
@ -635,7 +427,6 @@ __kernel void search5(__global hash_t* hashes)
@@ -635,7 +427,6 @@ __kernel void search5(__global hash_t* hashes)
|
|
|
|
|
a21 ^= SWAP8(hash->h8[7]); |
|
|
|
|
a31 ^= 0x8000000000000001; |
|
|
|
|
KECCAK_F_1600; |
|
|
|
|
|
|
|
|
|
// Finalize the "lane complement" |
|
|
|
|
a10 = ~a10; |
|
|
|
|
a20 = ~a20; |
|
|
|
@ -682,8 +473,7 @@ __kernel void search6(__global hash_t* hashes)
@@ -682,8 +473,7 @@ __kernel void search6(__global hash_t* hashes)
|
|
|
|
|
MI5; |
|
|
|
|
LUFFA_P5; |
|
|
|
|
|
|
|
|
|
if(i == 0) |
|
|
|
|
{ |
|
|
|
|
if(i == 0) { |
|
|
|
|
M0 = hash->h4[9]; |
|
|
|
|
M1 = hash->h4[8]; |
|
|
|
|
M2 = hash->h4[11]; |
|
|
|
@ -692,16 +482,12 @@ __kernel void search6(__global hash_t* hashes)
@@ -692,16 +482,12 @@ __kernel void search6(__global hash_t* hashes)
|
|
|
|
|
M5 = hash->h4[12]; |
|
|
|
|
M6 = hash->h4[15]; |
|
|
|
|
M7 = hash->h4[14]; |
|
|
|
|
} |
|
|
|
|
else if(i == 1) |
|
|
|
|
{ |
|
|
|
|
} else if(i == 1) { |
|
|
|
|
M0 = 0x80000000; |
|
|
|
|
M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; |
|
|
|
|
} |
|
|
|
|
else if(i == 2) |
|
|
|
|
} else if(i == 2) { |
|
|
|
|
M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; |
|
|
|
|
else if(i == 3) |
|
|
|
|
{ |
|
|
|
|
} else if(i == 3) { |
|
|
|
|
hash->h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; |
|
|
|
|
hash->h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; |
|
|
|
|
hash->h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; |
|
|
|
@ -751,12 +537,10 @@ __kernel void search7(__global hash_t* hashes)
@@ -751,12 +537,10 @@ __kernel void search7(__global hash_t* hashes)
|
|
|
|
|
x6 ^= SWAP4(hash->h4[7]); |
|
|
|
|
x7 ^= SWAP4(hash->h4[6]); |
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 13; i ++) |
|
|
|
|
{ |
|
|
|
|
for (int i = 0; i < 13; i ++) { |
|
|
|
|
SIXTEEN_ROUNDS; |
|
|
|
|
|
|
|
|
|
if (i == 0) |
|
|
|
|
{ |
|
|
|
|
if (i == 0) { |
|
|
|
|
x0 ^= SWAP4(hash->h4[9]); |
|
|
|
|
x1 ^= SWAP4(hash->h4[8]); |
|
|
|
|
x2 ^= SWAP4(hash->h4[11]); |
|
|
|
@ -765,12 +549,12 @@ __kernel void search7(__global hash_t* hashes)
@@ -765,12 +549,12 @@ __kernel void search7(__global hash_t* hashes)
|
|
|
|
|
x5 ^= SWAP4(hash->h4[12]); |
|
|
|
|
x6 ^= SWAP4(hash->h4[15]); |
|
|
|
|
x7 ^= SWAP4(hash->h4[14]); |
|
|
|
|
} |
|
|
|
|
else if(i == 1) |
|
|
|
|
} else if(i == 1) { |
|
|
|
|
x0 ^= 0x80; |
|
|
|
|
else if (i == 2) |
|
|
|
|
} else if (i == 2) { |
|
|
|
|
xv ^= SPH_C32(1); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
hash->h4[0] = x0; |
|
|
|
|
hash->h4[1] = x1; |
|
|
|
@ -797,7 +581,6 @@ __kernel void search8(__global hash_t* hashes)
@@ -797,7 +581,6 @@ __kernel void search8(__global hash_t* hashes)
|
|
|
|
|
{ |
|
|
|
|
uint gid = get_global_id(0); |
|
|
|
|
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
|
|
|
|
|
|
|
|
|
__local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; |
|
|
|
|
|
|
|
|
|
int init = get_local_id(0); |
|
|
|
@ -826,7 +609,7 @@ __kernel void search8(__global hash_t* hashes)
@@ -826,7 +609,7 @@ __kernel void search8(__global hash_t* hashes)
|
|
|
|
|
sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; |
|
|
|
|
sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; |
|
|
|
|
|
|
|
|
|
sph_u32 sc_count0 = 0x200, sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; |
|
|
|
|
sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; |
|
|
|
|
|
|
|
|
|
rk00 = hash->h4[0]; |
|
|
|
|
rk01 = hash->h4[1]; |
|
|
|
@ -892,8 +675,7 @@ __kernel void search9(__global hash_t* hashes)
@@ -892,8 +675,7 @@ __kernel void search9(__global hash_t* hashes)
|
|
|
|
|
u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); |
|
|
|
|
|
|
|
|
|
FFT256(0, 1, 0, ll1); |
|
|
|
|
for (int i = 0; i < 256; i ++) |
|
|
|
|
{ |
|
|
|
|
for (int i = 0; i < 256; i ++) { |
|
|
|
|
s32 tq; |
|
|
|
|
|
|
|
|
|
tq = q[i] + yoff_b_n[i]; |
|
|
|
@ -929,17 +711,14 @@ __kernel void search9(__global hash_t* hashes)
@@ -929,17 +711,14 @@ __kernel void search9(__global hash_t* hashes)
|
|
|
|
|
C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), |
|
|
|
|
C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), |
|
|
|
|
IF, 4, 13, PP8_4_); |
|
|
|
|
|
|
|
|
|
STEP_BIG( |
|
|
|
|
C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), |
|
|
|
|
C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), |
|
|
|
|
IF, 13, 10, PP8_5_); |
|
|
|
|
|
|
|
|
|
STEP_BIG( |
|
|
|
|
C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), |
|
|
|
|
C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), |
|
|
|
|
IF, 10, 25, PP8_6_); |
|
|
|
|
|
|
|
|
|
STEP_BIG( |
|
|
|
|
C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), |
|
|
|
|
C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), |
|
|
|
@ -958,27 +737,22 @@ __kernel void search9(__global hash_t* hashes)
@@ -958,27 +737,22 @@ __kernel void search9(__global hash_t* hashes)
|
|
|
|
|
ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); |
|
|
|
|
ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); |
|
|
|
|
ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); |
|
|
|
|
|
|
|
|
|
STEP_BIG( |
|
|
|
|
COPY_A0, COPY_A1, COPY_A2, COPY_A3, |
|
|
|
|
COPY_A4, COPY_A5, COPY_A6, COPY_A7, |
|
|
|
|
IF, 4, 13, PP8_4_); |
|
|
|
|
|
|
|
|
|
STEP_BIG( |
|
|
|
|
COPY_B0, COPY_B1, COPY_B2, COPY_B3, |
|
|
|
|
COPY_B4, COPY_B5, COPY_B6, COPY_B7, |
|
|
|
|
IF, 13, 10, PP8_5_); |
|
|
|
|
|
|
|
|
|
STEP_BIG( |
|
|
|
|
COPY_C0, COPY_C1, COPY_C2, COPY_C3, |
|
|
|
|
COPY_C4, COPY_C5, COPY_C6, COPY_C7, |
|
|
|
|
IF, 10, 25, PP8_6_); |
|
|
|
|
|
|
|
|
|
STEP_BIG( |
|
|
|
|
COPY_D0, COPY_D1, COPY_D2, COPY_D3, |
|
|
|
|
COPY_D4, COPY_D5, COPY_D6, COPY_D7, |
|
|
|
|
IF, 25, 4, PP8_0_); |
|
|
|
|
|
|
|
|
|
#undef q |
|
|
|
|
|
|
|
|
|
hash->h4[0] = A0; |
|
|
|
@ -1006,7 +780,7 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1006,7 +780,7 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
|
|
|
|
|
{ |
|
|
|
|
uint gid = get_global_id(0); |
|
|
|
|
uint offset = get_global_offset(0); |
|
|
|
|
__global hash_t *hash = &(hashes[gid-offset]); |
|
|
|
|
hash_t hash; |
|
|
|
|
|
|
|
|
|
__local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; |
|
|
|
|
|
|
|
|
@ -1023,9 +797,20 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1023,9 +797,20 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
|
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
#ifdef INPUT_BIG_LOCAL |
|
|
|
|
__local sph_u32 T512_L[1024]; |
|
|
|
|
__constant const sph_u32 *T512_C = &T512[0][0]; |
|
|
|
|
|
|
|
|
|
for (int i = init; i < 1024; i += step) |
|
|
|
|
T512_L[i] = T512_C[i]; |
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
#else |
|
|
|
|
#define INPUT_BIG_LOCAL INPUT_BIG |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
// mixtab |
|
|
|
|
__local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256]; |
|
|
|
|
|
|
|
|
|
for (int i = init; i < 256; i += step) |
|
|
|
|
{ |
|
|
|
|
mixtab0[i] = mixtab0_c[i]; |
|
|
|
@ -1033,37 +818,17 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1033,37 +818,17 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
|
|
|
|
|
mixtab2[i] = mixtab2_c[i]; |
|
|
|
|
mixtab3[i] = mixtab3_c[i]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
__local sph_u32 T512_L[1024]; |
|
|
|
|
__constant const sph_u32 *T512_C = &T512[0][0]; |
|
|
|
|
|
|
|
|
|
for (int i = init; i < 1024; i += step) |
|
|
|
|
T512_L[i] = T512_C[i]; |
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
__local sph_u64 LT0[256], LT1[256], LT2[256], LT3[256], LT4[256], LT5[256], LT6[256], LT7[256]; |
|
|
|
|
|
|
|
|
|
for (int i = init; i < 256; i += step) |
|
|
|
|
{ |
|
|
|
|
LT0[i] = plain_T0[i]; |
|
|
|
|
LT1[i] = plain_T1[i]; |
|
|
|
|
LT2[i] = plain_T2[i]; |
|
|
|
|
LT3[i] = plain_T3[i]; |
|
|
|
|
LT4[i] = plain_T4[i]; |
|
|
|
|
LT5[i] = plain_T5[i]; |
|
|
|
|
LT6[i] = plain_T6[i]; |
|
|
|
|
LT7[i] = plain_T7[i]; |
|
|
|
|
for (int i = 0; i < 8; i++) { |
|
|
|
|
hash.h8[i] = hashes[gid-offset].h8[i]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
// echo |
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 8; i++) |
|
|
|
|
hash->h8[i] = hashes[gid-offset].h8[i]; |
|
|
|
|
{ |
|
|
|
|
|
|
|
|
|
// echo |
|
|
|
|
sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; |
|
|
|
|
sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; |
|
|
|
|
Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; |
|
|
|
@ -1090,14 +855,14 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1090,14 +855,14 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
|
|
|
|
|
W61 = Vb61; |
|
|
|
|
W70 = Vb70; |
|
|
|
|
W71 = Vb71; |
|
|
|
|
W80 = hash->h8[0]; |
|
|
|
|
W81 = hash->h8[1]; |
|
|
|
|
W90 = hash->h8[2]; |
|
|
|
|
W91 = hash->h8[3]; |
|
|
|
|
WA0 = hash->h8[4]; |
|
|
|
|
WA1 = hash->h8[5]; |
|
|
|
|
WB0 = hash->h8[6]; |
|
|
|
|
WB1 = hash->h8[7]; |
|
|
|
|
W80 = hash.h8[0]; |
|
|
|
|
W81 = hash.h8[1]; |
|
|
|
|
W90 = hash.h8[2]; |
|
|
|
|
W91 = hash.h8[3]; |
|
|
|
|
WA0 = hash.h8[4]; |
|
|
|
|
WA1 = hash.h8[5]; |
|
|
|
|
WB0 = hash.h8[6]; |
|
|
|
|
WB1 = hash.h8[7]; |
|
|
|
|
WC0 = 0x80; |
|
|
|
|
WC1 = 0; |
|
|
|
|
WD0 = 0; |
|
|
|
@ -1107,19 +872,25 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1107,19 +872,25 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
|
|
|
|
|
WF0 = 0x200; |
|
|
|
|
WF1 = 0; |
|
|
|
|
|
|
|
|
|
for (unsigned u = 0; u < 10; u ++) |
|
|
|
|
for (unsigned u = 0; u < 10; u ++) { |
|
|
|
|
BIG_ROUND; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
hash->h8[0] ^= Vb00 ^ W00 ^ W80; |
|
|
|
|
hash->h8[1] ^= Vb01 ^ W01 ^ W81; |
|
|
|
|
hash->h8[2] ^= Vb10 ^ W10 ^ W90; |
|
|
|
|
hash->h8[3] ^= Vb11 ^ W11 ^ W91; |
|
|
|
|
hash->h8[4] ^= Vb20 ^ W20 ^ WA0; |
|
|
|
|
hash->h8[5] ^= Vb21 ^ W21 ^ WA1; |
|
|
|
|
hash->h8[6] ^= Vb30 ^ W30 ^ WB0; |
|
|
|
|
hash->h8[7] ^= Vb31 ^ W31 ^ WB1; |
|
|
|
|
hash.h8[0] ^= Vb00 ^ W00 ^ W80; |
|
|
|
|
hash.h8[1] ^= Vb01 ^ W01 ^ W81; |
|
|
|
|
hash.h8[2] ^= Vb10 ^ W10 ^ W90; |
|
|
|
|
hash.h8[3] ^= Vb11 ^ W11 ^ W91; |
|
|
|
|
hash.h8[4] ^= Vb20 ^ W20 ^ WA0; |
|
|
|
|
hash.h8[5] ^= Vb21 ^ W21 ^ WA1; |
|
|
|
|
hash.h8[6] ^= Vb30 ^ W30 ^ WB0; |
|
|
|
|
hash.h8[7] ^= Vb31 ^ W31 ^ WB1; |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// hamsi |
|
|
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
|
|
sph_u32 c0 = HAMSI_IV512[0], c1 = HAMSI_IV512[1], c2 = HAMSI_IV512[2], c3 = HAMSI_IV512[3]; |
|
|
|
|
sph_u32 c4 = HAMSI_IV512[4], c5 = HAMSI_IV512[5], c6 = HAMSI_IV512[6], c7 = HAMSI_IV512[7]; |
|
|
|
|
sph_u32 c8 = HAMSI_IV512[8], c9 = HAMSI_IV512[9], cA = HAMSI_IV512[10], cB = HAMSI_IV512[11]; |
|
|
|
@ -1128,39 +899,38 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1128,39 +899,38 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
|
|
|
|
|
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF; |
|
|
|
|
sph_u32 h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF }; |
|
|
|
|
|
|
|
|
|
#define buf(u) hash->h1[i + u] |
|
|
|
|
|
|
|
|
|
for(int i = 0; i < 64; i += 8) |
|
|
|
|
{ |
|
|
|
|
#define buf(u) hash.h1[i + u] |
|
|
|
|
for(int i = 0; i < 64; i += 8) { |
|
|
|
|
INPUT_BIG_LOCAL; |
|
|
|
|
P_BIG; |
|
|
|
|
T_BIG; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#undef buf |
|
|
|
|
#define buf(u) (u == 0 ? 0x80 : 0) |
|
|
|
|
|
|
|
|
|
INPUT_BIG_LOCAL; |
|
|
|
|
P_BIG; |
|
|
|
|
T_BIG; |
|
|
|
|
|
|
|
|
|
#undef buf |
|
|
|
|
#define buf(u) (u == 6 ? 2 : 0) |
|
|
|
|
|
|
|
|
|
INPUT_BIG_LOCAL; |
|
|
|
|
PF_BIG; |
|
|
|
|
T_BIG; |
|
|
|
|
|
|
|
|
|
for (unsigned u = 0; u < 16; u ++) |
|
|
|
|
hash->h4[u] = h[u]; |
|
|
|
|
hash.h4[u] = h[u]; |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// fugue |
|
|
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
|
|
sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09; |
|
|
|
|
sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19; |
|
|
|
|
sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29; |
|
|
|
|
sph_u32 S30, S31, S32, S33, S34, S35; |
|
|
|
|
|
|
|
|
|
ulong fc_bit_count = (sph_u64) 0x200; |
|
|
|
|
ulong fc_bit_count = (sph_u64) 64 << 3; |
|
|
|
|
|
|
|
|
|
S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0; |
|
|
|
|
S20 = SPH_C32(0x8807a57e); S21 = SPH_C32(0xe616af75); S22 = SPH_C32(0xc5d3e4db); S23 = SPH_C32(0xac9ab027); |
|
|
|
@ -1168,25 +938,22 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1168,25 +938,22 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
|
|
|
|
|
S28 = SPH_C32(0xaac6e2c9); S29 = SPH_C32(0xddb21398); S30 = SPH_C32(0xcae65838); S31 = SPH_C32(0x437f203f); |
|
|
|
|
S32 = SPH_C32(0x25ea78e7); S33 = SPH_C32(0x951fddd6); S34 = SPH_C32(0xda6ed11d); S35 = SPH_C32(0xe13e3567); |
|
|
|
|
|
|
|
|
|
FUGUE512_3((hash->h4[0x0]), (hash->h4[0x1]), (hash->h4[0x2])); |
|
|
|
|
FUGUE512_3((hash->h4[0x3]), (hash->h4[0x4]), (hash->h4[0x5])); |
|
|
|
|
FUGUE512_3((hash->h4[0x6]), (hash->h4[0x7]), (hash->h4[0x8])); |
|
|
|
|
FUGUE512_3((hash->h4[0x9]), (hash->h4[0xA]), (hash->h4[0xB])); |
|
|
|
|
FUGUE512_3((hash->h4[0xC]), (hash->h4[0xD]), (hash->h4[0xE])); |
|
|
|
|
FUGUE512_3((hash->h4[0xF]), as_uint2(fc_bit_count).y, as_uint2(fc_bit_count).x); |
|
|
|
|
FUGUE512_3((hash.h4[0x0]), (hash.h4[0x1]), (hash.h4[0x2])); |
|
|
|
|
FUGUE512_3((hash.h4[0x3]), (hash.h4[0x4]), (hash.h4[0x5])); |
|
|
|
|
FUGUE512_3((hash.h4[0x6]), (hash.h4[0x7]), (hash.h4[0x8])); |
|
|
|
|
FUGUE512_3((hash.h4[0x9]), (hash.h4[0xA]), (hash.h4[0xB])); |
|
|
|
|
FUGUE512_3((hash.h4[0xC]), (hash.h4[0xD]), (hash.h4[0xE])); |
|
|
|
|
FUGUE512_3((hash.h4[0xF]), as_uint2(fc_bit_count).y, as_uint2(fc_bit_count).x); |
|
|
|
|
|
|
|
|
|
// apply round shift if necessary |
|
|
|
|
int i; |
|
|
|
|
|
|
|
|
|
for (i = 0; i < 32; i ++) |
|
|
|
|
{ |
|
|
|
|
for (i = 0; i < 32; i ++) { |
|
|
|
|
ROR3; |
|
|
|
|
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); |
|
|
|
|
SMIX(S00, S01, S02, S03); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for (i = 0; i < 13; i ++) |
|
|
|
|
{ |
|
|
|
|
for (i = 0; i < 13; i ++) { |
|
|
|
|
S04 ^= S00; |
|
|
|
|
S09 ^= S00; |
|
|
|
|
S18 ^= S00; |
|
|
|
@ -1217,24 +984,27 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1217,24 +984,27 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
|
|
|
|
|
S18 ^= S00; |
|
|
|
|
S27 ^= S00; |
|
|
|
|
|
|
|
|
|
hash->h4[0] = SWAP4(S01); |
|
|
|
|
hash->h4[1] = SWAP4(S02); |
|
|
|
|
hash->h4[2] = SWAP4(S03); |
|
|
|
|
hash->h4[3] = SWAP4(S04); |
|
|
|
|
hash->h4[4] = SWAP4(S09); |
|
|
|
|
hash->h4[5] = SWAP4(S10); |
|
|
|
|
hash->h4[6] = SWAP4(S11); |
|
|
|
|
hash->h4[7] = SWAP4(S12); |
|
|
|
|
hash->h4[8] = SWAP4(S18); |
|
|
|
|
hash->h4[9] = SWAP4(S19); |
|
|
|
|
hash->h4[10] = SWAP4(S20); |
|
|
|
|
hash->h4[11] = SWAP4(S21); |
|
|
|
|
hash->h4[12] = SWAP4(S27); |
|
|
|
|
hash->h4[13] = SWAP4(S28); |
|
|
|
|
hash->h4[14] = SWAP4(S29); |
|
|
|
|
hash->h4[15] = SWAP4(S30); |
|
|
|
|
hash.h4[0] = SWAP4(S01); |
|
|
|
|
hash.h4[1] = SWAP4(S02); |
|
|
|
|
hash.h4[2] = SWAP4(S03); |
|
|
|
|
hash.h4[3] = SWAP4(S04); |
|
|
|
|
hash.h4[4] = SWAP4(S09); |
|
|
|
|
hash.h4[5] = SWAP4(S10); |
|
|
|
|
hash.h4[6] = SWAP4(S11); |
|
|
|
|
hash.h4[7] = SWAP4(S12); |
|
|
|
|
hash.h4[8] = SWAP4(S18); |
|
|
|
|
hash.h4[9] = SWAP4(S19); |
|
|
|
|
hash.h4[10] = SWAP4(S20); |
|
|
|
|
hash.h4[11] = SWAP4(S21); |
|
|
|
|
hash.h4[12] = SWAP4(S27); |
|
|
|
|
hash.h4[13] = SWAP4(S28); |
|
|
|
|
hash.h4[14] = SWAP4(S29); |
|
|
|
|
hash.h4[15] = SWAP4(S30); |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
//shabal |
|
|
|
|
{ |
|
|
|
|
sph_u32 A00 = A_init_512[0], A01 = A_init_512[1], A02 = A_init_512[2], A03 = A_init_512[3], A04 = A_init_512[4], A05 = A_init_512[5], A06 = A_init_512[6], A07 = A_init_512[7], |
|
|
|
|
A08 = A_init_512[8], A09 = A_init_512[9], A0A = A_init_512[10], A0B = A_init_512[11]; |
|
|
|
|
sph_u32 B0 = B_init_512[0], B1 = B_init_512[1], B2 = B_init_512[2], B3 = B_init_512[3], B4 = B_init_512[4], B5 = B_init_512[5], B6 = B_init_512[6], B7 = B_init_512[7], |
|
|
|
@ -1244,22 +1014,22 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1244,22 +1014,22 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
|
|
|
|
|
sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF; |
|
|
|
|
sph_u32 Wlow = 1, Whigh = 0; |
|
|
|
|
|
|
|
|
|
M0 = hash->h4[0]; |
|
|
|
|
M1 = hash->h4[1]; |
|
|
|
|
M2 = hash->h4[2]; |
|
|
|
|
M3 = hash->h4[3]; |
|
|
|
|
M4 = hash->h4[4]; |
|
|
|
|
M5 = hash->h4[5]; |
|
|
|
|
M6 = hash->h4[6]; |
|
|
|
|
M7 = hash->h4[7]; |
|
|
|
|
M8 = hash->h4[8]; |
|
|
|
|
M9 = hash->h4[9]; |
|
|
|
|
MA = hash->h4[10]; |
|
|
|
|
MB = hash->h4[11]; |
|
|
|
|
MC = hash->h4[12]; |
|
|
|
|
MD = hash->h4[13]; |
|
|
|
|
ME = hash->h4[14]; |
|
|
|
|
MF = hash->h4[15]; |
|
|
|
|
M0 = hash.h4[0]; |
|
|
|
|
M1 = hash.h4[1]; |
|
|
|
|
M2 = hash.h4[2]; |
|
|
|
|
M3 = hash.h4[3]; |
|
|
|
|
M4 = hash.h4[4]; |
|
|
|
|
M5 = hash.h4[5]; |
|
|
|
|
M6 = hash.h4[6]; |
|
|
|
|
M7 = hash.h4[7]; |
|
|
|
|
M8 = hash.h4[8]; |
|
|
|
|
M9 = hash.h4[9]; |
|
|
|
|
MA = hash.h4[10]; |
|
|
|
|
MB = hash.h4[11]; |
|
|
|
|
MC = hash.h4[12]; |
|
|
|
|
MD = hash.h4[13]; |
|
|
|
|
ME = hash.h4[14]; |
|
|
|
|
MF = hash.h4[15]; |
|
|
|
|
|
|
|
|
|
INPUT_BLOCK_ADD; |
|
|
|
|
XOR_W; |
|
|
|
@ -1274,44 +1044,44 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1274,44 +1044,44 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
|
|
|
|
|
INPUT_BLOCK_ADD; |
|
|
|
|
XOR_W; |
|
|
|
|
APPLY_P; |
|
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < 3; i ++) |
|
|
|
|
{ |
|
|
|
|
for (unsigned i = 0; i < 3; i ++) { |
|
|
|
|
SWAP_BC; |
|
|
|
|
XOR_W; |
|
|
|
|
APPLY_P; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
hash->h4[0] = B0; |
|
|
|
|
hash->h4[1] = B1; |
|
|
|
|
hash->h4[2] = B2; |
|
|
|
|
hash->h4[3] = B3; |
|
|
|
|
hash->h4[4] = B4; |
|
|
|
|
hash->h4[5] = B5; |
|
|
|
|
hash->h4[6] = B6; |
|
|
|
|
hash->h4[7] = B7; |
|
|
|
|
hash->h4[8] = B8; |
|
|
|
|
hash->h4[9] = B9; |
|
|
|
|
hash->h4[10] = BA; |
|
|
|
|
hash->h4[11] = BB; |
|
|
|
|
hash->h4[12] = BC; |
|
|
|
|
hash->h4[13] = BD; |
|
|
|
|
hash->h4[14] = BE; |
|
|
|
|
hash->h4[15] = BF; |
|
|
|
|
hash.h4[0] = B0; |
|
|
|
|
hash.h4[1] = B1; |
|
|
|
|
hash.h4[2] = B2; |
|
|
|
|
hash.h4[3] = B3; |
|
|
|
|
hash.h4[4] = B4; |
|
|
|
|
hash.h4[5] = B5; |
|
|
|
|
hash.h4[6] = B6; |
|
|
|
|
hash.h4[7] = B7; |
|
|
|
|
hash.h4[8] = B8; |
|
|
|
|
hash.h4[9] = B9; |
|
|
|
|
hash.h4[10] = BA; |
|
|
|
|
hash.h4[11] = BB; |
|
|
|
|
hash.h4[12] = BC; |
|
|
|
|
hash.h4[13] = BD; |
|
|
|
|
hash.h4[14] = BE; |
|
|
|
|
hash.h4[15] = BF; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
//whirlpool |
|
|
|
|
{ |
|
|
|
|
sph_u64 n0, n1, n2, n3, n4, n5, n6, n7; |
|
|
|
|
sph_u64 h0, h1, h2, h3, h4, h5, h6, h7; |
|
|
|
|
sph_u64 state[8]; |
|
|
|
|
|
|
|
|
|
n0 = (hash->h8[0]); |
|
|
|
|
n1 = (hash->h8[1]); |
|
|
|
|
n2 = (hash->h8[2]); |
|
|
|
|
n3 = (hash->h8[3]); |
|
|
|
|
n4 = (hash->h8[4]); |
|
|
|
|
n5 = (hash->h8[5]); |
|
|
|
|
n6 = (hash->h8[6]); |
|
|
|
|
n7 = (hash->h8[7]); |
|
|
|
|
n0 = (hash.h8[0]); |
|
|
|
|
n1 = (hash.h8[1]); |
|
|
|
|
n2 = (hash.h8[2]); |
|
|
|
|
n3 = (hash.h8[3]); |
|
|
|
|
n4 = (hash.h8[4]); |
|
|
|
|
n5 = (hash.h8[5]); |
|
|
|
|
n6 = (hash.h8[6]); |
|
|
|
|
n7 = (hash.h8[7]); |
|
|
|
|
|
|
|
|
|
h0 = h1 = h2 = h3 = h4 = h5 = h6 = h7 = 0; |
|
|
|
|
|
|
|
|
@ -1324,9 +1094,7 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1324,9 +1094,7 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
|
|
|
|
|
n6 ^= h6; |
|
|
|
|
n7 ^= h7; |
|
|
|
|
|
|
|
|
|
#pragma unroll 10 |
|
|
|
|
for (unsigned r = 0; r < 10; r ++) |
|
|
|
|
{ |
|
|
|
|
for (unsigned r = 0; r < 10; r ++) { |
|
|
|
|
sph_u64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
|
|
|
|
|
|
|
|
|
ROUND_KSCHED(plain_T, h, tmp, plain_RC[r]); |
|
|
|
@ -1335,14 +1103,14 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1335,14 +1103,14 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
|
|
|
|
|
TRANSFER(n, tmp); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
state[0] = n0 ^ (hash->h8[0]); |
|
|
|
|
state[1] = n1 ^ (hash->h8[1]); |
|
|
|
|
state[2] = n2 ^ (hash->h8[2]); |
|
|
|
|
state[3] = n3 ^ (hash->h8[3]); |
|
|
|
|
state[4] = n4 ^ (hash->h8[4]); |
|
|
|
|
state[5] = n5 ^ (hash->h8[5]); |
|
|
|
|
state[6] = n6 ^ (hash->h8[6]); |
|
|
|
|
state[7] = n7 ^ (hash->h8[7]); |
|
|
|
|
state[0] = n0 ^ (hash.h8[0]); |
|
|
|
|
state[1] = n1 ^ (hash.h8[1]); |
|
|
|
|
state[2] = n2 ^ (hash.h8[2]); |
|
|
|
|
state[3] = n3 ^ (hash.h8[3]); |
|
|
|
|
state[4] = n4 ^ (hash.h8[4]); |
|
|
|
|
state[5] = n5 ^ (hash.h8[5]); |
|
|
|
|
state[6] = n6 ^ (hash.h8[6]); |
|
|
|
|
state[7] = n7 ^ (hash.h8[7]); |
|
|
|
|
|
|
|
|
|
n0 = 0x80; |
|
|
|
|
n1 = n2 = n3 = n4 = n5 = n6 = 0; |
|
|
|
@ -1366,12 +1134,10 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1366,12 +1134,10 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
|
|
|
|
|
n6 ^= h6; |
|
|
|
|
n7 ^= h7; |
|
|
|
|
|
|
|
|
|
#pragma unroll 10 |
|
|
|
|
for (unsigned r = 0; r < 10; r ++) |
|
|
|
|
{ |
|
|
|
|
for (unsigned r = 0; r < 10; r ++) { |
|
|
|
|
sph_u64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
|
|
|
|
|
|
|
|
|
ROUND_KSCHED(LT, h, tmp, plain_RC[r]); |
|
|
|
|
ROUND_KSCHED(plain_T, h, tmp, plain_RC[r]); |
|
|
|
|
TRANSFER(h, tmp); |
|
|
|
|
ROUND_WENC(plain_T, n, h, tmp); |
|
|
|
|
TRANSFER(n, tmp); |
|
|
|
@ -1387,9 +1153,10 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1387,9 +1153,10 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
|
|
|
|
|
state[7] ^= n7 ^ 0x2000000000000; |
|
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < 8; i ++) |
|
|
|
|
hash->h8[i] = state[i]; |
|
|
|
|
hash.h8[i] = state[i]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
bool result = (hash->h8[3] <= target); |
|
|
|
|
bool result = (hash.h8[3] <= target); |
|
|
|
|
if (result) |
|
|
|
|
output[atomic_inc(output+0xFF)] = SWAP4(gid); |
|
|
|
|
|
|
|
|
|