|
|
@ -1,5 +1,5 @@ |
|
|
|
/* |
|
|
|
/* |
|
|
|
* X13 kernel implementation. |
|
|
|
* X15 kernel implementation. |
|
|
|
* |
|
|
|
* |
|
|
|
* ==========================(LICENSE BEGIN)============================ |
|
|
|
* ==========================(LICENSE BEGIN)============================ |
|
|
|
* |
|
|
|
* |
|
|
@ -75,7 +75,9 @@ typedef long sph_s64; |
|
|
|
#define SPH_GROESTL_BIG_ENDIAN 0 |
|
|
|
#define SPH_GROESTL_BIG_ENDIAN 0 |
|
|
|
#define SPH_CUBEHASH_UNROLL 0 |
|
|
|
#define SPH_CUBEHASH_UNROLL 0 |
|
|
|
#define SPH_KECCAK_UNROLL 1 |
|
|
|
#define SPH_KECCAK_UNROLL 1 |
|
|
|
|
|
|
|
#ifndef SPH_HAMSI_EXPAND_BIG |
|
|
|
#define SPH_HAMSI_EXPAND_BIG 4 |
|
|
|
#define SPH_HAMSI_EXPAND_BIG 4 |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
#include "blake.cl" |
|
|
|
#include "blake.cl" |
|
|
|
#include "bmw.cl" |
|
|
|
#include "bmw.cl" |
|
|
@ -115,8 +117,8 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes) |
|
|
|
{ |
|
|
|
{ |
|
|
|
uint gid = get_global_id(0); |
|
|
|
uint gid = get_global_id(0); |
|
|
|
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
|
|
|
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
|
|
|
|
|
|
|
|
|
|
|
// blake |
|
|
|
// blake |
|
|
|
|
|
|
|
|
|
|
|
sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); |
|
|
|
sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); |
|
|
|
sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); |
|
|
|
sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); |
|
|
|
sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); |
|
|
|
sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); |
|
|
@ -125,13 +127,13 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes) |
|
|
|
sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; |
|
|
|
sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; |
|
|
|
|
|
|
|
|
|
|
|
if ((T0 = SPH_T64(T0 + 1024)) < 1024) |
|
|
|
if ((T0 = SPH_T64(T0 + 1024)) < 1024) |
|
|
|
|
|
|
|
{ |
|
|
|
T1 = SPH_T64(T1 + 1); |
|
|
|
T1 = SPH_T64(T1 + 1); |
|
|
|
|
|
|
|
} |
|
|
|
sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; |
|
|
|
sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; |
|
|
|
sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; |
|
|
|
sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; |
|
|
|
sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; |
|
|
|
sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; |
|
|
|
sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; |
|
|
|
sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; |
|
|
|
|
|
|
|
|
|
|
|
M0 = DEC64BE(block + 0); |
|
|
|
M0 = DEC64BE(block + 0); |
|
|
|
M1 = DEC64BE(block + 8); |
|
|
|
M1 = DEC64BE(block + 8); |
|
|
|
M2 = DEC64BE(block + 16); |
|
|
|
M2 = DEC64BE(block + 16); |
|
|
@ -170,16 +172,13 @@ __kernel void search1(__global hash_t* hashes) |
|
|
|
{ |
|
|
|
{ |
|
|
|
uint gid = get_global_id(0); |
|
|
|
uint gid = get_global_id(0); |
|
|
|
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
|
|
|
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
|
|
|
|
|
|
|
|
|
|
|
// bmw |
|
|
|
// bmw |
|
|
|
sph_u64 BMW_H[16]; |
|
|
|
sph_u64 BMW_H[16]; |
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 16 |
|
|
|
|
|
|
|
for(unsigned u = 0; u < 16; u++) |
|
|
|
for(unsigned u = 0; u < 16; u++) |
|
|
|
BMW_H[u] = BMW_IV512[u]; |
|
|
|
BMW_H[u] = BMW_IV512[u]; |
|
|
|
|
|
|
|
|
|
|
|
sph_u64 mv[16],q[32]; |
|
|
|
sph_u64 BMW_h1[16], BMW_h2[16]; |
|
|
|
sph_u64 tmp; |
|
|
|
sph_u64 mv[16]; |
|
|
|
|
|
|
|
|
|
|
|
mv[ 0] = SWAP8(hash->h8[0]); |
|
|
|
mv[ 0] = SWAP8(hash->h8[0]); |
|
|
|
mv[ 1] = SWAP8(hash->h8[1]); |
|
|
|
mv[ 1] = SWAP8(hash->h8[1]); |
|
|
@ -196,243 +195,35 @@ __kernel void search1(__global hash_t* hashes) |
|
|
|
mv[12] = 0; |
|
|
|
mv[12] = 0; |
|
|
|
mv[13] = 0; |
|
|
|
mv[13] = 0; |
|
|
|
mv[14] = 0; |
|
|
|
mv[14] = 0; |
|
|
|
mv[15] = SPH_C64(512); |
|
|
|
mv[15] = 0x200; |
|
|
|
|
|
|
|
#define M(x) (mv[x]) |
|
|
|
tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]); |
|
|
|
#define H(x) (BMW_H[x]) |
|
|
|
q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1]; |
|
|
|
#define dH(x) (BMW_h2[x]) |
|
|
|
tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]); |
|
|
|
|
|
|
|
q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2]; |
|
|
|
|
|
|
|
tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); |
|
|
|
|
|
|
|
q[2] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[3]; |
|
|
|
|
|
|
|
tmp = (mv[0] ^ BMW_H[0]) - (mv[1] ^ BMW_H[1]) + (mv[8] ^ BMW_H[8]) - (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]); |
|
|
|
|
|
|
|
q[3] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[4]; |
|
|
|
|
|
|
|
tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]); |
|
|
|
|
|
|
|
q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5]; |
|
|
|
|
|
|
|
tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); |
|
|
|
|
|
|
|
q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6]; |
|
|
|
|
|
|
|
tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]); |
|
|
|
|
|
|
|
q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7]; |
|
|
|
|
|
|
|
tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]); |
|
|
|
|
|
|
|
q[7] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[8]; |
|
|
|
|
|
|
|
tmp = (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) - (mv[6] ^ BMW_H[6]) + (mv[13] ^ BMW_H[13]) - (mv[15] ^ BMW_H[15]); |
|
|
|
|
|
|
|
q[8] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[9]; |
|
|
|
|
|
|
|
tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]); |
|
|
|
|
|
|
|
q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10]; |
|
|
|
|
|
|
|
tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]); |
|
|
|
|
|
|
|
q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11]; |
|
|
|
|
|
|
|
tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]); |
|
|
|
|
|
|
|
q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12]; |
|
|
|
|
|
|
|
tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]); |
|
|
|
|
|
|
|
q[12] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[13]; |
|
|
|
|
|
|
|
tmp = (mv[2] ^ BMW_H[2]) + (mv[4] ^ BMW_H[4]) + (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[11] ^ BMW_H[11]); |
|
|
|
|
|
|
|
q[13] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[14]; |
|
|
|
|
|
|
|
tmp = (mv[3] ^ BMW_H[3]) - (mv[5] ^ BMW_H[5]) + (mv[8] ^ BMW_H[8]) - (mv[11] ^ BMW_H[11]) - (mv[12] ^ BMW_H[12]); |
|
|
|
|
|
|
|
q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15]; |
|
|
|
|
|
|
|
tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]); |
|
|
|
|
|
|
|
q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 2 |
|
|
|
|
|
|
|
for(int i=0;i<2;i++) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
q[i+16] = |
|
|
|
|
|
|
|
(SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) + |
|
|
|
|
|
|
|
(SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) + |
|
|
|
|
|
|
|
(SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) + |
|
|
|
|
|
|
|
(SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) + |
|
|
|
|
|
|
|
(SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) + |
|
|
|
|
|
|
|
(SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) + |
|
|
|
|
|
|
|
(SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) + |
|
|
|
|
|
|
|
(SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) + |
|
|
|
|
|
|
|
(SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) + |
|
|
|
|
|
|
|
(SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) + |
|
|
|
|
|
|
|
(SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) + |
|
|
|
|
|
|
|
(SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) + |
|
|
|
|
|
|
|
(SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) + |
|
|
|
|
|
|
|
(SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) + |
|
|
|
|
|
|
|
(SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) + |
|
|
|
|
|
|
|
(SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) + |
|
|
|
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 4 |
|
|
|
|
|
|
|
for(int i=2;i<6;i++) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 3 |
|
|
|
|
|
|
|
for(int i=6;i<9;i++) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 4 |
|
|
|
FOLDb; |
|
|
|
for(int i=9;i<13;i++) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 3 |
|
|
|
#undef M |
|
|
|
for(int i=13;i<16;i++) |
|
|
|
#undef H |
|
|
|
{ |
|
|
|
#undef dH |
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
|
|
|
SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sph_u64 XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23]; |
|
|
|
|
|
|
|
sph_u64 XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BMW_H[0] = (SHL(XH64, 5) ^ SHR(q[16],5) ^ mv[0]) + ( XL64 ^ q[24] ^ q[0]); |
|
|
|
|
|
|
|
BMW_H[1] = (SHR(XH64, 7) ^ SHL(q[17],8) ^ mv[1]) + ( XL64 ^ q[25] ^ q[1]); |
|
|
|
|
|
|
|
BMW_H[2] = (SHR(XH64, 5) ^ SHL(q[18],5) ^ mv[2]) + ( XL64 ^ q[26] ^ q[2]); |
|
|
|
|
|
|
|
BMW_H[3] = (SHR(XH64, 1) ^ SHL(q[19],5) ^ mv[3]) + ( XL64 ^ q[27] ^ q[3]); |
|
|
|
|
|
|
|
BMW_H[4] = (SHR(XH64, 3) ^ q[20] ^ mv[4]) + ( XL64 ^ q[28] ^ q[4]); |
|
|
|
|
|
|
|
BMW_H[5] = (SHL(XH64, 6) ^ SHR(q[21],6) ^ mv[5]) + ( XL64 ^ q[29] ^ q[5]); |
|
|
|
|
|
|
|
BMW_H[6] = (SHR(XH64, 4) ^ SHL(q[22],6) ^ mv[6]) + ( XL64 ^ q[30] ^ q[6]); |
|
|
|
|
|
|
|
BMW_H[7] = (SHR(XH64,11) ^ SHL(q[23],2) ^ mv[7]) + ( XL64 ^ q[31] ^ q[7]); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BMW_H[8] = SPH_ROTL64(BMW_H[4], 9) + ( XH64 ^ q[24] ^ mv[8]) + (SHL(XL64,8) ^ q[23] ^ q[8]); |
|
|
|
|
|
|
|
BMW_H[9] = SPH_ROTL64(BMW_H[5],10) + ( XH64 ^ q[25] ^ mv[9]) + (SHR(XL64,6) ^ q[16] ^ q[9]); |
|
|
|
|
|
|
|
BMW_H[10] = SPH_ROTL64(BMW_H[6],11) + ( XH64 ^ q[26] ^ mv[10]) + (SHL(XL64,6) ^ q[17] ^ q[10]); |
|
|
|
|
|
|
|
BMW_H[11] = SPH_ROTL64(BMW_H[7],12) + ( XH64 ^ q[27] ^ mv[11]) + (SHL(XL64,4) ^ q[18] ^ q[11]); |
|
|
|
|
|
|
|
BMW_H[12] = SPH_ROTL64(BMW_H[0],13) + ( XH64 ^ q[28] ^ mv[12]) + (SHR(XL64,3) ^ q[19] ^ q[12]); |
|
|
|
|
|
|
|
BMW_H[13] = SPH_ROTL64(BMW_H[1],14) + ( XH64 ^ q[29] ^ mv[13]) + (SHR(XL64,4) ^ q[20] ^ q[13]); |
|
|
|
|
|
|
|
BMW_H[14] = SPH_ROTL64(BMW_H[2],15) + ( XH64 ^ q[30] ^ mv[14]) + (SHR(XL64,7) ^ q[21] ^ q[14]); |
|
|
|
|
|
|
|
BMW_H[15] = SPH_ROTL64(BMW_H[3],16) + ( XH64 ^ q[31] ^ mv[15]) + (SHR(XL64,2) ^ q[22] ^ q[15]); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 16 |
|
|
|
|
|
|
|
for(int i=0;i<16;i++) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
mv[i] = BMW_H[i]; |
|
|
|
|
|
|
|
BMW_H[i] = 0xaaaaaaaaaaaaaaa0ull + (sph_u64)i; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]); |
|
|
|
#define M(x) (BMW_h2[x]) |
|
|
|
q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1]; |
|
|
|
#define H(x) (final_b[x]) |
|
|
|
tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]); |
|
|
|
#define dH(x) (BMW_h1[x]) |
|
|
|
q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2]; |
|
|
|
|
|
|
|
tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); |
|
|
|
|
|
|
|
q[2] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[3]; |
|
|
|
|
|
|
|
tmp = (mv[0] ^ BMW_H[0]) - (mv[1] ^ BMW_H[1]) + (mv[8] ^ BMW_H[8]) - (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]); |
|
|
|
|
|
|
|
q[3] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[4]; |
|
|
|
|
|
|
|
tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]); |
|
|
|
|
|
|
|
q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5]; |
|
|
|
|
|
|
|
tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]); |
|
|
|
|
|
|
|
q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6]; |
|
|
|
|
|
|
|
tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]); |
|
|
|
|
|
|
|
q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7]; |
|
|
|
|
|
|
|
tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]); |
|
|
|
|
|
|
|
q[7] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[8]; |
|
|
|
|
|
|
|
tmp = (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) - (mv[6] ^ BMW_H[6]) + (mv[13] ^ BMW_H[13]) - (mv[15] ^ BMW_H[15]); |
|
|
|
|
|
|
|
q[8] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[9]; |
|
|
|
|
|
|
|
tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]); |
|
|
|
|
|
|
|
q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10]; |
|
|
|
|
|
|
|
tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]); |
|
|
|
|
|
|
|
q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11]; |
|
|
|
|
|
|
|
tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]); |
|
|
|
|
|
|
|
q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12]; |
|
|
|
|
|
|
|
tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]); |
|
|
|
|
|
|
|
q[12] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[13]; |
|
|
|
|
|
|
|
tmp = (mv[2] ^ BMW_H[2]) + (mv[4] ^ BMW_H[4]) + (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[11] ^ BMW_H[11]); |
|
|
|
|
|
|
|
q[13] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[14]; |
|
|
|
|
|
|
|
tmp = (mv[3] ^ BMW_H[3]) - (mv[5] ^ BMW_H[5]) + (mv[8] ^ BMW_H[8]) - (mv[11] ^ BMW_H[11]) - (mv[12] ^ BMW_H[12]); |
|
|
|
|
|
|
|
q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15]; |
|
|
|
|
|
|
|
tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]); |
|
|
|
|
|
|
|
q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 2 |
|
|
|
|
|
|
|
for(int i=0;i<2;i++) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
q[i+16] = |
|
|
|
|
|
|
|
(SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) + |
|
|
|
|
|
|
|
(SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) + |
|
|
|
|
|
|
|
(SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) + |
|
|
|
|
|
|
|
(SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) + |
|
|
|
|
|
|
|
(SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) + |
|
|
|
|
|
|
|
(SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) + |
|
|
|
|
|
|
|
(SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) + |
|
|
|
|
|
|
|
(SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) + |
|
|
|
|
|
|
|
(SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) + |
|
|
|
|
|
|
|
(SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) + |
|
|
|
|
|
|
|
(SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) + |
|
|
|
|
|
|
|
(SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) + |
|
|
|
|
|
|
|
(SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) + |
|
|
|
|
|
|
|
(SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) + |
|
|
|
|
|
|
|
(SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) + |
|
|
|
|
|
|
|
(SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) + |
|
|
|
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 4 |
|
|
|
|
|
|
|
for(int i=2;i<6;i++) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 3 |
|
|
|
|
|
|
|
for(int i=6;i<9;i++) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 4 |
|
|
|
FOLDb; |
|
|
|
for(int i=9;i<13;i++) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
|
|
|
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 3 |
|
|
|
#undef M |
|
|
|
for(int i=13;i<16;i++) |
|
|
|
#undef H |
|
|
|
{ |
|
|
|
#undef dH |
|
|
|
q[i+16] = CONST_EXP2 + |
|
|
|
|
|
|
|
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) + |
|
|
|
|
|
|
|
SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23]; |
|
|
|
hash->h8[0] = SWAP8(BMW_h1[8]); |
|
|
|
XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31]; |
|
|
|
hash->h8[1] = SWAP8(BMW_h1[9]); |
|
|
|
|
|
|
|
hash->h8[2] = SWAP8(BMW_h1[10]); |
|
|
|
BMW_H[0] = (SHL(XH64, 5) ^ SHR(q[16],5) ^ mv[0]) + ( XL64 ^ q[24] ^ q[0]); |
|
|
|
hash->h8[3] = SWAP8(BMW_h1[11]); |
|
|
|
BMW_H[1] = (SHR(XH64, 7) ^ SHL(q[17],8) ^ mv[1]) + ( XL64 ^ q[25] ^ q[1]); |
|
|
|
hash->h8[4] = SWAP8(BMW_h1[12]); |
|
|
|
BMW_H[2] = (SHR(XH64, 5) ^ SHL(q[18],5) ^ mv[2]) + ( XL64 ^ q[26] ^ q[2]); |
|
|
|
hash->h8[5] = SWAP8(BMW_h1[13]); |
|
|
|
BMW_H[3] = (SHR(XH64, 1) ^ SHL(q[19],5) ^ mv[3]) + ( XL64 ^ q[27] ^ q[3]); |
|
|
|
hash->h8[6] = SWAP8(BMW_h1[14]); |
|
|
|
BMW_H[4] = (SHR(XH64, 3) ^ q[20] ^ mv[4]) + ( XL64 ^ q[28] ^ q[4]); |
|
|
|
hash->h8[7] = SWAP8(BMW_h1[15]); |
|
|
|
BMW_H[5] = (SHL(XH64, 6) ^ SHR(q[21],6) ^ mv[5]) + ( XL64 ^ q[29] ^ q[5]); |
|
|
|
|
|
|
|
BMW_H[6] = (SHR(XH64, 4) ^ SHL(q[22],6) ^ mv[6]) + ( XL64 ^ q[30] ^ q[6]); |
|
|
|
|
|
|
|
BMW_H[7] = (SHR(XH64,11) ^ SHL(q[23],2) ^ mv[7]) + ( XL64 ^ q[31] ^ q[7]); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BMW_H[8] = SPH_ROTL64(BMW_H[4], 9) + ( XH64 ^ q[24] ^ mv[8]) + (SHL(XL64,8) ^ q[23] ^ q[8]); |
|
|
|
|
|
|
|
BMW_H[9] = SPH_ROTL64(BMW_H[5],10) + ( XH64 ^ q[25] ^ mv[9]) + (SHR(XL64,6) ^ q[16] ^ q[9]); |
|
|
|
|
|
|
|
BMW_H[10] = SPH_ROTL64(BMW_H[6],11) + ( XH64 ^ q[26] ^ mv[10]) + (SHL(XL64,6) ^ q[17] ^ q[10]); |
|
|
|
|
|
|
|
BMW_H[11] = SPH_ROTL64(BMW_H[7],12) + ( XH64 ^ q[27] ^ mv[11]) + (SHL(XL64,4) ^ q[18] ^ q[11]); |
|
|
|
|
|
|
|
BMW_H[12] = SPH_ROTL64(BMW_H[0],13) + ( XH64 ^ q[28] ^ mv[12]) + (SHR(XL64,3) ^ q[19] ^ q[12]); |
|
|
|
|
|
|
|
BMW_H[13] = SPH_ROTL64(BMW_H[1],14) + ( XH64 ^ q[29] ^ mv[13]) + (SHR(XL64,4) ^ q[20] ^ q[13]); |
|
|
|
|
|
|
|
BMW_H[14] = SPH_ROTL64(BMW_H[2],15) + ( XH64 ^ q[30] ^ mv[14]) + (SHR(XL64,7) ^ q[21] ^ q[14]); |
|
|
|
|
|
|
|
BMW_H[15] = SPH_ROTL64(BMW_H[3],16) + ( XH64 ^ q[31] ^ mv[15]) + (SHR(XL64,2) ^ q[22] ^ q[15]); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hash->h8[0] = SWAP8(BMW_H[8]); |
|
|
|
|
|
|
|
hash->h8[1] = SWAP8(BMW_H[9]); |
|
|
|
|
|
|
|
hash->h8[2] = SWAP8(BMW_H[10]); |
|
|
|
|
|
|
|
hash->h8[3] = SWAP8(BMW_H[11]); |
|
|
|
|
|
|
|
hash->h8[4] = SWAP8(BMW_H[12]); |
|
|
|
|
|
|
|
hash->h8[5] = SWAP8(BMW_H[13]); |
|
|
|
|
|
|
|
hash->h8[6] = SWAP8(BMW_H[14]); |
|
|
|
|
|
|
|
hash->h8[7] = SWAP8(BMW_H[15]); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
barrier(CLK_GLOBAL_MEM_FENCE); |
|
|
|
barrier(CLK_GLOBAL_MEM_FENCE); |
|
|
|
} |
|
|
|
} |
|
|
@ -451,15 +242,14 @@ __kernel void search2(__global hash_t* hashes) |
|
|
|
for (int i = init; i < 256; i += step) |
|
|
|
for (int i = init; i < 256; i += step) |
|
|
|
{ |
|
|
|
{ |
|
|
|
T0_L[i] = T0[i]; |
|
|
|
T0_L[i] = T0[i]; |
|
|
|
T4_L[i] = T4[i]; |
|
|
|
|
|
|
|
T1_L[i] = T1[i]; |
|
|
|
T1_L[i] = T1[i]; |
|
|
|
T2_L[i] = T2[i]; |
|
|
|
T2_L[i] = T2[i]; |
|
|
|
T3_L[i] = T3[i]; |
|
|
|
T3_L[i] = T3[i]; |
|
|
|
|
|
|
|
T4_L[i] = T4[i]; |
|
|
|
T5_L[i] = T5[i]; |
|
|
|
T5_L[i] = T5[i]; |
|
|
|
T6_L[i] = T6[i]; |
|
|
|
T6_L[i] = T6[i]; |
|
|
|
T7_L[i] = T7[i]; |
|
|
|
T7_L[i] = T7[i]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
|
|
#define T0 T0_L |
|
|
|
#define T0 T0_L |
|
|
@ -472,38 +262,47 @@ __kernel void search2(__global hash_t* hashes) |
|
|
|
#define T7 T7_L |
|
|
|
#define T7 T7_L |
|
|
|
|
|
|
|
|
|
|
|
// groestl |
|
|
|
// groestl |
|
|
|
sph_u64 H[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0002000000000000}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sph_u64 g[16], m[16]; |
|
|
|
sph_u64 H[16]; |
|
|
|
g[0] = m[0] = DEC64E(hash->h8[0]); |
|
|
|
for (unsigned int u = 0; u < 15; u ++) |
|
|
|
g[1] = m[1] = DEC64E(hash->h8[1]); |
|
|
|
H[u] = 0; |
|
|
|
g[2] = m[2] = DEC64E(hash->h8[2]); |
|
|
|
#if USE_LE |
|
|
|
g[3] = m[3] = DEC64E(hash->h8[3]); |
|
|
|
H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); |
|
|
|
g[4] = m[4] = DEC64E(hash->h8[4]); |
|
|
|
#else |
|
|
|
g[5] = m[5] = DEC64E(hash->h8[5]); |
|
|
|
H[15] = (sph_u64)512; |
|
|
|
g[6] = m[6] = DEC64E(hash->h8[6]); |
|
|
|
#endif |
|
|
|
g[7] = m[7] = DEC64E(hash->h8[7]); |
|
|
|
|
|
|
|
g[8] = m[8] = 0x80; |
|
|
|
|
|
|
|
g[9] = m[9] = 0; |
|
|
|
|
|
|
|
g[10] = m[10] = 0; |
|
|
|
|
|
|
|
g[11] = m[11] = 0; |
|
|
|
|
|
|
|
g[12] = m[12] = 0; |
|
|
|
|
|
|
|
g[13] = m[13] = 0; |
|
|
|
|
|
|
|
g[14] = m[14] = 0; |
|
|
|
|
|
|
|
g[15] = 0x102000000000000; |
|
|
|
|
|
|
|
m[15] = 0x100000000000000; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sph_u64 g[16], m[16]; |
|
|
|
|
|
|
|
m[0] = DEC64E(hash->h8[0]); |
|
|
|
|
|
|
|
m[1] = DEC64E(hash->h8[1]); |
|
|
|
|
|
|
|
m[2] = DEC64E(hash->h8[2]); |
|
|
|
|
|
|
|
m[3] = DEC64E(hash->h8[3]); |
|
|
|
|
|
|
|
m[4] = DEC64E(hash->h8[4]); |
|
|
|
|
|
|
|
m[5] = DEC64E(hash->h8[5]); |
|
|
|
|
|
|
|
m[6] = DEC64E(hash->h8[6]); |
|
|
|
|
|
|
|
m[7] = DEC64E(hash->h8[7]); |
|
|
|
|
|
|
|
for (unsigned int u = 0; u < 16; u ++) |
|
|
|
|
|
|
|
g[u] = m[u] ^ H[u]; |
|
|
|
|
|
|
|
m[8] = 0x80; g[8] = m[8] ^ H[8]; |
|
|
|
|
|
|
|
m[9] = 0; g[9] = m[9] ^ H[9]; |
|
|
|
|
|
|
|
m[10] = 0; g[10] = m[10] ^ H[10]; |
|
|
|
|
|
|
|
m[11] = 0; g[11] = m[11] ^ H[11]; |
|
|
|
|
|
|
|
m[12] = 0; g[12] = m[12] ^ H[12]; |
|
|
|
|
|
|
|
m[13] = 0; g[13] = m[13] ^ H[13]; |
|
|
|
|
|
|
|
m[14] = 0; g[14] = m[14] ^ H[14]; |
|
|
|
|
|
|
|
m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; |
|
|
|
PERM_BIG_P(g); |
|
|
|
PERM_BIG_P(g); |
|
|
|
PERM_BIG_Q(m); |
|
|
|
PERM_BIG_Q(m); |
|
|
|
|
|
|
|
for (unsigned int u = 0; u < 16; u ++) |
|
|
|
|
|
|
|
H[u] ^= g[u] ^ m[u]; |
|
|
|
sph_u64 xH[16]; |
|
|
|
sph_u64 xH[16]; |
|
|
|
for (unsigned int u = 0; u < 16; u ++) |
|
|
|
for (unsigned int u = 0; u < 16; u ++) |
|
|
|
xH[u] = H[u] ^= g[u] ^ m[u]; |
|
|
|
xH[u] = H[u]; |
|
|
|
|
|
|
|
|
|
|
|
PERM_BIG_P(xH); |
|
|
|
PERM_BIG_P(xH); |
|
|
|
|
|
|
|
for (unsigned int u = 0; u < 16; u ++) |
|
|
|
for (unsigned int u = 8; u < 16; u ++) |
|
|
|
H[u] ^= xH[u]; |
|
|
|
hash->h8[u-8] = DEC64E(H[u] ^ xH[u]); |
|
|
|
for (unsigned int u = 0; u < 8; u ++) |
|
|
|
|
|
|
|
hash->h8[u] = DEC64E(H[u + 8]); |
|
|
|
|
|
|
|
|
|
|
|
barrier(CLK_GLOBAL_MEM_FENCE); |
|
|
|
barrier(CLK_GLOBAL_MEM_FENCE); |
|
|
|
} |
|
|
|
} |
|
|
@ -528,14 +327,10 @@ __kernel void search3(__global hash_t* hashes) |
|
|
|
m5 = SWAP8(hash->h8[5]); |
|
|
|
m5 = SWAP8(hash->h8[5]); |
|
|
|
m6 = SWAP8(hash->h8[6]); |
|
|
|
m6 = SWAP8(hash->h8[6]); |
|
|
|
m7 = SWAP8(hash->h8[7]); |
|
|
|
m7 = SWAP8(hash->h8[7]); |
|
|
|
|
|
|
|
|
|
|
|
UBI_BIG(480, 64); |
|
|
|
UBI_BIG(480, 64); |
|
|
|
|
|
|
|
|
|
|
|
bcount = 0; |
|
|
|
bcount = 0; |
|
|
|
m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; |
|
|
|
m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; |
|
|
|
|
|
|
|
|
|
|
|
UBI_BIG(510, 8); |
|
|
|
UBI_BIG(510, 8); |
|
|
|
|
|
|
|
|
|
|
|
hash->h8[0] = SWAP8(h0); |
|
|
|
hash->h8[0] = SWAP8(h0); |
|
|
|
hash->h8[1] = SWAP8(h1); |
|
|
|
hash->h8[1] = SWAP8(h1); |
|
|
|
hash->h8[2] = SWAP8(h2); |
|
|
|
hash->h8[2] = SWAP8(h2); |
|
|
@ -562,8 +357,7 @@ __kernel void search4(__global hash_t* hashes) |
|
|
|
|
|
|
|
|
|
|
|
for(int i = 0; i < 2; i++) |
|
|
|
for(int i = 0; i < 2; i++) |
|
|
|
{ |
|
|
|
{ |
|
|
|
if (i == 0) |
|
|
|
if (i == 0) { |
|
|
|
{ |
|
|
|
|
|
|
|
h0h ^= DEC64E(hash->h8[0]); |
|
|
|
h0h ^= DEC64E(hash->h8[0]); |
|
|
|
h0l ^= DEC64E(hash->h8[1]); |
|
|
|
h0l ^= DEC64E(hash->h8[1]); |
|
|
|
h1h ^= DEC64E(hash->h8[2]); |
|
|
|
h1h ^= DEC64E(hash->h8[2]); |
|
|
@ -572,9 +366,7 @@ __kernel void search4(__global hash_t* hashes) |
|
|
|
h2l ^= DEC64E(hash->h8[5]); |
|
|
|
h2l ^= DEC64E(hash->h8[5]); |
|
|
|
h3h ^= DEC64E(hash->h8[6]); |
|
|
|
h3h ^= DEC64E(hash->h8[6]); |
|
|
|
h3l ^= DEC64E(hash->h8[7]); |
|
|
|
h3l ^= DEC64E(hash->h8[7]); |
|
|
|
} |
|
|
|
} else if(i == 1) { |
|
|
|
else if(i == 1) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
h4h ^= DEC64E(hash->h8[0]); |
|
|
|
h4h ^= DEC64E(hash->h8[0]); |
|
|
|
h4l ^= DEC64E(hash->h8[1]); |
|
|
|
h4l ^= DEC64E(hash->h8[1]); |
|
|
|
h5h ^= DEC64E(hash->h8[2]); |
|
|
|
h5h ^= DEC64E(hash->h8[2]); |
|
|
@ -635,7 +427,6 @@ __kernel void search5(__global hash_t* hashes) |
|
|
|
a21 ^= SWAP8(hash->h8[7]); |
|
|
|
a21 ^= SWAP8(hash->h8[7]); |
|
|
|
a31 ^= 0x8000000000000001; |
|
|
|
a31 ^= 0x8000000000000001; |
|
|
|
KECCAK_F_1600; |
|
|
|
KECCAK_F_1600; |
|
|
|
|
|
|
|
|
|
|
|
// Finalize the "lane complement" |
|
|
|
// Finalize the "lane complement" |
|
|
|
a10 = ~a10; |
|
|
|
a10 = ~a10; |
|
|
|
a20 = ~a20; |
|
|
|
a20 = ~a20; |
|
|
@ -682,8 +473,7 @@ __kernel void search6(__global hash_t* hashes) |
|
|
|
MI5; |
|
|
|
MI5; |
|
|
|
LUFFA_P5; |
|
|
|
LUFFA_P5; |
|
|
|
|
|
|
|
|
|
|
|
if(i == 0) |
|
|
|
if(i == 0) { |
|
|
|
{ |
|
|
|
|
|
|
|
M0 = hash->h4[9]; |
|
|
|
M0 = hash->h4[9]; |
|
|
|
M1 = hash->h4[8]; |
|
|
|
M1 = hash->h4[8]; |
|
|
|
M2 = hash->h4[11]; |
|
|
|
M2 = hash->h4[11]; |
|
|
@ -692,16 +482,12 @@ __kernel void search6(__global hash_t* hashes) |
|
|
|
M5 = hash->h4[12]; |
|
|
|
M5 = hash->h4[12]; |
|
|
|
M6 = hash->h4[15]; |
|
|
|
M6 = hash->h4[15]; |
|
|
|
M7 = hash->h4[14]; |
|
|
|
M7 = hash->h4[14]; |
|
|
|
} |
|
|
|
} else if(i == 1) { |
|
|
|
else if(i == 1) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
M0 = 0x80000000; |
|
|
|
M0 = 0x80000000; |
|
|
|
M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; |
|
|
|
M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; |
|
|
|
} |
|
|
|
} else if(i == 2) { |
|
|
|
else if(i == 2) |
|
|
|
|
|
|
|
M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; |
|
|
|
M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; |
|
|
|
else if(i == 3) |
|
|
|
} else if(i == 3) { |
|
|
|
{ |
|
|
|
|
|
|
|
hash->h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; |
|
|
|
hash->h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; |
|
|
|
hash->h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; |
|
|
|
hash->h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; |
|
|
|
hash->h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; |
|
|
|
hash->h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; |
|
|
@ -751,12 +537,10 @@ __kernel void search7(__global hash_t* hashes) |
|
|
|
x6 ^= SWAP4(hash->h4[7]); |
|
|
|
x6 ^= SWAP4(hash->h4[7]); |
|
|
|
x7 ^= SWAP4(hash->h4[6]); |
|
|
|
x7 ^= SWAP4(hash->h4[6]); |
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 13; i ++) |
|
|
|
for (int i = 0; i < 13; i ++) { |
|
|
|
{ |
|
|
|
|
|
|
|
SIXTEEN_ROUNDS; |
|
|
|
SIXTEEN_ROUNDS; |
|
|
|
|
|
|
|
|
|
|
|
if (i == 0) |
|
|
|
if (i == 0) { |
|
|
|
{ |
|
|
|
|
|
|
|
x0 ^= SWAP4(hash->h4[9]); |
|
|
|
x0 ^= SWAP4(hash->h4[9]); |
|
|
|
x1 ^= SWAP4(hash->h4[8]); |
|
|
|
x1 ^= SWAP4(hash->h4[8]); |
|
|
|
x2 ^= SWAP4(hash->h4[11]); |
|
|
|
x2 ^= SWAP4(hash->h4[11]); |
|
|
@ -765,12 +549,12 @@ __kernel void search7(__global hash_t* hashes) |
|
|
|
x5 ^= SWAP4(hash->h4[12]); |
|
|
|
x5 ^= SWAP4(hash->h4[12]); |
|
|
|
x6 ^= SWAP4(hash->h4[15]); |
|
|
|
x6 ^= SWAP4(hash->h4[15]); |
|
|
|
x7 ^= SWAP4(hash->h4[14]); |
|
|
|
x7 ^= SWAP4(hash->h4[14]); |
|
|
|
} |
|
|
|
} else if(i == 1) { |
|
|
|
else if(i == 1) |
|
|
|
|
|
|
|
x0 ^= 0x80; |
|
|
|
x0 ^= 0x80; |
|
|
|
else if (i == 2) |
|
|
|
} else if (i == 2) { |
|
|
|
xv ^= SPH_C32(1); |
|
|
|
xv ^= SPH_C32(1); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
hash->h4[0] = x0; |
|
|
|
hash->h4[0] = x0; |
|
|
|
hash->h4[1] = x1; |
|
|
|
hash->h4[1] = x1; |
|
|
@ -797,7 +581,6 @@ __kernel void search8(__global hash_t* hashes) |
|
|
|
{ |
|
|
|
{ |
|
|
|
uint gid = get_global_id(0); |
|
|
|
uint gid = get_global_id(0); |
|
|
|
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
|
|
|
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
|
|
|
|
|
|
|
|
|
|
|
__local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; |
|
|
|
__local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; |
|
|
|
|
|
|
|
|
|
|
|
int init = get_local_id(0); |
|
|
|
int init = get_local_id(0); |
|
|
@ -826,7 +609,7 @@ __kernel void search8(__global hash_t* hashes) |
|
|
|
sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; |
|
|
|
sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; |
|
|
|
sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; |
|
|
|
sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; |
|
|
|
|
|
|
|
|
|
|
|
sph_u32 sc_count0 = 0x200, sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; |
|
|
|
sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; |
|
|
|
|
|
|
|
|
|
|
|
rk00 = hash->h4[0]; |
|
|
|
rk00 = hash->h4[0]; |
|
|
|
rk01 = hash->h4[1]; |
|
|
|
rk01 = hash->h4[1]; |
|
|
@ -892,8 +675,7 @@ __kernel void search9(__global hash_t* hashes) |
|
|
|
u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); |
|
|
|
u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); |
|
|
|
|
|
|
|
|
|
|
|
FFT256(0, 1, 0, ll1); |
|
|
|
FFT256(0, 1, 0, ll1); |
|
|
|
for (int i = 0; i < 256; i ++) |
|
|
|
for (int i = 0; i < 256; i ++) { |
|
|
|
{ |
|
|
|
|
|
|
|
s32 tq; |
|
|
|
s32 tq; |
|
|
|
|
|
|
|
|
|
|
|
tq = q[i] + yoff_b_n[i]; |
|
|
|
tq = q[i] + yoff_b_n[i]; |
|
|
@ -929,17 +711,14 @@ __kernel void search9(__global hash_t* hashes) |
|
|
|
C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), |
|
|
|
C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), |
|
|
|
C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), |
|
|
|
C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), |
|
|
|
IF, 4, 13, PP8_4_); |
|
|
|
IF, 4, 13, PP8_4_); |
|
|
|
|
|
|
|
|
|
|
|
STEP_BIG( |
|
|
|
STEP_BIG( |
|
|
|
C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), |
|
|
|
C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), |
|
|
|
C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), |
|
|
|
C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), |
|
|
|
IF, 13, 10, PP8_5_); |
|
|
|
IF, 13, 10, PP8_5_); |
|
|
|
|
|
|
|
|
|
|
|
STEP_BIG( |
|
|
|
STEP_BIG( |
|
|
|
C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), |
|
|
|
C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), |
|
|
|
C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), |
|
|
|
C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), |
|
|
|
IF, 10, 25, PP8_6_); |
|
|
|
IF, 10, 25, PP8_6_); |
|
|
|
|
|
|
|
|
|
|
|
STEP_BIG( |
|
|
|
STEP_BIG( |
|
|
|
C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), |
|
|
|
C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), |
|
|
|
C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), |
|
|
|
C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), |
|
|
@ -958,27 +737,22 @@ __kernel void search9(__global hash_t* hashes) |
|
|
|
ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); |
|
|
|
ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); |
|
|
|
ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); |
|
|
|
ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); |
|
|
|
ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); |
|
|
|
ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); |
|
|
|
|
|
|
|
|
|
|
|
STEP_BIG( |
|
|
|
STEP_BIG( |
|
|
|
COPY_A0, COPY_A1, COPY_A2, COPY_A3, |
|
|
|
COPY_A0, COPY_A1, COPY_A2, COPY_A3, |
|
|
|
COPY_A4, COPY_A5, COPY_A6, COPY_A7, |
|
|
|
COPY_A4, COPY_A5, COPY_A6, COPY_A7, |
|
|
|
IF, 4, 13, PP8_4_); |
|
|
|
IF, 4, 13, PP8_4_); |
|
|
|
|
|
|
|
|
|
|
|
STEP_BIG( |
|
|
|
STEP_BIG( |
|
|
|
COPY_B0, COPY_B1, COPY_B2, COPY_B3, |
|
|
|
COPY_B0, COPY_B1, COPY_B2, COPY_B3, |
|
|
|
COPY_B4, COPY_B5, COPY_B6, COPY_B7, |
|
|
|
COPY_B4, COPY_B5, COPY_B6, COPY_B7, |
|
|
|
IF, 13, 10, PP8_5_); |
|
|
|
IF, 13, 10, PP8_5_); |
|
|
|
|
|
|
|
|
|
|
|
STEP_BIG( |
|
|
|
STEP_BIG( |
|
|
|
COPY_C0, COPY_C1, COPY_C2, COPY_C3, |
|
|
|
COPY_C0, COPY_C1, COPY_C2, COPY_C3, |
|
|
|
COPY_C4, COPY_C5, COPY_C6, COPY_C7, |
|
|
|
COPY_C4, COPY_C5, COPY_C6, COPY_C7, |
|
|
|
IF, 10, 25, PP8_6_); |
|
|
|
IF, 10, 25, PP8_6_); |
|
|
|
|
|
|
|
|
|
|
|
STEP_BIG( |
|
|
|
STEP_BIG( |
|
|
|
COPY_D0, COPY_D1, COPY_D2, COPY_D3, |
|
|
|
COPY_D0, COPY_D1, COPY_D2, COPY_D3, |
|
|
|
COPY_D4, COPY_D5, COPY_D6, COPY_D7, |
|
|
|
COPY_D4, COPY_D5, COPY_D6, COPY_D7, |
|
|
|
IF, 25, 4, PP8_0_); |
|
|
|
IF, 25, 4, PP8_0_); |
|
|
|
|
|
|
|
|
|
|
|
#undef q |
|
|
|
#undef q |
|
|
|
|
|
|
|
|
|
|
|
hash->h4[0] = A0; |
|
|
|
hash->h4[0] = A0; |
|
|
@ -1006,7 +780,7 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo |
|
|
|
{ |
|
|
|
{ |
|
|
|
uint gid = get_global_id(0); |
|
|
|
uint gid = get_global_id(0); |
|
|
|
uint offset = get_global_offset(0); |
|
|
|
uint offset = get_global_offset(0); |
|
|
|
__global hash_t *hash = &(hashes[gid-offset]); |
|
|
|
hash_t hash; |
|
|
|
|
|
|
|
|
|
|
|
__local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; |
|
|
|
__local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; |
|
|
|
|
|
|
|
|
|
|
@ -1023,9 +797,20 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo |
|
|
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef INPUT_BIG_LOCAL |
|
|
|
|
|
|
|
__local sph_u32 T512_L[1024]; |
|
|
|
|
|
|
|
__constant const sph_u32 *T512_C = &T512[0][0]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = init; i < 1024; i += step) |
|
|
|
|
|
|
|
T512_L[i] = T512_C[i]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
#else |
|
|
|
|
|
|
|
#define INPUT_BIG_LOCAL INPUT_BIG |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
// mixtab |
|
|
|
// mixtab |
|
|
|
__local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256]; |
|
|
|
__local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256]; |
|
|
|
|
|
|
|
|
|
|
|
for (int i = init; i < 256; i += step) |
|
|
|
for (int i = init; i < 256; i += step) |
|
|
|
{ |
|
|
|
{ |
|
|
|
mixtab0[i] = mixtab0_c[i]; |
|
|
|
mixtab0[i] = mixtab0_c[i]; |
|
|
@ -1033,37 +818,17 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo |
|
|
|
mixtab2[i] = mixtab2_c[i]; |
|
|
|
mixtab2[i] = mixtab2_c[i]; |
|
|
|
mixtab3[i] = mixtab3_c[i]; |
|
|
|
mixtab3[i] = mixtab3_c[i]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
|
|
__local sph_u32 T512_L[1024]; |
|
|
|
|
|
|
|
__constant const sph_u32 *T512_C = &T512[0][0]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = init; i < 1024; i += step) |
|
|
|
for (int i = 0; i < 8; i++) { |
|
|
|
T512_L[i] = T512_C[i]; |
|
|
|
hash.h8[i] = hashes[gid-offset].h8[i]; |
|
|
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__local sph_u64 LT0[256], LT1[256], LT2[256], LT3[256], LT4[256], LT5[256], LT6[256], LT7[256]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = init; i < 256; i += step) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
LT0[i] = plain_T0[i]; |
|
|
|
|
|
|
|
LT1[i] = plain_T1[i]; |
|
|
|
|
|
|
|
LT2[i] = plain_T2[i]; |
|
|
|
|
|
|
|
LT3[i] = plain_T3[i]; |
|
|
|
|
|
|
|
LT4[i] = plain_T4[i]; |
|
|
|
|
|
|
|
LT5[i] = plain_T5[i]; |
|
|
|
|
|
|
|
LT6[i] = plain_T6[i]; |
|
|
|
|
|
|
|
LT7[i] = plain_T7[i]; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
// echo |
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 8; i++) |
|
|
|
{ |
|
|
|
hash->h8[i] = hashes[gid-offset].h8[i]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// echo |
|
|
|
|
|
|
|
sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; |
|
|
|
sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; |
|
|
|
sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; |
|
|
|
sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; |
|
|
|
Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; |
|
|
|
Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; |
|
|
@ -1090,14 +855,14 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo |
|
|
|
W61 = Vb61; |
|
|
|
W61 = Vb61; |
|
|
|
W70 = Vb70; |
|
|
|
W70 = Vb70; |
|
|
|
W71 = Vb71; |
|
|
|
W71 = Vb71; |
|
|
|
W80 = hash->h8[0]; |
|
|
|
W80 = hash.h8[0]; |
|
|
|
W81 = hash->h8[1]; |
|
|
|
W81 = hash.h8[1]; |
|
|
|
W90 = hash->h8[2]; |
|
|
|
W90 = hash.h8[2]; |
|
|
|
W91 = hash->h8[3]; |
|
|
|
W91 = hash.h8[3]; |
|
|
|
WA0 = hash->h8[4]; |
|
|
|
WA0 = hash.h8[4]; |
|
|
|
WA1 = hash->h8[5]; |
|
|
|
WA1 = hash.h8[5]; |
|
|
|
WB0 = hash->h8[6]; |
|
|
|
WB0 = hash.h8[6]; |
|
|
|
WB1 = hash->h8[7]; |
|
|
|
WB1 = hash.h8[7]; |
|
|
|
WC0 = 0x80; |
|
|
|
WC0 = 0x80; |
|
|
|
WC1 = 0; |
|
|
|
WC1 = 0; |
|
|
|
WD0 = 0; |
|
|
|
WD0 = 0; |
|
|
@ -1107,19 +872,25 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo |
|
|
|
WF0 = 0x200; |
|
|
|
WF0 = 0x200; |
|
|
|
WF1 = 0; |
|
|
|
WF1 = 0; |
|
|
|
|
|
|
|
|
|
|
|
for (unsigned u = 0; u < 10; u ++) |
|
|
|
for (unsigned u = 0; u < 10; u ++) { |
|
|
|
BIG_ROUND; |
|
|
|
BIG_ROUND; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
hash->h8[0] ^= Vb00 ^ W00 ^ W80; |
|
|
|
hash.h8[0] ^= Vb00 ^ W00 ^ W80; |
|
|
|
hash->h8[1] ^= Vb01 ^ W01 ^ W81; |
|
|
|
hash.h8[1] ^= Vb01 ^ W01 ^ W81; |
|
|
|
hash->h8[2] ^= Vb10 ^ W10 ^ W90; |
|
|
|
hash.h8[2] ^= Vb10 ^ W10 ^ W90; |
|
|
|
hash->h8[3] ^= Vb11 ^ W11 ^ W91; |
|
|
|
hash.h8[3] ^= Vb11 ^ W11 ^ W91; |
|
|
|
hash->h8[4] ^= Vb20 ^ W20 ^ WA0; |
|
|
|
hash.h8[4] ^= Vb20 ^ W20 ^ WA0; |
|
|
|
hash->h8[5] ^= Vb21 ^ W21 ^ WA1; |
|
|
|
hash.h8[5] ^= Vb21 ^ W21 ^ WA1; |
|
|
|
hash->h8[6] ^= Vb30 ^ W30 ^ WB0; |
|
|
|
hash.h8[6] ^= Vb30 ^ W30 ^ WB0; |
|
|
|
hash->h8[7] ^= Vb31 ^ W31 ^ WB1; |
|
|
|
hash.h8[7] ^= Vb31 ^ W31 ^ WB1; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// hamsi |
|
|
|
// hamsi |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
|
|
|
|
sph_u32 c0 = HAMSI_IV512[0], c1 = HAMSI_IV512[1], c2 = HAMSI_IV512[2], c3 = HAMSI_IV512[3]; |
|
|
|
sph_u32 c0 = HAMSI_IV512[0], c1 = HAMSI_IV512[1], c2 = HAMSI_IV512[2], c3 = HAMSI_IV512[3]; |
|
|
|
sph_u32 c4 = HAMSI_IV512[4], c5 = HAMSI_IV512[5], c6 = HAMSI_IV512[6], c7 = HAMSI_IV512[7]; |
|
|
|
sph_u32 c4 = HAMSI_IV512[4], c5 = HAMSI_IV512[5], c6 = HAMSI_IV512[6], c7 = HAMSI_IV512[7]; |
|
|
|
sph_u32 c8 = HAMSI_IV512[8], c9 = HAMSI_IV512[9], cA = HAMSI_IV512[10], cB = HAMSI_IV512[11]; |
|
|
|
sph_u32 c8 = HAMSI_IV512[8], c9 = HAMSI_IV512[9], cA = HAMSI_IV512[10], cB = HAMSI_IV512[11]; |
|
|
@ -1128,39 +899,38 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo |
|
|
|
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF; |
|
|
|
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF; |
|
|
|
sph_u32 h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF }; |
|
|
|
sph_u32 h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF }; |
|
|
|
|
|
|
|
|
|
|
|
#define buf(u) hash->h1[i + u] |
|
|
|
#define buf(u) hash.h1[i + u] |
|
|
|
|
|
|
|
for(int i = 0; i < 64; i += 8) { |
|
|
|
for(int i = 0; i < 64; i += 8) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
INPUT_BIG_LOCAL; |
|
|
|
INPUT_BIG_LOCAL; |
|
|
|
P_BIG; |
|
|
|
P_BIG; |
|
|
|
T_BIG; |
|
|
|
T_BIG; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#undef buf |
|
|
|
#undef buf |
|
|
|
#define buf(u) (u == 0 ? 0x80 : 0) |
|
|
|
#define buf(u) (u == 0 ? 0x80 : 0) |
|
|
|
|
|
|
|
|
|
|
|
INPUT_BIG_LOCAL; |
|
|
|
INPUT_BIG_LOCAL; |
|
|
|
P_BIG; |
|
|
|
P_BIG; |
|
|
|
T_BIG; |
|
|
|
T_BIG; |
|
|
|
|
|
|
|
|
|
|
|
#undef buf |
|
|
|
#undef buf |
|
|
|
#define buf(u) (u == 6 ? 2 : 0) |
|
|
|
#define buf(u) (u == 6 ? 2 : 0) |
|
|
|
|
|
|
|
|
|
|
|
INPUT_BIG_LOCAL; |
|
|
|
INPUT_BIG_LOCAL; |
|
|
|
PF_BIG; |
|
|
|
PF_BIG; |
|
|
|
T_BIG; |
|
|
|
T_BIG; |
|
|
|
|
|
|
|
|
|
|
|
for (unsigned u = 0; u < 16; u ++) |
|
|
|
for (unsigned u = 0; u < 16; u ++) |
|
|
|
hash->h4[u] = h[u]; |
|
|
|
hash.h4[u] = h[u]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// fugue |
|
|
|
// fugue |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
|
|
|
|
sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09; |
|
|
|
sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09; |
|
|
|
sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19; |
|
|
|
sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19; |
|
|
|
sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29; |
|
|
|
sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29; |
|
|
|
sph_u32 S30, S31, S32, S33, S34, S35; |
|
|
|
sph_u32 S30, S31, S32, S33, S34, S35; |
|
|
|
|
|
|
|
|
|
|
|
ulong fc_bit_count = (sph_u64) 0x200; |
|
|
|
ulong fc_bit_count = (sph_u64) 64 << 3; |
|
|
|
|
|
|
|
|
|
|
|
S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0; |
|
|
|
S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0; |
|
|
|
S20 = SPH_C32(0x8807a57e); S21 = SPH_C32(0xe616af75); S22 = SPH_C32(0xc5d3e4db); S23 = SPH_C32(0xac9ab027); |
|
|
|
S20 = SPH_C32(0x8807a57e); S21 = SPH_C32(0xe616af75); S22 = SPH_C32(0xc5d3e4db); S23 = SPH_C32(0xac9ab027); |
|
|
@ -1168,25 +938,22 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo |
|
|
|
S28 = SPH_C32(0xaac6e2c9); S29 = SPH_C32(0xddb21398); S30 = SPH_C32(0xcae65838); S31 = SPH_C32(0x437f203f); |
|
|
|
S28 = SPH_C32(0xaac6e2c9); S29 = SPH_C32(0xddb21398); S30 = SPH_C32(0xcae65838); S31 = SPH_C32(0x437f203f); |
|
|
|
S32 = SPH_C32(0x25ea78e7); S33 = SPH_C32(0x951fddd6); S34 = SPH_C32(0xda6ed11d); S35 = SPH_C32(0xe13e3567); |
|
|
|
S32 = SPH_C32(0x25ea78e7); S33 = SPH_C32(0x951fddd6); S34 = SPH_C32(0xda6ed11d); S35 = SPH_C32(0xe13e3567); |
|
|
|
|
|
|
|
|
|
|
|
FUGUE512_3((hash->h4[0x0]), (hash->h4[0x1]), (hash->h4[0x2])); |
|
|
|
FUGUE512_3((hash.h4[0x0]), (hash.h4[0x1]), (hash.h4[0x2])); |
|
|
|
FUGUE512_3((hash->h4[0x3]), (hash->h4[0x4]), (hash->h4[0x5])); |
|
|
|
FUGUE512_3((hash.h4[0x3]), (hash.h4[0x4]), (hash.h4[0x5])); |
|
|
|
FUGUE512_3((hash->h4[0x6]), (hash->h4[0x7]), (hash->h4[0x8])); |
|
|
|
FUGUE512_3((hash.h4[0x6]), (hash.h4[0x7]), (hash.h4[0x8])); |
|
|
|
FUGUE512_3((hash->h4[0x9]), (hash->h4[0xA]), (hash->h4[0xB])); |
|
|
|
FUGUE512_3((hash.h4[0x9]), (hash.h4[0xA]), (hash.h4[0xB])); |
|
|
|
FUGUE512_3((hash->h4[0xC]), (hash->h4[0xD]), (hash->h4[0xE])); |
|
|
|
FUGUE512_3((hash.h4[0xC]), (hash.h4[0xD]), (hash.h4[0xE])); |
|
|
|
FUGUE512_3((hash->h4[0xF]), as_uint2(fc_bit_count).y, as_uint2(fc_bit_count).x); |
|
|
|
FUGUE512_3((hash.h4[0xF]), as_uint2(fc_bit_count).y, as_uint2(fc_bit_count).x); |
|
|
|
|
|
|
|
|
|
|
|
// apply round shift if necessary |
|
|
|
// apply round shift if necessary |
|
|
|
int i; |
|
|
|
int i; |
|
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < 32; i ++) |
|
|
|
for (i = 0; i < 32; i ++) { |
|
|
|
{ |
|
|
|
|
|
|
|
ROR3; |
|
|
|
ROR3; |
|
|
|
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); |
|
|
|
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); |
|
|
|
SMIX(S00, S01, S02, S03); |
|
|
|
SMIX(S00, S01, S02, S03); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
for (i = 0; i < 13; i ++) { |
|
|
|
for (i = 0; i < 13; i ++) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
S04 ^= S00; |
|
|
|
S04 ^= S00; |
|
|
|
S09 ^= S00; |
|
|
|
S09 ^= S00; |
|
|
|
S18 ^= S00; |
|
|
|
S18 ^= S00; |
|
|
@ -1217,24 +984,27 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo |
|
|
|
S18 ^= S00; |
|
|
|
S18 ^= S00; |
|
|
|
S27 ^= S00; |
|
|
|
S27 ^= S00; |
|
|
|
|
|
|
|
|
|
|
|
hash->h4[0] = SWAP4(S01); |
|
|
|
hash.h4[0] = SWAP4(S01); |
|
|
|
hash->h4[1] = SWAP4(S02); |
|
|
|
hash.h4[1] = SWAP4(S02); |
|
|
|
hash->h4[2] = SWAP4(S03); |
|
|
|
hash.h4[2] = SWAP4(S03); |
|
|
|
hash->h4[3] = SWAP4(S04); |
|
|
|
hash.h4[3] = SWAP4(S04); |
|
|
|
hash->h4[4] = SWAP4(S09); |
|
|
|
hash.h4[4] = SWAP4(S09); |
|
|
|
hash->h4[5] = SWAP4(S10); |
|
|
|
hash.h4[5] = SWAP4(S10); |
|
|
|
hash->h4[6] = SWAP4(S11); |
|
|
|
hash.h4[6] = SWAP4(S11); |
|
|
|
hash->h4[7] = SWAP4(S12); |
|
|
|
hash.h4[7] = SWAP4(S12); |
|
|
|
hash->h4[8] = SWAP4(S18); |
|
|
|
hash.h4[8] = SWAP4(S18); |
|
|
|
hash->h4[9] = SWAP4(S19); |
|
|
|
hash.h4[9] = SWAP4(S19); |
|
|
|
hash->h4[10] = SWAP4(S20); |
|
|
|
hash.h4[10] = SWAP4(S20); |
|
|
|
hash->h4[11] = SWAP4(S21); |
|
|
|
hash.h4[11] = SWAP4(S21); |
|
|
|
hash->h4[12] = SWAP4(S27); |
|
|
|
hash.h4[12] = SWAP4(S27); |
|
|
|
hash->h4[13] = SWAP4(S28); |
|
|
|
hash.h4[13] = SWAP4(S28); |
|
|
|
hash->h4[14] = SWAP4(S29); |
|
|
|
hash.h4[14] = SWAP4(S29); |
|
|
|
hash->h4[15] = SWAP4(S30); |
|
|
|
hash.h4[15] = SWAP4(S30); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
//shabal |
|
|
|
//shabal |
|
|
|
|
|
|
|
{ |
|
|
|
sph_u32 A00 = A_init_512[0], A01 = A_init_512[1], A02 = A_init_512[2], A03 = A_init_512[3], A04 = A_init_512[4], A05 = A_init_512[5], A06 = A_init_512[6], A07 = A_init_512[7], |
|
|
|
sph_u32 A00 = A_init_512[0], A01 = A_init_512[1], A02 = A_init_512[2], A03 = A_init_512[3], A04 = A_init_512[4], A05 = A_init_512[5], A06 = A_init_512[6], A07 = A_init_512[7], |
|
|
|
A08 = A_init_512[8], A09 = A_init_512[9], A0A = A_init_512[10], A0B = A_init_512[11]; |
|
|
|
A08 = A_init_512[8], A09 = A_init_512[9], A0A = A_init_512[10], A0B = A_init_512[11]; |
|
|
|
sph_u32 B0 = B_init_512[0], B1 = B_init_512[1], B2 = B_init_512[2], B3 = B_init_512[3], B4 = B_init_512[4], B5 = B_init_512[5], B6 = B_init_512[6], B7 = B_init_512[7], |
|
|
|
sph_u32 B0 = B_init_512[0], B1 = B_init_512[1], B2 = B_init_512[2], B3 = B_init_512[3], B4 = B_init_512[4], B5 = B_init_512[5], B6 = B_init_512[6], B7 = B_init_512[7], |
|
|
@ -1244,22 +1014,22 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo |
|
|
|
sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF; |
|
|
|
sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF; |
|
|
|
sph_u32 Wlow = 1, Whigh = 0; |
|
|
|
sph_u32 Wlow = 1, Whigh = 0; |
|
|
|
|
|
|
|
|
|
|
|
M0 = hash->h4[0]; |
|
|
|
M0 = hash.h4[0]; |
|
|
|
M1 = hash->h4[1]; |
|
|
|
M1 = hash.h4[1]; |
|
|
|
M2 = hash->h4[2]; |
|
|
|
M2 = hash.h4[2]; |
|
|
|
M3 = hash->h4[3]; |
|
|
|
M3 = hash.h4[3]; |
|
|
|
M4 = hash->h4[4]; |
|
|
|
M4 = hash.h4[4]; |
|
|
|
M5 = hash->h4[5]; |
|
|
|
M5 = hash.h4[5]; |
|
|
|
M6 = hash->h4[6]; |
|
|
|
M6 = hash.h4[6]; |
|
|
|
M7 = hash->h4[7]; |
|
|
|
M7 = hash.h4[7]; |
|
|
|
M8 = hash->h4[8]; |
|
|
|
M8 = hash.h4[8]; |
|
|
|
M9 = hash->h4[9]; |
|
|
|
M9 = hash.h4[9]; |
|
|
|
MA = hash->h4[10]; |
|
|
|
MA = hash.h4[10]; |
|
|
|
MB = hash->h4[11]; |
|
|
|
MB = hash.h4[11]; |
|
|
|
MC = hash->h4[12]; |
|
|
|
MC = hash.h4[12]; |
|
|
|
MD = hash->h4[13]; |
|
|
|
MD = hash.h4[13]; |
|
|
|
ME = hash->h4[14]; |
|
|
|
ME = hash.h4[14]; |
|
|
|
MF = hash->h4[15]; |
|
|
|
MF = hash.h4[15]; |
|
|
|
|
|
|
|
|
|
|
|
INPUT_BLOCK_ADD; |
|
|
|
INPUT_BLOCK_ADD; |
|
|
|
XOR_W; |
|
|
|
XOR_W; |
|
|
@ -1274,44 +1044,44 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo |
|
|
|
INPUT_BLOCK_ADD; |
|
|
|
INPUT_BLOCK_ADD; |
|
|
|
XOR_W; |
|
|
|
XOR_W; |
|
|
|
APPLY_P; |
|
|
|
APPLY_P; |
|
|
|
|
|
|
|
for (unsigned i = 0; i < 3; i ++) { |
|
|
|
for (unsigned i = 0; i < 3; i ++) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
SWAP_BC; |
|
|
|
SWAP_BC; |
|
|
|
XOR_W; |
|
|
|
XOR_W; |
|
|
|
APPLY_P; |
|
|
|
APPLY_P; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
hash->h4[0] = B0; |
|
|
|
hash.h4[0] = B0; |
|
|
|
hash->h4[1] = B1; |
|
|
|
hash.h4[1] = B1; |
|
|
|
hash->h4[2] = B2; |
|
|
|
hash.h4[2] = B2; |
|
|
|
hash->h4[3] = B3; |
|
|
|
hash.h4[3] = B3; |
|
|
|
hash->h4[4] = B4; |
|
|
|
hash.h4[4] = B4; |
|
|
|
hash->h4[5] = B5; |
|
|
|
hash.h4[5] = B5; |
|
|
|
hash->h4[6] = B6; |
|
|
|
hash.h4[6] = B6; |
|
|
|
hash->h4[7] = B7; |
|
|
|
hash.h4[7] = B7; |
|
|
|
hash->h4[8] = B8; |
|
|
|
hash.h4[8] = B8; |
|
|
|
hash->h4[9] = B9; |
|
|
|
hash.h4[9] = B9; |
|
|
|
hash->h4[10] = BA; |
|
|
|
hash.h4[10] = BA; |
|
|
|
hash->h4[11] = BB; |
|
|
|
hash.h4[11] = BB; |
|
|
|
hash->h4[12] = BC; |
|
|
|
hash.h4[12] = BC; |
|
|
|
hash->h4[13] = BD; |
|
|
|
hash.h4[13] = BD; |
|
|
|
hash->h4[14] = BE; |
|
|
|
hash.h4[14] = BE; |
|
|
|
hash->h4[15] = BF; |
|
|
|
hash.h4[15] = BF; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
//whirlpool |
|
|
|
//whirlpool |
|
|
|
|
|
|
|
{ |
|
|
|
sph_u64 n0, n1, n2, n3, n4, n5, n6, n7; |
|
|
|
sph_u64 n0, n1, n2, n3, n4, n5, n6, n7; |
|
|
|
sph_u64 h0, h1, h2, h3, h4, h5, h6, h7; |
|
|
|
sph_u64 h0, h1, h2, h3, h4, h5, h6, h7; |
|
|
|
sph_u64 state[8]; |
|
|
|
sph_u64 state[8]; |
|
|
|
|
|
|
|
|
|
|
|
n0 = (hash->h8[0]); |
|
|
|
n0 = (hash.h8[0]); |
|
|
|
n1 = (hash->h8[1]); |
|
|
|
n1 = (hash.h8[1]); |
|
|
|
n2 = (hash->h8[2]); |
|
|
|
n2 = (hash.h8[2]); |
|
|
|
n3 = (hash->h8[3]); |
|
|
|
n3 = (hash.h8[3]); |
|
|
|
n4 = (hash->h8[4]); |
|
|
|
n4 = (hash.h8[4]); |
|
|
|
n5 = (hash->h8[5]); |
|
|
|
n5 = (hash.h8[5]); |
|
|
|
n6 = (hash->h8[6]); |
|
|
|
n6 = (hash.h8[6]); |
|
|
|
n7 = (hash->h8[7]); |
|
|
|
n7 = (hash.h8[7]); |
|
|
|
|
|
|
|
|
|
|
|
h0 = h1 = h2 = h3 = h4 = h5 = h6 = h7 = 0; |
|
|
|
h0 = h1 = h2 = h3 = h4 = h5 = h6 = h7 = 0; |
|
|
|
|
|
|
|
|
|
|
@ -1324,9 +1094,7 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo |
|
|
|
n6 ^= h6; |
|
|
|
n6 ^= h6; |
|
|
|
n7 ^= h7; |
|
|
|
n7 ^= h7; |
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 10 |
|
|
|
for (unsigned r = 0; r < 10; r ++) { |
|
|
|
for (unsigned r = 0; r < 10; r ++) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
sph_u64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
|
|
|
sph_u64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
|
|
|
|
|
|
|
|
|
|
|
ROUND_KSCHED(plain_T, h, tmp, plain_RC[r]); |
|
|
|
ROUND_KSCHED(plain_T, h, tmp, plain_RC[r]); |
|
|
@ -1335,14 +1103,14 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo |
|
|
|
TRANSFER(n, tmp); |
|
|
|
TRANSFER(n, tmp); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
state[0] = n0 ^ (hash->h8[0]); |
|
|
|
state[0] = n0 ^ (hash.h8[0]); |
|
|
|
state[1] = n1 ^ (hash->h8[1]); |
|
|
|
state[1] = n1 ^ (hash.h8[1]); |
|
|
|
state[2] = n2 ^ (hash->h8[2]); |
|
|
|
state[2] = n2 ^ (hash.h8[2]); |
|
|
|
state[3] = n3 ^ (hash->h8[3]); |
|
|
|
state[3] = n3 ^ (hash.h8[3]); |
|
|
|
state[4] = n4 ^ (hash->h8[4]); |
|
|
|
state[4] = n4 ^ (hash.h8[4]); |
|
|
|
state[5] = n5 ^ (hash->h8[5]); |
|
|
|
state[5] = n5 ^ (hash.h8[5]); |
|
|
|
state[6] = n6 ^ (hash->h8[6]); |
|
|
|
state[6] = n6 ^ (hash.h8[6]); |
|
|
|
state[7] = n7 ^ (hash->h8[7]); |
|
|
|
state[7] = n7 ^ (hash.h8[7]); |
|
|
|
|
|
|
|
|
|
|
|
n0 = 0x80; |
|
|
|
n0 = 0x80; |
|
|
|
n1 = n2 = n3 = n4 = n5 = n6 = 0; |
|
|
|
n1 = n2 = n3 = n4 = n5 = n6 = 0; |
|
|
@ -1366,12 +1134,10 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo |
|
|
|
n6 ^= h6; |
|
|
|
n6 ^= h6; |
|
|
|
n7 ^= h7; |
|
|
|
n7 ^= h7; |
|
|
|
|
|
|
|
|
|
|
|
#pragma unroll 10 |
|
|
|
for (unsigned r = 0; r < 10; r ++) { |
|
|
|
for (unsigned r = 0; r < 10; r ++) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
sph_u64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
|
|
|
sph_u64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
|
|
|
|
|
|
|
|
|
|
|
ROUND_KSCHED(LT, h, tmp, plain_RC[r]); |
|
|
|
ROUND_KSCHED(plain_T, h, tmp, plain_RC[r]); |
|
|
|
TRANSFER(h, tmp); |
|
|
|
TRANSFER(h, tmp); |
|
|
|
ROUND_WENC(plain_T, n, h, tmp); |
|
|
|
ROUND_WENC(plain_T, n, h, tmp); |
|
|
|
TRANSFER(n, tmp); |
|
|
|
TRANSFER(n, tmp); |
|
|
@ -1387,9 +1153,10 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo |
|
|
|
state[7] ^= n7 ^ 0x2000000000000; |
|
|
|
state[7] ^= n7 ^ 0x2000000000000; |
|
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < 8; i ++) |
|
|
|
for (unsigned i = 0; i < 8; i ++) |
|
|
|
hash->h8[i] = state[i]; |
|
|
|
hash.h8[i] = state[i]; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
bool result = (hash->h8[3] <= target); |
|
|
|
bool result = (hash.h8[3] <= target); |
|
|
|
if (result) |
|
|
|
if (result) |
|
|
|
output[atomic_inc(output+0xFF)] = SWAP4(gid); |
|
|
|
output[atomic_inc(output+0xFF)] = SWAP4(gid); |
|
|
|
|
|
|
|
|
|
|
|