@ -70,7 +70,7 @@ typedef long sph_s64;
@@ -70,7 +70,7 @@ typedef long sph_s64;
# define SPH_SIMD_NOCOPY 0
# define SPH_KECCAK_NOCOPY 0
# define SPH_COMPACT_BLAKE_64 0
# define SPH_LUFFA_PARALLEL 1
# define SPH_LUFFA_PARALLEL 0
# define SPH_SMALL_FOOTPRINT_GROESTL 0
# define SPH_GROESTL_BIG_ENDIAN 0
# define SPH_CUBEHASH_UNROLL 0
@ -115,8 +115,8 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes)
@@ -115,8 +115,8 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes)
{
uint gid = get_global_id ( 0 ) ;
__global hash_t *hash = & ( hashes[gid-get_global_offset ( 0 ) ] ) ;
// blake
// blake
sph_u64 H0 = SPH_C64 ( 0x6A09E667F3BCC908 ) , H1 = SPH_C64 ( 0xBB67AE8584CAA73B ) ;
sph_u64 H2 = SPH_C64 ( 0x3C6EF372FE94F82B ) , H3 = SPH_C64 ( 0xA54FF53A5F1D36F1 ) ;
sph_u64 H4 = SPH_C64 ( 0x510E527FADE682D1 ) , H5 = SPH_C64 ( 0x9B05688C2B3E6C1F ) ;
@ -125,13 +125,13 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes)
@@ -125,13 +125,13 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes)
sph_u64 T0 = SPH_C64 ( 0xFFFFFFFFFFFFFC00 ) + ( 80 << 3 ) , T1 = 0xFFFFFFFFFFFFFFFF ;;
if ( ( T0 = SPH_T64 ( T0 + 1024 ) ) < 1024 )
{
T1 = SPH_T64 ( T1 + 1 ) ;
}
sph_u64 M0, M1, M2, M3, M4, M5, M6, M7 ;
sph_u64 M8, M9, MA, MB, MC, MD, ME, MF ;
sph_u64 V0, V1, V2, V3, V4, V5, V6, V7 ;
sph_u64 V8, V9, VA, VB, VC, VD, VE, VF ;
M0 = DEC64BE ( block + 0 ) ;
M1 = DEC64BE ( block + 8 ) ;
M2 = DEC64BE ( block + 16 ) ;
@ -170,13 +170,16 @@ __kernel void search1(__global hash_t* hashes)
@@ -170,13 +170,16 @@ __kernel void search1(__global hash_t* hashes)
{
uint gid = get_global_id ( 0 ) ;
__global hash_t *hash = & ( hashes[gid-get_global_offset ( 0 ) ] ) ;
// bmw
sph_u64 BMW_H[16] ;
# pragma unroll 16
for ( unsigned u = 0 ; u < 16; u++)
BMW_H[u] = BMW_IV512[u] ;
sph_u64 BMW_h1[16], BMW_h2[16 ];
sph_u64 mv[16] ;
sph_u64 mv[16],q[32 ];
sph_u64 tmp ;
mv[0] = SWAP8 ( hash->h8[0] ) ;
mv[1] = SWAP8 ( hash->h8[1] ) ;
@ -193,35 +196,243 @@ __kernel void search1(__global hash_t* hashes)
@@ -193,35 +196,243 @@ __kernel void search1(__global hash_t* hashes)
mv[12] = 0 ;
mv[13] = 0 ;
mv[14] = 0 ;
mv[15] = 0x200 ;
# define M ( x ) ( mv[x] )
# define H ( x ) ( BMW_H[x] )
# define dH ( x ) ( BMW_h2[x] )
mv[15] = SPH_C64 ( 512 ) ;
tmp = ( mv[5] ^ BMW_H[5] ) - ( mv[7] ^ BMW_H[7] ) + ( mv[10] ^ BMW_H[10] ) + ( mv[13] ^ BMW_H[13] ) + ( mv[14] ^ BMW_H[14] ) ;
q[0] = ( SHR ( tmp, 1 ) ^ SHL ( tmp, 3 ) ^ SPH_ROTL64 ( tmp, 4 ) ^ SPH_ROTL64 ( tmp, 37 ) ) + BMW_H[1] ;
tmp = ( mv[6] ^ BMW_H[6] ) - ( mv[8] ^ BMW_H[8] ) + ( mv[11] ^ BMW_H[11] ) + ( mv[14] ^ BMW_H[14] ) - ( mv[15] ^ BMW_H[15] ) ;
q[1] = ( SHR ( tmp, 1 ) ^ SHL ( tmp, 2 ) ^ SPH_ROTL64 ( tmp, 13 ) ^ SPH_ROTL64 ( tmp, 43 ) ) + BMW_H[2] ;
tmp = ( mv[0] ^ BMW_H[0] ) + ( mv[7] ^ BMW_H[7] ) + ( mv[9] ^ BMW_H[9] ) - ( mv[12] ^ BMW_H[12] ) + ( mv[15] ^ BMW_H[15] ) ;
q[2] = ( SHR ( tmp, 2 ) ^ SHL ( tmp, 1 ) ^ SPH_ROTL64 ( tmp, 19 ) ^ SPH_ROTL64 ( tmp, 53 ) ) + BMW_H[3] ;
tmp = ( mv[0] ^ BMW_H[0] ) - ( mv[1] ^ BMW_H[1] ) + ( mv[8] ^ BMW_H[8] ) - ( mv[10] ^ BMW_H[10] ) + ( mv[13] ^ BMW_H[13] ) ;
q[3] = ( SHR ( tmp, 2 ) ^ SHL ( tmp, 2 ) ^ SPH_ROTL64 ( tmp, 28 ) ^ SPH_ROTL64 ( tmp, 59 ) ) + BMW_H[4] ;
tmp = ( mv[1] ^ BMW_H[1] ) + ( mv[2] ^ BMW_H[2] ) + ( mv[9] ^ BMW_H[9] ) - ( mv[11] ^ BMW_H[11] ) - ( mv[14] ^ BMW_H[14] ) ;
q[4] = ( SHR ( tmp, 1 ) ^ tmp ) + BMW_H[5] ;
tmp = ( mv[3] ^ BMW_H[3] ) - ( mv[2] ^ BMW_H[2] ) + ( mv[10] ^ BMW_H[10] ) - ( mv[12] ^ BMW_H[12] ) + ( mv[15] ^ BMW_H[15] ) ;
q[5] = ( SHR ( tmp, 1 ) ^ SHL ( tmp, 3 ) ^ SPH_ROTL64 ( tmp, 4 ) ^ SPH_ROTL64 ( tmp, 37 ) ) + BMW_H[6] ;
tmp = ( mv[4] ^ BMW_H[4] ) - ( mv[0] ^ BMW_H[0] ) - ( mv[3] ^ BMW_H[3] ) - ( mv[11] ^ BMW_H[11] ) + ( mv[13] ^ BMW_H[13] ) ;
q[6] = ( SHR ( tmp, 1 ) ^ SHL ( tmp, 2 ) ^ SPH_ROTL64 ( tmp, 13 ) ^ SPH_ROTL64 ( tmp, 43 ) ) + BMW_H[7] ;
tmp = ( mv[1] ^ BMW_H[1] ) - ( mv[4] ^ BMW_H[4] ) - ( mv[5] ^ BMW_H[5] ) - ( mv[12] ^ BMW_H[12] ) - ( mv[14] ^ BMW_H[14] ) ;
q[7] = ( SHR ( tmp, 2 ) ^ SHL ( tmp, 1 ) ^ SPH_ROTL64 ( tmp, 19 ) ^ SPH_ROTL64 ( tmp, 53 ) ) + BMW_H[8] ;
tmp = ( mv[2] ^ BMW_H[2] ) - ( mv[5] ^ BMW_H[5] ) - ( mv[6] ^ BMW_H[6] ) + ( mv[13] ^ BMW_H[13] ) - ( mv[15] ^ BMW_H[15] ) ;
q[8] = ( SHR ( tmp, 2 ) ^ SHL ( tmp, 2 ) ^ SPH_ROTL64 ( tmp, 28 ) ^ SPH_ROTL64 ( tmp, 59 ) ) + BMW_H[9] ;
tmp = ( mv[0] ^ BMW_H[0] ) - ( mv[3] ^ BMW_H[3] ) + ( mv[6] ^ BMW_H[6] ) - ( mv[7] ^ BMW_H[7] ) + ( mv[14] ^ BMW_H[14] ) ;
q[9] = ( SHR ( tmp, 1 ) ^ tmp ) + BMW_H[10] ;
tmp = ( mv[8] ^ BMW_H[8] ) - ( mv[1] ^ BMW_H[1] ) - ( mv[4] ^ BMW_H[4] ) - ( mv[7] ^ BMW_H[7] ) + ( mv[15] ^ BMW_H[15] ) ;
q[10] = ( SHR ( tmp, 1 ) ^ SHL ( tmp, 3 ) ^ SPH_ROTL64 ( tmp, 4 ) ^ SPH_ROTL64 ( tmp, 37 ) ) + BMW_H[11] ;
tmp = ( mv[8] ^ BMW_H[8] ) - ( mv[0] ^ BMW_H[0] ) - ( mv[2] ^ BMW_H[2] ) - ( mv[5] ^ BMW_H[5] ) + ( mv[9] ^ BMW_H[9] ) ;
q[11] = ( SHR ( tmp, 1 ) ^ SHL ( tmp, 2 ) ^ SPH_ROTL64 ( tmp, 13 ) ^ SPH_ROTL64 ( tmp, 43 ) ) + BMW_H[12] ;
tmp = ( mv[1] ^ BMW_H[1] ) + ( mv[3] ^ BMW_H[3] ) - ( mv[6] ^ BMW_H[6] ) - ( mv[9] ^ BMW_H[9] ) + ( mv[10] ^ BMW_H[10] ) ;
q[12] = ( SHR ( tmp, 2 ) ^ SHL ( tmp, 1 ) ^ SPH_ROTL64 ( tmp, 19 ) ^ SPH_ROTL64 ( tmp, 53 ) ) + BMW_H[13] ;
tmp = ( mv[2] ^ BMW_H[2] ) + ( mv[4] ^ BMW_H[4] ) + ( mv[7] ^ BMW_H[7] ) + ( mv[10] ^ BMW_H[10] ) + ( mv[11] ^ BMW_H[11] ) ;
q[13] = ( SHR ( tmp, 2 ) ^ SHL ( tmp, 2 ) ^ SPH_ROTL64 ( tmp, 28 ) ^ SPH_ROTL64 ( tmp, 59 ) ) + BMW_H[14] ;
tmp = ( mv[3] ^ BMW_H[3] ) - ( mv[5] ^ BMW_H[5] ) + ( mv[8] ^ BMW_H[8] ) - ( mv[11] ^ BMW_H[11] ) - ( mv[12] ^ BMW_H[12] ) ;
q[14] = ( SHR ( tmp, 1 ) ^ tmp ) + BMW_H[15] ;
tmp = ( mv[12] ^ BMW_H[12] ) - ( mv[4] ^ BMW_H[4] ) - ( mv[6] ^ BMW_H[6] ) - ( mv[9] ^ BMW_H[9] ) + ( mv[13] ^ BMW_H[13] ) ;
q[15] = ( SHR ( tmp, 1 ) ^ SHL ( tmp, 3 ) ^ SPH_ROTL64 ( tmp, 4 ) ^ SPH_ROTL64 ( tmp, 37 ) ) + BMW_H[0] ;
# pragma unroll 2
for ( int i=0 ;i<2;i++)
{
q[i+16] =
( SHR ( q[i], 1 ) ^ SHL ( q[i], 2 ) ^ SPH_ROTL64 ( q[i], 13 ) ^ SPH_ROTL64 ( q[i], 43 ) ) +
( SHR ( q[i+1], 2 ) ^ SHL ( q[i+1], 1 ) ^ SPH_ROTL64 ( q[i+1], 19 ) ^ SPH_ROTL64 ( q[i+1], 53 ) ) +
( SHR ( q[i+2], 2 ) ^ SHL ( q[i+2], 2 ) ^ SPH_ROTL64 ( q[i+2], 28 ) ^ SPH_ROTL64 ( q[i+2], 59 ) ) +
( SHR ( q[i+3], 1 ) ^ SHL ( q[i+3], 3 ) ^ SPH_ROTL64 ( q[i+3], 4 ) ^ SPH_ROTL64 ( q[i+3], 37 ) ) +
( SHR ( q[i+4], 1 ) ^ SHL ( q[i+4], 2 ) ^ SPH_ROTL64 ( q[i+4], 13 ) ^ SPH_ROTL64 ( q[i+4], 43 ) ) +
( SHR ( q[i+5], 2 ) ^ SHL ( q[i+5], 1 ) ^ SPH_ROTL64 ( q[i+5], 19 ) ^ SPH_ROTL64 ( q[i+5], 53 ) ) +
( SHR ( q[i+6], 2 ) ^ SHL ( q[i+6], 2 ) ^ SPH_ROTL64 ( q[i+6], 28 ) ^ SPH_ROTL64 ( q[i+6], 59 ) ) +
( SHR ( q[i+7], 1 ) ^ SHL ( q[i+7], 3 ) ^ SPH_ROTL64 ( q[i+7], 4 ) ^ SPH_ROTL64 ( q[i+7], 37 ) ) +
( SHR ( q[i+8], 1 ) ^ SHL ( q[i+8], 2 ) ^ SPH_ROTL64 ( q[i+8], 13 ) ^ SPH_ROTL64 ( q[i+8], 43 ) ) +
( SHR ( q[i+9], 2 ) ^ SHL ( q[i+9], 1 ) ^ SPH_ROTL64 ( q[i+9], 19 ) ^ SPH_ROTL64 ( q[i+9], 53 ) ) +
( SHR ( q[i+10], 2 ) ^ SHL ( q[i+10], 2 ) ^ SPH_ROTL64 ( q[i+10], 28 ) ^ SPH_ROTL64 ( q[i+10], 59 ) ) +
( SHR ( q[i+11], 1 ) ^ SHL ( q[i+11], 3 ) ^ SPH_ROTL64 ( q[i+11], 4 ) ^ SPH_ROTL64 ( q[i+11], 37 ) ) +
( SHR ( q[i+12], 1 ) ^ SHL ( q[i+12], 2 ) ^ SPH_ROTL64 ( q[i+12], 13 ) ^ SPH_ROTL64 ( q[i+12], 43 ) ) +
( SHR ( q[i+13], 2 ) ^ SHL ( q[i+13], 1 ) ^ SPH_ROTL64 ( q[i+13], 19 ) ^ SPH_ROTL64 ( q[i+13], 53 ) ) +
( SHR ( q[i+14], 2 ) ^ SHL ( q[i+14], 2 ) ^ SPH_ROTL64 ( q[i+14], 28 ) ^ SPH_ROTL64 ( q[i+14], 59 ) ) +
( SHR ( q[i+15], 1 ) ^ SHL ( q[i+15], 3 ) ^ SPH_ROTL64 ( q[i+15], 4 ) ^ SPH_ROTL64 ( q[i+15], 37 ) ) +
( ( ( ( i+16 ) * ( 0x0555555555555555ull ) ) + SPH_ROTL64 ( mv[i], i+1 ) +
SPH_ROTL64 ( mv[i+3], i+4 ) - SPH_ROTL64 ( mv[i+10], i+11 ) ) ^ BMW_H[i+7] ) ;
}
FOLDb ;
# pragma unroll 4
for ( int i=2 ;i<6;i++)
{
q[i+16] = CONST_EXP2 +
( ( ( ( i+16 ) * ( 0x0555555555555555ull ) ) + SPH_ROTL64 ( mv[i], i+1 ) +
SPH_ROTL64 ( mv[i+3], i+4 ) - SPH_ROTL64 ( mv[i+10], i+11 ) ) ^ BMW_H[i+7] ) ;
}
# undef M
# undef H
# undef dH
# pragma unroll 3
for ( int i=6 ;i<9;i++)
{
q[i+16] = CONST_EXP2 +
( ( ( ( i+16 ) * ( 0x0555555555555555ull ) ) + SPH_ROTL64 ( mv[i], i+1 ) +
SPH_ROTL64 ( mv[i+3], i+4 ) - SPH_ROTL64 ( mv[i-6], ( i-6 ) +1 ) ) ^ BMW_H[i+7] ) ;
}
# define M ( x ) ( BMW_h2[x] )
# define H ( x ) ( final_b[x] )
# define dH ( x ) ( BMW_h1[x] )
# pragma unroll 4
for ( int i=9 ;i<13;i++)
{
q[i+16] = CONST_EXP2 +
( ( ( ( i+16 ) * ( 0x0555555555555555ull ) ) + SPH_ROTL64 ( mv[i], i+1 ) +
SPH_ROTL64 ( mv[i+3], i+4 ) - SPH_ROTL64 ( mv[i-6], ( i-6 ) +1 ) ) ^ BMW_H[i-9] ) ;
}
FOLDb ;
# pragma unroll 3
for ( int i=13 ;i<16;i++)
{
q[i+16] = CONST_EXP2 +
( ( ( ( i+16 ) * ( 0x0555555555555555ull ) ) + SPH_ROTL64 ( mv[i], i+1 ) +
SPH_ROTL64 ( mv[i-13], ( i-13 ) +1 ) - SPH_ROTL64 ( mv[i-6], ( i-6 ) +1 ) ) ^ BMW_H[i-9] ) ;
}
# undef M
# undef H
# undef dH
sph_u64 XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23] ;
sph_u64 XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31] ;
BMW_H[0] = ( SHL ( XH64, 5 ) ^ SHR ( q[16],5 ) ^ mv[0] ) + ( XL64 ^ q[24] ^ q[0] ) ;
BMW_H[1] = ( SHR ( XH64, 7 ) ^ SHL ( q[17],8 ) ^ mv[1] ) + ( XL64 ^ q[25] ^ q[1] ) ;
BMW_H[2] = ( SHR ( XH64, 5 ) ^ SHL ( q[18],5 ) ^ mv[2] ) + ( XL64 ^ q[26] ^ q[2] ) ;
BMW_H[3] = ( SHR ( XH64, 1 ) ^ SHL ( q[19],5 ) ^ mv[3] ) + ( XL64 ^ q[27] ^ q[3] ) ;
BMW_H[4] = ( SHR ( XH64, 3 ) ^ q[20] ^ mv[4] ) + ( XL64 ^ q[28] ^ q[4] ) ;
BMW_H[5] = ( SHL ( XH64, 6 ) ^ SHR ( q[21],6 ) ^ mv[5] ) + ( XL64 ^ q[29] ^ q[5] ) ;
BMW_H[6] = ( SHR ( XH64, 4 ) ^ SHL ( q[22],6 ) ^ mv[6] ) + ( XL64 ^ q[30] ^ q[6] ) ;
BMW_H[7] = ( SHR ( XH64,11 ) ^ SHL ( q[23],2 ) ^ mv[7] ) + ( XL64 ^ q[31] ^ q[7] ) ;
BMW_H[8] = SPH_ROTL64 ( BMW_H[4], 9 ) + ( XH64 ^ q[24] ^ mv[8] ) + ( SHL ( XL64,8 ) ^ q[23] ^ q[8] ) ;
BMW_H[9] = SPH_ROTL64 ( BMW_H[5],10 ) + ( XH64 ^ q[25] ^ mv[9] ) + ( SHR ( XL64,6 ) ^ q[16] ^ q[9] ) ;
BMW_H[10] = SPH_ROTL64 ( BMW_H[6],11 ) + ( XH64 ^ q[26] ^ mv[10] ) + ( SHL ( XL64,6 ) ^ q[17] ^ q[10] ) ;
BMW_H[11] = SPH_ROTL64 ( BMW_H[7],12 ) + ( XH64 ^ q[27] ^ mv[11] ) + ( SHL ( XL64,4 ) ^ q[18] ^ q[11] ) ;
BMW_H[12] = SPH_ROTL64 ( BMW_H[0],13 ) + ( XH64 ^ q[28] ^ mv[12] ) + ( SHR ( XL64,3 ) ^ q[19] ^ q[12] ) ;
BMW_H[13] = SPH_ROTL64 ( BMW_H[1],14 ) + ( XH64 ^ q[29] ^ mv[13] ) + ( SHR ( XL64,4 ) ^ q[20] ^ q[13] ) ;
BMW_H[14] = SPH_ROTL64 ( BMW_H[2],15 ) + ( XH64 ^ q[30] ^ mv[14] ) + ( SHR ( XL64,7 ) ^ q[21] ^ q[14] ) ;
BMW_H[15] = SPH_ROTL64 ( BMW_H[3],16 ) + ( XH64 ^ q[31] ^ mv[15] ) + ( SHR ( XL64,2 ) ^ q[22] ^ q[15] ) ;
# pragma unroll 16
for ( int i=0 ;i<16;i++)
{
mv[i] = BMW_H[i] ;
BMW_H[i] = 0xaaaaaaaaaaaaaaa0ull + ( sph_u64 ) i ;
}
hash->h8[0] = SWAP8 ( BMW_h1[8] ) ;
hash->h8[1] = SWAP8 ( BMW_h1[9] ) ;
hash->h8[2] = SWAP8 ( BMW_h1[10] ) ;
hash->h8[3] = SWAP8 ( BMW_h1[11] ) ;
hash->h8[4] = SWAP8 ( BMW_h1[12] ) ;
hash->h8[5] = SWAP8 ( BMW_h1[13] ) ;
hash->h8[6] = SWAP8 ( BMW_h1[14] ) ;
hash->h8[7] = SWAP8 ( BMW_h1[15] ) ;
tmp = ( mv[5] ^ BMW_H[5] ) - ( mv[7] ^ BMW_H[7] ) + ( mv[10] ^ BMW_H[10] ) + ( mv[13] ^ BMW_H[13] ) + ( mv[14] ^ BMW_H[14] ) ;
q[0] = ( SHR ( tmp, 1 ) ^ SHL ( tmp, 3 ) ^ SPH_ROTL64 ( tmp, 4 ) ^ SPH_ROTL64 ( tmp, 37 ) ) + BMW_H[1] ;
tmp = ( mv[6] ^ BMW_H[6] ) - ( mv[8] ^ BMW_H[8] ) + ( mv[11] ^ BMW_H[11] ) + ( mv[14] ^ BMW_H[14] ) - ( mv[15] ^ BMW_H[15] ) ;
q[1] = ( SHR ( tmp, 1 ) ^ SHL ( tmp, 2 ) ^ SPH_ROTL64 ( tmp, 13 ) ^ SPH_ROTL64 ( tmp, 43 ) ) + BMW_H[2] ;
tmp = ( mv[0] ^ BMW_H[0] ) + ( mv[7] ^ BMW_H[7] ) + ( mv[9] ^ BMW_H[9] ) - ( mv[12] ^ BMW_H[12] ) + ( mv[15] ^ BMW_H[15] ) ;
q[2] = ( SHR ( tmp, 2 ) ^ SHL ( tmp, 1 ) ^ SPH_ROTL64 ( tmp, 19 ) ^ SPH_ROTL64 ( tmp, 53 ) ) + BMW_H[3] ;
tmp = ( mv[0] ^ BMW_H[0] ) - ( mv[1] ^ BMW_H[1] ) + ( mv[8] ^ BMW_H[8] ) - ( mv[10] ^ BMW_H[10] ) + ( mv[13] ^ BMW_H[13] ) ;
q[3] = ( SHR ( tmp, 2 ) ^ SHL ( tmp, 2 ) ^ SPH_ROTL64 ( tmp, 28 ) ^ SPH_ROTL64 ( tmp, 59 ) ) + BMW_H[4] ;
tmp = ( mv[1] ^ BMW_H[1] ) + ( mv[2] ^ BMW_H[2] ) + ( mv[9] ^ BMW_H[9] ) - ( mv[11] ^ BMW_H[11] ) - ( mv[14] ^ BMW_H[14] ) ;
q[4] = ( SHR ( tmp, 1 ) ^ tmp ) + BMW_H[5] ;
tmp = ( mv[3] ^ BMW_H[3] ) - ( mv[2] ^ BMW_H[2] ) + ( mv[10] ^ BMW_H[10] ) - ( mv[12] ^ BMW_H[12] ) + ( mv[15] ^ BMW_H[15] ) ;
q[5] = ( SHR ( tmp, 1 ) ^ SHL ( tmp, 3 ) ^ SPH_ROTL64 ( tmp, 4 ) ^ SPH_ROTL64 ( tmp, 37 ) ) + BMW_H[6] ;
tmp = ( mv[4] ^ BMW_H[4] ) - ( mv[0] ^ BMW_H[0] ) - ( mv[3] ^ BMW_H[3] ) - ( mv[11] ^ BMW_H[11] ) + ( mv[13] ^ BMW_H[13] ) ;
q[6] = ( SHR ( tmp, 1 ) ^ SHL ( tmp, 2 ) ^ SPH_ROTL64 ( tmp, 13 ) ^ SPH_ROTL64 ( tmp, 43 ) ) + BMW_H[7] ;
tmp = ( mv[1] ^ BMW_H[1] ) - ( mv[4] ^ BMW_H[4] ) - ( mv[5] ^ BMW_H[5] ) - ( mv[12] ^ BMW_H[12] ) - ( mv[14] ^ BMW_H[14] ) ;
q[7] = ( SHR ( tmp, 2 ) ^ SHL ( tmp, 1 ) ^ SPH_ROTL64 ( tmp, 19 ) ^ SPH_ROTL64 ( tmp, 53 ) ) + BMW_H[8] ;
tmp = ( mv[2] ^ BMW_H[2] ) - ( mv[5] ^ BMW_H[5] ) - ( mv[6] ^ BMW_H[6] ) + ( mv[13] ^ BMW_H[13] ) - ( mv[15] ^ BMW_H[15] ) ;
q[8] = ( SHR ( tmp, 2 ) ^ SHL ( tmp, 2 ) ^ SPH_ROTL64 ( tmp, 28 ) ^ SPH_ROTL64 ( tmp, 59 ) ) + BMW_H[9] ;
tmp = ( mv[0] ^ BMW_H[0] ) - ( mv[3] ^ BMW_H[3] ) + ( mv[6] ^ BMW_H[6] ) - ( mv[7] ^ BMW_H[7] ) + ( mv[14] ^ BMW_H[14] ) ;
q[9] = ( SHR ( tmp, 1 ) ^ tmp ) + BMW_H[10] ;
tmp = ( mv[8] ^ BMW_H[8] ) - ( mv[1] ^ BMW_H[1] ) - ( mv[4] ^ BMW_H[4] ) - ( mv[7] ^ BMW_H[7] ) + ( mv[15] ^ BMW_H[15] ) ;
q[10] = ( SHR ( tmp, 1 ) ^ SHL ( tmp, 3 ) ^ SPH_ROTL64 ( tmp, 4 ) ^ SPH_ROTL64 ( tmp, 37 ) ) + BMW_H[11] ;
tmp = ( mv[8] ^ BMW_H[8] ) - ( mv[0] ^ BMW_H[0] ) - ( mv[2] ^ BMW_H[2] ) - ( mv[5] ^ BMW_H[5] ) + ( mv[9] ^ BMW_H[9] ) ;
q[11] = ( SHR ( tmp, 1 ) ^ SHL ( tmp, 2 ) ^ SPH_ROTL64 ( tmp, 13 ) ^ SPH_ROTL64 ( tmp, 43 ) ) + BMW_H[12] ;
tmp = ( mv[1] ^ BMW_H[1] ) + ( mv[3] ^ BMW_H[3] ) - ( mv[6] ^ BMW_H[6] ) - ( mv[9] ^ BMW_H[9] ) + ( mv[10] ^ BMW_H[10] ) ;
q[12] = ( SHR ( tmp, 2 ) ^ SHL ( tmp, 1 ) ^ SPH_ROTL64 ( tmp, 19 ) ^ SPH_ROTL64 ( tmp, 53 ) ) + BMW_H[13] ;
tmp = ( mv[2] ^ BMW_H[2] ) + ( mv[4] ^ BMW_H[4] ) + ( mv[7] ^ BMW_H[7] ) + ( mv[10] ^ BMW_H[10] ) + ( mv[11] ^ BMW_H[11] ) ;
q[13] = ( SHR ( tmp, 2 ) ^ SHL ( tmp, 2 ) ^ SPH_ROTL64 ( tmp, 28 ) ^ SPH_ROTL64 ( tmp, 59 ) ) + BMW_H[14] ;
tmp = ( mv[3] ^ BMW_H[3] ) - ( mv[5] ^ BMW_H[5] ) + ( mv[8] ^ BMW_H[8] ) - ( mv[11] ^ BMW_H[11] ) - ( mv[12] ^ BMW_H[12] ) ;
q[14] = ( SHR ( tmp, 1 ) ^ tmp ) + BMW_H[15] ;
tmp = ( mv[12] ^ BMW_H[12] ) - ( mv[4] ^ BMW_H[4] ) - ( mv[6] ^ BMW_H[6] ) - ( mv[9] ^ BMW_H[9] ) + ( mv[13] ^ BMW_H[13] ) ;
q[15] = ( SHR ( tmp, 1 ) ^ SHL ( tmp, 3 ) ^ SPH_ROTL64 ( tmp, 4 ) ^ SPH_ROTL64 ( tmp, 37 ) ) + BMW_H[0] ;
# pragma unroll 2
for ( int i=0 ;i<2;i++)
{
q[i+16] =
( SHR ( q[i], 1 ) ^ SHL ( q[i], 2 ) ^ SPH_ROTL64 ( q[i], 13 ) ^ SPH_ROTL64 ( q[i], 43 ) ) +
( SHR ( q[i+1], 2 ) ^ SHL ( q[i+1], 1 ) ^ SPH_ROTL64 ( q[i+1], 19 ) ^ SPH_ROTL64 ( q[i+1], 53 ) ) +
( SHR ( q[i+2], 2 ) ^ SHL ( q[i+2], 2 ) ^ SPH_ROTL64 ( q[i+2], 28 ) ^ SPH_ROTL64 ( q[i+2], 59 ) ) +
( SHR ( q[i+3], 1 ) ^ SHL ( q[i+3], 3 ) ^ SPH_ROTL64 ( q[i+3], 4 ) ^ SPH_ROTL64 ( q[i+3], 37 ) ) +
( SHR ( q[i+4], 1 ) ^ SHL ( q[i+4], 2 ) ^ SPH_ROTL64 ( q[i+4], 13 ) ^ SPH_ROTL64 ( q[i+4], 43 ) ) +
( SHR ( q[i+5], 2 ) ^ SHL ( q[i+5], 1 ) ^ SPH_ROTL64 ( q[i+5], 19 ) ^ SPH_ROTL64 ( q[i+5], 53 ) ) +
( SHR ( q[i+6], 2 ) ^ SHL ( q[i+6], 2 ) ^ SPH_ROTL64 ( q[i+6], 28 ) ^ SPH_ROTL64 ( q[i+6], 59 ) ) +
( SHR ( q[i+7], 1 ) ^ SHL ( q[i+7], 3 ) ^ SPH_ROTL64 ( q[i+7], 4 ) ^ SPH_ROTL64 ( q[i+7], 37 ) ) +
( SHR ( q[i+8], 1 ) ^ SHL ( q[i+8], 2 ) ^ SPH_ROTL64 ( q[i+8], 13 ) ^ SPH_ROTL64 ( q[i+8], 43 ) ) +
( SHR ( q[i+9], 2 ) ^ SHL ( q[i+9], 1 ) ^ SPH_ROTL64 ( q[i+9], 19 ) ^ SPH_ROTL64 ( q[i+9], 53 ) ) +
( SHR ( q[i+10], 2 ) ^ SHL ( q[i+10], 2 ) ^ SPH_ROTL64 ( q[i+10], 28 ) ^ SPH_ROTL64 ( q[i+10], 59 ) ) +
( SHR ( q[i+11], 1 ) ^ SHL ( q[i+11], 3 ) ^ SPH_ROTL64 ( q[i+11], 4 ) ^ SPH_ROTL64 ( q[i+11], 37 ) ) +
( SHR ( q[i+12], 1 ) ^ SHL ( q[i+12], 2 ) ^ SPH_ROTL64 ( q[i+12], 13 ) ^ SPH_ROTL64 ( q[i+12], 43 ) ) +
( SHR ( q[i+13], 2 ) ^ SHL ( q[i+13], 1 ) ^ SPH_ROTL64 ( q[i+13], 19 ) ^ SPH_ROTL64 ( q[i+13], 53 ) ) +
( SHR ( q[i+14], 2 ) ^ SHL ( q[i+14], 2 ) ^ SPH_ROTL64 ( q[i+14], 28 ) ^ SPH_ROTL64 ( q[i+14], 59 ) ) +
( SHR ( q[i+15], 1 ) ^ SHL ( q[i+15], 3 ) ^ SPH_ROTL64 ( q[i+15], 4 ) ^ SPH_ROTL64 ( q[i+15], 37 ) ) +
( ( ( ( i+16 ) * ( 0x0555555555555555ull ) ) + SPH_ROTL64 ( mv[i], i+1 ) +
SPH_ROTL64 ( mv[i+3], i+4 ) - SPH_ROTL64 ( mv[i+10], i+11 ) ) ^ BMW_H[i+7] ) ;
}
# pragma unroll 4
for ( int i=2 ;i<6;i++)
{
q[i+16] = CONST_EXP2 +
( ( ( ( i+16 ) * ( 0x0555555555555555ull ) ) + SPH_ROTL64 ( mv[i], i+1 ) +
SPH_ROTL64 ( mv[i+3], i+4 ) - SPH_ROTL64 ( mv[i+10], i+11 ) ) ^ BMW_H[i+7] ) ;
}
# pragma unroll 3
for ( int i=6 ;i<9;i++)
{
q[i+16] = CONST_EXP2 +
( ( ( ( i+16 ) * ( 0x0555555555555555ull ) ) + SPH_ROTL64 ( mv[i], i+1 ) +
SPH_ROTL64 ( mv[i+3], i+4 ) - SPH_ROTL64 ( mv[i-6], ( i-6 ) +1 ) ) ^ BMW_H[i+7] ) ;
}
# pragma unroll 4
for ( int i=9 ;i<13;i++)
{
q[i+16] = CONST_EXP2 +
( ( ( ( i+16 ) * ( 0x0555555555555555ull ) ) + SPH_ROTL64 ( mv[i], i+1 ) +
SPH_ROTL64 ( mv[i+3], i+4 ) - SPH_ROTL64 ( mv[i-6], ( i-6 ) +1 ) ) ^ BMW_H[i-9] ) ;
}
# pragma unroll 3
for ( int i=13 ;i<16;i++)
{
q[i+16] = CONST_EXP2 +
( ( ( ( i+16 ) * ( 0x0555555555555555ull ) ) + SPH_ROTL64 ( mv[i], i+1 ) +
SPH_ROTL64 ( mv[i-13], ( i-13 ) +1 ) - SPH_ROTL64 ( mv[i-6], ( i-6 ) +1 ) ) ^ BMW_H[i-9] ) ;
}
XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23] ;
XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31] ;
BMW_H[0] = ( SHL ( XH64, 5 ) ^ SHR ( q[16],5 ) ^ mv[0] ) + ( XL64 ^ q[24] ^ q[0] ) ;
BMW_H[1] = ( SHR ( XH64, 7 ) ^ SHL ( q[17],8 ) ^ mv[1] ) + ( XL64 ^ q[25] ^ q[1] ) ;
BMW_H[2] = ( SHR ( XH64, 5 ) ^ SHL ( q[18],5 ) ^ mv[2] ) + ( XL64 ^ q[26] ^ q[2] ) ;
BMW_H[3] = ( SHR ( XH64, 1 ) ^ SHL ( q[19],5 ) ^ mv[3] ) + ( XL64 ^ q[27] ^ q[3] ) ;
BMW_H[4] = ( SHR ( XH64, 3 ) ^ q[20] ^ mv[4] ) + ( XL64 ^ q[28] ^ q[4] ) ;
BMW_H[5] = ( SHL ( XH64, 6 ) ^ SHR ( q[21],6 ) ^ mv[5] ) + ( XL64 ^ q[29] ^ q[5] ) ;
BMW_H[6] = ( SHR ( XH64, 4 ) ^ SHL ( q[22],6 ) ^ mv[6] ) + ( XL64 ^ q[30] ^ q[6] ) ;
BMW_H[7] = ( SHR ( XH64,11 ) ^ SHL ( q[23],2 ) ^ mv[7] ) + ( XL64 ^ q[31] ^ q[7] ) ;
BMW_H[8] = SPH_ROTL64 ( BMW_H[4], 9 ) + ( XH64 ^ q[24] ^ mv[8] ) + ( SHL ( XL64,8 ) ^ q[23] ^ q[8] ) ;
BMW_H[9] = SPH_ROTL64 ( BMW_H[5],10 ) + ( XH64 ^ q[25] ^ mv[9] ) + ( SHR ( XL64,6 ) ^ q[16] ^ q[9] ) ;
BMW_H[10] = SPH_ROTL64 ( BMW_H[6],11 ) + ( XH64 ^ q[26] ^ mv[10] ) + ( SHL ( XL64,6 ) ^ q[17] ^ q[10] ) ;
BMW_H[11] = SPH_ROTL64 ( BMW_H[7],12 ) + ( XH64 ^ q[27] ^ mv[11] ) + ( SHL ( XL64,4 ) ^ q[18] ^ q[11] ) ;
BMW_H[12] = SPH_ROTL64 ( BMW_H[0],13 ) + ( XH64 ^ q[28] ^ mv[12] ) + ( SHR ( XL64,3 ) ^ q[19] ^ q[12] ) ;
BMW_H[13] = SPH_ROTL64 ( BMW_H[1],14 ) + ( XH64 ^ q[29] ^ mv[13] ) + ( SHR ( XL64,4 ) ^ q[20] ^ q[13] ) ;
BMW_H[14] = SPH_ROTL64 ( BMW_H[2],15 ) + ( XH64 ^ q[30] ^ mv[14] ) + ( SHR ( XL64,7 ) ^ q[21] ^ q[14] ) ;
BMW_H[15] = SPH_ROTL64 ( BMW_H[3],16 ) + ( XH64 ^ q[31] ^ mv[15] ) + ( SHR ( XL64,2 ) ^ q[22] ^ q[15] ) ;
hash->h8[0] = SWAP8 ( BMW_H[8] ) ;
hash->h8[1] = SWAP8 ( BMW_H[9] ) ;
hash->h8[2] = SWAP8 ( BMW_H[10] ) ;
hash->h8[3] = SWAP8 ( BMW_H[11] ) ;
hash->h8[4] = SWAP8 ( BMW_H[12] ) ;
hash->h8[5] = SWAP8 ( BMW_H[13] ) ;
hash->h8[6] = SWAP8 ( BMW_H[14] ) ;
hash->h8[7] = SWAP8 ( BMW_H[15] ) ;
barrier ( CLK_GLOBAL_MEM_FENCE ) ;
}
@ -240,14 +451,15 @@ __kernel void search2(__global hash_t* hashes)
@@ -240,14 +451,15 @@ __kernel void search2(__global hash_t* hashes)
for ( int i = init ; i < 256; i += step)
{
T0_L[i] = T0[i] ;
T4_L[i] = T4[i] ;
T1_L[i] = T1[i] ;
T2_L[i] = T2[i] ;
T3_L[i] = T3[i] ;
T4_L[i] = T4[i] ;
T5_L[i] = T5[i] ;
T6_L[i] = T6[i] ;
T7_L[i] = T7[i] ;
}
barrier ( CLK_LOCAL_MEM_FENCE ) ;
# define T0 T0_L
@ -260,47 +472,38 @@ __kernel void search2(__global hash_t* hashes)
@@ -260,47 +472,38 @@ __kernel void search2(__global hash_t* hashes)
# define T7 T7_L
// groestl
sph_u64 H[16] ;
for ( unsigned int u = 0 ; u < 15; u ++)
H[u] = 0 ;
# if USE_LE
H[15] = ( ( sph_u64 ) ( 512 & 0xFF ) << 56 ) | ( ( sph_u64 ) ( 512 & 0xFF00 ) << 40 ) ;
# else
H[15] = ( sph_u64 ) 512 ;
# endif
sph_u64 H[16] = {0, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0x0002000000000000} ;
sph_u64 g[16], m[16] ;
m[0] = DEC64E ( hash->h8[0] ) ;
m[1] = DEC64E ( hash->h8[1] ) ;
m[2] = DEC64E ( hash->h8[2] ) ;
m[3] = DEC64E ( hash->h8[3] ) ;
m[4] = DEC64E ( hash->h8[4] ) ;
m[5] = DEC64E ( hash->h8[5] ) ;
m[6] = DEC64E ( hash->h8[6] ) ;
m[7] = DEC64E ( hash->h8[7] ) ;
for ( unsigned int u = 0 ; u < 16; u ++)
g[u] = m[u] ^ H[u] ;
m[8] = 0x80 ; g[8] = m[8] ^ H[8] ;
m[9 ] = 0 ; g[9] = m[9] ^ H[9] ;
m[10 ] = 0 ; g[10] = m[10] ^ H[10] ;
m[11 ] = 0 ; g[11] = m[11] ^ H[11] ;
m[12 ] = 0 ; g[12] = m[12] ^ H[12] ;
m[13] = 0 ; g[13] = m[13] ^ H[13] ;
m[14] = 0 ; g[14] = m[14] ^ H[14] ;
m[15] = 0x100000000000000 ; g[15] = m[15] ^ H[15];
g[0] = m[0] = DEC64E ( hash->h8[0] ) ;
g[1] = m[1] = DEC64E ( hash->h8[1] ) ;
g[2] = m[2] = DEC64E ( hash->h8[2] ) ;
g[3] = m[3] = DEC64E ( hash->h8[3] ) ;
g[4] = m[4] = DEC64E ( hash->h8[4] ) ;
g[5] = m[5] = DEC64E ( hash->h8[5] ) ;
g[6] = m[6] = DEC64E ( hash->h8[6] ) ;
g[7] = m[7] = DEC64E ( hash->h8[7] ) ;
g[8] = m[8] = 0x80 ;
g[9] = m[9] = 0 ;
g[10] = m[10] = 0 ;
g[11] = m[11 ] = 0 ;
g[12] = m[12 ] = 0 ;
g[13] = m[13 ] = 0 ;
g[14] = m[14 ] = 0 ;
g[15] = 0x102000000000000 ;
m[15] = 0x100000000000000 ;
PERM_BIG_P ( g ) ;
PERM_BIG_Q ( m ) ;
for ( unsigned int u = 0 ; u < 16; u ++)
H[u] ^= g[u] ^ m[u] ;
sph_u64 xH[16] ;
for ( unsigned int u = 0 ; u < 16; u ++)
xH[u] = H[u] ;
xH[u] = H[u] ^= g[u] ^ m[u] ;
PERM_BIG_P ( xH ) ;
for ( unsigned int u = 0 ; u < 16; u ++)
H[u] ^= xH[u] ;
for ( unsigned int u = 0 ; u < 8; u ++)
hash->h8[u] = DEC64E ( H[u + 8] ) ;
for ( unsigned int u = 8 ; u < 16; u ++)
hash->h8[u-8] = DEC64E ( H[u] ^ xH[u] ) ;
barrier ( CLK_GLOBAL_MEM_FENCE ) ;
}
@ -325,10 +528,14 @@ __kernel void search3(__global hash_t* hashes)
@@ -325,10 +528,14 @@ __kernel void search3(__global hash_t* hashes)
m5 = SWAP8 ( hash->h8[5] ) ;
m6 = SWAP8 ( hash->h8[6] ) ;
m7 = SWAP8 ( hash->h8[7] ) ;
UBI_BIG ( 480 , 64 ) ;
bcount = 0 ;
m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0 ;
UBI_BIG ( 510 , 8 ) ;
hash->h8[0] = SWAP8 ( h0 ) ;
hash->h8[1] = SWAP8 ( h1 ) ;
hash->h8[2] = SWAP8 ( h2 ) ;
@ -355,7 +562,8 @@ __kernel void search4(__global hash_t* hashes)
@@ -355,7 +562,8 @@ __kernel void search4(__global hash_t* hashes)
for ( int i = 0 ; i < 2; i++)
{
if ( i == 0 ) {
if ( i == 0 )
{
h0h ^= DEC64E ( hash->h8[0] ) ;
h0l ^= DEC64E ( hash->h8[1] ) ;
h1h ^= DEC64E ( hash->h8[2] ) ;
@ -364,7 +572,9 @@ __kernel void search4(__global hash_t* hashes)
@@ -364,7 +572,9 @@ __kernel void search4(__global hash_t* hashes)
h2l ^= DEC64E ( hash->h8[5] ) ;
h3h ^= DEC64E ( hash->h8[6] ) ;
h3l ^= DEC64E ( hash->h8[7] ) ;
} else if ( i == 1 ) {
}
else if ( i == 1 )
{
h4h ^= DEC64E ( hash->h8[0] ) ;
h4l ^= DEC64E ( hash->h8[1] ) ;
h5h ^= DEC64E ( hash->h8[2] ) ;
@ -425,6 +635,7 @@ __kernel void search5(__global hash_t* hashes)
@@ -425,6 +635,7 @@ __kernel void search5(__global hash_t* hashes)
a21 ^= SWAP8 ( hash->h8[7] ) ;
a31 ^= 0x8000000000000001 ;
KECCAK_F_1600 ;
// Finalize the "lane complement"
a10 = ~a10 ;
a20 = ~a20 ;
@ -471,7 +682,8 @@ __kernel void search6(__global hash_t* hashes)
@@ -471,7 +682,8 @@ __kernel void search6(__global hash_t* hashes)
MI5 ;
LUFFA_P5 ;
if ( i == 0 ) {
if ( i == 0 )
{
M0 = hash->h4[9] ;
M1 = hash->h4[8] ;
M2 = hash->h4[11] ;
@ -480,12 +692,16 @@ __kernel void search6(__global hash_t* hashes)
@@ -480,12 +692,16 @@ __kernel void search6(__global hash_t* hashes)
M5 = hash->h4[12] ;
M6 = hash->h4[15] ;
M7 = hash->h4[14] ;
} else if ( i == 1 ) {
}
else if ( i == 1 )
{
M0 = 0x80000000 ;
M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0 ;
} else if ( i == 2 ) {
}
else if ( i == 2 )
M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0 ;
} else if ( i == 3 ) {
else if ( i == 3 )
{
hash->h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40 ;
hash->h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41 ;
hash->h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42 ;
@ -535,10 +751,12 @@ __kernel void search7(__global hash_t* hashes)
@@ -535,10 +751,12 @@ __kernel void search7(__global hash_t* hashes)
x6 ^= SWAP4 ( hash->h4[7] ) ;
x7 ^= SWAP4 ( hash->h4[6] ) ;
for ( int i = 0 ; i < 13; i ++) {
for ( int i = 0 ; i < 13; i ++)
{
SIXTEEN_ROUNDS ;
if ( i == 0 ) {
if ( i == 0 )
{
x0 ^= SWAP4 ( hash->h4[9] ) ;
x1 ^= SWAP4 ( hash->h4[8] ) ;
x2 ^= SWAP4 ( hash->h4[11] ) ;
@ -547,12 +765,12 @@ __kernel void search7(__global hash_t* hashes)
@@ -547,12 +765,12 @@ __kernel void search7(__global hash_t* hashes)
x5 ^= SWAP4 ( hash->h4[12] ) ;
x6 ^= SWAP4 ( hash->h4[15] ) ;
x7 ^= SWAP4 ( hash->h4[14] ) ;
} else if ( i == 1 ) {
}
else if ( i == 1 )
x0 ^= 0x80 ;
} else if ( i == 2 ) {
else if ( i == 2 )
xv ^= SPH_C32 ( 1 ) ;
}
}
hash->h4[0] = x0 ;
hash->h4[1] = x1 ;
@ -579,6 +797,7 @@ __kernel void search8(__global hash_t* hashes)
@@ -579,6 +797,7 @@ __kernel void search8(__global hash_t* hashes)
{
uint gid = get_global_id ( 0 ) ;
__global hash_t *hash = & ( hashes[gid-get_global_offset ( 0 ) ] ) ;
__local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256] ;
int init = get_local_id ( 0 ) ;
@ -607,7 +826,7 @@ __kernel void search8(__global hash_t* hashes)
@@ -607,7 +826,7 @@ __kernel void search8(__global hash_t* hashes)
sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17 ;
sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F ;
sph_u32 sc_count0 = ( 64 << 3 ) , sc_count1 = 0 , sc_count2 = 0 , sc_count3 = 0 ;
sph_u32 sc_count0 = 0x200 , sc_count1 = 0 , sc_count2 = 0 , sc_count3 = 0 ;
rk00 = hash->h4[0] ;
rk01 = hash->h4[1] ;
@ -673,7 +892,8 @@ __kernel void search9(__global hash_t* hashes)
@@ -673,7 +892,8 @@ __kernel void search9(__global hash_t* hashes)
u32 D0 = C32 ( 0x09254899 ) , D1 = C32 ( 0xD699C7BC ) , D2 = C32 ( 0x9019B6DC ) , D3 = C32 ( 0x2B9022E4 ) , D4 = C32 ( 0x8FA14956 ) , D5 = C32 ( 0x21BF9BD3 ) , D6 = C32 ( 0xB94D0943 ) , D7 = C32 ( 0x6FFDDC22 ) ;
FFT256 ( 0 , 1 , 0 , ll1 ) ;
for ( int i = 0 ; i < 256; i ++) {
for ( int i = 0 ; i < 256; i ++)
{
s32 tq ;
tq = q[i] + yoff_b_n[i] ;
@ -709,14 +929,17 @@ __kernel void search9(__global hash_t* hashes)
@@ -709,14 +929,17 @@ __kernel void search9(__global hash_t* hashes)
C32 ( 0x0BA16B95 ) , C32 ( 0x72F999AD ) , C32 ( 0x9FECC2AE ) , C32 ( 0xBA3264FC ) ,
C32 ( 0x5E894929 ) , C32 ( 0x8E9F30E5 ) , C32 ( 0x2F1DAA37 ) , C32 ( 0xF0F2C558 ) ,
IF, 4 , 13 , PP8_4_ ) ;
STEP_BIG (
C32 ( 0xAC506643 ) , C32 ( 0xA90635A5 ) , C32 ( 0xE25B878B ) , C32 ( 0xAAB7878F ) ,
C32 ( 0x88817F7A ) , C32 ( 0x0A02892B ) , C32 ( 0x559A7550 ) , C32 ( 0x598F657E ) ,
IF, 13 , 10 , PP8_5_ ) ;
STEP_BIG (
C32 ( 0x7EEF60A1 ) , C32 ( 0x6B70E3E8 ) , C32 ( 0x9C1714D1 ) , C32 ( 0xB958E2A8 ) ,
C32 ( 0xAB02675E ) , C32 ( 0xED1C014F ) , C32 ( 0xCD8D65BB ) , C32 ( 0xFDB7A257 ) ,
IF, 10 , 25 , PP8_6_ ) ;
STEP_BIG (
C32 ( 0x09254899 ) , C32 ( 0xD699C7BC ) , C32 ( 0x9019B6DC ) , C32 ( 0x2B9022E4 ) ,
C32 ( 0x8FA14956 ) , C32 ( 0x21BF9BD3 ) , C32 ( 0xB94D0943 ) , C32 ( 0x6FFDDC22 ) ,
@ -735,22 +958,27 @@ __kernel void search9(__global hash_t* hashes)
@@ -735,22 +958,27 @@ __kernel void search9(__global hash_t* hashes)
ONE_ROUND_BIG ( 1_, 1 , 28 , 19 , 22 , 7 ) ;
ONE_ROUND_BIG ( 2_, 2 , 29 , 9 , 15 , 5 ) ;
ONE_ROUND_BIG ( 3_, 3 , 4 , 13 , 10 , 25 ) ;
STEP_BIG (
COPY_A0, COPY_A1, COPY_A2, COPY_A3,
COPY_A4, COPY_A5, COPY_A6, COPY_A7,
IF, 4 , 13 , PP8_4_ ) ;
STEP_BIG (
COPY_B0, COPY_B1, COPY_B2, COPY_B3,
COPY_B4, COPY_B5, COPY_B6, COPY_B7,
IF, 13 , 10 , PP8_5_ ) ;
STEP_BIG (
COPY_C0, COPY_C1, COPY_C2, COPY_C3,
COPY_C4, COPY_C5, COPY_C6, COPY_C7,
IF, 10 , 25 , PP8_6_ ) ;
STEP_BIG (
COPY_D0, COPY_D1, COPY_D2, COPY_D3,
COPY_D4, COPY_D5, COPY_D6, COPY_D7,
IF, 25 , 4 , PP8_0_ ) ;
# undef q
hash->h4[0] = A0 ;
@ -778,7 +1006,7 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -778,7 +1006,7 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
{
uint gid = get_global_id ( 0 ) ;
uint offset = get_global_offset ( 0 ) ;
hash_t hash ;
__global hash_t * hash = & ( hashes[gid-offset] ) ;
__local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256] ;
@ -797,6 +1025,7 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -797,6 +1025,7 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
//mixtab
__local sph_u32 mixtab0[256], mixtab1[256], mixtab2[256], mixtab3[256] ;
for ( int i = init ; i < 256; i += step)
{
mixtab0[i] = mixtab0_c[i] ;
@ -804,17 +1033,37 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -804,17 +1033,37 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
mixtab2[i] = mixtab2_c[i] ;
mixtab3[i] = mixtab3_c[i] ;
}
barrier ( CLK_LOCAL_MEM_FENCE ) ;
__local sph_u32 T512_L[1024] ;
__constant const sph_u32 *T512_C = &T512[0][0] ;
for ( int i = 0 ; i < 8; i++) {
hash.h8[i] = hashes[gid-offset].h8[i] ;
}
for ( int i = init ; i < 1024; i += step)
T512_L[i] = T512_C[i] ;
// echo
barrier ( CLK_LOCAL_MEM_FENCE ) ;
__local sph_u64 LT0[256], LT1[256], LT2[256], LT3[256], LT4[256], LT5[256], LT6[256], LT7[256] ;
for ( int i = init ; i < 256; i += step)
{
LT0[i] = plain_T0[i] ;
LT1[i] = plain_T1[i] ;
LT2[i] = plain_T2[i] ;
LT3[i] = plain_T3[i] ;
LT4[i] = plain_T4[i] ;
LT5[i] = plain_T5[i] ;
LT6[i] = plain_T6[i] ;
LT7[i] = plain_T7[i] ;
}
barrier ( CLK_LOCAL_MEM_FENCE ) ;
for ( int i = 0 ; i < 8; i++)
hash->h8[i] = hashes[gid-offset].h8[i] ;
// echo
sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1 ;
sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71 ;
Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL ;
@ -841,14 +1090,14 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -841,14 +1090,14 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
W61 = Vb61 ;
W70 = Vb70 ;
W71 = Vb71 ;
W80 = hash. h8[0] ;
W81 = hash. h8[1] ;
W90 = hash. h8[2] ;
W91 = hash. h8[3] ;
WA0 = hash. h8[4] ;
WA1 = hash. h8[5] ;
WB0 = hash. h8[6] ;
WB1 = hash. h8[7] ;
W80 = hash-> h8[0] ;
W81 = hash-> h8[1] ;
W90 = hash-> h8[2] ;
W91 = hash-> h8[3] ;
WA0 = hash-> h8[4] ;
WA1 = hash-> h8[5] ;
WB0 = hash-> h8[6] ;
WB1 = hash-> h8[7] ;
WC0 = 0x80 ;
WC1 = 0 ;
WD0 = 0 ;
@ -858,25 +1107,19 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -858,25 +1107,19 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
WF0 = 0x200 ;
WF1 = 0 ;
for ( unsigned u = 0 ; u < 10; u ++) {
for ( unsigned u = 0 ; u < 10; u ++)
BIG_ROUND ;
}
hash.h8[0] ^= Vb00 ^ W00 ^ W80 ;
hash.h8[1] ^= Vb01 ^ W01 ^ W81 ;
hash.h8[2] ^= Vb10 ^ W10 ^ W90 ;
hash.h8[3] ^= Vb11 ^ W11 ^ W91 ;
hash.h8[4] ^= Vb20 ^ W20 ^ WA0 ;
hash.h8[5] ^= Vb21 ^ W21 ^ WA1 ;
hash.h8[6] ^= Vb30 ^ W30 ^ WB0 ;
hash.h8[7] ^= Vb31 ^ W31 ^ WB1 ;
}
hash->h8[0] ^= Vb00 ^ W00 ^ W80 ;
hash->h8[1] ^= Vb01 ^ W01 ^ W81 ;
hash->h8[2] ^= Vb10 ^ W10 ^ W90 ;
hash->h8[3] ^= Vb11 ^ W11 ^ W91 ;
hash->h8[4] ^= Vb20 ^ W20 ^ WA0 ;
hash->h8[5] ^= Vb21 ^ W21 ^ WA1 ;
hash->h8[6] ^= Vb30 ^ W30 ^ WB0 ;
hash->h8[7] ^= Vb31 ^ W31 ^ WB1 ;
// hamsi
{
sph_u32 c0 = HAMSI_IV512[0], c1 = HAMSI_IV512[1], c2 = HAMSI_IV512[2], c3 = HAMSI_IV512[3] ;
sph_u32 c4 = HAMSI_IV512[4], c5 = HAMSI_IV512[5], c6 = HAMSI_IV512[6], c7 = HAMSI_IV512[7] ;
sph_u32 c8 = HAMSI_IV512[8], c9 = HAMSI_IV512[9], cA = HAMSI_IV512[10], cB = HAMSI_IV512[11] ;
@ -885,38 +1128,39 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -885,38 +1128,39 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF ;
sph_u32 h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF } ;
# define buf ( u ) hash.h1[i + u]
for ( int i = 0 ; i < 64; i += 8) {
INPUT_BIG ;
# define buf ( u ) hash->h1[i + u]
for ( int i = 0 ; i < 64; i += 8)
{
INPUT_BIG_LOCAL ;
P_BIG ;
T_BIG ;
}
# undef buf
# define buf ( u ) ( u == 0 ? 0x80 : 0 )
INPUT_BIG ;
INPUT_BIG_LOCAL ;
P_BIG ;
T_BIG ;
# undef buf
# define buf ( u ) ( u == 6 ? 2 : 0 )
INPUT_BIG ;
INPUT_BIG_LOCAL ;
PF_BIG ;
T_BIG ;
for ( unsigned u = 0 ; u < 16; u ++)
hash.h4[u] = h[u] ;
}
hash->h4[u] = h[u] ;
// fugue
{
sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09 ;
sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19 ;
sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29 ;
sph_u32 S30, S31, S32, S33, S34, S35 ;
ulong fc_bit_count = ( sph_u64 ) 64 << 3 ;
ulong fc_bit_count = ( sph_u64 ) 0x200 ;
S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0 ;
S20 = SPH_C32 ( 0x8807a57e ) ; S21 = SPH_C32(0xe616af75); S22 = SPH_C32(0xc5d3e4db); S23 = SPH_C32(0xac9ab027);
@ -924,22 +1168,25 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -924,22 +1168,25 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
S28 = SPH_C32 ( 0xaac6e2c9 ) ; S29 = SPH_C32(0xddb21398); S30 = SPH_C32(0xcae65838); S31 = SPH_C32(0x437f203f);
S32 = SPH_C32 ( 0x25ea78e7 ) ; S33 = SPH_C32(0x951fddd6); S34 = SPH_C32(0xda6ed11d); S35 = SPH_C32(0xe13e3567);
FUGUE512_3 ( ( hash. h4[0x0] ) , ( hash. h4[0x1] ) , ( hash. h4[0x2] ) ) ;
FUGUE512_3 ( ( hash. h4[0x3] ) , ( hash. h4[0x4] ) , ( hash. h4[0x5] ) ) ;
FUGUE512_3 ( ( hash. h4[0x6] ) , ( hash. h4[0x7] ) , ( hash. h4[0x8] ) ) ;
FUGUE512_3 ( ( hash. h4[0x9] ) , ( hash. h4[0xA] ) , ( hash. h4[0xB] ) ) ;
FUGUE512_3 ( ( hash. h4[0xC] ) , ( hash. h4[0xD] ) , ( hash. h4[0xE] ) ) ;
FUGUE512_3 ( ( hash. h4[0xF] ) , as_uint2 ( fc_bit_count ) . y, as_uint2 ( fc_bit_count ) . x ) ;
FUGUE512_3 ( ( hash-> h4[0x0] ) , ( hash-> h4[0x1] ) , ( hash-> h4[0x2] ) ) ;
FUGUE512_3 ( ( hash-> h4[0x3] ) , ( hash-> h4[0x4] ) , ( hash-> h4[0x5] ) ) ;
FUGUE512_3 ( ( hash-> h4[0x6] ) , ( hash-> h4[0x7] ) , ( hash-> h4[0x8] ) ) ;
FUGUE512_3 ( ( hash-> h4[0x9] ) , ( hash-> h4[0xA] ) , ( hash-> h4[0xB] ) ) ;
FUGUE512_3 ( ( hash-> h4[0xC] ) , ( hash-> h4[0xD] ) , ( hash-> h4[0xE] ) ) ;
FUGUE512_3 ( ( hash-> h4[0xF] ) , as_uint2 ( fc_bit_count ) . y, as_uint2 ( fc_bit_count ) . x ) ;
// apply round shift if necessary
int i ;
for ( i = 0 ; i < 32; i ++) {
for ( i = 0 ; i < 32; i ++)
{
ROR3 ;
CMIX36 ( S00, S01, S02, S04, S05, S06, S18, S19, S20 ) ;
SMIX ( S00, S01, S02, S03 ) ;
}
for ( i = 0 ; i < 13; i ++) {
for ( i = 0 ; i < 13; i ++)
{
S04 ^= S00 ;
S09 ^= S00 ;
S18 ^= S00 ;
@ -970,27 +1217,24 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -970,27 +1217,24 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
S18 ^= S00 ;
S27 ^= S00 ;
hash.h4[0] = SWAP4 ( S01 ) ;
hash.h4[1] = SWAP4 ( S02 ) ;
hash.h4[2] = SWAP4 ( S03 ) ;
hash.h4[3] = SWAP4 ( S04 ) ;
hash.h4[4] = SWAP4 ( S09 ) ;
hash.h4[5] = SWAP4 ( S10 ) ;
hash.h4[6] = SWAP4 ( S11 ) ;
hash.h4[7] = SWAP4 ( S12 ) ;
hash.h4[8] = SWAP4 ( S18 ) ;
hash.h4[9] = SWAP4 ( S19 ) ;
hash.h4[10] = SWAP4 ( S20 ) ;
hash.h4[11] = SWAP4 ( S21 ) ;
hash.h4[12] = SWAP4 ( S27 ) ;
hash.h4[13] = SWAP4 ( S28 ) ;
hash.h4[14] = SWAP4 ( S29 ) ;
hash.h4[15] = SWAP4 ( S30 ) ;
}
hash->h4[0] = SWAP4 ( S01 ) ;
hash->h4[1] = SWAP4 ( S02 ) ;
hash->h4[2] = SWAP4 ( S03 ) ;
hash->h4[3] = SWAP4 ( S04 ) ;
hash->h4[4] = SWAP4 ( S09 ) ;
hash->h4[5] = SWAP4 ( S10 ) ;
hash->h4[6] = SWAP4 ( S11 ) ;
hash->h4[7] = SWAP4 ( S12 ) ;
hash->h4[8] = SWAP4 ( S18 ) ;
hash->h4[9] = SWAP4 ( S19 ) ;
hash->h4[10] = SWAP4 ( S20 ) ;
hash->h4[11] = SWAP4 ( S21 ) ;
hash->h4[12] = SWAP4 ( S27 ) ;
hash->h4[13] = SWAP4 ( S28 ) ;
hash->h4[14] = SWAP4 ( S29 ) ;
hash->h4[15] = SWAP4 ( S30 ) ;
//shabal
{
sph_u32 A00 = A_init_512[0], A01 = A_init_512[1], A02 = A_init_512[2], A03 = A_init_512[3], A04 = A_init_512[4], A05 = A_init_512[5], A06 = A_init_512[6], A07 = A_init_512[7],
A08 = A_init_512[8], A09 = A_init_512[9], A0A = A_init_512[10], A0B = A_init_512[11] ;
sph_u32 B0 = B_init_512[0], B1 = B_init_512[1], B2 = B_init_512[2], B3 = B_init_512[3], B4 = B_init_512[4], B5 = B_init_512[5], B6 = B_init_512[6], B7 = B_init_512[7],
@ -1000,22 +1244,22 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1000,22 +1244,22 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF ;
sph_u32 Wlow = 1 , Whigh = 0 ;
M0 = hash. h4[0] ;
M1 = hash. h4[1] ;
M2 = hash. h4[2] ;
M3 = hash. h4[3] ;
M4 = hash. h4[4] ;
M5 = hash. h4[5] ;
M6 = hash. h4[6] ;
M7 = hash. h4[7] ;
M8 = hash. h4[8] ;
M9 = hash. h4[9] ;
MA = hash. h4[10] ;
MB = hash. h4[11] ;
MC = hash. h4[12] ;
MD = hash. h4[13] ;
ME = hash. h4[14] ;
MF = hash. h4[15] ;
M0 = hash-> h4[0] ;
M1 = hash-> h4[1] ;
M2 = hash-> h4[2] ;
M3 = hash-> h4[3] ;
M4 = hash-> h4[4] ;
M5 = hash-> h4[5] ;
M6 = hash-> h4[6] ;
M7 = hash-> h4[7] ;
M8 = hash-> h4[8] ;
M9 = hash-> h4[9] ;
MA = hash-> h4[10] ;
MB = hash-> h4[11] ;
MC = hash-> h4[12] ;
MD = hash-> h4[13] ;
ME = hash-> h4[14] ;
MF = hash-> h4[15] ;
INPUT_BLOCK_ADD ;
XOR_W ;
@ -1030,44 +1274,44 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1030,44 +1274,44 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
INPUT_BLOCK_ADD ;
XOR_W ;
APPLY_P ;
for ( unsigned i = 0 ; i < 3; i ++) {
for ( unsigned i = 0 ; i < 3; i ++)
{
SWAP_BC ;
XOR_W ;
APPLY_P ;
}
hash.h4[0] = B0 ;
hash.h4[1] = B1 ;
hash.h4[2] = B2 ;
hash.h4[3] = B3 ;
hash.h4[4] = B4 ;
hash.h4[5] = B5 ;
hash.h4[6] = B6 ;
hash.h4[7] = B7 ;
hash.h4[8] = B8 ;
hash.h4[9] = B9 ;
hash.h4[10] = BA ;
hash.h4[11] = BB ;
hash.h4[12] = BC ;
hash.h4[13] = BD ;
hash.h4[14] = BE ;
hash.h4[15] = BF ;
}
hash->h4[0] = B0 ;
hash->h4[1] = B1 ;
hash->h4[2] = B2 ;
hash->h4[3] = B3 ;
hash->h4[4] = B4 ;
hash->h4[5] = B5 ;
hash->h4[6] = B6 ;
hash->h4[7] = B7 ;
hash->h4[8] = B8 ;
hash->h4[9] = B9 ;
hash->h4[10] = BA ;
hash->h4[11] = BB ;
hash->h4[12] = BC ;
hash->h4[13] = BD ;
hash->h4[14] = BE ;
hash->h4[15] = BF ;
// whirlpool
{
sph_u64 n0, n1, n2, n3, n4, n5, n6, n7 ;
sph_u64 h0, h1, h2, h3, h4, h5, h6, h7 ;
sph_u64 state[8] ;
n0 = ( hash. h8[0] ) ;
n1 = ( hash. h8[1] ) ;
n2 = ( hash. h8[2] ) ;
n3 = ( hash. h8[3] ) ;
n4 = ( hash. h8[4] ) ;
n5 = ( hash. h8[5] ) ;
n6 = ( hash. h8[6] ) ;
n7 = ( hash. h8[7] ) ;
n0 = ( hash-> h8[0] ) ;
n1 = ( hash-> h8[1] ) ;
n2 = ( hash-> h8[2] ) ;
n3 = ( hash-> h8[3] ) ;
n4 = ( hash-> h8[4] ) ;
n5 = ( hash-> h8[5] ) ;
n6 = ( hash-> h8[6] ) ;
n7 = ( hash-> h8[7] ) ;
h0 = h1 = h2 = h3 = h4 = h5 = h6 = h7 = 0 ;
@ -1080,7 +1324,9 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1080,7 +1324,9 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
n6 ^= h6 ;
n7 ^= h7 ;
for ( unsigned r = 0 ; r < 10; r ++) {
# pragma unroll 10
for ( unsigned r = 0 ; r < 10; r ++)
{
sph_u64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7 ;
ROUND_KSCHED ( plain_T, h, tmp, plain_RC[r] ) ;
@ -1089,14 +1335,14 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1089,14 +1335,14 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
TRANSFER ( n, tmp ) ;
}
state[0] = n0 ^ ( hash. h8[0] ) ;
state[1] = n1 ^ ( hash. h8[1] ) ;
state[2] = n2 ^ ( hash. h8[2] ) ;
state[3] = n3 ^ ( hash. h8[3] ) ;
state[4] = n4 ^ ( hash. h8[4] ) ;
state[5] = n5 ^ ( hash. h8[5] ) ;
state[6] = n6 ^ ( hash. h8[6] ) ;
state[7] = n7 ^ ( hash. h8[7] ) ;
state[0] = n0 ^ ( hash-> h8[0] ) ;
state[1] = n1 ^ ( hash-> h8[1] ) ;
state[2] = n2 ^ ( hash-> h8[2] ) ;
state[3] = n3 ^ ( hash-> h8[3] ) ;
state[4] = n4 ^ ( hash-> h8[4] ) ;
state[5] = n5 ^ ( hash-> h8[5] ) ;
state[6] = n6 ^ ( hash-> h8[6] ) ;
state[7] = n7 ^ ( hash-> h8[7] ) ;
n0 = 0x80 ;
n1 = n2 = n3 = n4 = n5 = n6 = 0 ;
@ -1120,10 +1366,12 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1120,10 +1366,12 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
n6 ^= h6 ;
n7 ^= h7 ;
for ( unsigned r = 0 ; r < 10; r ++) {
# pragma unroll 10
for ( unsigned r = 0 ; r < 10; r ++)
{
sph_u64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7 ;
ROUND_KSCHED ( plain_ T, h, tmp, plain_RC[r] ) ;
ROUND_KSCHED ( L T, h, tmp, plain_RC[r] ) ;
TRANSFER ( h, tmp ) ;
ROUND_WENC ( plain_T, n, h, tmp ) ;
TRANSFER ( n, tmp ) ;
@ -1139,10 +1387,9 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
@@ -1139,10 +1387,9 @@ __kernel void search10(__global hash_t* hashes, __global uint* output, const ulo
state[7] ^= n7 ^ 0x2000000000000 ;
for ( unsigned i = 0 ; i < 8; i ++)
hash.h8[i] = state[i] ;
}
hash->h8[i] = state[i] ;
bool result = ( hash. h8[3] <= target ) ;
bool result = ( hash-> h8[3] <= target ) ;
if ( result )
output[atomic_inc ( output+0xFF ) ] = SWAP4 ( gid ) ;