mirror of https://github.com/GOSTSec/sgminer
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
525 lines
14 KiB
525 lines
14 KiB
/* |
|
* Lyra2RE kernel implementation. |
|
* |
|
* ==========================(LICENSE BEGIN)============================ |
|
* Copyright (c) 2014 djm34 |
|
* Copyright (c) 2014 James Lovejoy |
|
* |
|
* Permission is hereby granted, free of charge, to any person obtaining |
|
* a copy of this software and associated documentation files (the |
|
* "Software"), to deal in the Software without restriction, including |
|
* without limitation the rights to use, copy, modify, merge, publish, |
|
* distribute, sublicense, and/or sell copies of the Software, and to |
|
* permit persons to whom the Software is furnished to do so, subject to |
|
* the following conditions: |
|
* |
|
* The above copyright notice and this permission notice shall be |
|
* included in all copies or substantial portions of the Software. |
|
* |
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|
* |
|
* ===========================(LICENSE END)============================= |
|
* |
|
* @author djm34 |
|
*/ |
|
// typedef unsigned int uint; |
|
#pragma OPENCL EXTENSION cl_amd_printf : enable |
|
|
|
#ifndef LYRA2REV2_CL |
|
#define LYRA2REV2_CL |
|
|
|
#if __ENDIAN_LITTLE__ |
|
#define SPH_LITTLE_ENDIAN 1 |
|
#else |
|
#define SPH_BIG_ENDIAN 1 |
|
#endif |
|
|
|
#define SPH_UPTR sph_u64 |
|
|
|
typedef unsigned int sph_u32; |
|
typedef int sph_s32; |
|
#ifndef __OPENCL_VERSION__ |
|
typedef unsigned long sph_u64; |
|
typedef long sph_s64; |
|
#else |
|
typedef unsigned long sph_u64; |
|
typedef long sph_s64; |
|
#endif |
|
|
|
|
|
#define SPH_64 1 |
|
#define SPH_64_TRUE 1 |
|
|
|
#define SPH_C32(x) ((sph_u32)(x ## U)) |
|
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) |
|
|
|
#define SPH_C64(x) ((sph_u64)(x ## UL)) |
|
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) |
|
|
|
//#define SPH_ROTL32(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) |
|
//#define SPH_ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) |
|
//#define SPH_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) |
|
//#define SPH_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) |
|
|
|
#define SPH_ROTL32(x,n) rotate(x,(uint)n) //faster with driver 14.6 |
|
#define SPH_ROTR32(x,n) rotate(x,(uint)(32-n)) |
|
#define SPH_ROTL64(x,n) rotate(x,(ulong)n) |
|
#define SPH_ROTR64(x,n) rotate(x,(ulong)(64-n)) |
|
static inline sph_u64 ror64(sph_u64 vw, unsigned a) { |
|
uint2 result; |
|
uint2 v = as_uint2(vw); |
|
unsigned n = (unsigned)(64 - a); |
|
if (n == 32) { return as_ulong((uint2)(v.y, v.x)); } |
|
if (n < 32) { |
|
result.y = ((v.y << (n)) | (v.x >> (32 - n))); |
|
result.x = ((v.x << (n)) | (v.y >> (32 - n))); |
|
} |
|
else { |
|
result.y = ((v.x << (n - 32)) | (v.y >> (64 - n))); |
|
result.x = ((v.y << (n - 32)) | (v.x >> (64 - n))); |
|
} |
|
return as_ulong(result); |
|
} |
|
|
|
//#define SPH_ROTR64(l,n) ror64(l,n) |
|
#define memshift 3 |
|
#include "blake256.cl" |
|
#include "lyra2v2.cl" |
|
#include "keccak1600.cl" |
|
#include "skein256.cl" |
|
#include "cubehash.cl" |
|
#include "bmw256.cl" |
|
|
|
#define SWAP4(x) as_uint(as_uchar4(x).wzyx) |
|
#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) |
|
//#define SWAP8(x) as_ulong(as_uchar8(x).s32107654) |
|
#if SPH_BIG_ENDIAN |
|
#define DEC64E(x) (x) |
|
#define DEC64BE(x) (*(const __global sph_u64 *) (x)); |
|
#define DEC64LE(x) SWAP8(*(const __global sph_u64 *) (x)); |
|
#define DEC32LE(x) (*(const __global sph_u32 *) (x)); |
|
#else |
|
#define DEC64E(x) SWAP8(x) |
|
#define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); |
|
#define DEC64LE(x) (*(const __global sph_u64 *) (x)); |
|
#define DEC32LE(x) SWAP4(*(const __global sph_u32 *) (x)); |
|
#endif |
|
|
|
typedef union { |
|
unsigned char h1[32]; |
|
uint h4[8]; |
|
ulong h8[4]; |
|
} hash_t; |
|
|
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
|
__kernel void search( |
|
__global uchar* hashes, |
|
// precalc hash from fisrt part of message |
|
const uint h0, |
|
const uint h1, |
|
const uint h2, |
|
const uint h3, |
|
const uint h4, |
|
const uint h5, |
|
const uint h6, |
|
const uint h7, |
|
// last 12 bytes of original message |
|
const uint in16, |
|
const uint in17, |
|
const uint in18 |
|
) |
|
{ |
|
uint gid = get_global_id(0); |
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
|
|
|
|
|
// __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
|
|
|
unsigned int h[8]; |
|
unsigned int m[16]; |
|
unsigned int v[16]; |
|
|
|
|
|
h[0]=h0; |
|
h[1]=h1; |
|
h[2]=h2; |
|
h[3]=h3; |
|
h[4]=h4; |
|
h[5]=h5; |
|
h[6]=h6; |
|
h[7]=h7; |
|
// compress 2nd round |
|
m[0] = in16; |
|
m[1] = in17; |
|
m[2] = in18; |
|
m[3] = SWAP4(gid); |
|
|
|
for (int i = 4; i < 16; i++) {m[i] = c_Padding[i];} |
|
|
|
for (int i = 0; i < 8; i++) {v[i] = h[i];} |
|
|
|
v[8] = c_u256[0]; |
|
v[9] = c_u256[1]; |
|
v[10] = c_u256[2]; |
|
v[11] = c_u256[3]; |
|
v[12] = c_u256[4] ^ 640; |
|
v[13] = c_u256[5] ^ 640; |
|
v[14] = c_u256[6]; |
|
v[15] = c_u256[7]; |
|
|
|
for (int r = 0; r < 14; r++) { |
|
GS(0, 4, 0x8, 0xC, 0x0); |
|
GS(1, 5, 0x9, 0xD, 0x2); |
|
GS(2, 6, 0xA, 0xE, 0x4); |
|
GS(3, 7, 0xB, 0xF, 0x6); |
|
GS(0, 5, 0xA, 0xF, 0x8); |
|
GS(1, 6, 0xB, 0xC, 0xA); |
|
GS(2, 7, 0x8, 0xD, 0xC); |
|
GS(3, 4, 0x9, 0xE, 0xE); |
|
} |
|
|
|
for (int i = 0; i < 16; i++) { |
|
int j = i & 7; |
|
h[j] ^= v[i];} |
|
|
|
for (int i=0;i<8;i++) {hash->h4[i]=SWAP4(h[i]);} |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
} |
|
|
|
// keccak256 |
|
|
|
|
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
|
__kernel void search1(__global uchar* hashes) |
|
{ |
|
uint gid = get_global_id(0); |
|
// __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
|
|
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
|
|
|
sph_u64 keccak_gpu_state[25]; |
|
|
|
for (int i = 0; i<25; i++) { |
|
if (i<4) { keccak_gpu_state[i] = hash->h8[i]; } |
|
else { keccak_gpu_state[i] = 0; } |
|
} |
|
keccak_gpu_state[4] = 0x0000000000000001; |
|
keccak_gpu_state[16] = 0x8000000000000000; |
|
|
|
keccak_block(keccak_gpu_state); |
|
for (int i = 0; i<4; i++) { hash->h8[i] = keccak_gpu_state[i]; } |
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
} |
|
|
|
// cubehash256 |
|
|
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
|
__kernel void search2(__global uchar* hashes) |
|
{ |
|
uint gid = get_global_id(0); |
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
|
|
|
|
|
sph_u32 x0 = 0xEA2BD4B4; sph_u32 x1 = 0xCCD6F29F; sph_u32 x2 = 0x63117E71; |
|
sph_u32 x3 = 0x35481EAE; sph_u32 x4 = 0x22512D5B; sph_u32 x5 = 0xE5D94E63; |
|
sph_u32 x6 = 0x7E624131; sph_u32 x7 = 0xF4CC12BE; sph_u32 x8 = 0xC2D0B696; |
|
sph_u32 x9 = 0x42AF2070; sph_u32 xa = 0xD0720C35; sph_u32 xb = 0x3361DA8C; |
|
sph_u32 xc = 0x28CCECA4; sph_u32 xd = 0x8EF8AD83; sph_u32 xe = 0x4680AC00; |
|
sph_u32 xf = 0x40E5FBAB; |
|
|
|
sph_u32 xg = 0xD89041C3; sph_u32 xh = 0x6107FBD5; |
|
sph_u32 xi = 0x6C859D41; sph_u32 xj = 0xF0B26679; sph_u32 xk = 0x09392549; |
|
sph_u32 xl = 0x5FA25603; sph_u32 xm = 0x65C892FD; sph_u32 xn = 0x93CB6285; |
|
sph_u32 xo = 0x2AF2B5AE; sph_u32 xp = 0x9E4B4E60; sph_u32 xq = 0x774ABFDD; |
|
sph_u32 xr = 0x85254725; sph_u32 xs = 0x15815AEB; sph_u32 xt = 0x4AB6AAD6; |
|
sph_u32 xu = 0x9CDAF8AF; sph_u32 xv = 0xD6032C0A; |
|
|
|
x0 ^= (hash->h4[0]); |
|
x1 ^= (hash->h4[1]); |
|
x2 ^= (hash->h4[2]); |
|
x3 ^= (hash->h4[3]); |
|
x4 ^= (hash->h4[4]); |
|
x5 ^= (hash->h4[5]); |
|
x6 ^= (hash->h4[6]); |
|
x7 ^= (hash->h4[7]); |
|
|
|
|
|
SIXTEEN_ROUNDS; |
|
x0 ^= 0x80; |
|
SIXTEEN_ROUNDS; |
|
xv ^= 0x01; |
|
for (int i = 0; i < 10; ++i) SIXTEEN_ROUNDS; |
|
|
|
hash->h4[0] = x0; |
|
hash->h4[1] = x1; |
|
hash->h4[2] = x2; |
|
hash->h4[3] = x3; |
|
hash->h4[4] = x4; |
|
hash->h4[5] = x5; |
|
hash->h4[6] = x6; |
|
hash->h4[7] = x7; |
|
|
|
|
|
barrier(CLK_GLOBAL_MEM_FENCE); |
|
|
|
} |
|
|
|
|
|
/// lyra2 algo |
|
|
|
|
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
|
__kernel void search3(__global uchar* hashes,__global uchar* matrix ) |
|
{ |
|
uint gid = get_global_id(0); |
|
// __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
|
__global ulong4 *DMatrix = (__global ulong4 *)(matrix + (4 * memshift * 4 * 4 * 8 * (get_global_id(0) % MAX_GLOBAL_THREADS))); |
|
|
|
// uint offset = (4 * memshift * 4 * 4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))/32; |
|
ulong4 state[4]; |
|
|
|
state[0].x = hash->h8[0]; //password |
|
state[0].y = hash->h8[1]; //password |
|
state[0].z = hash->h8[2]; //password |
|
state[0].w = hash->h8[3]; //password |
|
state[1] = state[0]; |
|
state[2] = (ulong4)(0x6a09e667f3bcc908UL, 0xbb67ae8584caa73bUL, 0x3c6ef372fe94f82bUL, 0xa54ff53a5f1d36f1UL); |
|
state[3] = (ulong4)(0x510e527fade682d1UL, 0x9b05688c2b3e6c1fUL, 0x1f83d9abfb41bd6bUL, 0x5be0cd19137e2179UL); |
|
for (int i = 0; i<12; i++) { round_lyra(state); } |
|
|
|
state[0] ^= (ulong4)(0x20,0x20,0x20,0x01); |
|
state[1] ^= (ulong4)(0x04,0x04,0x80,0x0100000000000000); |
|
|
|
for (int i = 0; i<12; i++) { round_lyra(state); } |
|
|
|
|
|
uint ps1 = (memshift * 3); |
|
//#pragma unroll 4 |
|
for (int i = 0; i < 4; i++) |
|
{ |
|
uint s1 = ps1 - memshift * i; |
|
for (int j = 0; j < 3; j++) |
|
(DMatrix)[j+s1] = state[j]; |
|
|
|
round_lyra(state); |
|
} |
|
|
|
reduceDuplexf(state,DMatrix); |
|
|
|
reduceDuplexRowSetupf(1, 0, 2,state, DMatrix); |
|
reduceDuplexRowSetupf(2, 1, 3, state,DMatrix); |
|
|
|
|
|
uint rowa; |
|
uint prev = 3; |
|
for (uint i = 0; i<4; i++) { |
|
rowa = state[0].x & 3; |
|
reduceDuplexRowf(prev, rowa, i, state, DMatrix); |
|
prev = i; |
|
} |
|
|
|
|
|
|
|
uint shift = (memshift * 4 * rowa); |
|
|
|
for (int j = 0; j < 3; j++) |
|
state[j] ^= (DMatrix)[j+shift]; |
|
|
|
for (int i = 0; i < 12; i++) |
|
round_lyra(state); |
|
////////////////////////////////////// |
|
|
|
|
|
for (int i = 0; i<4; i++) {hash->h8[i] = ((ulong*)state)[i];} |
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
} |
|
|
|
//skein256 |
|
|
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
|
__kernel void search4(__global uchar* hashes) |
|
{ |
|
uint gid = get_global_id(0); |
|
// __global hash_t *hash = &(hashes[gid-get_global_offset(0)]); |
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
|
|
|
|
|
sph_u64 h[9]; |
|
sph_u64 t[3]; |
|
sph_u64 dt0,dt1,dt2,dt3; |
|
sph_u64 p0, p1, p2, p3, p4, p5, p6, p7; |
|
h[8] = skein_ks_parity; |
|
|
|
for (int i = 0; i<8; i++) { |
|
h[i] = SKEIN_IV512_256[i]; |
|
h[8] ^= h[i];} |
|
|
|
t[0]=t12[0]; |
|
t[1]=t12[1]; |
|
t[2]=t12[2]; |
|
|
|
dt0=hash->h8[0]; |
|
dt1=hash->h8[1]; |
|
dt2=hash->h8[2]; |
|
dt3=hash->h8[3]; |
|
|
|
p0 = h[0] + dt0; |
|
p1 = h[1] + dt1; |
|
p2 = h[2] + dt2; |
|
p3 = h[3] + dt3; |
|
p4 = h[4]; |
|
p5 = h[5] + t[0]; |
|
p6 = h[6] + t[1]; |
|
p7 = h[7]; |
|
|
|
#pragma unroll |
|
for (int i = 1; i<19; i+=2) {Round_8_512(p0,p1,p2,p3,p4,p5,p6,p7,i);} |
|
p0 ^= dt0; |
|
p1 ^= dt1; |
|
p2 ^= dt2; |
|
p3 ^= dt3; |
|
|
|
h[0] = p0; |
|
h[1] = p1; |
|
h[2] = p2; |
|
h[3] = p3; |
|
h[4] = p4; |
|
h[5] = p5; |
|
h[6] = p6; |
|
h[7] = p7; |
|
h[8] = skein_ks_parity; |
|
|
|
for (int i = 0; i<8; i++) { h[8] ^= h[i]; } |
|
|
|
t[0] = t12[3]; |
|
t[1] = t12[4]; |
|
t[2] = t12[5]; |
|
p5 += t[0]; //p5 already equal h[5] |
|
p6 += t[1]; |
|
|
|
#pragma unroll |
|
for (int i = 1; i<19; i+=2) { Round_8_512(p0, p1, p2, p3, p4, p5, p6, p7, i); } |
|
|
|
hash->h8[0] = p0; |
|
hash->h8[1] = p1; |
|
hash->h8[2] = p2; |
|
hash->h8[3] = p3; |
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
} |
|
|
|
//cubehash |
|
|
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
|
__kernel void search5(__global uchar* hashes) |
|
{ |
|
uint gid = get_global_id(0); |
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
|
|
|
sph_u32 x0 = 0xEA2BD4B4; sph_u32 x1 = 0xCCD6F29F; sph_u32 x2 = 0x63117E71; |
|
sph_u32 x3 = 0x35481EAE; sph_u32 x4 = 0x22512D5B; sph_u32 x5 = 0xE5D94E63; |
|
sph_u32 x6 = 0x7E624131; sph_u32 x7 = 0xF4CC12BE; sph_u32 x8 = 0xC2D0B696; |
|
sph_u32 x9 = 0x42AF2070; sph_u32 xa = 0xD0720C35; sph_u32 xb = 0x3361DA8C; |
|
sph_u32 xc = 0x28CCECA4; sph_u32 xd = 0x8EF8AD83; sph_u32 xe = 0x4680AC00; |
|
sph_u32 xf = 0x40E5FBAB; |
|
|
|
sph_u32 xg = 0xD89041C3; sph_u32 xh = 0x6107FBD5; |
|
sph_u32 xi = 0x6C859D41; sph_u32 xj = 0xF0B26679; sph_u32 xk = 0x09392549; |
|
sph_u32 xl = 0x5FA25603; sph_u32 xm = 0x65C892FD; sph_u32 xn = 0x93CB6285; |
|
sph_u32 xo = 0x2AF2B5AE; sph_u32 xp = 0x9E4B4E60; sph_u32 xq = 0x774ABFDD; |
|
sph_u32 xr = 0x85254725; sph_u32 xs = 0x15815AEB; sph_u32 xt = 0x4AB6AAD6; |
|
sph_u32 xu = 0x9CDAF8AF; sph_u32 xv = 0xD6032C0A; |
|
|
|
x0 ^= (hash->h4[0]); |
|
x1 ^= (hash->h4[1]); |
|
x2 ^= (hash->h4[2]); |
|
x3 ^= (hash->h4[3]); |
|
x4 ^= (hash->h4[4]); |
|
x5 ^= (hash->h4[5]); |
|
x6 ^= (hash->h4[6]); |
|
x7 ^= (hash->h4[7]); |
|
|
|
|
|
SIXTEEN_ROUNDS; |
|
x0 ^= 0x80; |
|
SIXTEEN_ROUNDS; |
|
xv ^= 0x01; |
|
for (int i = 0; i < 10; ++i) SIXTEEN_ROUNDS; |
|
|
|
hash->h4[0] = x0; |
|
hash->h4[1] = x1; |
|
hash->h4[2] = x2; |
|
hash->h4[3] = x3; |
|
hash->h4[4] = x4; |
|
hash->h4[5] = x5; |
|
hash->h4[6] = x6; |
|
hash->h4[7] = x7; |
|
|
|
|
|
barrier(CLK_GLOBAL_MEM_FENCE); |
|
|
|
} |
|
|
|
|
|
|
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
|
__kernel void search6(__global uchar* hashes, __global uint* output, const ulong target) |
|
{ |
|
uint gid = get_global_id(0); |
|
__global hash_t *hash = (__global hash_t *)(hashes + (4 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
|
|
|
uint dh[16] = { |
|
0x40414243, 0x44454647, |
|
0x48494A4B, 0x4C4D4E4F, |
|
0x50515253, 0x54555657, |
|
0x58595A5B, 0x5C5D5E5F, |
|
0x60616263, 0x64656667, |
|
0x68696A6B, 0x6C6D6E6F, |
|
0x70717273, 0x74757677, |
|
0x78797A7B, 0x7C7D7E7F |
|
}; |
|
uint final_s[16] = { |
|
0xaaaaaaa0, 0xaaaaaaa1, 0xaaaaaaa2, |
|
0xaaaaaaa3, 0xaaaaaaa4, 0xaaaaaaa5, |
|
0xaaaaaaa6, 0xaaaaaaa7, 0xaaaaaaa8, |
|
0xaaaaaaa9, 0xaaaaaaaa, 0xaaaaaaab, |
|
0xaaaaaaac, 0xaaaaaaad, 0xaaaaaaae, |
|
0xaaaaaaaf |
|
}; |
|
|
|
uint message[16]; |
|
for (int i = 0; i<8; i++) message[i] = hash->h4[i]; |
|
for (int i = 9; i<14; i++) message[i] = 0; |
|
message[8]= 0x80; |
|
message[14]=0x100; |
|
message[15]=0; |
|
|
|
Compression256(message, dh); |
|
Compression256(dh, final_s); |
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
bool result = ( ((ulong*)final_s)[7] <= target); |
|
if (result) { |
|
output[atomic_inc(output + 0xFF)] = SWAP4(gid); |
|
} |
|
|
|
} |
|
|
|
|
|
#endif // LYRA2REV2_CL |