mirror of https://github.com/GOSTSec/sgminer
djm34
10 years ago
10 changed files with 576 additions and 212 deletions
@ -0,0 +1,314 @@
@@ -0,0 +1,314 @@
|
||||
/* |
||||
* "yescrypt" kernel implementation. |
||||
* |
||||
* ==========================(LICENSE BEGIN)============================ |
||||
* |
||||
* Copyright (c) 2015 djm34 |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining |
||||
* a copy of this software and associated documentation files (the |
||||
* "Software"), to deal in the Software without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Software, and to |
||||
* permit persons to whom the Software is furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be |
||||
* included in all copies or substantial portions of the Software. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
* |
||||
* ===========================(LICENSE END)============================= |
||||
* |
||||
* @author djm34 |
||||
*/ |
||||
#if !defined(cl_khr_byte_addressable_store) |
||||
#error "Device does not support unaligned stores" |
||||
#endif |
||||
|
||||
#include "yescrypt_essential.cl" |
||||
|
||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||
__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, __global uchar* buff1, __global uchar* buff2, __global uchar* buff3, const uint target) |
||||
{ |
||||
|
||||
__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||
__global ulong16 *prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)*(get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||
__global uint8 *sha256tokeep = (__global uint8 *)(buff3 + (8 * sizeof(uint)*(get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||
__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||
|
||||
|
||||
|
||||
uint nonce = (get_global_id(0)); |
||||
uint data[20]; |
||||
uint16 in; |
||||
uint8 state1, state2; |
||||
// uint8 sha256tokeep; |
||||
|
||||
// ulong16 Bdev[8]; // will require an additional buffer |
||||
((uint16 *)data)[0] = ((__global const uint16 *)input)[0]; |
||||
((uint4 *)data)[4] = ((__global const uint4 *)input)[4]; |
||||
// for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); } |
||||
// if (nonce == 10) { printf("data %08x %08x\n", data[0], data[1]); } |
||||
uint8 passwd = sha256_80(data, nonce); |
||||
//pbkdf |
||||
in.lo = pad1.lo ^ passwd; |
||||
in.hi = pad1.hi; |
||||
state1 = sha256_Transform(in, H256); |
||||
|
||||
in.lo = pad2.lo ^ passwd; |
||||
in.hi = pad2.hi; |
||||
state2 = sha256_Transform(in, H256); |
||||
|
||||
in = ((uint16*)data)[0]; |
||||
state1 = sha256_Transform(in, state1); |
||||
#pragma unroll 1 |
||||
for (int i = 0; i<8; i++) |
||||
{ |
||||
uint16 result; |
||||
in = pad3; |
||||
in.s0 = data[16]; |
||||
in.s1 = data[17]; |
||||
in.s2 = data[18]; |
||||
in.s3 = nonce; |
||||
in.s4 = 4 * i + 1; |
||||
in.lo = sha256_Transform(in, state1); |
||||
in.hi = pad4; |
||||
result.lo = swapvec(sha256_Transform(in, state2)); |
||||
if (i == 0) sha256tokeep[0] = result.lo; |
||||
in = pad3; |
||||
in.s0 = data[16]; |
||||
in.s1 = data[17]; |
||||
in.s2 = data[18]; |
||||
in.s3 = nonce; |
||||
in.s4 = 4 * i + 2; |
||||
in.lo = sha256_Transform(in, state1); |
||||
in.hi = pad4; |
||||
result.hi = swapvec(sha256_Transform(in, state2)); |
||||
Bdev[i].lo = as_ulong8(shuffle(result)); |
||||
// Bdev[i].lo = as_ulong8(result); |
||||
in = pad3; |
||||
in.s0 = data[16]; |
||||
in.s1 = data[17]; |
||||
in.s2 = data[18]; |
||||
in.s3 = nonce; |
||||
in.s4 = 4 * i + 3; |
||||
in.lo = sha256_Transform(in, state1); |
||||
in.hi = pad4; |
||||
result.lo = swapvec(sha256_Transform(in, state2)); |
||||
in = pad3; |
||||
in.s0 = data[16]; |
||||
in.s1 = data[17]; |
||||
in.s2 = data[18]; |
||||
in.s3 = nonce; |
||||
in.s4 = 4 * i + 4; |
||||
in.lo = sha256_Transform(in, state1); |
||||
in.hi = pad4; |
||||
result.hi = swapvec(sha256_Transform(in, state2)); |
||||
|
||||
|
||||
Bdev[i].hi = as_ulong8(shuffle(result)); |
||||
// Bdev[i].hi = as_ulong8(result); |
||||
} |
||||
|
||||
//mixing1 |
||||
|
||||
prevstate[0] = Bdev[0]; |
||||
Bdev[0] = blockmix_salsa8_small2(Bdev[0]); |
||||
prevstate[1] = Bdev[0]; |
||||
Bdev[0] = blockmix_salsa8_small2(Bdev[0]); |
||||
|
||||
uint n = 1; |
||||
#pragma unroll 1 |
||||
for (uint i = 2; i < 64; i++) |
||||
{ |
||||
|
||||
prevstate[i] = Bdev[0]; |
||||
|
||||
if ((i&(i - 1)) == 0) n = n << 1; |
||||
|
||||
uint j = as_uint2(Bdev[0].hi.s0).x & (n - 1); |
||||
|
||||
j += i - n; |
||||
Bdev[0] ^= prevstate[j]; |
||||
|
||||
Bdev[0] = blockmix_salsa8_small2(Bdev[0]); |
||||
} |
||||
|
||||
|
||||
} |
||||
|
||||
|
||||
|
||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||
__kernel void search1(__global uchar *buffer1, __global uchar *buffer2) |
||||
{ |
||||
} |
||||
|
||||
|
||||
|
||||
|
||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||
__kernel void search2(__global uchar *padcache, __global uchar *buff1, __global uchar *buff2) |
||||
{ |
||||
|
||||
__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||
__global ulong16* prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||
__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||
|
||||
|
||||
for (int i = 0; i<8; i++) |
||||
hashbuffer[i] = Bdev[i]; |
||||
|
||||
blockmix_pwxform((__global ulong8*)Bdev, prevstate); |
||||
|
||||
|
||||
for (int i = 0; i<8; i++) |
||||
hashbuffer[i + 8] = Bdev[i]; |
||||
|
||||
blockmix_pwxform((__global ulong8*)Bdev, prevstate); |
||||
int n = 1; |
||||
#pragma unroll 1 |
||||
for (int i = 2; i < 2048; i ++) |
||||
{ |
||||
|
||||
for (int k = 0; k<8; k++) |
||||
(hashbuffer + 8 * i)[k] = Bdev[k]; |
||||
|
||||
|
||||
if ((i&(i - 1)) == 0) n = n << 1; |
||||
|
||||
uint j = as_uint2(Bdev[7].hi.s0).x & (n - 1); |
||||
j += i - n; |
||||
|
||||
for (int k = 0; k < 8; k++) |
||||
Bdev[k] ^= (hashbuffer + 8 * j)[k]; |
||||
|
||||
|
||||
blockmix_pwxform((__global ulong8*)Bdev, prevstate); |
||||
} |
||||
} |
||||
|
||||
/* |
||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||
__kernel void search3(__global uchar *buffer1, __global uchar *buffer2) |
||||
{ |
||||
} |
||||
*/ |
||||
|
||||
|
||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||
__kernel void search3(__global uchar *padcache, __global uchar *buff1, __global uchar *buff2) |
||||
{ |
||||
|
||||
__global ulong16 *hashbuffer = (__global ulong16 *)(padcache + (2048 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||
__global ulong16* prevstate = (__global ulong16 *)(buff1 + (64 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||
__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||
|
||||
|
||||
#pragma unroll 1 |
||||
for (int z = 0; z < 684; z++) |
||||
{ |
||||
|
||||
uint j = as_uint2(Bdev[7].hi.s0).x & 2047; |
||||
|
||||
|
||||
for (int k = 0; k < 8; k++) |
||||
Bdev[k] ^= (hashbuffer + 8 * j)[k]; |
||||
|
||||
if (z<682) |
||||
for (int k = 0; k<8; k++) |
||||
(hashbuffer + 8 * j)[k] = Bdev[k]; |
||||
|
||||
blockmix_pwxform((__global ulong8*)Bdev, prevstate); |
||||
//// |
||||
} |
||||
|
||||
} |
||||
|
||||
/* |
||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||
__kernel void search5(__global uchar *buffer1, __global uchar *buffer2) |
||||
{ |
||||
} |
||||
*/ |
||||
|
||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) |
||||
__kernel void search4(__global const uchar* restrict input, __global uint* restrict output, __global uchar *buff2,__global uchar* buff3, const uint target) |
||||
{ |
||||
|
||||
__global ulong16 *Bdev = (__global ulong16 *)(buff2 + (8 * 128 * sizeof(ulong)* (get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||
__global uint8 *sha256tokeep = (__global uint8 *)(buff3 + (8 * sizeof(uint)*(get_global_id(0) % MAX_GLOBAL_THREADS))); |
||||
|
||||
uint nonce = (get_global_id(0)); |
||||
|
||||
|
||||
uint data[20]; |
||||
((uint16 *)data)[0] = ((__global const uint16 *)input)[0]; |
||||
((uint4 *)data)[4] = ((__global const uint4 *)input)[4]; |
||||
// for (int i = 0; i<20; i++) { data[i] = SWAP32(data[i]); } |
||||
uint8 swpass = swapvec(sha256tokeep[0]); |
||||
uint16 in; |
||||
uint8 state1,state2; |
||||
in.lo = pad1.lo ^ swpass; |
||||
in.hi = pad1.hi; |
||||
|
||||
|
||||
state1 = sha256_Transform(in, H256); |
||||
|
||||
in.lo = pad2.lo ^ swpass; |
||||
in.hi = pad2.hi; |
||||
state2 = sha256_Transform(in, H256); |
||||
|
||||
#pragma unroll 1 |
||||
for (int i = 0; i<8; i++) { |
||||
in = unshuffle(Bdev[i].lo); |
||||
in = swapvec16(in); |
||||
state1 = sha256_Transform(in, state1); |
||||
in = unshuffle(Bdev[i].hi); |
||||
in = swapvec16(in); |
||||
state1 = sha256_Transform(in, state1); |
||||
} |
||||
in = pad5; |
||||
state1 = sha256_Transform(in, state1); |
||||
in.lo = state1; |
||||
in.hi = pad4; |
||||
uint8 res = sha256_Transform(in, state2); |
||||
|
||||
//hmac and final sha |
||||
|
||||
in.lo = pad1.lo ^ res; |
||||
in.hi = pad1.hi; |
||||
state1 = sha256_Transform(in, H256); |
||||
in.lo = pad2.lo ^ res; |
||||
in.hi = pad2.hi; |
||||
state2 = sha256_Transform(in, H256); |
||||
in = ((uint16*)data)[0]; |
||||
state1 = sha256_Transform(in, state1); |
||||
in = padsha80; |
||||
in.s0 = data[16]; |
||||
in.s1 = data[17]; |
||||
in.s2 = data[18]; |
||||
in.s3 = get_global_id(0); |
||||
in.sf = 0x480; |
||||
state1 = sha256_Transform(in, state1); |
||||
in.lo = state1; |
||||
in.hi = pad4; |
||||
state1 = sha256_Transform(in, state2); |
||||
// state2 = H256; |
||||
in.lo = state1; |
||||
in.hi = pad4; |
||||
in.sf = 0x100; |
||||
res = sha256_Transform(in, H256); |
||||
|
||||
|
||||
if (SWAP32(res.s7) <= (target)) |
||||
output[atomic_inc(output + 0xFF)] = (nonce); |
||||
|
||||
} |
Binary file not shown.
Loading…
Reference in new issue