OpenCL GPU miner
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

180 lines
5.3 KiB

#include "sha256.cl"
#include "wolf-sha512.cl"
#include "ripemd160.cl"
#define SWAP32(x) as_uint(as_uchar4(x).s3210)
#define SWAP64(x) as_ulong(as_uchar8(x).s76543210)
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global const uint *input, __global uint8 *ctx)
{
// SHA256 takes 16 uints of input per block - we have 112 bytes to process
// 8 * 16 == 64, meaning two block transforms.
uint SHA256Buf[16];
uint gid = get_global_id(0);
// Remember the last four is the nonce - so 108 bytes / 4 bytes per dword
#pragma unroll
for(int i = 0; i < 16; ++i) SHA256Buf[i] = SWAP32(input[i]);
// SHA256 initialization constants
uint8 outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
#pragma unroll
for(int i = 0; i < 3; ++i)
{
if(i == 1)
{
#pragma unroll
for(int i = 0; i < 11; ++i) SHA256Buf[i] = SWAP32(input[i + 16]);
SHA256Buf[11] = SWAP32(gid);
SHA256Buf[12] = 0x80000000;
SHA256Buf[13] = 0x00000000;
SHA256Buf[14] = 0x00000000;
SHA256Buf[15] = 0x00000380;
}
if(i == 2)
{
((uint8 *)SHA256Buf)[0] = outbuf;
SHA256Buf[8] = 0x80000000;
#pragma unroll
for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
SHA256Buf[15] = 0x00000100;
outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
}
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
}
/*
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
#pragma unroll
for(int i = 0; i < 11; ++i) SHA256Buf[i] = SWAP32(input[i + 16]);
SHA256Buf[11] = SWAP32(gid);
SHA256Buf[12] = 0x80000000;
SHA256Buf[13] = 0x00000000;
SHA256Buf[14] = 0x00000000;
SHA256Buf[15] = 0x00000380;
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
((uint8 *)SHA256Buf)[0] = outbuf;
SHA256Buf[8] = 0x80000000;
for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
SHA256Buf[15] = 0x00000100;
outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
*/
/*
//outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
//outbuf = sha256_round(((uint16 *)SHA256Buf)[1], outbuf);
// outbuf would normall be SWAP32'd here, but it'll need it again
// once we use it as input to the next SHA256, so it negates.
((uint8 *)SHA256Buf)[0] = outbuf;
SHA256Buf[8] = 0x80000000;
for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
SHA256Buf[15] = 0x00000100;
outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
*/
outbuf.s0 = SWAP32(outbuf.s0);
outbuf.s1 = SWAP32(outbuf.s1);
outbuf.s2 = SWAP32(outbuf.s2);
outbuf.s3 = SWAP32(outbuf.s3);
outbuf.s4 = SWAP32(outbuf.s4);
outbuf.s5 = SWAP32(outbuf.s5);
outbuf.s6 = SWAP32(outbuf.s6);
outbuf.s7 = SWAP32(outbuf.s7);
ctx[get_global_id(0) - get_global_offset(0)] = outbuf;
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search1(__global uint8 *ctx)
{
ulong W[16] = { 0UL }, SHA512Out[8];
uint SHA256Buf[16];
uint8 outbuf = ctx[get_global_id(0) - get_global_offset(0)];
((uint8 *)W)[0] = outbuf;
for(int i = 0; i < 4; ++i) W[i] = SWAP64(W[i]);
W[4] = 0x8000000000000000UL;
W[15] = 0x0000000000000100UL;
for(int i = 0; i < 8; ++i) SHA512Out[i] = SHA512_INIT[i];
SHA512Block(W, SHA512Out);
for(int i = 0; i < 8; ++i) SHA512Out[i] = SWAP64(SHA512Out[i]);
uint RMD160_0[16] = { 0U };
uint RMD160_1[16] = { 0U };
uint RMD160_0_Out[5], RMD160_1_Out[5];
for(int i = 0; i < 4; ++i)
{
((ulong *)RMD160_0)[i] = SHA512Out[i];
((ulong *)RMD160_1)[i] = SHA512Out[i + 4];
}
RMD160_0[8] = RMD160_1[8] = 0x00000080;
RMD160_0[14] = RMD160_1[14] = 0x00000100;
for(int i = 0; i < 5; ++i)
{
RMD160_0_Out[i] = RMD160_IV[i];
RMD160_1_Out[i] = RMD160_IV[i];
}
RIPEMD160_ROUND_BODY(RMD160_0, RMD160_0_Out);
RIPEMD160_ROUND_BODY(RMD160_1, RMD160_1_Out);
for(int i = 0; i < 5; ++i) SHA256Buf[i] = SWAP32(RMD160_0_Out[i]);
for(int i = 5; i < 10; ++i) SHA256Buf[i] = SWAP32(RMD160_1_Out[i - 5]);
SHA256Buf[10] = 0x80000000;
for(int i = 11; i < 15; ++i) SHA256Buf[i] = 0x00000000U;
SHA256Buf[15] = 0x00000140;
outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
ctx[get_global_id(0) - get_global_offset(0)] = outbuf;
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search2(__global uint8 *ctx, __global uint *output, ulong target)
{
uint SHA256Buf[16] = { 0U };
uint gid = get_global_id(0);
uint8 outbuf = ctx[get_global_id(0) - get_global_offset(0)];
((uint8 *)SHA256Buf)[0] = outbuf;
SHA256Buf[8] = 0x80000000;
for(int i = 9; i < 15; ++i) SHA256Buf[i] = 0x00000000;
SHA256Buf[15] = 0x00000100;
outbuf = (uint8)(0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19);
outbuf = sha256_round(((uint16 *)SHA256Buf)[0], outbuf);
outbuf.s6 = SWAP32(outbuf.s6);
outbuf.s7 = SWAP32(outbuf.s7);
if(as_ulong(outbuf.s67) <= target)
output[atomic_inc(output+0xFF)] = SWAP32(gid);
}