mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-08 22:08:02 +00:00
Added Neoscrypt with Wolf9466 improvements.
This commit is contained in:
parent
0557017ca8
commit
42737acf66
@ -65,6 +65,7 @@ sgminer_SOURCES += algorithm/talkcoin.c algorithm/talkcoin.h
|
||||
sgminer_SOURCES += algorithm/bitblock.c algorithm/bitblock.h
|
||||
sgminer_SOURCES += algorithm/x14.c algorithm/x14.h
|
||||
sgminer_SOURCES += algorithm/fresh.c algorithm/fresh.h
|
||||
sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h
|
||||
|
||||
bin_SCRIPTS = $(top_srcdir)/kernel/*.cl
|
||||
|
||||
|
41
algorithm.c
41
algorithm.c
@ -29,6 +29,7 @@
|
||||
#include "algorithm/bitblock.h"
|
||||
#include "algorithm/x14.h"
|
||||
#include "algorithm/fresh.h"
|
||||
#include "algorithm/neoscrypt.h"
|
||||
|
||||
#include "compat.h"
|
||||
|
||||
@ -92,6 +93,17 @@ static void append_scrypt_compiler_options(struct _build_kernel_data *data, stru
|
||||
strcat(data->binary_filename, buf);
|
||||
}
|
||||
|
||||
static void append_neoscrypt_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm)
|
||||
{
|
||||
char buf[255];
|
||||
sprintf(buf, " -D MAX_GLOBAL_THREADS=%u",
|
||||
(unsigned int)cgpu->thread_concurrency);
|
||||
strcat(data->compiler_options, buf);
|
||||
|
||||
sprintf(buf, "tc%u", (unsigned int)cgpu->thread_concurrency);
|
||||
strcat(data->binary_filename, buf);
|
||||
}
|
||||
|
||||
static void append_x11_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm)
|
||||
{
|
||||
char buf[255];
|
||||
@ -140,6 +152,30 @@ static cl_int queue_scrypt_kernel(struct __clState *clState, struct _dev_blk_ctx
|
||||
return status;
|
||||
}
|
||||
|
||||
static cl_int queue_neoscrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
||||
{
|
||||
cl_kernel *kernel = &clState->kernel;
|
||||
unsigned int num = 0;
|
||||
cl_uint le_target;
|
||||
cl_int status = 0;
|
||||
|
||||
/* This looks like a unnecessary double cast, but to make sure, that
|
||||
* the target's most significant entry is adressed as a 32-bit value
|
||||
* and not accidently by something else the double cast seems wise.
|
||||
* The compiler will get rid of it anyway.
|
||||
*/
|
||||
le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]);
|
||||
memcpy(clState->cldata, blk->work->data, 80);
|
||||
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
|
||||
|
||||
CL_SET_ARG(clState->CLbuffer0);
|
||||
CL_SET_ARG(clState->outputBuffer);
|
||||
CL_SET_ARG(clState->padbuffer8);
|
||||
CL_SET_ARG(le_target);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
|
||||
{
|
||||
cl_kernel *kernel = &clState->kernel;
|
||||
@ -597,6 +633,11 @@ static algorithm_settings_t algos[] = {
|
||||
A_SCRYPT( "zuikkis" ),
|
||||
#undef A_SCRYPT
|
||||
|
||||
#define A_NEOSCRYPT(a) \
|
||||
{ a, ALGO_NEOSCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, neoscrypt_regenhash, queue_neoscrypt_kernel, gen_hash, append_neoscrypt_compiler_options}
|
||||
A_NEOSCRYPT("neoscrypt"),
|
||||
#undef A_NEOSCRYPT
|
||||
|
||||
// kernels starting from this will have difficulty calculated by using quarkcoin algorithm
|
||||
#define A_QUARK(a, b) \
|
||||
{ a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, append_x11_compiler_options }
|
||||
|
@ -23,7 +23,8 @@ typedef enum {
|
||||
ALGO_TWE,
|
||||
ALGO_FUGUE,
|
||||
ALGO_NIST,
|
||||
ALGO_FRESH
|
||||
ALGO_FRESH,
|
||||
ALGO_NEOSCRYPT
|
||||
} algorithm_type_t;
|
||||
|
||||
extern const char *algorithm_type_str[];
|
||||
|
1411
algorithm/neoscrypt.c
Normal file
1411
algorithm/neoscrypt.c
Normal file
File diff suppressed because it is too large
Load Diff
13
algorithm/neoscrypt.h
Normal file
13
algorithm/neoscrypt.h
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef NEOSCRYPT_H
|
||||
#define NEOSCRYPT_H
|
||||
|
||||
#include "miner.h"
|
||||
|
||||
/* The neoscrypt scratch buffer needs 32kBytes memory. */
|
||||
#define NEOSCRYPT_SCRATCHBUF_SIZE (32 * 1024)
|
||||
|
||||
/* These routines are always available. */
|
||||
extern void neoscrypt_regenhash(struct work *work);
|
||||
extern void neoscrypt(const unsigned char *input, unsigned char *output, unsigned int profile);
|
||||
|
||||
#endif /* NEOSCRYPT_H */
|
525
kernel/neoscrypt.cl
Normal file
525
kernel/neoscrypt.cl
Normal file
@ -0,0 +1,525 @@
|
||||
/* NeoScrypt(128, 2, 1) with Salsa20/20 and ChaCha20/20 */
|
||||
/* Adapted and improved for 14.x drivers by Wolf9466 (Wolf`) */
|
||||
|
||||
// Stupid AMD compiler ignores the unroll pragma in these two
|
||||
#define SALSA_SMALL_UNROLL 3
|
||||
#define CHACHA_SMALL_UNROLL 3
|
||||
|
||||
// If SMALL_BLAKE2S is defined, BLAKE2S_UNROLL is interpreted
|
||||
// as the unroll factor; must divide cleanly into ten.
|
||||
// Usually a bad idea.
|
||||
//#define SMALL_BLAKE2S
|
||||
//#define BLAKE2S_UNROLL 5
|
||||
|
||||
#define BLOCK_SIZE 64U
|
||||
#define FASTKDF_BUFFER_SIZE 256U
|
||||
#ifndef PASSWORD_LEN
|
||||
#define PASSWORD_LEN 80U
|
||||
#endif
|
||||
|
||||
#if !defined(cl_khr_byte_addressable_store)
|
||||
#error "Device does not support unaligned stores"
|
||||
#endif
|
||||
|
||||
// Swaps 128 bytes at a time without using temp vars
|
||||
void SwapBytes128(void *restrict A, void *restrict B, uint len)
|
||||
{
|
||||
#pragma unroll 2
|
||||
for(int i = 0; i < (len >> 7); ++i)
|
||||
{
|
||||
((ulong16 *)A)[i] ^= ((ulong16 *)B)[i];
|
||||
((ulong16 *)B)[i] ^= ((ulong16 *)A)[i];
|
||||
((ulong16 *)A)[i] ^= ((ulong16 *)B)[i];
|
||||
}
|
||||
}
|
||||
|
||||
void CopyBytes128(void *restrict dst, const void *restrict src, uint len)
|
||||
{
|
||||
#pragma unroll 2
|
||||
for(int i = 0; i < len; ++i)
|
||||
((ulong16 *)dst)[i] = ((ulong16 *)src)[i];
|
||||
}
|
||||
|
||||
void CopyBytes(void *restrict dst, const void *restrict src, uint len)
|
||||
{
|
||||
for(int i = 0; i < len; ++i)
|
||||
((uchar *)dst)[i] = ((uchar *)src)[i];
|
||||
}
|
||||
|
||||
void XORBytesInPlace(void *restrict dst, const void *restrict src, uint len)
|
||||
{
|
||||
for(int i = 0; i < len; ++i)
|
||||
((uchar *)dst)[i] ^= ((uchar *)src)[i];
|
||||
}
|
||||
|
||||
void XORBytes(void *restrict dst, const void *restrict src1, const void *restrict src2, uint len)
|
||||
{
|
||||
#pragma unroll 1
|
||||
for(int i = 0; i < len; ++i)
|
||||
((uchar *)dst)[i] = ((uchar *)src1)[i] ^ ((uchar *)src2)[i];
|
||||
}
|
||||
|
||||
// Blake2S
|
||||
|
||||
#define BLAKE2S_BLOCK_SIZE 64U
|
||||
#define BLAKE2S_OUT_SIZE 32U
|
||||
#define BLAKE2S_KEY_SIZE 32U
|
||||
|
||||
static const __constant uint BLAKE2S_IV[8] =
|
||||
{
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
|
||||
};
|
||||
|
||||
static const __constant uchar BLAKE2S_SIGMA[10][16] =
|
||||
{
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
|
||||
};
|
||||
|
||||
#define BLAKE_G(idx0, idx1, a, b, c, d, key) do { \
|
||||
a += b + key[BLAKE2S_SIGMA[idx0][idx1]]; \
|
||||
d = rotate(d ^ a, 16U); \
|
||||
c += d; \
|
||||
b = rotate(b ^ c, 20U); \
|
||||
a += b + key[BLAKE2S_SIGMA[idx0][idx1 + 1]]; \
|
||||
d = rotate(d ^ a, 24U); \
|
||||
c += d; \
|
||||
b = rotate(b ^ c, 25U); \
|
||||
} while(0)
|
||||
|
||||
void Blake2S(uint *restrict inout, const uint *restrict inkey)
|
||||
{
|
||||
uint16 V;
|
||||
uint8 tmpblock;
|
||||
|
||||
// Load first block (IV into V.lo) and constants (IV into V.hi)
|
||||
V.lo = V.hi = vload8(0U, BLAKE2S_IV);
|
||||
|
||||
// XOR with initial constant
|
||||
V.s0 ^= 0x01012020;
|
||||
|
||||
// Copy input block for later
|
||||
tmpblock = V.lo;
|
||||
|
||||
// XOR length of message so far (including this block)
|
||||
// There are two uints for this field, but high uint is zero
|
||||
V.sc ^= BLAKE2S_BLOCK_SIZE;
|
||||
|
||||
// Compress state, using the key as the key
|
||||
#ifdef SMALL_BLAKE2S
|
||||
#pragma unroll BLAKE2S_UNROLL
|
||||
#else
|
||||
#pragma unroll
|
||||
#endif
|
||||
for(int x = 0; x < 10; ++x)
|
||||
{
|
||||
BLAKE_G(x, 0x00, V.s0, V.s4, V.s8, V.sc, inkey);
|
||||
BLAKE_G(x, 0x02, V.s1, V.s5, V.s9, V.sd, inkey);
|
||||
BLAKE_G(x, 0x04, V.s2, V.s6, V.sa, V.se, inkey);
|
||||
BLAKE_G(x, 0x06, V.s3, V.s7, V.sb, V.sf, inkey);
|
||||
BLAKE_G(x, 0x08, V.s0, V.s5, V.sa, V.sf, inkey);
|
||||
BLAKE_G(x, 0x0A, V.s1, V.s6, V.sb, V.sc, inkey);
|
||||
BLAKE_G(x, 0x0C, V.s2, V.s7, V.s8, V.sd, inkey);
|
||||
BLAKE_G(x, 0x0E, V.s3, V.s4, V.s9, V.se, inkey);
|
||||
}
|
||||
|
||||
// XOR low part of state with the high part,
|
||||
// then with the original input block.
|
||||
V.lo ^= V.hi ^ tmpblock;
|
||||
|
||||
// Load constants (IV into V.hi)
|
||||
V.hi = vload8(0U, BLAKE2S_IV);
|
||||
|
||||
// Copy input block for later
|
||||
tmpblock = V.lo;
|
||||
|
||||
// XOR length of message into block again
|
||||
V.sc ^= BLAKE2S_BLOCK_SIZE << 1;
|
||||
|
||||
// Last block compression - XOR final constant into state
|
||||
V.se ^= 0xFFFFFFFFU;
|
||||
|
||||
// Compress block, using the input as the key
|
||||
#ifdef SMALL_BLAKE2S
|
||||
#pragma unroll BLAKE2S_UNROLL
|
||||
#else
|
||||
#pragma unroll
|
||||
#endif
|
||||
for(int x = 0; x < 10; ++x)
|
||||
{
|
||||
BLAKE_G(x, 0x00, V.s0, V.s4, V.s8, V.sc, inout);
|
||||
BLAKE_G(x, 0x02, V.s1, V.s5, V.s9, V.sd, inout);
|
||||
BLAKE_G(x, 0x04, V.s2, V.s6, V.sa, V.se, inout);
|
||||
BLAKE_G(x, 0x06, V.s3, V.s7, V.sb, V.sf, inout);
|
||||
BLAKE_G(x, 0x08, V.s0, V.s5, V.sa, V.sf, inout);
|
||||
BLAKE_G(x, 0x0A, V.s1, V.s6, V.sb, V.sc, inout);
|
||||
BLAKE_G(x, 0x0C, V.s2, V.s7, V.s8, V.sd, inout);
|
||||
BLAKE_G(x, 0x0E, V.s3, V.s4, V.s9, V.se, inout);
|
||||
}
|
||||
|
||||
// XOR low part of state with high part, then with input block
|
||||
V.lo ^= V.hi ^ tmpblock;
|
||||
|
||||
// Store result in input/output buffer
|
||||
vstore8(V.lo, 0, inout);
|
||||
}
|
||||
|
||||
/* FastKDF, a fast buffered key derivation function:
|
||||
* FASTKDF_BUFFER_SIZE must be a power of 2;
|
||||
* password_len, salt_len and output_len should not exceed FASTKDF_BUFFER_SIZE;
|
||||
* prf_output_size must be <= prf_key_size; */
|
||||
void fastkdf(const uchar *restrict password, const uchar *restrict salt, const uint salt_len, uchar *restrict output, uint output_len)
|
||||
{
|
||||
|
||||
/* WARNING!
|
||||
* This algorithm uses byte-wise addressing for memory blocks.
|
||||
* Or in other words, trying to copy an unaligned memory region
|
||||
* will significantly slow down the algorithm, when copying uses
|
||||
* words or bigger entities. It even may corrupt the data, when
|
||||
* the device does not support it properly.
|
||||
* Therefore use byte copying, which will not the fastest but at
|
||||
* least get reliable results. */
|
||||
|
||||
// BLOCK_SIZE 64U
|
||||
// FASTKDF_BUFFER_SIZE 256U
|
||||
// BLAKE2S_BLOCK_SIZE 64U
|
||||
// BLAKE2S_KEY_SIZE 32U
|
||||
// BLAKE2S_OUT_SIZE 32U
|
||||
uchar bufidx = 0;
|
||||
uint8 Abuffer[9], Bbuffer[9] = { (uint8)(0) };
|
||||
uchar *A = (uchar *)Abuffer, *B = (uchar *)Bbuffer;
|
||||
|
||||
// Initialize the password buffer
|
||||
#pragma unroll 1
|
||||
for(int i = 0; i < (FASTKDF_BUFFER_SIZE >> 3); ++i) ((ulong *)A)[i] = ((ulong *)password)[i % 10];
|
||||
|
||||
((uint16 *)(A + FASTKDF_BUFFER_SIZE))[0] = ((uint16 *)password)[0];
|
||||
|
||||
// Initialize the salt buffer
|
||||
if(salt_len == FASTKDF_BUFFER_SIZE)
|
||||
{
|
||||
((ulong16 *)B)[0] = ((ulong16 *)B)[2] = ((ulong16 *)salt)[0];
|
||||
((ulong16 *)B)[1] = ((ulong16 *)B)[3] = ((ulong16 *)salt)[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
// salt_len is 80 bytes here
|
||||
#pragma unroll 1
|
||||
for(int i = 0; i < (FASTKDF_BUFFER_SIZE >> 3); ++i) ((ulong *)B)[i] = ((ulong *)salt)[i % 10];
|
||||
|
||||
// Initialized the rest to zero earlier
|
||||
#pragma unroll 1
|
||||
for(int i = 0; i < 10; ++i) ((ulong *)(B + FASTKDF_BUFFER_SIZE))[i] = ((ulong *)salt)[i];
|
||||
}
|
||||
|
||||
// The primary iteration
|
||||
#pragma unroll 1
|
||||
for(int i = 0; i < 32; ++i)
|
||||
{
|
||||
// Make the key buffer twice the size of the key so it fits a Blake2S block
|
||||
// This way, we don't need a temp buffer in the Blake2S function.
|
||||
uchar input[BLAKE2S_BLOCK_SIZE], key[BLAKE2S_BLOCK_SIZE] = { 0 };
|
||||
|
||||
// Copy input and key to their buffers
|
||||
CopyBytes(input, A + bufidx, BLAKE2S_BLOCK_SIZE);
|
||||
CopyBytes(key, B + bufidx, BLAKE2S_KEY_SIZE);
|
||||
|
||||
// PRF
|
||||
Blake2S((uint *)input, (uint *)key);
|
||||
|
||||
// Calculate the next buffer pointer
|
||||
bufidx = 0;
|
||||
|
||||
for(int x = 0; x < BLAKE2S_OUT_SIZE; ++x)
|
||||
bufidx += input[x];
|
||||
|
||||
// bufidx a uchar now - always mod 255
|
||||
//bufidx &= (FASTKDF_BUFFER_SIZE - 1);
|
||||
|
||||
// Modify the salt buffer
|
||||
XORBytesInPlace(B + bufidx, input, BLAKE2S_OUT_SIZE);
|
||||
|
||||
if(bufidx < BLAKE2S_KEY_SIZE)
|
||||
{
|
||||
// Head modified, tail updated
|
||||
// this was made off the original code... wtf
|
||||
//CopyBytes(B + FASTKDF_BUFFER_SIZE + bufidx, B + bufidx, min(BLAKE2S_OUT_SIZE, BLAKE2S_KEY_SIZE - bufidx));
|
||||
CopyBytes(B + FASTKDF_BUFFER_SIZE + bufidx, B + bufidx, BLAKE2S_KEY_SIZE - bufidx);
|
||||
}
|
||||
else if((FASTKDF_BUFFER_SIZE - bufidx) < BLAKE2S_OUT_SIZE)
|
||||
{
|
||||
// Tail modified, head updated
|
||||
CopyBytes(B, B + FASTKDF_BUFFER_SIZE, BLAKE2S_OUT_SIZE - (FASTKDF_BUFFER_SIZE - bufidx));
|
||||
}
|
||||
}
|
||||
|
||||
// Modify and copy into the output buffer
|
||||
|
||||
// Damned compiler crashes
|
||||
// Fuck you, AMD
|
||||
|
||||
//for(uint i = 0; i < output_len; ++i, ++bufidx)
|
||||
// output[i] = B[bufidx] ^ A[i];
|
||||
|
||||
uint left = FASTKDF_BUFFER_SIZE - bufidx;
|
||||
//uint left = (~bufidx) + 1
|
||||
|
||||
if(left < output_len)
|
||||
{
|
||||
XORBytes(output, B + bufidx, A, left);
|
||||
XORBytes(output + left, B, A + left, output_len - left);
|
||||
}
|
||||
else
|
||||
{
|
||||
XORBytes(output, B + bufidx, A, output_len);
|
||||
}
|
||||
}
|
||||
|
||||
#define SALSA_CORE(state) do { \
|
||||
state.s4 ^= rotate(state.s0 + state.sc, 7U); state.s8 ^= rotate(state.s4 + state.s0, 9U); state.sc ^= rotate(state.s8 + state.s4, 13U); state.s0 ^= rotate(state.sc + state.s8, 18U); \
|
||||
state.s9 ^= rotate(state.s5 + state.s1, 7U); state.sd ^= rotate(state.s9 + state.s5, 9U); state.s1 ^= rotate(state.sd + state.s9, 13U); state.s5 ^= rotate(state.s1 + state.sd, 18U); \
|
||||
state.se ^= rotate(state.sa + state.s6, 7U); state.s2 ^= rotate(state.se + state.sa, 9U); state.s6 ^= rotate(state.s2 + state.se, 13U); state.sa ^= rotate(state.s6 + state.s2, 18U); \
|
||||
state.s3 ^= rotate(state.sf + state.sb, 7U); state.s7 ^= rotate(state.s3 + state.sf, 9U); state.sb ^= rotate(state.s7 + state.s3, 13U); state.sf ^= rotate(state.sb + state.s7, 18U); \
|
||||
state.s1 ^= rotate(state.s0 + state.s3, 7U); state.s2 ^= rotate(state.s1 + state.s0, 9U); state.s3 ^= rotate(state.s2 + state.s1, 13U); state.s0 ^= rotate(state.s3 + state.s2, 18U); \
|
||||
state.s6 ^= rotate(state.s5 + state.s4, 7U); state.s7 ^= rotate(state.s6 + state.s5, 9U); state.s4 ^= rotate(state.s7 + state.s6, 13U); state.s5 ^= rotate(state.s4 + state.s7, 18U); \
|
||||
state.sb ^= rotate(state.sa + state.s9, 7U); state.s8 ^= rotate(state.sb + state.sa, 9U); state.s9 ^= rotate(state.s8 + state.sb, 13U); state.sa ^= rotate(state.s9 + state.s8, 18U); \
|
||||
state.sc ^= rotate(state.sf + state.se, 7U); state.sd ^= rotate(state.sc + state.sf, 9U); state.se ^= rotate(state.sd + state.sc, 13U); state.sf ^= rotate(state.se + state.sd, 18U); \
|
||||
} while(0)
|
||||
|
||||
uint16 salsa_small_scalar_rnd(uint16 X)
|
||||
{
|
||||
uint16 st = X;
|
||||
|
||||
#if SALSA_SMALL_UNROLL == 1
|
||||
|
||||
for(int i = 0; i < 10; ++i)
|
||||
{
|
||||
SALSA_CORE(st);
|
||||
}
|
||||
|
||||
#elif SALSA_SMALL_UNROLL == 2
|
||||
|
||||
for(int i = 0; i < 5; ++i)
|
||||
{
|
||||
SALSA_CORE(st);
|
||||
SALSA_CORE(st);
|
||||
}
|
||||
|
||||
#elif SALSA_SMALL_UNROLL == 3
|
||||
|
||||
for(int i = 0; i < 4; ++i)
|
||||
{
|
||||
SALSA_CORE(st);
|
||||
if(i == 3) break;
|
||||
SALSA_CORE(st);
|
||||
SALSA_CORE(st);
|
||||
}
|
||||
|
||||
#elif SALSA_SMALL_UNROLL == 4
|
||||
|
||||
for(int i = 0; i < 3; ++i)
|
||||
{
|
||||
SALSA_CORE(st);
|
||||
SALSA_CORE(st);
|
||||
if(i == 2) break;
|
||||
SALSA_CORE(st);
|
||||
SALSA_CORE(st);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for(int i = 0; i < 2; ++i)
|
||||
{
|
||||
SALSA_CORE(st);
|
||||
SALSA_CORE(st);
|
||||
SALSA_CORE(st);
|
||||
SALSA_CORE(st);
|
||||
SALSA_CORE(st);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
return(X + st);
|
||||
}
|
||||
|
||||
#define CHACHA_CORE_PARALLEL(state) do { \
|
||||
state[0] += state[1]; state[3] = rotate(state[3] ^ state[0], (uint4)(16U, 16U, 16U, 16U)); \
|
||||
state[2] += state[3]; state[1] = rotate(state[1] ^ state[2], (uint4)(12U, 12U, 12U, 12U)); \
|
||||
state[0] += state[1]; state[3] = rotate(state[3] ^ state[0], (uint4)(8U, 8U, 8U, 8U)); \
|
||||
state[2] += state[3]; state[1] = rotate(state[1] ^ state[2], (uint4)(7U, 7U, 7U, 7U)); \
|
||||
\
|
||||
state[0] += state[1].yzwx; state[3].wxyz = rotate(state[3].wxyz ^ state[0], (uint4)(16U, 16U, 16U, 16U)); \
|
||||
state[2].zwxy += state[3].wxyz; state[1].yzwx = rotate(state[1].yzwx ^ state[2].zwxy, (uint4)(12U, 12U, 12U, 12U)); \
|
||||
state[0] += state[1].yzwx; state[3].wxyz = rotate(state[3].wxyz ^ state[0], (uint4)(8U, 8U, 8U, 8U)); \
|
||||
state[2].zwxy += state[3].wxyz; state[1].yzwx = rotate(state[1].yzwx ^ state[2].zwxy, (uint4)(7U, 7U, 7U, 7U)); \
|
||||
} while(0)
|
||||
|
||||
uint16 chacha_small_parallel_rnd(uint16 X)
|
||||
{
|
||||
uint4 t, st[4];
|
||||
|
||||
((uint16 *)st)[0] = X;
|
||||
|
||||
#if CHACHA_SMALL_UNROLL == 1
|
||||
|
||||
for(int i = 0; i < 10; ++i)
|
||||
{
|
||||
CHACHA_CORE_PARALLEL(st);
|
||||
}
|
||||
|
||||
#elif CHACHA_SMALL_UNROLL == 2
|
||||
|
||||
for(int i = 0; i < 5; ++i)
|
||||
{
|
||||
CHACHA_CORE_PARALLEL(st);
|
||||
CHACHA_CORE_PARALLEL(st);
|
||||
}
|
||||
|
||||
#elif CHACHA_SMALL_UNROLL == 3
|
||||
|
||||
for(int i = 0; i < 4; ++i)
|
||||
{
|
||||
CHACHA_CORE_PARALLEL(st);
|
||||
if(i == 3) break;
|
||||
CHACHA_CORE_PARALLEL(st);
|
||||
CHACHA_CORE_PARALLEL(st);
|
||||
}
|
||||
|
||||
#elif CHACHA_SMALL_UNROLL == 4
|
||||
|
||||
for(int i = 0; i < 3; ++i)
|
||||
{
|
||||
CHACHA_CORE_PARALLEL(st);
|
||||
CHACHA_CORE_PARALLEL(st);
|
||||
if(i == 2) break;
|
||||
CHACHA_CORE_PARALLEL(st);
|
||||
CHACHA_CORE_PARALLEL(st);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for(int i = 0; i < 2; ++i)
|
||||
{
|
||||
CHACHA_CORE_PARALLEL(st);
|
||||
CHACHA_CORE_PARALLEL(st);
|
||||
CHACHA_CORE_PARALLEL(st);
|
||||
CHACHA_CORE_PARALLEL(st);
|
||||
CHACHA_CORE_PARALLEL(st);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
return(X + ((uint16 *)st)[0]);
|
||||
}
|
||||
|
||||
void neoscrypt_blkmix(uint16 *XV, bool alg)
|
||||
{
|
||||
|
||||
/* NeoScrypt flow: Scrypt flow:
|
||||
Xa ^= Xd; M(Xa'); Ya = Xa"; Xa ^= Xb; M(Xa'); Ya = Xa";
|
||||
Xb ^= Xa"; M(Xb'); Yb = Xb"; Xb ^= Xa"; M(Xb'); Yb = Xb";
|
||||
Xc ^= Xb"; M(Xc'); Yc = Xc"; Xa" = Ya;
|
||||
Xd ^= Xc"; M(Xd'); Yd = Xd"; Xb" = Yb;
|
||||
Xa" = Ya; Xb" = Yc;
|
||||
Xc" = Yb; Xd" = Yd; */
|
||||
|
||||
XV[0] ^= XV[3];
|
||||
|
||||
if(!alg)
|
||||
{
|
||||
XV[0] = salsa_small_scalar_rnd(XV[0]); XV[1] ^= XV[0];
|
||||
XV[1] = salsa_small_scalar_rnd(XV[1]); XV[2] ^= XV[1];
|
||||
XV[2] = salsa_small_scalar_rnd(XV[2]); XV[3] ^= XV[2];
|
||||
XV[3] = salsa_small_scalar_rnd(XV[3]);
|
||||
}
|
||||
else
|
||||
{
|
||||
XV[0] = chacha_small_parallel_rnd(XV[0]); XV[1] ^= XV[0];
|
||||
XV[1] = chacha_small_parallel_rnd(XV[1]); XV[2] ^= XV[1];
|
||||
XV[2] = chacha_small_parallel_rnd(XV[2]); XV[3] ^= XV[2];
|
||||
XV[3] = chacha_small_parallel_rnd(XV[3]);
|
||||
}
|
||||
|
||||
XV[1] ^= XV[2];
|
||||
XV[2] ^= XV[1];
|
||||
XV[1] ^= XV[2];
|
||||
}
|
||||
|
||||
void ScratchpadStore(__global void *V, void *X, uchar idx)
|
||||
{
|
||||
((__global ulong16 *)V)[idx << 1] = ((ulong16 *)X)[0];
|
||||
((__global ulong16 *)V)[(idx << 1) + 1] = ((ulong16 *)X)[1];
|
||||
}
|
||||
|
||||
void ScratchpadMix(void *X, const __global void *V, uchar idx)
|
||||
{
|
||||
((ulong16 *)X)[0] ^= ((__global ulong16 *)V)[idx << 1];
|
||||
((ulong16 *)X)[1] ^= ((__global ulong16 *)V)[(idx << 1) + 1];
|
||||
}
|
||||
|
||||
void SMix(uint16 *X, __global uint16 *V, bool flag)
|
||||
{
|
||||
#pragma unroll 1
|
||||
for(int i = 0; i < 128; ++i)
|
||||
{
|
||||
ScratchpadStore(V, X, i);
|
||||
neoscrypt_blkmix(X, flag);
|
||||
}
|
||||
|
||||
#pragma unroll 1
|
||||
for(int i = 0; i < 128; ++i)
|
||||
{
|
||||
const uint idx = convert_uchar(((uint *)X)[48] & 0x7F);
|
||||
ScratchpadMix(X, V, idx);
|
||||
neoscrypt_blkmix(X, flag);
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||
__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, const uint target)
|
||||
{
|
||||
#define CONSTANT_N 128
|
||||
#define CONSTANT_r 2
|
||||
// X = CONSTANT_r * 2 * BLOCK_SIZE(64); Z is a copy of X for ChaCha
|
||||
uint16 X[4], Z[4];
|
||||
/* V = CONSTANT_N * CONSTANT_r * 2 * BLOCK_SIZE */
|
||||
__global ulong16 *V = (__global ulong16 *)(padcache + (0x8000 * (get_global_id(0) % MAX_GLOBAL_THREADS)));
|
||||
uchar outbuf[32];
|
||||
uchar data[PASSWORD_LEN];
|
||||
|
||||
((ulong8 *)data)[0] = ((__global const ulong8 *)input)[0];
|
||||
((ulong *)data)[8] = ((__global const ulong *)input)[8];
|
||||
((uint *)data)[18] = ((__global const uint *)input)[18];
|
||||
((uint *)data)[19] = get_global_id(0);
|
||||
|
||||
// X = KDF(password, salt)
|
||||
fastkdf(data, data, PASSWORD_LEN, (uchar *)X, 256);
|
||||
|
||||
// Process ChaCha 1st, Salsa 2nd and XOR them - run that through PBKDF2
|
||||
CopyBytes128(Z, X, 2);
|
||||
|
||||
// X = SMix(X); X & Z are swapped, repeat.
|
||||
for(bool flag = false;; ++flag)
|
||||
{
|
||||
SMix(X, V, flag);
|
||||
if(flag) break;
|
||||
SwapBytes128(X, Z, 256);
|
||||
}
|
||||
|
||||
// blkxor(X, Z)
|
||||
((ulong16 *)X)[0] ^= ((ulong16 *)Z)[0];
|
||||
((ulong16 *)X)[1] ^= ((ulong16 *)Z)[1];
|
||||
|
||||
// output = KDF(password, X)
|
||||
fastkdf(data, (uchar *)X, FASTKDF_BUFFER_SIZE, outbuf, 32);
|
||||
if(((uint *)outbuf)[7] <= target) output[atomic_add(output + 0xFF, 1)] = get_global_id(0);
|
||||
}
|
1
miner.h
1
miner.h
@ -1100,6 +1100,7 @@ extern pthread_cond_t restart_cond;
|
||||
extern void clear_stratum_shares(struct pool *pool);
|
||||
extern void clear_pool_work(struct pool *pool);
|
||||
extern void set_target(unsigned char *dest_target, double diff, double diff_multiplier2);
|
||||
extern void set_target_neoscrypt(unsigned char *target, double diff);
|
||||
|
||||
extern void kill_work(void);
|
||||
|
||||
|
78
ocl.c
78
ocl.c
@ -34,6 +34,7 @@
|
||||
#include "ocl.h"
|
||||
#include "ocl/build_kernel.h"
|
||||
#include "ocl/binary_kernel.h"
|
||||
#include "algorithm/neoscrypt.h"
|
||||
|
||||
/* FIXME: only here for global config vars, replace with configuration.h
|
||||
* or similar as soon as config is in a struct instead of littered all
|
||||
@ -344,19 +345,55 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
cgpu->lookup_gap = 2;
|
||||
}
|
||||
|
||||
if (!cgpu->opt_tc) {
|
||||
// neoscrypt calculates TC differently
|
||||
if (!safe_cmp(cgpu->algorithm.name, "neoscrypt")) {
|
||||
int max_int = ((cgpu->dynamic) ? MAX_INTENSITY : cgpu->intensity);
|
||||
size_t glob_thread_count = 1UL << max_int;
|
||||
|
||||
// if TC is entered by user, use that value... otherwise use default
|
||||
cgpu->thread_concurrency = ((cgpu->opt_tc) ? cgpu->opt_tc : ((glob_thread_count < cgpu->work_size) ? cgpu->work_size : glob_thread_count));
|
||||
|
||||
// if TC * scratchbuf size is too big for memory... reduce to max
|
||||
if (((uint64_t)cgpu->thread_concurrency * NEOSCRYPT_SCRATCHBUF_SIZE) >(uint64_t)cgpu->max_alloc) {
|
||||
/* Selected intensity will not run on this GPU. Not enough memory.
|
||||
* Adapt the memory setting. */
|
||||
glob_thread_count = cgpu->max_alloc / NEOSCRYPT_SCRATCHBUF_SIZE;
|
||||
|
||||
/* Find highest significant bit in glob_thread_count, which gives
|
||||
* the intensity. */
|
||||
while (max_int && ((1U << max_int) & glob_thread_count) == 0) {
|
||||
--max_int;
|
||||
}
|
||||
|
||||
/* Check if max_intensity is >0. */
|
||||
if (max_int < MIN_INTENSITY) {
|
||||
applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu);
|
||||
max_int = MIN_INTENSITY;
|
||||
}
|
||||
|
||||
cgpu->intensity = max_int;
|
||||
cgpu->thread_concurrency = 1U << max_int;
|
||||
}
|
||||
|
||||
applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
|
||||
|
||||
}
|
||||
else if (!cgpu->opt_tc) {
|
||||
unsigned int sixtyfours;
|
||||
|
||||
sixtyfours = cgpu->max_alloc / 131072 / 64 / (algorithm->n/1024) - 1;
|
||||
cgpu->thread_concurrency = sixtyfours * 64;
|
||||
if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders) {
|
||||
cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders;
|
||||
if (cgpu->thread_concurrency > cgpu->shaders * 5)
|
||||
if (cgpu->thread_concurrency > cgpu->shaders * 5) {
|
||||
cgpu->thread_concurrency = cgpu->shaders * 5;
|
||||
}
|
||||
}
|
||||
applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %d", gpu, (int)(cgpu->thread_concurrency));
|
||||
} else
|
||||
}
|
||||
else {
|
||||
cgpu->thread_concurrency = cgpu->opt_tc;
|
||||
}
|
||||
|
||||
|
||||
cl_uint slot, cpnd;
|
||||
@ -445,17 +482,36 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
}
|
||||
|
||||
size_t bufsize;
|
||||
size_t readbufsize = 128;
|
||||
|
||||
if (algorithm->rw_buffer_size < 0) {
|
||||
size_t ipt = (algorithm->n / cgpu->lookup_gap +
|
||||
(algorithm->n % cgpu->lookup_gap > 0));
|
||||
bufsize = 128 * ipt * cgpu->thread_concurrency;
|
||||
} else
|
||||
bufsize = (size_t) algorithm->rw_buffer_size;
|
||||
// calc buffer size for neoscrypt
|
||||
if (!safe_cmp(algorithm->name, "neoscrypt")) {
|
||||
/* The scratch/pad-buffer needs 32kBytes memory per thread. */
|
||||
bufsize = NEOSCRYPT_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
|
||||
|
||||
/* This is the input buffer. For neoscrypt this is guaranteed to be
|
||||
* 80 bytes only. */
|
||||
readbufsize = 80;
|
||||
|
||||
applog(LOG_DEBUG, "Neoscrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
|
||||
// scrypt/n-scrypt
|
||||
}
|
||||
else {
|
||||
size_t ipt = (algorithm->n / cgpu->lookup_gap + (algorithm->n % cgpu->lookup_gap > 0));
|
||||
bufsize = 128 * ipt * cgpu->thread_concurrency;
|
||||
applog(LOG_DEBUG, "Scrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
|
||||
}
|
||||
}
|
||||
else {
|
||||
bufsize = (size_t)algorithm->rw_buffer_size;
|
||||
applog(LOG_DEBUG, "Buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
|
||||
}
|
||||
|
||||
clState->padbuffer8 = NULL;
|
||||
|
||||
if (bufsize > 0) {
|
||||
applog(LOG_DEBUG, "Creating read/write buffer sized %lu", (unsigned long)bufsize);
|
||||
/* Use the max alloc value which has been rounded to a power of
|
||||
* 2 greater >= required amount earlier */
|
||||
if (bufsize > cgpu->max_alloc) {
|
||||
@ -463,7 +519,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
gpu, (unsigned long)(cgpu->max_alloc));
|
||||
applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize);
|
||||
}
|
||||
applog(LOG_DEBUG, "Creating buffer sized %lu", (unsigned long)bufsize);
|
||||
|
||||
/* This buffer is weird and might work to some degree even if
|
||||
* the create buffer call has apparently failed, so check if we
|
||||
@ -475,11 +530,14 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
|
||||
}
|
||||
}
|
||||
|
||||
clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
|
||||
applog(LOG_DEBUG, "Using read buffer sized %lu", (unsigned long)readbufsize);
|
||||
clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, readbufsize, NULL, &status);
|
||||
if (status != CL_SUCCESS) {
|
||||
applog(LOG_ERR, "Error %d: clCreateBuffer (CLbuffer0)", status);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
applog(LOG_DEBUG, "Using output buffer sized %lu", BUFFERSIZE);
|
||||
clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, BUFFERSIZE, NULL, &status);
|
||||
|
||||
if (status != CL_SUCCESS) {
|
||||
|
294
sgminer.c
294
sgminer.c
@ -2019,11 +2019,26 @@ static void update_gbt(struct pool *pool)
|
||||
/* Return the work coin/network difficulty */
|
||||
static double get_work_blockdiff(const struct work *work)
|
||||
{
|
||||
uint8_t pow = work->data[72];
|
||||
int powdiff = (8 * (0x1d - 3)) - (8 * (pow - 3));
|
||||
uint32_t diff32 = be32toh(*((uint32_t *)(work->data + 72))) & 0x00FFFFFF;
|
||||
double numerator = work->pool->algorithm.diff_numerator << powdiff;
|
||||
return numerator / (double)diff32;
|
||||
uint64_t diff64;
|
||||
double numerator;
|
||||
|
||||
// Neoscrypt has the data reversed
|
||||
if (!safe_cmp(work->pool->algorithm.name, "neoscrypt")) {
|
||||
diff64 = bswap_64(((uint64_t)(be32toh(*((uint32_t *)(work->data + 72))) & 0xFFFFFF00)) << 8);
|
||||
numerator = (double)work->pool->algorithm.diff_numerator;
|
||||
}
|
||||
else {
|
||||
uint8_t pow = work->data[72];
|
||||
int powdiff = (8 * (0x1d - 3)) - (8 * (pow - 3));;
|
||||
diff64 = be32toh(*((uint32_t *)(work->data + 72))) & 0x0000000000FFFFFF;
|
||||
numerator = work->pool->algorithm.diff_numerator << powdiff;
|
||||
}
|
||||
|
||||
if (unlikely(!diff64)) {
|
||||
diff64 = 1;
|
||||
}
|
||||
|
||||
return numerator / (double)diff64;
|
||||
}
|
||||
|
||||
static void gen_gbt_work(struct pool *pool, struct work *work)
|
||||
@ -2073,7 +2088,10 @@ static void gen_gbt_work(struct pool *pool, struct work *work)
|
||||
free(header);
|
||||
}
|
||||
|
||||
calc_midstate(work);
|
||||
// Neoscrypt doesn't calc_midstate()
|
||||
if (safe_cmp(pool->algorithm.name, "neoscrypt")) {
|
||||
calc_midstate(work);
|
||||
}
|
||||
local_work++;
|
||||
work->pool = pool;
|
||||
work->gbt = true;
|
||||
@ -2189,10 +2207,15 @@ static bool getwork_decode(json_t *res_val, struct work *work)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!jobj_binary(res_val, "midstate", work->midstate, sizeof(work->midstate), false)) {
|
||||
// Calculate it ourselves
|
||||
applog(LOG_DEBUG, "%s: Calculating midstate locally", isnull(get_pool_name(work->pool), ""));
|
||||
calc_midstate(work);
|
||||
// Neoscrypt doesn't calc midstate
|
||||
if (safe_cmp(work->pool->algorithm.name, "neoscrypt")) {
|
||||
if (!jobj_binary(res_val, "midstate", work->midstate, sizeof(work->midstate), false)) {
|
||||
// Calculate it ourselves
|
||||
if (opt_morenotices) {
|
||||
applog(LOG_DEBUG, "%s: Calculating midstate locally", isnull(get_pool_name(work->pool), ""));
|
||||
}
|
||||
calc_midstate(work);
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(!jobj_binary(res_val, "target", work->target, sizeof(work->target), true))) {
|
||||
@ -2936,8 +2959,8 @@ static bool submit_upstream_work(struct work *work, CURL *curl, char *curl_err_s
|
||||
|
||||
endian_flip128(work->data, work->data);
|
||||
|
||||
/* build hex string */
|
||||
hexstr = bin2hex(work->data, sizeof(work->data));
|
||||
/* build hex string - Make sure to restrict to 80 bytes for Neoscrypt */
|
||||
hexstr = bin2hex(work->data, ((!safe_cmp(work->pool->algorithm.name, "neoscrypt")) ? 80 : sizeof(work->data)));
|
||||
|
||||
/* build JSON-RPC request */
|
||||
if (work->gbt) {
|
||||
@ -3304,11 +3327,19 @@ static void calc_diff(struct work *work, double known)
|
||||
|
||||
d64 = work->pool->algorithm.diff_multiplier2 * truediffone;
|
||||
|
||||
dcut64 = le256todouble(work->target);
|
||||
applog(LOG_DEBUG, "calc_diff() algorithm = %s", work->pool->algorithm.name);
|
||||
// Neoscrypt
|
||||
if (!safe_cmp(work->pool->algorithm.name, "neoscrypt")) {
|
||||
dcut64 = (double)*((uint64_t *)(work->target + 22));
|
||||
}
|
||||
else {
|
||||
dcut64 = le256todouble(work->target);
|
||||
}
|
||||
if (unlikely(!dcut64))
|
||||
dcut64 = 1;
|
||||
work->work_difficulty = d64 / dcut64;
|
||||
}
|
||||
|
||||
difficulty = work->work_difficulty;
|
||||
|
||||
pool_stats->last_diff = difficulty;
|
||||
@ -5465,8 +5496,21 @@ static void *stratum_sthread(void *userdata)
|
||||
sshare->sshare_time = time(NULL);
|
||||
/* This work item is freed in parse_stratum_response */
|
||||
sshare->work = work;
|
||||
nonce = *((uint32_t *)(work->data + 76));
|
||||
|
||||
applog(LOG_DEBUG, "stratum_sthread() algorithm = %s", pool->algorithm.name);
|
||||
|
||||
// Neoscrypt is little endian
|
||||
if (!safe_cmp(pool->algorithm.name, "neoscrypt")) {
|
||||
nonce = htobe32(*((uint32_t *)(work->data + 76)));
|
||||
//*((uint32_t *)nonce2) = htole32(work->nonce2);
|
||||
}
|
||||
else {
|
||||
nonce = *((uint32_t *)(work->data + 76));
|
||||
}
|
||||
__bin2hex(noncehex, (const unsigned char *)&nonce, 4);
|
||||
|
||||
*((uint64_t *)nonce2) = htole64(work->nonce2);
|
||||
__bin2hex(nonce2hex, nonce2, work->nonce2_len);
|
||||
memset(s, 0, 1024);
|
||||
|
||||
mutex_lock(&sshare_lock);
|
||||
@ -5474,10 +5518,6 @@ static void *stratum_sthread(void *userdata)
|
||||
sshare->id = swork_id++;
|
||||
mutex_unlock(&sshare_lock);
|
||||
|
||||
nonce2_64 = (uint64_t *)nonce2;
|
||||
*nonce2_64 = htole64(work->nonce2);
|
||||
__bin2hex(nonce2hex, nonce2, work->nonce2_len);
|
||||
|
||||
snprintf(s, sizeof(s),
|
||||
"{\"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\": %d, \"method\": \"mining.submit\"}",
|
||||
pool->rpc_user, work->job_id, nonce2hex, work->ntime, noncehex, sshare->id);
|
||||
@ -5885,6 +5925,50 @@ void set_target(unsigned char *dest_target, double diff, double diff_multiplier2
|
||||
memcpy(dest_target, target, 32);
|
||||
}
|
||||
|
||||
/*****************************************************
|
||||
* Special set_target() function for Neoscrypt
|
||||
****************************************************/
|
||||
void set_target_neoscrypt(unsigned char *target, double diff)
|
||||
{
|
||||
uint64_t m;
|
||||
int k;
|
||||
|
||||
diff /= 65536.0;
|
||||
for (k = 6; k > 0 && diff > 1.0; --k) {
|
||||
diff /= 4294967296.0;
|
||||
}
|
||||
|
||||
m = 4294901760.0 / diff;
|
||||
|
||||
if (m == 0 && k == 6) {
|
||||
memset(target, 0xff, 32);
|
||||
}
|
||||
else {
|
||||
memset(target, 0, 32);
|
||||
((uint32_t *)target)[k] = (uint32_t)m;
|
||||
((uint32_t *)target)[k + 1] = (uint32_t)(m >> 32);
|
||||
}
|
||||
|
||||
if (opt_debug) {
|
||||
/* The target is computed in this systems endianess and stored
|
||||
* in its endianess on a uint32-level. But because the target are
|
||||
* eight uint32s, they are stored in mixed mode, i.e., each uint32
|
||||
* is stored in the local endianess, but the least significant bit
|
||||
* is stored in target[0] bit 0.
|
||||
*
|
||||
* To print this large number in a native human readable form the
|
||||
* order of the array entries is swapped, i.e., target[7] <-> target[0]
|
||||
* and each array entry is byte swapped to have the least significant
|
||||
* bit to the right. */
|
||||
uint32_t swaped[8];
|
||||
swab256(swaped, target);
|
||||
char *htarget = bin2hex((unsigned char *)swaped, 32);
|
||||
|
||||
applog(LOG_DEBUG, "Generated neoscrypt target 0x%s", htarget);
|
||||
free(htarget);
|
||||
}
|
||||
}
|
||||
|
||||
/* Generates stratum based work based on the most recent notify information
|
||||
* from the pool. This will keep generating work while a pool is down so we use
|
||||
* other means to detect when the pool has died in stratum_thread */
|
||||
@ -5893,12 +5977,12 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
|
||||
unsigned char merkle_root[32], merkle_sha[64];
|
||||
uint32_t *data32, *swap32;
|
||||
uint64_t nonce2le;
|
||||
int i;
|
||||
int i, j;
|
||||
|
||||
cg_wlock(&pool->data_lock);
|
||||
|
||||
/* Update coinbase. Always use an LE encoded nonce2 to fill in values
|
||||
* from left to right and prevent overflow errors with small n2sizes */
|
||||
* from left to right and prevent overflow errors with small n2sizes */
|
||||
nonce2le = htole64(pool->nonce2);
|
||||
memcpy(pool->coinbase + pool->nonce2_offset, &nonce2le, pool->n2size);
|
||||
work->nonce2 = pool->nonce2++;
|
||||
@ -5915,16 +5999,50 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
|
||||
gen_hash(merkle_sha, 64, merkle_root);
|
||||
memcpy(merkle_sha, merkle_root, 32);
|
||||
}
|
||||
data32 = (uint32_t *)merkle_sha;
|
||||
swap32 = (uint32_t *)merkle_root;
|
||||
flip32(swap32, data32);
|
||||
|
||||
/* Copy the data template from header_bin */
|
||||
memcpy(work->data, pool->header_bin, 128);
|
||||
memcpy(work->data + pool->merkle_offset, merkle_root, 32);
|
||||
applog(LOG_DEBUG, "gen_stratum_work() - algorithm = %s", pool->algorithm.name);
|
||||
|
||||
// Different for Neoscrypt because of Little Endian
|
||||
if (!safe_cmp(pool->algorithm.name, "neoscrypt")) {
|
||||
/* Incoming data is in little endian. */
|
||||
memcpy(merkle_root, merkle_sha, 32);
|
||||
|
||||
uint32_t temp = pool->merkle_offset / sizeof(uint32_t), i;
|
||||
/* Put version (4 byte) + prev_hash (4 byte* 8) but big endian encoded
|
||||
* into work. */
|
||||
for (i = 0; i < temp; ++i) {
|
||||
((uint32_t *)work->data)[i] = be32toh(((uint32_t *)pool->header_bin)[i]);
|
||||
}
|
||||
|
||||
/* Now add the merkle_root (4 byte* 8), but it is encoded in little endian. */
|
||||
temp += 8;
|
||||
|
||||
for (j = 0; i < temp; ++i, ++j) {
|
||||
((uint32_t *)work->data)[i] = le32toh(((uint32_t *)merkle_root)[j]);
|
||||
}
|
||||
|
||||
/* Add the time encoded in big endianess. */
|
||||
hex2bin((unsigned char *)&temp, pool->swork.ntime, 4);
|
||||
|
||||
/* Add the nbits (big endianess). */
|
||||
((uint32_t *)work->data)[17] = be32toh(temp);
|
||||
hex2bin((unsigned char *)&temp, pool->swork.nbit, 4);
|
||||
((uint32_t *)work->data)[18] = be32toh(temp);
|
||||
((uint32_t *)work->data)[20] = 0x80000000;
|
||||
((uint32_t *)work->data)[31] = 0x00000280;
|
||||
}
|
||||
else {
|
||||
data32 = (uint32_t *)merkle_sha;
|
||||
swap32 = (uint32_t *)merkle_root;
|
||||
flip32(swap32, data32);
|
||||
|
||||
/* Copy the data template from header_bin */
|
||||
memcpy(work->data, pool->header_bin, 128);
|
||||
memcpy(work->data + pool->merkle_offset, merkle_root, 32);
|
||||
}
|
||||
|
||||
/* Store the stratum work diff to check it still matches the pool's
|
||||
* stratum diff when submitting shares */
|
||||
* stratum diff when submitting shares */
|
||||
work->sdiff = pool->swork.diff;
|
||||
|
||||
/* Copy parameters required for share submission */
|
||||
@ -5941,13 +6059,19 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
|
||||
applog(LOG_DEBUG, "Generated stratum merkle %s", merkle_hash);
|
||||
applog(LOG_DEBUG, "Generated stratum header %s", header);
|
||||
applog(LOG_DEBUG, "Work job_id %s nonce2 %"PRIu64" ntime %s", work->job_id,
|
||||
work->nonce2, work->ntime);
|
||||
work->nonce2, work->ntime);
|
||||
free(header);
|
||||
free(merkle_hash);
|
||||
}
|
||||
|
||||
calc_midstate(work);
|
||||
set_target(work->target, work->sdiff, pool->algorithm.diff_multiplier2);
|
||||
// For Neoscrypt use set_target_neoscrypt() function
|
||||
if (!safe_cmp(pool->algorithm.name, "neoscrypt")) {
|
||||
set_target_neoscrypt(work->target, work->sdiff);
|
||||
}
|
||||
else {
|
||||
calc_midstate(work);
|
||||
set_target(work->target, work->sdiff, pool->algorithm.diff_multiplier2);
|
||||
}
|
||||
|
||||
local_work++;
|
||||
work->pool = pool;
|
||||
@ -6124,15 +6248,17 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool
|
||||
unsigned int options = 0;
|
||||
const char *opt1, *opt2;
|
||||
|
||||
if(!pool1 || !pool2)
|
||||
applog(LOG_DEBUG, "compare_pool_settings()");
|
||||
|
||||
if (!pool1 || !pool2)
|
||||
return 0;
|
||||
|
||||
//compare pool devices
|
||||
opt1 = get_pool_setting(pool1->devices, ((!empty_string(default_profile.devices))?default_profile.devices:"all"));
|
||||
opt2 = get_pool_setting(pool2->devices, ((!empty_string(default_profile.devices))?default_profile.devices:"all"));
|
||||
opt1 = get_pool_setting(pool1->devices, ((!empty_string(default_profile.devices)) ? default_profile.devices : "all"));
|
||||
opt2 = get_pool_setting(pool2->devices, ((!empty_string(default_profile.devices)) ? default_profile.devices : "all"));
|
||||
|
||||
//changing devices means a hard reset of mining threads
|
||||
if(strcasecmp(opt1, opt2) != 0)
|
||||
if (strcasecmp(opt1, opt2) != 0)
|
||||
options |= (SWITCHER_APPLY_DEVICE | SWITCHER_HARD_RESET);
|
||||
|
||||
//compare gpu threads
|
||||
@ -6140,11 +6266,11 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool
|
||||
opt2 = get_pool_setting(pool2->gpu_threads, default_profile.gpu_threads);
|
||||
|
||||
//changing gpu threads means a hard reset of mining threads
|
||||
if(strcasecmp(opt1, opt2) != 0)
|
||||
if (strcasecmp(opt1, opt2) != 0)
|
||||
options |= (SWITCHER_APPLY_GT | SWITCHER_HARD_RESET);
|
||||
|
||||
//compare algorithm
|
||||
if(!cmp_algorithm(&pool1->algorithm, &pool2->algorithm))
|
||||
if (!cmp_algorithm(&pool1->algorithm, &pool2->algorithm))
|
||||
options |= (SWITCHER_APPLY_ALGO | SWITCHER_SOFT_RESET);
|
||||
|
||||
//lookup gap
|
||||
@ -6152,46 +6278,46 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool
|
||||
opt2 = get_pool_setting(pool2->lookup_gap, default_profile.lookup_gap);
|
||||
|
||||
//lookup gap means soft reset but only if hard reset isnt set
|
||||
if(strcasecmp(opt1, opt2) != 0)
|
||||
if (strcasecmp(opt1, opt2) != 0)
|
||||
options |= (SWITCHER_APPLY_LG | SWITCHER_SOFT_RESET);
|
||||
|
||||
//intensities
|
||||
opt1 = get_pool_setting(pool1->rawintensity, default_profile.rawintensity);
|
||||
opt2 = get_pool_setting(pool2->rawintensity, default_profile.rawintensity);
|
||||
|
||||
if(strcasecmp(opt1, opt2) != 0)
|
||||
if (strcasecmp(opt1, opt2) != 0)
|
||||
{
|
||||
//intensity is soft reset
|
||||
if(!empty_string(opt2))
|
||||
if (!empty_string(opt2))
|
||||
options |= (SWITCHER_APPLY_RAWINT | SWITCHER_SOFT_RESET);
|
||||
}
|
||||
|
||||
//xintensity -- only if raw intensity not set
|
||||
if(!opt_isset(options, SWITCHER_APPLY_RAWINT))
|
||||
if (!opt_isset(options, SWITCHER_APPLY_RAWINT))
|
||||
{
|
||||
opt1 = get_pool_setting(pool1->xintensity, default_profile.xintensity);
|
||||
opt2 = get_pool_setting(pool2->xintensity, default_profile.xintensity);
|
||||
|
||||
//if different...
|
||||
if(strcasecmp(opt1, opt2) != 0)
|
||||
if (strcasecmp(opt1, opt2) != 0)
|
||||
{
|
||||
//intensity is soft reset
|
||||
if(!empty_string(opt2))
|
||||
if (!empty_string(opt2))
|
||||
options |= (SWITCHER_APPLY_XINT | SWITCHER_SOFT_RESET);
|
||||
}
|
||||
}
|
||||
|
||||
//intensity -- only if raw intensity and xintensity not set
|
||||
if(!opt_isset(options, SWITCHER_APPLY_RAWINT) && !opt_isset(options, SWITCHER_APPLY_XINT))
|
||||
if (!opt_isset(options, SWITCHER_APPLY_RAWINT) && !opt_isset(options, SWITCHER_APPLY_XINT))
|
||||
{
|
||||
opt1 = get_pool_setting(pool1->intensity, default_profile.intensity);
|
||||
opt2 = get_pool_setting(pool2->intensity, default_profile.intensity);
|
||||
|
||||
//if different...
|
||||
if(strcasecmp(opt1, opt2) != 0)
|
||||
if (strcasecmp(opt1, opt2) != 0)
|
||||
{
|
||||
//intensity is soft reset
|
||||
if(!empty_string(opt2))
|
||||
if (!empty_string(opt2))
|
||||
options |= (SWITCHER_APPLY_INT | SWITCHER_SOFT_RESET);
|
||||
//if blank, set default profile to intensity 8 and apply
|
||||
else
|
||||
@ -6203,10 +6329,10 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool
|
||||
opt1 = get_pool_setting(pool1->shaders, default_profile.shaders);
|
||||
opt2 = get_pool_setting(pool2->shaders, default_profile.shaders);
|
||||
|
||||
if(strcasecmp(opt1, opt2) != 0)
|
||||
if (strcasecmp(opt1, opt2) != 0)
|
||||
{
|
||||
//shaders is soft reset
|
||||
if(!empty_string(opt2))
|
||||
if (!empty_string(opt2))
|
||||
options |= (SWITCHER_APPLY_SHADER | SWITCHER_SOFT_RESET);
|
||||
}
|
||||
|
||||
@ -6215,7 +6341,7 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool
|
||||
opt2 = get_pool_setting(pool2->thread_concurrency, default_profile.thread_concurrency);
|
||||
|
||||
//thread-concurrency is soft reset
|
||||
if(strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
|
||||
if (strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
|
||||
options |= (SWITCHER_APPLY_TC | SWITCHER_SOFT_RESET);
|
||||
|
||||
//worksize
|
||||
@ -6223,45 +6349,45 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool
|
||||
opt2 = get_pool_setting(pool2->worksize, default_profile.worksize);
|
||||
|
||||
//worksize is soft reset
|
||||
if(strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
|
||||
options |= (SWITCHER_APPLY_WORKSIZE | SWITCHER_SOFT_RESET);
|
||||
if (strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
|
||||
options |= (SWITCHER_APPLY_WORKSIZE | SWITCHER_SOFT_RESET);
|
||||
|
||||
#ifdef HAVE_ADL
|
||||
//gpu-engine
|
||||
opt1 = get_pool_setting(pool1->gpu_engine, default_profile.gpu_engine);
|
||||
opt2 = get_pool_setting(pool2->gpu_engine, default_profile.gpu_engine);
|
||||
#ifdef HAVE_ADL
|
||||
//gpu-engine
|
||||
opt1 = get_pool_setting(pool1->gpu_engine, default_profile.gpu_engine);
|
||||
opt2 = get_pool_setting(pool2->gpu_engine, default_profile.gpu_engine);
|
||||
|
||||
if(strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
|
||||
options |= SWITCHER_APPLY_GPU_ENGINE;
|
||||
if (strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
|
||||
options |= SWITCHER_APPLY_GPU_ENGINE;
|
||||
|
||||
//gpu-memclock
|
||||
opt1 = get_pool_setting(pool1->gpu_memclock, default_profile.gpu_memclock);
|
||||
opt2 = get_pool_setting(pool2->gpu_memclock, default_profile.gpu_memclock);
|
||||
//gpu-memclock
|
||||
opt1 = get_pool_setting(pool1->gpu_memclock, default_profile.gpu_memclock);
|
||||
opt2 = get_pool_setting(pool2->gpu_memclock, default_profile.gpu_memclock);
|
||||
|
||||
if(strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
|
||||
options |= SWITCHER_APPLY_GPU_MEMCLOCK;
|
||||
if (strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
|
||||
options |= SWITCHER_APPLY_GPU_MEMCLOCK;
|
||||
|
||||
//GPU fans
|
||||
opt1 = get_pool_setting(pool1->gpu_fan, default_profile.gpu_fan);
|
||||
opt2 = get_pool_setting(pool2->gpu_fan, default_profile.gpu_fan);
|
||||
//GPU fans
|
||||
opt1 = get_pool_setting(pool1->gpu_fan, default_profile.gpu_fan);
|
||||
opt2 = get_pool_setting(pool2->gpu_fan, default_profile.gpu_fan);
|
||||
|
||||
if(strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
|
||||
options |= SWITCHER_APPLY_GPU_FAN;
|
||||
if (strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
|
||||
options |= SWITCHER_APPLY_GPU_FAN;
|
||||
|
||||
//GPU powertune
|
||||
opt1 = get_pool_setting(pool1->gpu_powertune, default_profile.gpu_powertune);
|
||||
opt2 = get_pool_setting(pool2->gpu_powertune, default_profile.gpu_powertune);
|
||||
//GPU powertune
|
||||
opt1 = get_pool_setting(pool1->gpu_powertune, default_profile.gpu_powertune);
|
||||
opt2 = get_pool_setting(pool2->gpu_powertune, default_profile.gpu_powertune);
|
||||
|
||||
if(strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
|
||||
options |= SWITCHER_APPLY_GPU_POWERTUNE;
|
||||
if (strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
|
||||
options |= SWITCHER_APPLY_GPU_POWERTUNE;
|
||||
|
||||
//GPU vddc
|
||||
opt1 = get_pool_setting(pool1->gpu_vddc, default_profile.gpu_vddc);
|
||||
opt2 = get_pool_setting(pool2->gpu_vddc, default_profile.gpu_vddc);
|
||||
//GPU vddc
|
||||
opt1 = get_pool_setting(pool1->gpu_vddc, default_profile.gpu_vddc);
|
||||
opt2 = get_pool_setting(pool2->gpu_vddc, default_profile.gpu_vddc);
|
||||
|
||||
if(strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
|
||||
options |= SWITCHER_APPLY_GPU_VDDC;
|
||||
#endif
|
||||
if (strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
|
||||
options |= SWITCHER_APPLY_GPU_VDDC;
|
||||
#endif
|
||||
|
||||
// Remove soft reset if hard reset is set
|
||||
if (opt_isset(options, SWITCHER_HARD_RESET) &&
|
||||
@ -6281,6 +6407,8 @@ static void get_work_prepare_thread(struct thr_info *mythr, struct work *work)
|
||||
{
|
||||
int i;
|
||||
|
||||
applog(LOG_DEBUG, "get_work_prepare_thread()");
|
||||
|
||||
//if switcher is disabled
|
||||
if(opt_switchmode == SWITCH_OFF)
|
||||
return;
|
||||
@ -6608,6 +6736,7 @@ struct work *get_work(struct thr_info *thr, const int thr_id)
|
||||
}
|
||||
}
|
||||
|
||||
applog(LOG_DEBUG, "preparing thread...");
|
||||
get_work_prepare_thread(thr, work);
|
||||
|
||||
diff_t = time(NULL) - diff_t;
|
||||
@ -6700,7 +6829,16 @@ bool test_nonce(struct work *work, uint32_t nonce)
|
||||
uint32_t diff1targ;
|
||||
|
||||
rebuild_nonce(work, nonce);
|
||||
diff1targ = work->pool->algorithm.diff1targ;
|
||||
|
||||
applog(LOG_DEBUG, "test_nonce() algorithm = %s", work->pool->algorithm.name);
|
||||
|
||||
// for Neoscrypt, the diff1targe value is in work->target
|
||||
if ((work->pool->algorithm.name, "neoscrypt")) {
|
||||
diff1targ = ((uint32_t *)work->target)[7];
|
||||
}
|
||||
else {
|
||||
diff1targ = work->pool->algorithm.diff1targ;
|
||||
}
|
||||
|
||||
return (le32toh(*hash_32) <= diff1targ);
|
||||
}
|
||||
|
@ -263,6 +263,7 @@
|
||||
<ClCompile Include="..\algorithm.c" />
|
||||
<ClCompile Include="..\algorithm\animecoin.c" />
|
||||
<ClCompile Include="..\algorithm\bitblock.c" />
|
||||
<ClCompile Include="..\algorithm\neoscrypt.c" />
|
||||
<ClCompile Include="..\algorithm\talkcoin.c" />
|
||||
<ClCompile Include="..\algorithm\x14.c" />
|
||||
<ClCompile Include="..\algorithm\fresh.c" />
|
||||
@ -321,6 +322,7 @@
|
||||
<ClInclude Include="..\algorithm.h" />
|
||||
<ClInclude Include="..\algorithm\animecoin.h" />
|
||||
<ClInclude Include="..\algorithm\bitblock.h" />
|
||||
<ClInclude Include="..\algorithm\neoscrypt.h" />
|
||||
<ClInclude Include="..\algorithm\talkcoin.h" />
|
||||
<ClInclude Include="..\algorithm\x14.h" />
|
||||
<ClInclude Include="..\algorithm\fresh.h" />
|
||||
|
@ -197,6 +197,9 @@
|
||||
<ClCompile Include="..\algorithm.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\algorithm\neoscrypt.c">
|
||||
<Filter>Source Files\algorithm</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\adl.h">
|
||||
@ -373,6 +376,9 @@
|
||||
<ClInclude Include="..\sph\sph_shabal.h">
|
||||
<Filter>Header Files\sph</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\algorithm\neoscrypt.h">
|
||||
<Filter>Header Files\algorithm</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="README.txt" />
|
||||
|
Loading…
Reference in New Issue
Block a user