Browse Source

Added Neoscrypt with Wolf9466 improvements.

djm34
ystarnaud 10 years ago committed by troky
parent
commit
42737acf66
  1. 1
      Makefile.am
  2. 41
      algorithm.c
  3. 3
      algorithm.h
  4. 1411
      algorithm/neoscrypt.c
  5. 13
      algorithm/neoscrypt.h
  6. 525
      kernel/neoscrypt.cl
  7. 1
      miner.h
  8. 76
      ocl.c
  9. 212
      sgminer.c
  10. 2
      winbuild/sgminer.vcxproj
  11. 6
      winbuild/sgminer.vcxproj.filters

1
Makefile.am

@ -65,6 +65,7 @@ sgminer_SOURCES += algorithm/talkcoin.c algorithm/talkcoin.h @@ -65,6 +65,7 @@ sgminer_SOURCES += algorithm/talkcoin.c algorithm/talkcoin.h
sgminer_SOURCES += algorithm/bitblock.c algorithm/bitblock.h
sgminer_SOURCES += algorithm/x14.c algorithm/x14.h
sgminer_SOURCES += algorithm/fresh.c algorithm/fresh.h
sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h
bin_SCRIPTS = $(top_srcdir)/kernel/*.cl

41
algorithm.c

@ -29,6 +29,7 @@ @@ -29,6 +29,7 @@
#include "algorithm/bitblock.h"
#include "algorithm/x14.h"
#include "algorithm/fresh.h"
#include "algorithm/neoscrypt.h"
#include "compat.h"
@ -92,6 +93,17 @@ static void append_scrypt_compiler_options(struct _build_kernel_data *data, stru @@ -92,6 +93,17 @@ static void append_scrypt_compiler_options(struct _build_kernel_data *data, stru
strcat(data->binary_filename, buf);
}
static void append_neoscrypt_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm)
{
char buf[255];
sprintf(buf, " -D MAX_GLOBAL_THREADS=%u",
(unsigned int)cgpu->thread_concurrency);
strcat(data->compiler_options, buf);
sprintf(buf, "tc%u", (unsigned int)cgpu->thread_concurrency);
strcat(data->binary_filename, buf);
}
static void append_x11_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm)
{
char buf[255];
@ -140,6 +152,30 @@ static cl_int queue_scrypt_kernel(struct __clState *clState, struct _dev_blk_ctx @@ -140,6 +152,30 @@ static cl_int queue_scrypt_kernel(struct __clState *clState, struct _dev_blk_ctx
return status;
}
static cl_int queue_neoscrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel = &clState->kernel;
unsigned int num = 0;
cl_uint le_target;
cl_int status = 0;
/* This looks like a unnecessary double cast, but to make sure, that
* the target's most significant entry is adressed as a 32-bit value
* and not accidently by something else the double cast seems wise.
* The compiler will get rid of it anyway.
*/
le_target = (cl_uint)le32toh(((uint32_t *)blk->work->/*device_*/target)[7]);
memcpy(clState->cldata, blk->work->data, 80);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(clState->padbuffer8);
CL_SET_ARG(le_target);
return status;
}
static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel = &clState->kernel;
@ -597,6 +633,11 @@ static algorithm_settings_t algos[] = { @@ -597,6 +633,11 @@ static algorithm_settings_t algos[] = {
A_SCRYPT( "zuikkis" ),
#undef A_SCRYPT
#define A_NEOSCRYPT(a) \
{ a, ALGO_NEOSCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, neoscrypt_regenhash, queue_neoscrypt_kernel, gen_hash, append_neoscrypt_compiler_options}
A_NEOSCRYPT("neoscrypt"),
#undef A_NEOSCRYPT
// kernels starting from this will have difficulty calculated by using quarkcoin algorithm
#define A_QUARK(a, b) \
{ a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, append_x11_compiler_options }

3
algorithm.h

@ -23,7 +23,8 @@ typedef enum { @@ -23,7 +23,8 @@ typedef enum {
ALGO_TWE,
ALGO_FUGUE,
ALGO_NIST,
ALGO_FRESH
ALGO_FRESH,
ALGO_NEOSCRYPT
} algorithm_type_t;
extern const char *algorithm_type_str[];

1411
algorithm/neoscrypt.c

File diff suppressed because it is too large Load Diff

13
algorithm/neoscrypt.h

@ -0,0 +1,13 @@ @@ -0,0 +1,13 @@
#ifndef NEOSCRYPT_H
#define NEOSCRYPT_H
#include "miner.h"
/* The neoscrypt scratch buffer needs 32kBytes memory. */
#define NEOSCRYPT_SCRATCHBUF_SIZE (32 * 1024)
/* These routines are always available. */
extern void neoscrypt_regenhash(struct work *work);
extern void neoscrypt(const unsigned char *input, unsigned char *output, unsigned int profile);
#endif /* NEOSCRYPT_H */

525
kernel/neoscrypt.cl

@ -0,0 +1,525 @@ @@ -0,0 +1,525 @@
/* NeoScrypt(128, 2, 1) with Salsa20/20 and ChaCha20/20 */
/* Adapted and improved for 14.x drivers by Wolf9466 (Wolf`) */
// Stupid AMD compiler ignores the unroll pragma in these two
#define SALSA_SMALL_UNROLL 3
#define CHACHA_SMALL_UNROLL 3
// If SMALL_BLAKE2S is defined, BLAKE2S_UNROLL is interpreted
// as the unroll factor; must divide cleanly into ten.
// Usually a bad idea.
//#define SMALL_BLAKE2S
//#define BLAKE2S_UNROLL 5
#define BLOCK_SIZE 64U
#define FASTKDF_BUFFER_SIZE 256U
#ifndef PASSWORD_LEN
#define PASSWORD_LEN 80U
#endif
#if !defined(cl_khr_byte_addressable_store)
#error "Device does not support unaligned stores"
#endif
// Swaps 128 bytes at a time without using temp vars
void SwapBytes128(void *restrict A, void *restrict B, uint len)
{
#pragma unroll 2
for(int i = 0; i < (len >> 7); ++i)
{
((ulong16 *)A)[i] ^= ((ulong16 *)B)[i];
((ulong16 *)B)[i] ^= ((ulong16 *)A)[i];
((ulong16 *)A)[i] ^= ((ulong16 *)B)[i];
}
}
void CopyBytes128(void *restrict dst, const void *restrict src, uint len)
{
#pragma unroll 2
for(int i = 0; i < len; ++i)
((ulong16 *)dst)[i] = ((ulong16 *)src)[i];
}
void CopyBytes(void *restrict dst, const void *restrict src, uint len)
{
for(int i = 0; i < len; ++i)
((uchar *)dst)[i] = ((uchar *)src)[i];
}
void XORBytesInPlace(void *restrict dst, const void *restrict src, uint len)
{
for(int i = 0; i < len; ++i)
((uchar *)dst)[i] ^= ((uchar *)src)[i];
}
void XORBytes(void *restrict dst, const void *restrict src1, const void *restrict src2, uint len)
{
#pragma unroll 1
for(int i = 0; i < len; ++i)
((uchar *)dst)[i] = ((uchar *)src1)[i] ^ ((uchar *)src2)[i];
}
// Blake2S
#define BLAKE2S_BLOCK_SIZE 64U
#define BLAKE2S_OUT_SIZE 32U
#define BLAKE2S_KEY_SIZE 32U
static const __constant uint BLAKE2S_IV[8] =
{
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
};
static const __constant uchar BLAKE2S_SIGMA[10][16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
};
#define BLAKE_G(idx0, idx1, a, b, c, d, key) do { \
a += b + key[BLAKE2S_SIGMA[idx0][idx1]]; \
d = rotate(d ^ a, 16U); \
c += d; \
b = rotate(b ^ c, 20U); \
a += b + key[BLAKE2S_SIGMA[idx0][idx1 + 1]]; \
d = rotate(d ^ a, 24U); \
c += d; \
b = rotate(b ^ c, 25U); \
} while(0)
void Blake2S(uint *restrict inout, const uint *restrict inkey)
{
uint16 V;
uint8 tmpblock;
// Load first block (IV into V.lo) and constants (IV into V.hi)
V.lo = V.hi = vload8(0U, BLAKE2S_IV);
// XOR with initial constant
V.s0 ^= 0x01012020;
// Copy input block for later
tmpblock = V.lo;
// XOR length of message so far (including this block)
// There are two uints for this field, but high uint is zero
V.sc ^= BLAKE2S_BLOCK_SIZE;
// Compress state, using the key as the key
#ifdef SMALL_BLAKE2S
#pragma unroll BLAKE2S_UNROLL
#else
#pragma unroll
#endif
for(int x = 0; x < 10; ++x)
{
BLAKE_G(x, 0x00, V.s0, V.s4, V.s8, V.sc, inkey);
BLAKE_G(x, 0x02, V.s1, V.s5, V.s9, V.sd, inkey);
BLAKE_G(x, 0x04, V.s2, V.s6, V.sa, V.se, inkey);
BLAKE_G(x, 0x06, V.s3, V.s7, V.sb, V.sf, inkey);
BLAKE_G(x, 0x08, V.s0, V.s5, V.sa, V.sf, inkey);
BLAKE_G(x, 0x0A, V.s1, V.s6, V.sb, V.sc, inkey);
BLAKE_G(x, 0x0C, V.s2, V.s7, V.s8, V.sd, inkey);
BLAKE_G(x, 0x0E, V.s3, V.s4, V.s9, V.se, inkey);
}
// XOR low part of state with the high part,
// then with the original input block.
V.lo ^= V.hi ^ tmpblock;
// Load constants (IV into V.hi)
V.hi = vload8(0U, BLAKE2S_IV);
// Copy input block for later
tmpblock = V.lo;
// XOR length of message into block again
V.sc ^= BLAKE2S_BLOCK_SIZE << 1;
// Last block compression - XOR final constant into state
V.se ^= 0xFFFFFFFFU;
// Compress block, using the input as the key
#ifdef SMALL_BLAKE2S
#pragma unroll BLAKE2S_UNROLL
#else
#pragma unroll
#endif
for(int x = 0; x < 10; ++x)
{
BLAKE_G(x, 0x00, V.s0, V.s4, V.s8, V.sc, inout);
BLAKE_G(x, 0x02, V.s1, V.s5, V.s9, V.sd, inout);
BLAKE_G(x, 0x04, V.s2, V.s6, V.sa, V.se, inout);
BLAKE_G(x, 0x06, V.s3, V.s7, V.sb, V.sf, inout);
BLAKE_G(x, 0x08, V.s0, V.s5, V.sa, V.sf, inout);
BLAKE_G(x, 0x0A, V.s1, V.s6, V.sb, V.sc, inout);
BLAKE_G(x, 0x0C, V.s2, V.s7, V.s8, V.sd, inout);
BLAKE_G(x, 0x0E, V.s3, V.s4, V.s9, V.se, inout);
}
// XOR low part of state with high part, then with input block
V.lo ^= V.hi ^ tmpblock;
// Store result in input/output buffer
vstore8(V.lo, 0, inout);
}
/* FastKDF, a fast buffered key derivation function:
* FASTKDF_BUFFER_SIZE must be a power of 2;
* password_len, salt_len and output_len should not exceed FASTKDF_BUFFER_SIZE;
* prf_output_size must be <= prf_key_size; */
void fastkdf(const uchar *restrict password, const uchar *restrict salt, const uint salt_len, uchar *restrict output, uint output_len)
{
/* WARNING!
* This algorithm uses byte-wise addressing for memory blocks.
* Or in other words, trying to copy an unaligned memory region
* will significantly slow down the algorithm, when copying uses
* words or bigger entities. It even may corrupt the data, when
* the device does not support it properly.
* Therefore use byte copying, which will not the fastest but at
* least get reliable results. */
// BLOCK_SIZE 64U
// FASTKDF_BUFFER_SIZE 256U
// BLAKE2S_BLOCK_SIZE 64U
// BLAKE2S_KEY_SIZE 32U
// BLAKE2S_OUT_SIZE 32U
uchar bufidx = 0;
uint8 Abuffer[9], Bbuffer[9] = { (uint8)(0) };
uchar *A = (uchar *)Abuffer, *B = (uchar *)Bbuffer;
// Initialize the password buffer
#pragma unroll 1
for(int i = 0; i < (FASTKDF_BUFFER_SIZE >> 3); ++i) ((ulong *)A)[i] = ((ulong *)password)[i % 10];
((uint16 *)(A + FASTKDF_BUFFER_SIZE))[0] = ((uint16 *)password)[0];
// Initialize the salt buffer
if(salt_len == FASTKDF_BUFFER_SIZE)
{
((ulong16 *)B)[0] = ((ulong16 *)B)[2] = ((ulong16 *)salt)[0];
((ulong16 *)B)[1] = ((ulong16 *)B)[3] = ((ulong16 *)salt)[1];
}
else
{
// salt_len is 80 bytes here
#pragma unroll 1
for(int i = 0; i < (FASTKDF_BUFFER_SIZE >> 3); ++i) ((ulong *)B)[i] = ((ulong *)salt)[i % 10];
// Initialized the rest to zero earlier
#pragma unroll 1
for(int i = 0; i < 10; ++i) ((ulong *)(B + FASTKDF_BUFFER_SIZE))[i] = ((ulong *)salt)[i];
}
// The primary iteration
#pragma unroll 1
for(int i = 0; i < 32; ++i)
{
// Make the key buffer twice the size of the key so it fits a Blake2S block
// This way, we don't need a temp buffer in the Blake2S function.
uchar input[BLAKE2S_BLOCK_SIZE], key[BLAKE2S_BLOCK_SIZE] = { 0 };
// Copy input and key to their buffers
CopyBytes(input, A + bufidx, BLAKE2S_BLOCK_SIZE);
CopyBytes(key, B + bufidx, BLAKE2S_KEY_SIZE);
// PRF
Blake2S((uint *)input, (uint *)key);
// Calculate the next buffer pointer
bufidx = 0;
for(int x = 0; x < BLAKE2S_OUT_SIZE; ++x)
bufidx += input[x];
// bufidx a uchar now - always mod 255
//bufidx &= (FASTKDF_BUFFER_SIZE - 1);
// Modify the salt buffer
XORBytesInPlace(B + bufidx, input, BLAKE2S_OUT_SIZE);
if(bufidx < BLAKE2S_KEY_SIZE)
{
// Head modified, tail updated
// this was made off the original code... wtf
//CopyBytes(B + FASTKDF_BUFFER_SIZE + bufidx, B + bufidx, min(BLAKE2S_OUT_SIZE, BLAKE2S_KEY_SIZE - bufidx));
CopyBytes(B + FASTKDF_BUFFER_SIZE + bufidx, B + bufidx, BLAKE2S_KEY_SIZE - bufidx);
}
else if((FASTKDF_BUFFER_SIZE - bufidx) < BLAKE2S_OUT_SIZE)
{
// Tail modified, head updated
CopyBytes(B, B + FASTKDF_BUFFER_SIZE, BLAKE2S_OUT_SIZE - (FASTKDF_BUFFER_SIZE - bufidx));
}
}
// Modify and copy into the output buffer
// Damned compiler crashes
// Fuck you, AMD
//for(uint i = 0; i < output_len; ++i, ++bufidx)
// output[i] = B[bufidx] ^ A[i];
uint left = FASTKDF_BUFFER_SIZE - bufidx;
//uint left = (~bufidx) + 1
if(left < output_len)
{
XORBytes(output, B + bufidx, A, left);
XORBytes(output + left, B, A + left, output_len - left);
}
else
{
XORBytes(output, B + bufidx, A, output_len);
}
}
#define SALSA_CORE(state) do { \
state.s4 ^= rotate(state.s0 + state.sc, 7U); state.s8 ^= rotate(state.s4 + state.s0, 9U); state.sc ^= rotate(state.s8 + state.s4, 13U); state.s0 ^= rotate(state.sc + state.s8, 18U); \
state.s9 ^= rotate(state.s5 + state.s1, 7U); state.sd ^= rotate(state.s9 + state.s5, 9U); state.s1 ^= rotate(state.sd + state.s9, 13U); state.s5 ^= rotate(state.s1 + state.sd, 18U); \
state.se ^= rotate(state.sa + state.s6, 7U); state.s2 ^= rotate(state.se + state.sa, 9U); state.s6 ^= rotate(state.s2 + state.se, 13U); state.sa ^= rotate(state.s6 + state.s2, 18U); \
state.s3 ^= rotate(state.sf + state.sb, 7U); state.s7 ^= rotate(state.s3 + state.sf, 9U); state.sb ^= rotate(state.s7 + state.s3, 13U); state.sf ^= rotate(state.sb + state.s7, 18U); \
state.s1 ^= rotate(state.s0 + state.s3, 7U); state.s2 ^= rotate(state.s1 + state.s0, 9U); state.s3 ^= rotate(state.s2 + state.s1, 13U); state.s0 ^= rotate(state.s3 + state.s2, 18U); \
state.s6 ^= rotate(state.s5 + state.s4, 7U); state.s7 ^= rotate(state.s6 + state.s5, 9U); state.s4 ^= rotate(state.s7 + state.s6, 13U); state.s5 ^= rotate(state.s4 + state.s7, 18U); \
state.sb ^= rotate(state.sa + state.s9, 7U); state.s8 ^= rotate(state.sb + state.sa, 9U); state.s9 ^= rotate(state.s8 + state.sb, 13U); state.sa ^= rotate(state.s9 + state.s8, 18U); \
state.sc ^= rotate(state.sf + state.se, 7U); state.sd ^= rotate(state.sc + state.sf, 9U); state.se ^= rotate(state.sd + state.sc, 13U); state.sf ^= rotate(state.se + state.sd, 18U); \
} while(0)
uint16 salsa_small_scalar_rnd(uint16 X)
{
uint16 st = X;
#if SALSA_SMALL_UNROLL == 1
for(int i = 0; i < 10; ++i)
{
SALSA_CORE(st);
}
#elif SALSA_SMALL_UNROLL == 2
for(int i = 0; i < 5; ++i)
{
SALSA_CORE(st);
SALSA_CORE(st);
}
#elif SALSA_SMALL_UNROLL == 3
for(int i = 0; i < 4; ++i)
{
SALSA_CORE(st);
if(i == 3) break;
SALSA_CORE(st);
SALSA_CORE(st);
}
#elif SALSA_SMALL_UNROLL == 4
for(int i = 0; i < 3; ++i)
{
SALSA_CORE(st);
SALSA_CORE(st);
if(i == 2) break;
SALSA_CORE(st);
SALSA_CORE(st);
}
#else
for(int i = 0; i < 2; ++i)
{
SALSA_CORE(st);
SALSA_CORE(st);
SALSA_CORE(st);
SALSA_CORE(st);
SALSA_CORE(st);
}
#endif
return(X + st);
}
#define CHACHA_CORE_PARALLEL(state) do { \
state[0] += state[1]; state[3] = rotate(state[3] ^ state[0], (uint4)(16U, 16U, 16U, 16U)); \
state[2] += state[3]; state[1] = rotate(state[1] ^ state[2], (uint4)(12U, 12U, 12U, 12U)); \
state[0] += state[1]; state[3] = rotate(state[3] ^ state[0], (uint4)(8U, 8U, 8U, 8U)); \
state[2] += state[3]; state[1] = rotate(state[1] ^ state[2], (uint4)(7U, 7U, 7U, 7U)); \
\
state[0] += state[1].yzwx; state[3].wxyz = rotate(state[3].wxyz ^ state[0], (uint4)(16U, 16U, 16U, 16U)); \
state[2].zwxy += state[3].wxyz; state[1].yzwx = rotate(state[1].yzwx ^ state[2].zwxy, (uint4)(12U, 12U, 12U, 12U)); \
state[0] += state[1].yzwx; state[3].wxyz = rotate(state[3].wxyz ^ state[0], (uint4)(8U, 8U, 8U, 8U)); \
state[2].zwxy += state[3].wxyz; state[1].yzwx = rotate(state[1].yzwx ^ state[2].zwxy, (uint4)(7U, 7U, 7U, 7U)); \
} while(0)
uint16 chacha_small_parallel_rnd(uint16 X)
{
uint4 t, st[4];
((uint16 *)st)[0] = X;
#if CHACHA_SMALL_UNROLL == 1
for(int i = 0; i < 10; ++i)
{
CHACHA_CORE_PARALLEL(st);
}
#elif CHACHA_SMALL_UNROLL == 2
for(int i = 0; i < 5; ++i)
{
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
}
#elif CHACHA_SMALL_UNROLL == 3
for(int i = 0; i < 4; ++i)
{
CHACHA_CORE_PARALLEL(st);
if(i == 3) break;
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
}
#elif CHACHA_SMALL_UNROLL == 4
for(int i = 0; i < 3; ++i)
{
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
if(i == 2) break;
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
}
#else
for(int i = 0; i < 2; ++i)
{
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
}
#endif
return(X + ((uint16 *)st)[0]);
}
void neoscrypt_blkmix(uint16 *XV, bool alg)
{
/* NeoScrypt flow: Scrypt flow:
Xa ^= Xd; M(Xa'); Ya = Xa"; Xa ^= Xb; M(Xa'); Ya = Xa";
Xb ^= Xa"; M(Xb'); Yb = Xb"; Xb ^= Xa"; M(Xb'); Yb = Xb";
Xc ^= Xb"; M(Xc'); Yc = Xc"; Xa" = Ya;
Xd ^= Xc"; M(Xd'); Yd = Xd"; Xb" = Yb;
Xa" = Ya; Xb" = Yc;
Xc" = Yb; Xd" = Yd; */
XV[0] ^= XV[3];
if(!alg)
{
XV[0] = salsa_small_scalar_rnd(XV[0]); XV[1] ^= XV[0];
XV[1] = salsa_small_scalar_rnd(XV[1]); XV[2] ^= XV[1];
XV[2] = salsa_small_scalar_rnd(XV[2]); XV[3] ^= XV[2];
XV[3] = salsa_small_scalar_rnd(XV[3]);
}
else
{
XV[0] = chacha_small_parallel_rnd(XV[0]); XV[1] ^= XV[0];
XV[1] = chacha_small_parallel_rnd(XV[1]); XV[2] ^= XV[1];
XV[2] = chacha_small_parallel_rnd(XV[2]); XV[3] ^= XV[2];
XV[3] = chacha_small_parallel_rnd(XV[3]);
}
XV[1] ^= XV[2];
XV[2] ^= XV[1];
XV[1] ^= XV[2];
}
void ScratchpadStore(__global void *V, void *X, uchar idx)
{
((__global ulong16 *)V)[idx << 1] = ((ulong16 *)X)[0];
((__global ulong16 *)V)[(idx << 1) + 1] = ((ulong16 *)X)[1];
}
void ScratchpadMix(void *X, const __global void *V, uchar idx)
{
((ulong16 *)X)[0] ^= ((__global ulong16 *)V)[idx << 1];
((ulong16 *)X)[1] ^= ((__global ulong16 *)V)[(idx << 1) + 1];
}
void SMix(uint16 *X, __global uint16 *V, bool flag)
{
#pragma unroll 1
for(int i = 0; i < 128; ++i)
{
ScratchpadStore(V, X, i);
neoscrypt_blkmix(X, flag);
}
#pragma unroll 1
for(int i = 0; i < 128; ++i)
{
const uint idx = convert_uchar(((uint *)X)[48] & 0x7F);
ScratchpadMix(X, V, idx);
neoscrypt_blkmix(X, flag);
}
}
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global const uchar* restrict input, __global uint* restrict output, __global uchar *padcache, const uint target)
{
#define CONSTANT_N 128
#define CONSTANT_r 2
// X = CONSTANT_r * 2 * BLOCK_SIZE(64); Z is a copy of X for ChaCha
uint16 X[4], Z[4];
/* V = CONSTANT_N * CONSTANT_r * 2 * BLOCK_SIZE */
__global ulong16 *V = (__global ulong16 *)(padcache + (0x8000 * (get_global_id(0) % MAX_GLOBAL_THREADS)));
uchar outbuf[32];
uchar data[PASSWORD_LEN];
((ulong8 *)data)[0] = ((__global const ulong8 *)input)[0];
((ulong *)data)[8] = ((__global const ulong *)input)[8];
((uint *)data)[18] = ((__global const uint *)input)[18];
((uint *)data)[19] = get_global_id(0);
// X = KDF(password, salt)
fastkdf(data, data, PASSWORD_LEN, (uchar *)X, 256);
// Process ChaCha 1st, Salsa 2nd and XOR them - run that through PBKDF2
CopyBytes128(Z, X, 2);
// X = SMix(X); X & Z are swapped, repeat.
for(bool flag = false;; ++flag)
{
SMix(X, V, flag);
if(flag) break;
SwapBytes128(X, Z, 256);
}
// blkxor(X, Z)
((ulong16 *)X)[0] ^= ((ulong16 *)Z)[0];
((ulong16 *)X)[1] ^= ((ulong16 *)Z)[1];
// output = KDF(password, X)
fastkdf(data, (uchar *)X, FASTKDF_BUFFER_SIZE, outbuf, 32);
if(((uint *)outbuf)[7] <= target) output[atomic_add(output + 0xFF, 1)] = get_global_id(0);
}

1
miner.h

@ -1100,6 +1100,7 @@ extern pthread_cond_t restart_cond; @@ -1100,6 +1100,7 @@ extern pthread_cond_t restart_cond;
extern void clear_stratum_shares(struct pool *pool);
extern void clear_pool_work(struct pool *pool);
extern void set_target(unsigned char *dest_target, double diff, double diff_multiplier2);
extern void set_target_neoscrypt(unsigned char *target, double diff);
extern void kill_work(void);

76
ocl.c

@ -34,6 +34,7 @@ @@ -34,6 +34,7 @@
#include "ocl.h"
#include "ocl/build_kernel.h"
#include "ocl/binary_kernel.h"
#include "algorithm/neoscrypt.h"
/* FIXME: only here for global config vars, replace with configuration.h
* or similar as soon as config is in a struct instead of littered all
@ -344,19 +345,55 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg @@ -344,19 +345,55 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
cgpu->lookup_gap = 2;
}
if (!cgpu->opt_tc) {
// neoscrypt calculates TC differently
if (!safe_cmp(cgpu->algorithm.name, "neoscrypt")) {
int max_int = ((cgpu->dynamic) ? MAX_INTENSITY : cgpu->intensity);
size_t glob_thread_count = 1UL << max_int;
// if TC is entered by user, use that value... otherwise use default
cgpu->thread_concurrency = ((cgpu->opt_tc) ? cgpu->opt_tc : ((glob_thread_count < cgpu->work_size) ? cgpu->work_size : glob_thread_count));
// if TC * scratchbuf size is too big for memory... reduce to max
if (((uint64_t)cgpu->thread_concurrency * NEOSCRYPT_SCRATCHBUF_SIZE) >(uint64_t)cgpu->max_alloc) {
/* Selected intensity will not run on this GPU. Not enough memory.
* Adapt the memory setting. */
glob_thread_count = cgpu->max_alloc / NEOSCRYPT_SCRATCHBUF_SIZE;
/* Find highest significant bit in glob_thread_count, which gives
* the intensity. */
while (max_int && ((1U << max_int) & glob_thread_count) == 0) {
--max_int;
}
/* Check if max_intensity is >0. */
if (max_int < MIN_INTENSITY) {
applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu);
max_int = MIN_INTENSITY;
}
cgpu->intensity = max_int;
cgpu->thread_concurrency = 1U << max_int;
}
applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
}
else if (!cgpu->opt_tc) {
unsigned int sixtyfours;
sixtyfours = cgpu->max_alloc / 131072 / 64 / (algorithm->n/1024) - 1;
cgpu->thread_concurrency = sixtyfours * 64;
if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders) {
cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders;
if (cgpu->thread_concurrency > cgpu->shaders * 5)
if (cgpu->thread_concurrency > cgpu->shaders * 5) {
cgpu->thread_concurrency = cgpu->shaders * 5;
}
}
applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %d", gpu, (int)(cgpu->thread_concurrency));
} else
}
else {
cgpu->thread_concurrency = cgpu->opt_tc;
}
cl_uint slot, cpnd;
@ -445,17 +482,36 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg @@ -445,17 +482,36 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
}
size_t bufsize;
size_t readbufsize = 128;
if (algorithm->rw_buffer_size < 0) {
size_t ipt = (algorithm->n / cgpu->lookup_gap +
(algorithm->n % cgpu->lookup_gap > 0));
// calc buffer size for neoscrypt
if (!safe_cmp(algorithm->name, "neoscrypt")) {
/* The scratch/pad-buffer needs 32kBytes memory per thread. */
bufsize = NEOSCRYPT_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
/* This is the input buffer. For neoscrypt this is guaranteed to be
* 80 bytes only. */
readbufsize = 80;
applog(LOG_DEBUG, "Neoscrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
// scrypt/n-scrypt
}
else {
size_t ipt = (algorithm->n / cgpu->lookup_gap + (algorithm->n % cgpu->lookup_gap > 0));
bufsize = 128 * ipt * cgpu->thread_concurrency;
} else
bufsize = (size_t) algorithm->rw_buffer_size;
applog(LOG_DEBUG, "Scrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
}
}
else {
bufsize = (size_t)algorithm->rw_buffer_size;
applog(LOG_DEBUG, "Buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
}
clState->padbuffer8 = NULL;
if (bufsize > 0) {
applog(LOG_DEBUG, "Creating read/write buffer sized %lu", (unsigned long)bufsize);
/* Use the max alloc value which has been rounded to a power of
* 2 greater >= required amount earlier */
if (bufsize > cgpu->max_alloc) {
@ -463,7 +519,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg @@ -463,7 +519,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
gpu, (unsigned long)(cgpu->max_alloc));
applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize);
}
applog(LOG_DEBUG, "Creating buffer sized %lu", (unsigned long)bufsize);
/* This buffer is weird and might work to some degree even if
* the create buffer call has apparently failed, so check if we
@ -475,11 +530,14 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg @@ -475,11 +530,14 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
}
}
clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
applog(LOG_DEBUG, "Using read buffer sized %lu", (unsigned long)readbufsize);
clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, readbufsize, NULL, &status);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d: clCreateBuffer (CLbuffer0)", status);
return NULL;
}
applog(LOG_DEBUG, "Using output buffer sized %lu", BUFFERSIZE);
clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, BUFFERSIZE, NULL, &status);
if (status != CL_SUCCESS) {

212
sgminer.c

@ -2019,11 +2019,26 @@ static void update_gbt(struct pool *pool) @@ -2019,11 +2019,26 @@ static void update_gbt(struct pool *pool)
/* Return the work coin/network difficulty */
static double get_work_blockdiff(const struct work *work)
{
uint64_t diff64;
double numerator;
// Neoscrypt has the data reversed
if (!safe_cmp(work->pool->algorithm.name, "neoscrypt")) {
diff64 = bswap_64(((uint64_t)(be32toh(*((uint32_t *)(work->data + 72))) & 0xFFFFFF00)) << 8);
numerator = (double)work->pool->algorithm.diff_numerator;
}
else {
uint8_t pow = work->data[72];
int powdiff = (8 * (0x1d - 3)) - (8 * (pow - 3));
uint32_t diff32 = be32toh(*((uint32_t *)(work->data + 72))) & 0x00FFFFFF;
double numerator = work->pool->algorithm.diff_numerator << powdiff;
return numerator / (double)diff32;
int powdiff = (8 * (0x1d - 3)) - (8 * (pow - 3));;
diff64 = be32toh(*((uint32_t *)(work->data + 72))) & 0x0000000000FFFFFF;
numerator = work->pool->algorithm.diff_numerator << powdiff;
}
if (unlikely(!diff64)) {
diff64 = 1;
}
return numerator / (double)diff64;
}
static void gen_gbt_work(struct pool *pool, struct work *work)
@ -2073,7 +2088,10 @@ static void gen_gbt_work(struct pool *pool, struct work *work) @@ -2073,7 +2088,10 @@ static void gen_gbt_work(struct pool *pool, struct work *work)
free(header);
}
// Neoscrypt doesn't calc_midstate()
if (safe_cmp(pool->algorithm.name, "neoscrypt")) {
calc_midstate(work);
}
local_work++;
work->pool = pool;
work->gbt = true;
@ -2189,11 +2207,16 @@ static bool getwork_decode(json_t *res_val, struct work *work) @@ -2189,11 +2207,16 @@ static bool getwork_decode(json_t *res_val, struct work *work)
return false;
}
// Neoscrypt doesn't calc midstate
if (safe_cmp(work->pool->algorithm.name, "neoscrypt")) {
if (!jobj_binary(res_val, "midstate", work->midstate, sizeof(work->midstate), false)) {
// Calculate it ourselves
if (opt_morenotices) {
applog(LOG_DEBUG, "%s: Calculating midstate locally", isnull(get_pool_name(work->pool), ""));
}
calc_midstate(work);
}
}
if (unlikely(!jobj_binary(res_val, "target", work->target, sizeof(work->target), true))) {
applog(LOG_ERR, "%s: JSON inval target", isnull(get_pool_name(work->pool), ""));
@ -2936,8 +2959,8 @@ static bool submit_upstream_work(struct work *work, CURL *curl, char *curl_err_s @@ -2936,8 +2959,8 @@ static bool submit_upstream_work(struct work *work, CURL *curl, char *curl_err_s
endian_flip128(work->data, work->data);
/* build hex string */
hexstr = bin2hex(work->data, sizeof(work->data));
/* build hex string - Make sure to restrict to 80 bytes for Neoscrypt */
hexstr = bin2hex(work->data, ((!safe_cmp(work->pool->algorithm.name, "neoscrypt")) ? 80 : sizeof(work->data)));
/* build JSON-RPC request */
if (work->gbt) {
@ -3304,11 +3327,19 @@ static void calc_diff(struct work *work, double known) @@ -3304,11 +3327,19 @@ static void calc_diff(struct work *work, double known)
d64 = work->pool->algorithm.diff_multiplier2 * truediffone;
applog(LOG_DEBUG, "calc_diff() algorithm = %s", work->pool->algorithm.name);
// Neoscrypt
if (!safe_cmp(work->pool->algorithm.name, "neoscrypt")) {
dcut64 = (double)*((uint64_t *)(work->target + 22));
}
else {
dcut64 = le256todouble(work->target);
}
if (unlikely(!dcut64))
dcut64 = 1;
work->work_difficulty = d64 / dcut64;
}
difficulty = work->work_difficulty;
pool_stats->last_diff = difficulty;
@ -5465,8 +5496,21 @@ static void *stratum_sthread(void *userdata) @@ -5465,8 +5496,21 @@ static void *stratum_sthread(void *userdata)
sshare->sshare_time = time(NULL);
/* This work item is freed in parse_stratum_response */
sshare->work = work;
applog(LOG_DEBUG, "stratum_sthread() algorithm = %s", pool->algorithm.name);
// Neoscrypt is little endian
if (!safe_cmp(pool->algorithm.name, "neoscrypt")) {
nonce = htobe32(*((uint32_t *)(work->data + 76)));
//*((uint32_t *)nonce2) = htole32(work->nonce2);
}
else {
nonce = *((uint32_t *)(work->data + 76));
}
__bin2hex(noncehex, (const unsigned char *)&nonce, 4);
*((uint64_t *)nonce2) = htole64(work->nonce2);
__bin2hex(nonce2hex, nonce2, work->nonce2_len);
memset(s, 0, 1024);
mutex_lock(&sshare_lock);
@ -5474,10 +5518,6 @@ static void *stratum_sthread(void *userdata) @@ -5474,10 +5518,6 @@ static void *stratum_sthread(void *userdata)
sshare->id = swork_id++;
mutex_unlock(&sshare_lock);
nonce2_64 = (uint64_t *)nonce2;
*nonce2_64 = htole64(work->nonce2);
__bin2hex(nonce2hex, nonce2, work->nonce2_len);
snprintf(s, sizeof(s),
"{\"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\": %d, \"method\": \"mining.submit\"}",
pool->rpc_user, work->job_id, nonce2hex, work->ntime, noncehex, sshare->id);
@ -5885,6 +5925,50 @@ void set_target(unsigned char *dest_target, double diff, double diff_multiplier2 @@ -5885,6 +5925,50 @@ void set_target(unsigned char *dest_target, double diff, double diff_multiplier2
memcpy(dest_target, target, 32);
}
/*****************************************************
* Special set_target() function for Neoscrypt
****************************************************/
void set_target_neoscrypt(unsigned char *target, double diff)
{
uint64_t m;
int k;
diff /= 65536.0;
for (k = 6; k > 0 && diff > 1.0; --k) {
diff /= 4294967296.0;
}
m = 4294901760.0 / diff;
if (m == 0 && k == 6) {
memset(target, 0xff, 32);
}
else {
memset(target, 0, 32);
((uint32_t *)target)[k] = (uint32_t)m;
((uint32_t *)target)[k + 1] = (uint32_t)(m >> 32);
}
if (opt_debug) {
/* The target is computed in this systems endianess and stored
* in its endianess on a uint32-level. But because the target are
* eight uint32s, they are stored in mixed mode, i.e., each uint32
* is stored in the local endianess, but the least significant bit
* is stored in target[0] bit 0.
*
* To print this large number in a native human readable form the
* order of the array entries is swapped, i.e., target[7] <-> target[0]
* and each array entry is byte swapped to have the least significant
* bit to the right. */
uint32_t swaped[8];
swab256(swaped, target);
char *htarget = bin2hex((unsigned char *)swaped, 32);
applog(LOG_DEBUG, "Generated neoscrypt target 0x%s", htarget);
free(htarget);
}
}
/* Generates stratum based work based on the most recent notify information
* from the pool. This will keep generating work while a pool is down so we use
* other means to detect when the pool has died in stratum_thread */
@ -5893,7 +5977,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work) @@ -5893,7 +5977,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
unsigned char merkle_root[32], merkle_sha[64];
uint32_t *data32, *swap32;
uint64_t nonce2le;
int i;
int i, j;
cg_wlock(&pool->data_lock);
@ -5915,6 +5999,39 @@ static void gen_stratum_work(struct pool *pool, struct work *work) @@ -5915,6 +5999,39 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
gen_hash(merkle_sha, 64, merkle_root);
memcpy(merkle_sha, merkle_root, 32);
}
applog(LOG_DEBUG, "gen_stratum_work() - algorithm = %s", pool->algorithm.name);
// Different for Neoscrypt because of Little Endian
if (!safe_cmp(pool->algorithm.name, "neoscrypt")) {
/* Incoming data is in little endian. */
memcpy(merkle_root, merkle_sha, 32);
uint32_t temp = pool->merkle_offset / sizeof(uint32_t), i;
/* Put version (4 byte) + prev_hash (4 byte* 8) but big endian encoded
* into work. */
for (i = 0; i < temp; ++i) {
((uint32_t *)work->data)[i] = be32toh(((uint32_t *)pool->header_bin)[i]);
}
/* Now add the merkle_root (4 byte* 8), but it is encoded in little endian. */
temp += 8;
for (j = 0; i < temp; ++i, ++j) {
((uint32_t *)work->data)[i] = le32toh(((uint32_t *)merkle_root)[j]);
}
/* Add the time encoded in big endianess. */
hex2bin((unsigned char *)&temp, pool->swork.ntime, 4);
/* Add the nbits (big endianess). */
((uint32_t *)work->data)[17] = be32toh(temp);
hex2bin((unsigned char *)&temp, pool->swork.nbit, 4);
((uint32_t *)work->data)[18] = be32toh(temp);
((uint32_t *)work->data)[20] = 0x80000000;
((uint32_t *)work->data)[31] = 0x00000280;
}
else {
data32 = (uint32_t *)merkle_sha;
swap32 = (uint32_t *)merkle_root;
flip32(swap32, data32);
@ -5922,6 +6039,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work) @@ -5922,6 +6039,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
/* Copy the data template from header_bin */
memcpy(work->data, pool->header_bin, 128);
memcpy(work->data + pool->merkle_offset, merkle_root, 32);
}
/* Store the stratum work diff to check it still matches the pool's
* stratum diff when submitting shares */
@ -5946,8 +6064,14 @@ static void gen_stratum_work(struct pool *pool, struct work *work) @@ -5946,8 +6064,14 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
free(merkle_hash);
}
// For Neoscrypt use set_target_neoscrypt() function
if (!safe_cmp(pool->algorithm.name, "neoscrypt")) {
set_target_neoscrypt(work->target, work->sdiff);
}
else {
calc_midstate(work);
set_target(work->target, work->sdiff, pool->algorithm.diff_multiplier2);
}
local_work++;
work->pool = pool;
@ -6124,15 +6248,17 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool @@ -6124,15 +6248,17 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool
unsigned int options = 0;
const char *opt1, *opt2;
if(!pool1 || !pool2)
applog(LOG_DEBUG, "compare_pool_settings()");
if (!pool1 || !pool2)
return 0;
//compare pool devices
opt1 = get_pool_setting(pool1->devices, ((!empty_string(default_profile.devices))?default_profile.devices:"all"));
opt2 = get_pool_setting(pool2->devices, ((!empty_string(default_profile.devices))?default_profile.devices:"all"));
opt1 = get_pool_setting(pool1->devices, ((!empty_string(default_profile.devices)) ? default_profile.devices : "all"));
opt2 = get_pool_setting(pool2->devices, ((!empty_string(default_profile.devices)) ? default_profile.devices : "all"));
//changing devices means a hard reset of mining threads
if(strcasecmp(opt1, opt2) != 0)
if (strcasecmp(opt1, opt2) != 0)
options |= (SWITCHER_APPLY_DEVICE | SWITCHER_HARD_RESET);
//compare gpu threads
@ -6140,11 +6266,11 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool @@ -6140,11 +6266,11 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool
opt2 = get_pool_setting(pool2->gpu_threads, default_profile.gpu_threads);
//changing gpu threads means a hard reset of mining threads
if(strcasecmp(opt1, opt2) != 0)
if (strcasecmp(opt1, opt2) != 0)
options |= (SWITCHER_APPLY_GT | SWITCHER_HARD_RESET);
//compare algorithm
if(!cmp_algorithm(&pool1->algorithm, &pool2->algorithm))
if (!cmp_algorithm(&pool1->algorithm, &pool2->algorithm))
options |= (SWITCHER_APPLY_ALGO | SWITCHER_SOFT_RESET);
//lookup gap
@ -6152,46 +6278,46 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool @@ -6152,46 +6278,46 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool
opt2 = get_pool_setting(pool2->lookup_gap, default_profile.lookup_gap);
//lookup gap means soft reset but only if hard reset isnt set
if(strcasecmp(opt1, opt2) != 0)
if (strcasecmp(opt1, opt2) != 0)
options |= (SWITCHER_APPLY_LG | SWITCHER_SOFT_RESET);
//intensities
opt1 = get_pool_setting(pool1->rawintensity, default_profile.rawintensity);
opt2 = get_pool_setting(pool2->rawintensity, default_profile.rawintensity);
if(strcasecmp(opt1, opt2) != 0)
if (strcasecmp(opt1, opt2) != 0)
{
//intensity is soft reset
if(!empty_string(opt2))
if (!empty_string(opt2))
options |= (SWITCHER_APPLY_RAWINT | SWITCHER_SOFT_RESET);
}
//xintensity -- only if raw intensity not set
if(!opt_isset(options, SWITCHER_APPLY_RAWINT))
if (!opt_isset(options, SWITCHER_APPLY_RAWINT))
{
opt1 = get_pool_setting(pool1->xintensity, default_profile.xintensity);
opt2 = get_pool_setting(pool2->xintensity, default_profile.xintensity);
//if different...
if(strcasecmp(opt1, opt2) != 0)
if (strcasecmp(opt1, opt2) != 0)
{
//intensity is soft reset
if(!empty_string(opt2))
if (!empty_string(opt2))
options |= (SWITCHER_APPLY_XINT | SWITCHER_SOFT_RESET);
}
}
//intensity -- only if raw intensity and xintensity not set
if(!opt_isset(options, SWITCHER_APPLY_RAWINT) && !opt_isset(options, SWITCHER_APPLY_XINT))
if (!opt_isset(options, SWITCHER_APPLY_RAWINT) && !opt_isset(options, SWITCHER_APPLY_XINT))
{
opt1 = get_pool_setting(pool1->intensity, default_profile.intensity);
opt2 = get_pool_setting(pool2->intensity, default_profile.intensity);
//if different...
if(strcasecmp(opt1, opt2) != 0)
if (strcasecmp(opt1, opt2) != 0)
{
//intensity is soft reset
if(!empty_string(opt2))
if (!empty_string(opt2))
options |= (SWITCHER_APPLY_INT | SWITCHER_SOFT_RESET);
//if blank, set default profile to intensity 8 and apply
else
@ -6203,10 +6329,10 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool @@ -6203,10 +6329,10 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool
opt1 = get_pool_setting(pool1->shaders, default_profile.shaders);
opt2 = get_pool_setting(pool2->shaders, default_profile.shaders);
if(strcasecmp(opt1, opt2) != 0)
if (strcasecmp(opt1, opt2) != 0)
{
//shaders is soft reset
if(!empty_string(opt2))
if (!empty_string(opt2))
options |= (SWITCHER_APPLY_SHADER | SWITCHER_SOFT_RESET);
}
@ -6215,7 +6341,7 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool @@ -6215,7 +6341,7 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool
opt2 = get_pool_setting(pool2->thread_concurrency, default_profile.thread_concurrency);
//thread-concurrency is soft reset
if(strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
if (strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
options |= (SWITCHER_APPLY_TC | SWITCHER_SOFT_RESET);
//worksize
@ -6223,45 +6349,45 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool @@ -6223,45 +6349,45 @@ static unsigned long compare_pool_settings(struct pool *pool1, struct pool *pool
opt2 = get_pool_setting(pool2->worksize, default_profile.worksize);
//worksize is soft reset
if(strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
if (strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
options |= (SWITCHER_APPLY_WORKSIZE | SWITCHER_SOFT_RESET);
#ifdef HAVE_ADL
#ifdef HAVE_ADL
//gpu-engine
opt1 = get_pool_setting(pool1->gpu_engine, default_profile.gpu_engine);
opt2 = get_pool_setting(pool2->gpu_engine, default_profile.gpu_engine);
if(strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
if (strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
options |= SWITCHER_APPLY_GPU_ENGINE;
//gpu-memclock
opt1 = get_pool_setting(pool1->gpu_memclock, default_profile.gpu_memclock);
opt2 = get_pool_setting(pool2->gpu_memclock, default_profile.gpu_memclock);
if(strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
if (strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
options |= SWITCHER_APPLY_GPU_MEMCLOCK;
//GPU fans
opt1 = get_pool_setting(pool1->gpu_fan, default_profile.gpu_fan);
opt2 = get_pool_setting(pool2->gpu_fan, default_profile.gpu_fan);
if(strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
if (strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
options |= SWITCHER_APPLY_GPU_FAN;
//GPU powertune
opt1 = get_pool_setting(pool1->gpu_powertune, default_profile.gpu_powertune);
opt2 = get_pool_setting(pool2->gpu_powertune, default_profile.gpu_powertune);
if(strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
if (strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
options |= SWITCHER_APPLY_GPU_POWERTUNE;
//GPU vddc
opt1 = get_pool_setting(pool1->gpu_vddc, default_profile.gpu_vddc);
opt2 = get_pool_setting(pool2->gpu_vddc, default_profile.gpu_vddc);
if(strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
if (strcasecmp(opt1, opt2) != 0 && !empty_string(opt2))
options |= SWITCHER_APPLY_GPU_VDDC;
#endif
#endif
// Remove soft reset if hard reset is set
if (opt_isset(options, SWITCHER_HARD_RESET) &&
@ -6281,6 +6407,8 @@ static void get_work_prepare_thread(struct thr_info *mythr, struct work *work) @@ -6281,6 +6407,8 @@ static void get_work_prepare_thread(struct thr_info *mythr, struct work *work)
{
int i;
applog(LOG_DEBUG, "get_work_prepare_thread()");
//if switcher is disabled
if(opt_switchmode == SWITCH_OFF)
return;
@ -6608,6 +6736,7 @@ struct work *get_work(struct thr_info *thr, const int thr_id) @@ -6608,6 +6736,7 @@ struct work *get_work(struct thr_info *thr, const int thr_id)
}
}
applog(LOG_DEBUG, "preparing thread...");
get_work_prepare_thread(thr, work);
diff_t = time(NULL) - diff_t;
@ -6700,7 +6829,16 @@ bool test_nonce(struct work *work, uint32_t nonce) @@ -6700,7 +6829,16 @@ bool test_nonce(struct work *work, uint32_t nonce)
uint32_t diff1targ;
rebuild_nonce(work, nonce);
applog(LOG_DEBUG, "test_nonce() algorithm = %s", work->pool->algorithm.name);
// for Neoscrypt, the diff1targe value is in work->target
if ((work->pool->algorithm.name, "neoscrypt")) {
diff1targ = ((uint32_t *)work->target)[7];
}
else {
diff1targ = work->pool->algorithm.diff1targ;
}
return (le32toh(*hash_32) <= diff1targ);
}

2
winbuild/sgminer.vcxproj

@ -263,6 +263,7 @@ @@ -263,6 +263,7 @@
<ClCompile Include="..\algorithm.c" />
<ClCompile Include="..\algorithm\animecoin.c" />
<ClCompile Include="..\algorithm\bitblock.c" />
<ClCompile Include="..\algorithm\neoscrypt.c" />
<ClCompile Include="..\algorithm\talkcoin.c" />
<ClCompile Include="..\algorithm\x14.c" />
<ClCompile Include="..\algorithm\fresh.c" />
@ -321,6 +322,7 @@ @@ -321,6 +322,7 @@
<ClInclude Include="..\algorithm.h" />
<ClInclude Include="..\algorithm\animecoin.h" />
<ClInclude Include="..\algorithm\bitblock.h" />
<ClInclude Include="..\algorithm\neoscrypt.h" />
<ClInclude Include="..\algorithm\talkcoin.h" />
<ClInclude Include="..\algorithm\x14.h" />
<ClInclude Include="..\algorithm\fresh.h" />

6
winbuild/sgminer.vcxproj.filters

@ -197,6 +197,9 @@ @@ -197,6 +197,9 @@
<ClCompile Include="..\algorithm.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\algorithm\neoscrypt.c">
<Filter>Source Files\algorithm</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\adl.h">
@ -373,6 +376,9 @@ @@ -373,6 +376,9 @@
<ClInclude Include="..\sph\sph_shabal.h">
<Filter>Header Files\sph</Filter>
</ClInclude>
<ClInclude Include="..\algorithm\neoscrypt.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="README.txt" />

Loading…
Cancel
Save