Browse Source

Merge pull request #4 from theLosers106/master

Upgrade to version 5.2.1
windows 5.2.1
kenshirothefist 9 years ago
parent
commit
44ee55735b
  1. 1
      Makefile.am
  2. 76
      algorithm.c
  3. 26
      algorithm.h
  4. 22
      algorithm/lyra2.c
  5. 8
      algorithm/lyra2.h
  6. 23
      algorithm/lyra2re.c
  7. 2
      algorithm/lyra2re.h
  8. 10
      algorithm/lyra2re_old.h
  9. 47
      algorithm/lyra2rev2.c
  10. 11
      algorithm/lyra2rev2.h
  11. 213
      algorithm/lyra2v2.c
  12. 42
      algorithm/lyra2v2.h
  13. 22
      algorithm/sponge.c
  14. 8
      algorithm/sponge.h
  15. 745
      algorithm/spongev2.c
  16. 108
      algorithm/spongev2.h
  17. 118
      algorithm/whirlpoolx.c
  18. 48
      algorithm/whirlpoolx.h
  19. 8
      algorithm/yescrypt-opt.c
  20. 2
      api.c
  21. 2
      driver-opencl.c
  22. 8
      kernel/lyra2rev2.cl
  23. 97
      kernel/neoscrypt.cl
  24. 1537
      kernel/whirlpoolx.cl
  25. 16
      miner.h
  26. 19
      ocl.c
  27. 3
      ocl.h
  28. 2
      ocl/binary_kernel.c
  29. 2
      ocl/build_kernel.c
  30. 1
      ocl/build_kernel.h
  31. 22
      sgminer.c
  32. 6
      util.c
  33. 6
      winbuild/dist/include/config.h
  34. 26
      winbuild/sgminer.vcxproj
  35. 30
      winbuild/sgminer.vcxproj.filters

1
Makefile.am

@ -73,7 +73,6 @@ sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h @@ -73,7 +73,6 @@ sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h
sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h
sgminer_SOURCES += algorithm/whirlpoolx.c algorithm/whirlpoolx.h
sgminer_SOURCES += algorithm/lyra2re.c algorithm/lyra2re.h algorithm/lyra2.c algorithm/lyra2.h algorithm/sponge.c algorithm/sponge.h
sgminer_SOURCES += algorithm/lyra2re_old.c algorithm/lyra2re_old.h
sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h
sgminer_SOURCES += algorithm/credits.c algorithm/credits.h
sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h

76
algorithm.c

@ -33,9 +33,9 @@ @@ -33,9 +33,9 @@
#include "algorithm/neoscrypt.h"
#include "algorithm/whirlpoolx.h"
#include "algorithm/lyra2re.h"
#include "algorithm/lyra2re_old.h"
#include "algorithm/lyra2rev2.h"
#include "algorithm/pluck.h"
#include "algorithm/yescrypt.h"
//#include "algorithm/yescrypt.h"
#include "algorithm/credits.h"
#include "compat.h"
@ -43,6 +43,7 @@ @@ -43,6 +43,7 @@
#include <inttypes.h>
#include <string.h>
bool opt_lyra;
const char *algorithm_type_str[] = {
"Unknown",
"Credits",
@ -62,7 +63,7 @@ const char *algorithm_type_str[] = { @@ -62,7 +63,7 @@ const char *algorithm_type_str[] = {
"Neoscrypt",
"WhirlpoolX",
"Lyra2RE",
"Lyra2REv2"
"Lyra2REV2"
"Pluck"
"Yescrypt",
"Yescrypt-multi"
@ -216,6 +217,7 @@ static cl_int queue_credits_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_ @@ -216,6 +217,7 @@ static cl_int queue_credits_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_
return status;
}
#if 0
static cl_int queue_yescrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel = &clState->kernel;
@ -309,6 +311,7 @@ static cl_int queue_yescrypt_multikernel(_clState *clState, dev_blk_ctx *blk, __ @@ -309,6 +311,7 @@ static cl_int queue_yescrypt_multikernel(_clState *clState, dev_blk_ctx *blk, __
return status;
}
#endif
static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
@ -764,40 +767,43 @@ static cl_int queue_whirlcoin_kernel(struct __clState *clState, struct _dev_blk_ @@ -764,40 +767,43 @@ static cl_int queue_whirlcoin_kernel(struct __clState *clState, struct _dev_blk_
static cl_int queue_whirlpoolx_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
uint64_t midblock[8], key[8] = { 0 }, tmp[8] = { 0 };
cl_kernel *kernel = &clState->kernel;
unsigned int num = 0;
cl_ulong le_target;
cl_int status;
le_target = *(cl_ulong *)(blk->work->device_target + 24);
flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
memcpy(midblock, clState->cldata, 64);
// midblock = n, key = h
for (int i = 0; i < 10; ++i) {
tmp[0] = WHIRLPOOL_ROUND_CONSTANTS[i];
whirlpool_round(key, tmp);
tmp[0] = 0;
whirlpool_round(midblock, tmp);
for (int x = 0; x < 8; ++x) {
midblock[x] ^= key[x];
}
}
for (int i = 0; i < 8; ++i) {
midblock[i] ^= ((uint64_t *)(clState->cldata))[i];
}
status = clSetKernelArg(clState->kernel, 0, sizeof(cl_ulong8), (cl_ulong8 *)&midblock);
status |= clSetKernelArg(clState->kernel, 1, sizeof(cl_ulong), (void *)(((uint64_t *)clState->cldata) + 8));
status |= clSetKernelArg(clState->kernel, 2, sizeof(cl_ulong), (void *)(((uint64_t *)clState->cldata) + 9));
status |= clSetKernelArg(clState->kernel, 3, sizeof(cl_mem), (void *)&clState->outputBuffer);
status |= clSetKernelArg(clState->kernel, 4, sizeof(cl_ulong), (void *)&le_target);
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(le_target);
return status;
}
typedef struct _algorithm_settings_t {
const char *name; /* Human-readable identifier */
algorithm_type_t type; //common algorithm type
const char *kernelfile; /* alternate kernel file */
double diff_multiplier1;
double diff_multiplier2;
double share_diff_multiplier;
uint32_t xintensity_shift;
uint32_t intensity_shift;
uint32_t found_idx;
unsigned long long diff_numerator;
uint32_t diff1targ;
size_t n_extra_kernels;
long rw_buffer_size;
cl_command_queue_properties cq_properties;
void (*regenhash)(struct work *);
cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
void (*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *);
} algorithm_settings_t;
static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel;
@ -842,7 +848,7 @@ static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ct @@ -842,7 +848,7 @@ static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ct
return status;
}
static cl_int queue_lyra2REv2_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
static cl_int queue_lyra2rev2_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel;
unsigned int num;
@ -945,6 +951,7 @@ static algorithm_settings_t algos[] = { @@ -945,6 +951,7 @@ static algorithm_settings_t algos[] = {
#if 0
#define A_YESCRYPT(a) \
{ a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options}
A_YESCRYPT("yescrypt"),
@ -955,6 +962,7 @@ static algorithm_settings_t algos[] = { @@ -955,6 +962,7 @@ static algorithm_settings_t algos[] = {
A_YESCRYPT_MULTI("yescrypt-multi"),
#undef A_YESCRYPT_MULTI
#endif
// kernels starting from this will have difficulty calculated by using quarkcoin algorithm
#define A_QUARK(a, b) \
@ -992,10 +1000,8 @@ static algorithm_settings_t algos[] = { @@ -992,10 +1000,8 @@ static algorithm_settings_t algos[] = {
{ "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, queue_fresh_kernel, gen_hash, NULL },
{ "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2reold_regenhash, queue_lyra2RE_kernel, gen_hash, NULL },
{ "lyra2rev2", ALGO_LYRA2REv2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2re_regenhash, queue_lyra2REv2_kernel, gen_hash, append_neoscrypt_compiler_options },
{ "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2re_regenhash, queue_lyra2RE_kernel, gen_hash, NULL },
{ "lyra2rev2", ALGO_LYRA2REV2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2rev2_regenhash, queue_lyra2rev2_kernel, gen_hash, append_neoscrypt_compiler_options },
// kernels starting from this will have difficulty calculated by using fuguecoin algorithm
#define A_FUGUE(a, b, c) \
@ -1006,7 +1012,7 @@ static algorithm_settings_t algos[] = { @@ -1006,7 +1012,7 @@ static algorithm_settings_t algos[] = {
#undef A_FUGUE
{ "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, queue_whirlcoin_kernel, sha256, NULL },
{ "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFFU, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, queue_whirlpoolx_kernel, gen_hash, NULL },
{ "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, queue_whirlpoolx_kernel, gen_hash, NULL },
// Terminator (do not remove)
{ NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL }
@ -1079,7 +1085,10 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa @@ -1079,7 +1085,10 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa
ALGO_ALIAS("nist5", "talkcoin-mod");
ALGO_ALIAS("keccak", "maxcoin");
ALGO_ALIAS("whirlpool", "whirlcoin");
ALGO_ALIAS("Lyra2RE", "lyra2re");
ALGO_ALIAS("lyra2", "lyra2re");
ALGO_ALIAS("Lyra2REv2", "lyra2rev2");
ALGO_ALIAS("lyra2rev2", "lyra2rev2");
ALGO_ALIAS("lyra2v2", "lyra2rev2");
#undef ALGO_ALIAS
@ -1107,6 +1116,7 @@ void set_algorithm(algorithm_t* algo, const char* newname_alias) @@ -1107,6 +1116,7 @@ void set_algorithm(algorithm_t* algo, const char* newname_alias)
if ((old_nfactor > 0) && (old_nfactor != nfactor))
nfactor = old_nfactor;
if (algo->type == ALGO_LYRA2RE || algo->type == ALGO_LYRA2REV2) { opt_lyra = true; }
set_algorithm_nfactor(algo, nfactor);
//reapply kernelfile if was set

26
algorithm.h

@ -9,7 +9,7 @@ @@ -9,7 +9,7 @@
#include <inttypes.h>
#include <stdbool.h>
#include "ocl/build_kernel.h" // For the build_kernel_data type
//#include "ocl/build_kernel.h" // For the build_kernel_data type
typedef enum {
ALGO_UNK,
@ -30,7 +30,7 @@ typedef enum { @@ -30,7 +30,7 @@ typedef enum {
ALGO_NEOSCRYPT,
ALGO_WHIRLPOOLX,
ALGO_LYRA2RE,
ALGO_LYRA2REv2,
ALGO_LYRA2REV2,
ALGO_PLUCK,
ALGO_YESCRYPT,
ALGO_YESCRYPT_MULTI,
@ -72,28 +72,6 @@ typedef struct _algorithm_t { @@ -72,28 +72,6 @@ typedef struct _algorithm_t {
void(*set_compile_options)(struct _build_kernel_data *, struct cgpu_info *, struct _algorithm_t *);
} algorithm_t;
typedef struct _algorithm_settings_t
{
const char *name;
algorithm_type_t type;
const char *kernelfile;
double diff_multiplier1;
double diff_multiplier2;
double share_diff_multiplier;
uint32_t xintensity_shift;
uint32_t intensity_shift;
uint32_t found_idx;
unsigned long long diff_numerator;
uint32_t diff1targ;
size_t n_extra_kernels;
long rw_buffer_size;
cl_command_queue_properties cq_properties;
void (*regenhash)(struct work *);
cl_int (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
void (*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
void (*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *);
} algorithm_settings_t;
/* Set default parameters based on name. */
void set_algorithm(algorithm_t* algo, const char* name);

22
algorithm/lyra2.c

@ -58,19 +58,15 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * @@ -58,19 +58,15 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
//========== Initializing the Memory Matrix and pointers to it =============//
//Tries to allocate enough space for the whole memory matrix
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES);
uint64_t *wholeMatrix = malloc(i);
uint64_t *wholeMatrix = (uint64_t*)malloc(i);
if (wholeMatrix == NULL) {
return -1;
}
memset(wholeMatrix, 0, i);
//Allocates pointers to each row of the matrix
uint64_t **memMatrix = malloc(nRows * sizeof (uint64_t*));
uint64_t **memMatrix = (uint64_t**)malloc(nRows * sizeof (uint64_t*));
if (memMatrix == NULL) {
return -1;
}
@ -122,7 +118,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * @@ -122,7 +118,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
//======================= Initializing the Sponge State ====================//
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
uint64_t *state = malloc(16 * sizeof (uint64_t));
uint64_t *state = (uint64_t*)malloc(16 * sizeof (uint64_t));
if (state == NULL) {
return -1;
}
@ -134,16 +130,16 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * @@ -134,16 +130,16 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
ptrWord = wholeMatrix;
for (i = 0; i < nBlocksInput; i++) {
absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT64; //goes to next block of pad(pwd || salt || basil)
ptrWord += BLOCK_LEN_BLAKE2_SAFE_BYTES; //goes to next block of pad(pwd || salt || basil)
}
//Initializes M[0] and M[1]
reducedSqueezeRow0(state, memMatrix[0], nCols); //The locally copied password is most likely overwritten here
reducedDuplexRow1(state, memMatrix[0], memMatrix[1], nCols);
reducedSqueezeRow0(state, memMatrix[0]); //The locally copied password is most likely overwritten here
reducedDuplexRow1(state, memMatrix[0], memMatrix[1]);
do {
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]);
//updates the value of row* (deterministically picked during Setup))
@ -176,7 +172,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * @@ -176,7 +172,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
//------------------------------------------------------------------------------------------
//Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]);
//update prev: it now points to the last row ever computed
prev = row;
@ -196,7 +192,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void * @@ -196,7 +192,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
absorbBlock(state, memMatrix[rowa]);
//Squeezes the key
squeeze(state, K, kLen);
squeeze(state, (unsigned char*)K, kLen);
//==========================================================================/
//========================= Freeing the memory =============================//

8
algorithm/lyra2.h

@ -37,6 +37,14 @@ typedef unsigned char byte; @@ -37,6 +37,14 @@ typedef unsigned char byte;
#define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8) //Block length, in bytes
#endif
#ifndef N_COLS
#define N_COLS 8 //Number of columns in the memory matrix: fixed to 64 by default
#endif
#define ROW_LEN_INT64 (BLOCK_LEN_INT64 * N_COLS) //Total length of a row: N_COLS blocks
#define ROW_LEN_BYTES (ROW_LEN_INT64 * 8) //Number of bytes per row
int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols);
#endif /* LYRA2_H_ */

23
algorithm/lyra2re.c

@ -36,8 +36,6 @@ @@ -36,8 +36,6 @@
#include "sph/sph_groestl.h"
#include "sph/sph_skein.h"
#include "sph/sph_keccak.h"
#include "sph/sph_bmw.h"
#include "sph/sph_cubehash.h"
#include "lyra2.h"
/*
@ -57,10 +55,9 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) @@ -57,10 +55,9 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
inline void lyra2rehash(void *state, const void *input)
{
sph_blake256_context ctx_blake;
sph_bmw256_context ctx_bmw;
sph_groestl256_context ctx_groestl;
sph_keccak256_context ctx_keccak;
sph_skein256_context ctx_skein;
sph_cubehash256_context ctx_cube;
uint32_t hashA[8], hashB[8];
@ -72,23 +69,17 @@ inline void lyra2rehash(void *state, const void *input) @@ -72,23 +69,17 @@ inline void lyra2rehash(void *state, const void *input)
sph_keccak256 (&ctx_keccak,hashA, 32);
sph_keccak256_close(&ctx_keccak, hashB);
sph_cubehash256_init(&ctx_cube);
sph_cubehash256(&ctx_cube, hashB, 32);
sph_cubehash256_close(&ctx_cube, hashA);
LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8);
LYRA2(hashB, 32, hashA, 32, hashA, 32, 1, 4, 4);
sph_skein256_init(&ctx_skein);
sph_skein256 (&ctx_skein, hashB, 32);
sph_skein256_close(&ctx_skein, hashA);
sph_skein256 (&ctx_skein, hashA, 32);
sph_skein256_close(&ctx_skein, hashB);
sph_cubehash256_init(&ctx_cube);
sph_cubehash256(&ctx_cube, hashA, 32);
sph_cubehash256_close(&ctx_cube, hashB);
sph_bmw256_init(&ctx_bmw);
sph_bmw256 (&ctx_bmw, hashB, 32);
sph_bmw256_close(&ctx_bmw, hashA);
sph_groestl256_init(&ctx_groestl);
sph_groestl256 (&ctx_groestl, hashB, 32);
sph_groestl256_close(&ctx_groestl, hashA);
memcpy(state, hashA, 32);
}

2
algorithm/lyra2re.h

@ -2,8 +2,6 @@ @@ -2,8 +2,6 @@
#define LYRA2RE_H
#include "miner.h"
#define LYRA_SCRATCHBUF_SIZE (1536) // matrix size [12][4][4] uint64_t or equivalent
#define LYRA_SECBUF_SIZE (4) // (not used)
extern int lyra2re_test(unsigned char *pdata, const unsigned char *ptarget,
uint32_t nonce);

10
algorithm/lyra2re_old.h

@ -1,10 +0,0 @@ @@ -1,10 +0,0 @@
#ifndef LYRA2REOLD_H
#define LYRA2REOLD_H
#include "miner.h"
extern int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget,
uint32_t nonce);
extern void lyra2reold_regenhash(struct work *work);
#endif /* LYRA2RE_H */

47
algorithm/lyra2re_old.c → algorithm/lyra2rev2.c

@ -36,7 +36,9 @@ @@ -36,7 +36,9 @@
#include "sph/sph_groestl.h"
#include "sph/sph_skein.h"
#include "sph/sph_keccak.h"
#include "lyra2.h"
#include "sph/sph_bmw.h"
#include "sph/sph_cubehash.h"
#include "lyra2v2.h"
/*
* Encode a length len/4 vector of (uint32_t) into a length len vector of
@ -52,13 +54,13 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) @@ -52,13 +54,13 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
}
inline void lyra2rehash_old(void *state, const void *input)
inline void lyra2rev2hash(void *state, const void *input)
{
sph_blake256_context ctx_blake;
sph_groestl256_context ctx_groestl;
sph_bmw256_context ctx_bmw;
sph_keccak256_context ctx_keccak;
sph_skein256_context ctx_skein;
sph_cubehash256_context ctx_cube;
uint32_t hashA[8], hashB[8];
sph_blake256_init(&ctx_blake);
@ -69,32 +71,41 @@ inline void lyra2rehash_old(void *state, const void *input) @@ -69,32 +71,41 @@ inline void lyra2rehash_old(void *state, const void *input)
sph_keccak256 (&ctx_keccak,hashA, 32);
sph_keccak256_close(&ctx_keccak, hashB);
LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8);
sph_cubehash256_init(&ctx_cube);
sph_cubehash256(&ctx_cube, hashB, 32);
sph_cubehash256_close(&ctx_cube, hashA);
LYRA2V2(hashB, 32, hashA, 32, hashA, 32, 1, 4, 4);
sph_skein256_init(&ctx_skein);
sph_skein256 (&ctx_skein, hashB, 32);
sph_skein256_close(&ctx_skein, hashA);
sph_skein256_init(&ctx_skein);
sph_skein256 (&ctx_skein, hashA, 32);
sph_skein256_close(&ctx_skein, hashB);
sph_cubehash256_init(&ctx_cube);
sph_cubehash256(&ctx_cube, hashA, 32);
sph_cubehash256_close(&ctx_cube, hashB);
sph_bmw256_init(&ctx_bmw);
sph_bmw256 (&ctx_bmw, hashB, 32);
sph_bmw256_close(&ctx_bmw, hashA);
sph_groestl256_init(&ctx_groestl);
sph_groestl256 (&ctx_groestl, hashB, 32);
sph_groestl256_close(&ctx_groestl, hashA);
//printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]);
memcpy(state, hashA, 32);
memcpy(state, hashA, 32);
}
static const uint32_t diff1targ = 0x0000ffff;
/* Used externally as confirmation of correct OCL code */
int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
int lyra2rev2_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
{
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
uint32_t data[20], ohash[8];
be32enc_vect(data, (const uint32_t *)pdata, 19);
data[19] = htobe32(nonce);
lyra2rehash_old(ohash, data);
lyra2rev2hash(ohash, data);
tmp_hash7 = be32toh(ohash[7]);
applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
@ -108,7 +119,7 @@ int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t @@ -108,7 +119,7 @@ int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t
return 1;
}
void lyra2reold_regenhash(struct work *work)
void lyra2rev2_regenhash(struct work *work)
{
uint32_t data[20];
uint32_t *nonce = (uint32_t *)(work->data + 76);
@ -116,10 +127,10 @@ void lyra2reold_regenhash(struct work *work) @@ -116,10 +127,10 @@ void lyra2reold_regenhash(struct work *work)
be32enc_vect(data, (const uint32_t *)work->data, 19);
data[19] = htobe32(*nonce);
lyra2rehash_old(ohash, data);
lyra2rev2hash(ohash, data);
}
bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
bool scanhash_lyra2rev2(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
unsigned char *pdata, unsigned char __maybe_unused *phash1,
unsigned char __maybe_unused *phash, const unsigned char *ptarget,
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
@ -137,7 +148,7 @@ bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unuse @@ -137,7 +148,7 @@ bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unuse
*nonce = ++n;
data[19] = (n);
lyra2rehash_old(ostate, data);
lyra2rev2hash(ostate, data);
tmp_hash7 = (ostate[7]);
applog(LOG_INFO, "data7 %08lx",

11
algorithm/lyra2rev2.h

@ -0,0 +1,11 @@ @@ -0,0 +1,11 @@
#ifndef LYRA2REV2_H
#define LYRA2REV2_H
#include "miner.h"
#define LYRA_SCRATCHBUF_SIZE (1536) // matrix size [12][4][4] uint64_t or equivalent
#define LYRA_SECBUF_SIZE (4) // (not used)
extern int lyra2rev2_test(unsigned char *pdata, const unsigned char *ptarget,
uint32_t nonce);
extern void lyra2rev2_regenhash(struct work *work);
#endif /* LYRA2REV2_H */

213
algorithm/lyra2v2.c

@ -0,0 +1,213 @@ @@ -0,0 +1,213 @@
/**
* Implementation of the Lyra2 Password Hashing Scheme (PHS).
*
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
*
* This software is hereby placed in the public domain.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "lyra2v2.h"
#include "spongev2.h"
/**
* Executes Lyra2 based on the G function from Blake2b. This version supports salts and passwords
* whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits,
* where "b" is the underlying sponge's bitrate). In this implementation, the "basil" is composed by all
* integer parameters (treated as type "unsigned int") in the order they are provided, plus the value
* of nCols, (i.e., basil = kLen || pwdlen || saltlen || timeCost || nRows || nCols).
*
* @param K The derived key to be output by the algorithm
* @param kLen Desired key length
* @param pwd User password
* @param pwdlen Password length
* @param salt Salt
* @param saltlen Salt length
* @param timeCost Parameter to determine the processing time (T)
* @param nRows Number or rows of the memory matrix (R)
* @param nCols Number of columns of the memory matrix (C)
*
* @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation)
*/
int LYRA2V2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols) {
//============================= Basic variables ============================//
int64_t row = 2; //index of row to be processed
int64_t prev = 1; //index of prev (last row ever computed/modified)
int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering)
int64_t tau; //Time Loop iterator
int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
int64_t i; //auxiliary iteration counter
//==========================================================================/
//========== Initializing the Memory Matrix and pointers to it =============//
//Tries to allocate enough space for the whole memory matrix
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES);
uint64_t *wholeMatrix = (uint64_t*) malloc(i);
if (wholeMatrix == NULL) {
return -1;
}
memset(wholeMatrix, 0, i);
//Allocates pointers to each row of the matrix
uint64_t **memMatrix = (uint64_t**) malloc(nRows * sizeof(uint64_t*));
if (memMatrix == NULL) {
return -1;
}
//Places the pointers in the correct positions
uint64_t *ptrWord = wholeMatrix;
for (i = 0; i < nRows; i++) {
memMatrix[i] = ptrWord;
ptrWord += ROW_LEN_INT64;
}
//==========================================================================/
//============= Getting the password + salt + basil padded with 10*1 ===============//
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
//but this ensures that the password copied locally will be overwritten as soon as possible
//First, we clean enough blocks for the password, salt, basil and padding
uint64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof (uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1;
byte *ptrByte = (byte*) wholeMatrix;
memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES);
//Prepends the password
memcpy(ptrByte, pwd, pwdlen);
ptrByte += pwdlen;
//Concatenates the salt
memcpy(ptrByte, salt, saltlen);
ptrByte += saltlen;
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
memcpy(ptrByte, &kLen, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
memcpy(ptrByte, &pwdlen, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
memcpy(ptrByte, &saltlen, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
memcpy(ptrByte, &timeCost, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
memcpy(ptrByte, &nRows, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
memcpy(ptrByte, &nCols, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
//Now comes the padding
*ptrByte = 0x80; //first byte of padding: right after the password
ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
*ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
//==========================================================================/
//======================= Initializing the Sponge State ====================//
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
uint64_t *state = (uint64_t*) malloc(16 * sizeof(uint64_t));
if (state == NULL) {
return -1;
}
initStatev2(state);
//==========================================================================/
//================================ Setup Phase =============================//
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
ptrWord = wholeMatrix;
for (i = 0; i < nBlocksInput; i++) {
absorbBlockBlake2Safev2(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT64; //goes to next block of pad(pwd || salt || basil)
}
//Initializes M[0] and M[1]
reducedSqueezeRow0v2(state, memMatrix[0], nCols); //The locally copied password is most likely overwritten here
reducedDuplexRow1v2(state, memMatrix[0], memMatrix[1], nCols);
do {
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
reducedDuplexRowSetupv2(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
//updates the value of row* (deterministically picked during Setup))
rowa = (rowa + step) & (window - 1);
//update prev: it now points to the last row ever computed
prev = row;
//updates row: goes to the next row to be computed
row++;
//Checks if all rows in the window where visited.
if (rowa == 0) {
step = window + gap; //changes the step: approximately doubles its value
window *= 2; //doubles the size of the re-visitation window
gap = -gap; //inverts the modifier to the step
}
} while (row < nRows);
//==========================================================================/
//============================ Wandering Phase =============================//
row = 0; //Resets the visitation to the first row of the memory matrix
for (tau = 1; tau <= timeCost; tau++) {
//Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1
step = (tau % 2 == 0) ? -1 : nRows / 2 - 1;
do {
//Selects a pseudorandom index row*
//------------------------------------------------------------------------------------------
//rowa = ((unsigned int)state[0]) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
//------------------------------------------------------------------------------------------
//Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
reducedDuplexRowv2(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
//update prev: it now points to the last row ever computed
prev = row;
//updates row: goes to the next row to be computed
//------------------------------------------------------------------------------------------
//row = (row + step) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
//------------------------------------------------------------------------------------------
} while (row != 0);
}
//==========================================================================/
//============================ Wrap-up Phase ===============================//
//Absorbs the last block of the memory matrix
absorbBlockv2(state, memMatrix[rowa]);
//Squeezes the key
squeezev2(state, (unsigned char*)K, kLen);
//==========================================================================/
//========================= Freeing the memory =============================//
free(memMatrix);
free(wholeMatrix);
//Wiping out the sponge's internal state before freeing it
memset(state, 0, 16 * sizeof (uint64_t));
free(state);
//==========================================================================/
return 0;
}

42
algorithm/lyra2v2.h

@ -0,0 +1,42 @@ @@ -0,0 +1,42 @@
/**
* Header file for the Lyra2 Password Hashing Scheme (PHS).
*
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
*
* This software is hereby placed in the public domain.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef LYRA2V2_H_
#define LYRA2V2_H_
#include <stdint.h>
typedef unsigned char byte;
//Block length required so Blake2's Initialization Vector (IV) is not overwritten (THIS SHOULD NOT BE MODIFIED)
#define BLOCK_LEN_BLAKE2_SAFE_INT64 8 //512 bits (=64 bytes, =8 uint64_t)
#define BLOCK_LEN_BLAKE2_SAFE_BYTES (BLOCK_LEN_BLAKE2_SAFE_INT64 * 8) //same as above, in bytes
#ifdef BLOCK_LEN_BITS
#define BLOCK_LEN_INT64 (BLOCK_LEN_BITS/64) //Block length: 768 bits (=96 bytes, =12 uint64_t)
#define BLOCK_LEN_BYTES (BLOCK_LEN_BITS/8) //Block length, in bytes
#else //default block lenght: 768 bits
#define BLOCK_LEN_INT64 12 //Block length: 768 bits (=96 bytes, =12 uint64_t)
#define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8) //Block length, in bytes
#endif
int LYRA2V2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols);
#endif /* LYRA2_H_ */

22
algorithm/sponge.c

@ -158,11 +158,11 @@ void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) { @@ -158,11 +158,11 @@ void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) {
* @param state The current state of the sponge
* @param rowOut Row to receive the data squeezed
*/
void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut, uint64_t nCols) {
uint64_t* ptrWord = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1]
void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut) {
uint64_t* ptrWord = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1]
int i;
//M[row][C-1-col] = H.reduced_squeeze()
for (i = 0; i < nCols; i++) {
for (i = 0; i < N_COLS; i++) {
ptrWord[0] = state[0];
ptrWord[1] = state[1];
ptrWord[2] = state[2];
@ -193,12 +193,12 @@ void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut, uint64_t nCols) { @@ -193,12 +193,12 @@ void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut, uint64_t nCols) {
* @param rowIn Row to feed the sponge
* @param rowOut Row to receive the sponge's output
*/
void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols) {
void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut) {
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
int i;
for (i = 0; i < nCols; i++) {
for (i = 0; i < N_COLS; i++) {
//Absorbing "M[prev][col]"
state[0] ^= (ptrWordIn[0]);
@ -253,13 +253,13 @@ void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint6 @@ -253,13 +253,13 @@ void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint6
* @param rowOut Row receiving the output
*
*/
void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) {
void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
int i;
for (i = 0; i < nCols; i++) {
for (i = 0; i < N_COLS; i++) {
//Absorbing "M[prev] [+] M[row*]"
state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]);
state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]);
@ -327,13 +327,13 @@ void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, @@ -327,13 +327,13 @@ void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut,
* @param rowOut Row receiving the output
*
*/
void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) {
void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
int i;
for (i = 0; i < nCols; i++) {
for (i = 0; i < N_COLS; i++) {
//Absorbing "M[prev] [+] M[row*]"
state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]);

8
algorithm/sponge.h

@ -78,16 +78,16 @@ void initState(uint64_t state[/*16*/]); @@ -78,16 +78,16 @@ void initState(uint64_t state[/*16*/]);
//---- Squeezes
void squeeze(uint64_t *state, unsigned char *out, unsigned int len);
void reducedSqueezeRow0(uint64_t* state, uint64_t* row, uint64_t nCols);
void reducedSqueezeRow0(uint64_t* state, uint64_t* row);
//---- Absorbs
void absorbBlock(uint64_t *state, const uint64_t *in);
void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in);
//---- Duplexes
void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols);
void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols);
void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols);
void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut);
void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
//---- Misc
void printArray(unsigned char *array, unsigned int size, char *name);

745
algorithm/spongev2.c

@ -0,0 +1,745 @@ @@ -0,0 +1,745 @@
/**
* A simple implementation of Blake2b's internal permutation
* in the form of a sponge.
*
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
*
* This software is hereby placed in the public domain.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#include <stdio.h>
#include <time.h>
#include "spongev2.h"
#include "lyra2v2.h"
/**
* Initializes the Sponge State. The first 512 bits are set to zeros and the remainder
* receive Blake2b's IV as per Blake2b's specification. <b>Note:</b> Even though sponges
* typically have their internal state initialized with zeros, Blake2b's G function
* has a fixed point: if the internal state and message are both filled with zeros. the
* resulting permutation will always be a block filled with zeros; this happens because
* Blake2b does not use the constants originally employed in Blake2 inside its G function,
* relying on the IV for avoiding possible fixed points.
*
* @param state The 1024-bit array to be initialized
*/
inline void initStatev2(uint64_t state[/*16*/]) {
//First 512 bis are zeros
memset(state, 0, 64);
//Remainder BLOCK_LEN_BLAKE2_SAFE_BYTES are reserved to the IV
state[8] = blake2b_IV[0];
state[9] = blake2b_IV[1];
state[10] = blake2b_IV[2];
state[11] = blake2b_IV[3];
state[12] = blake2b_IV[4];
state[13] = blake2b_IV[5];
state[14] = blake2b_IV[6];
state[15] = blake2b_IV[7];
}
/**
* Execute Blake2b's G function, with all 12 rounds.
*
* @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
*/
inline static void blake2bLyra(uint64_t *v) {
ROUND_LYRA(0);
ROUND_LYRA(1);
ROUND_LYRA(2);
ROUND_LYRA(3);
ROUND_LYRA(4);
ROUND_LYRA(5);
ROUND_LYRA(6);
ROUND_LYRA(7);
ROUND_LYRA(8);
ROUND_LYRA(9);
ROUND_LYRA(10);
ROUND_LYRA(11);
}
/**
* Executes a reduced version of Blake2b's G function with only one round
* @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
*/
inline static void reducedBlake2bLyra(uint64_t *v) {
ROUND_LYRA(0);
}
/**
* Performs a squeeze operation, using Blake2b's G function as the
* internal permutation
*
* @param state The current state of the sponge
* @param out Array that will receive the data squeezed
* @param len The number of bytes to be squeezed into the "out" array
*/
inline void squeezev2(uint64_t *state, byte *out, unsigned int len) {
int fullBlocks = len / BLOCK_LEN_BYTES;
byte *ptr = out;
int i;
//Squeezes full blocks
for (i = 0; i < fullBlocks; i++) {
memcpy(ptr, state, BLOCK_LEN_BYTES);
blake2bLyra(state);
ptr += BLOCK_LEN_BYTES;
}
//Squeezes remaining bytes
memcpy(ptr, state, (len % BLOCK_LEN_BYTES));
}
/**
* Performs an absorb operation for a single block (BLOCK_LEN_INT64 words
* of type uint64_t), using Blake2b's G function as the internal permutation
*
* @param state The current state of the sponge
* @param in The block to be absorbed (BLOCK_LEN_INT64 words)
*/
inline void absorbBlockv2(uint64_t *state, const uint64_t *in) {
//XORs the first BLOCK_LEN_INT64 words of "in" with the current state
state[0] ^= in[0];
state[1] ^= in[1];
state[2] ^= in[2];
state[3] ^= in[3];
state[4] ^= in[4];
state[5] ^= in[5];
state[6] ^= in[6];
state[7] ^= in[7];
state[8] ^= in[8];
state[9] ^= in[9];
state[10] ^= in[10];
state[11] ^= in[11];
//Applies the transformation f to the sponge's state
blake2bLyra(state);
}
/**
* Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64
* words of type uint64_t), using Blake2b's G function as the internal permutation
*
* @param state The current state of the sponge
* @param in The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words)
*/
inline void absorbBlockBlake2Safev2(uint64_t *state, const uint64_t *in) {
//XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state
state[0] ^= in[0];
state[1] ^= in[1];
state[2] ^= in[2];
state[3] ^= in[3];
state[4] ^= in[4];
state[5] ^= in[5];
state[6] ^= in[6];
state[7] ^= in[7];
//Applies the transformation f to the sponge's state
blake2bLyra(state);
}
/**
* Performs a reduced squeeze operation for a single row, from the highest to
* the lowest index, using the reduced-round Blake2b's G function as the
* internal permutation
*
* @param state The current state of the sponge
* @param rowOut Row to receive the data squeezed
*/
inline void reducedSqueezeRow0v2(uint64_t* state, uint64_t* rowOut, uint64_t nCols) {
uint64_t* ptrWord = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1]
int i;
//M[row][C-1-col] = H.reduced_squeeze()
for (i = 0; i < nCols; i++) {
ptrWord[0] = state[0];
ptrWord[1] = state[1];
ptrWord[2] = state[2];
ptrWord[3] = state[3];
ptrWord[4] = state[4];
ptrWord[5] = state[5];
ptrWord[6] = state[6];
ptrWord[7] = state[7];
ptrWord[8] = state[8];
ptrWord[9] = state[9];
ptrWord[10] = state[10];
ptrWord[11] = state[11];
//Goes to next block (column) that will receive the squeezed data
ptrWord -= BLOCK_LEN_INT64;
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
}
}
/**
* Performs a reduced duplex operation for a single row, from the highest to
* the lowest index, using the reduced-round Blake2b's G function as the
* internal permutation
*
* @param state The current state of the sponge
* @param rowIn Row to feed the sponge
* @param rowOut Row to receive the sponge's output
*/
inline void reducedDuplexRow1v2(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols) {
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
int i;
for (i = 0; i < nCols; i++) {
//Absorbing "M[prev][col]"
state[0] ^= (ptrWordIn[0]);
state[1] ^= (ptrWordIn[1]);
state[2] ^= (ptrWordIn[2]);
state[3] ^= (ptrWordIn[3]);
state[4] ^= (ptrWordIn[4]);
state[5] ^= (ptrWordIn[5]);
state[6] ^= (ptrWordIn[6]);
state[7] ^= (ptrWordIn[7]);
state[8] ^= (ptrWordIn[8]);
state[9] ^= (ptrWordIn[9]);
state[10] ^= (ptrWordIn[10]);
state[11] ^= (ptrWordIn[11]);
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[row][C-1-col] = M[prev][col] XOR rand
ptrWordOut[0] = ptrWordIn[0] ^ state[0];
ptrWordOut[1] = ptrWordIn[1] ^ state[1];
ptrWordOut[2] = ptrWordIn[2] ^ state[2];
ptrWordOut[3] = ptrWordIn[3] ^ state[3];
ptrWordOut[4] = ptrWordIn[4] ^ state[4];
ptrWordOut[5] = ptrWordIn[5] ^ state[5];
ptrWordOut[6] = ptrWordIn[6] ^ state[6];
ptrWordOut[7] = ptrWordIn[7] ^ state[7];
ptrWordOut[8] = ptrWordIn[8] ^ state[8];
ptrWordOut[9] = ptrWordIn[9] ^ state[9];
ptrWordOut[10] = ptrWordIn[10] ^ state[10];
ptrWordOut[11] = ptrWordIn[11] ^ state[11];
//Input: next column (i.e., next block in sequence)
ptrWordIn += BLOCK_LEN_INT64;
//Output: goes to previous column
ptrWordOut -= BLOCK_LEN_INT64;
}
}
/**
* Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e.,
* the wordwise addition of two columns, ignoring carries between words). The
* output of this operation, "rand", is then used to make
* "M[rowOut][(N_COLS-1)-col] = M[rowIn][col] XOR rand" and
* "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit
* rotation to the left and N_COLS is a system parameter.
*
* @param state The current state of the sponge
* @param rowIn Row used only as input
* @param rowInOut Row used as input and to receive output after rotation
* @param rowOut Row receiving the output
*
*/
inline void reducedDuplexRowSetupv2(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) {
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
int i;
for (i = 0; i < nCols; i++) {
//Absorbing "M[prev] [+] M[row*]"
state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]);
state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]);
state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]);
state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]);
state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]);
state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]);
state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]);
state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]);
state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]);
state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]);
state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]);
state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]);
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[row][col] = M[prev][col] XOR rand
ptrWordOut[0] = ptrWordIn[0] ^ state[0];
ptrWordOut[1] = ptrWordIn[1] ^ state[1];
ptrWordOut[2] = ptrWordIn[2] ^ state[2];
ptrWordOut[3] = ptrWordIn[3] ^ state[3];
ptrWordOut[4] = ptrWordIn[4] ^ state[4];
ptrWordOut[5] = ptrWordIn[5] ^ state[5];
ptrWordOut[6] = ptrWordIn[6] ^ state[6];
ptrWordOut[7] = ptrWordIn[7] ^ state[7];
ptrWordOut[8] = ptrWordIn[8] ^ state[8];
ptrWordOut[9] = ptrWordIn[9] ^ state[9];
ptrWordOut[10] = ptrWordIn[10] ^ state[10];
ptrWordOut[11] = ptrWordIn[11] ^ state[11];
//M[row*][col] = M[row*][col] XOR rotW(rand)
ptrWordInOut[0] ^= state[11];
ptrWordInOut[1] ^= state[0];
ptrWordInOut[2] ^= state[1];
ptrWordInOut[3] ^= state[2];
ptrWordInOut[4] ^= state[3];
ptrWordInOut[5] ^= state[4];
ptrWordInOut[6] ^= state[5];
ptrWordInOut[7] ^= state[6];
ptrWordInOut[8] ^= state[7];
ptrWordInOut[9] ^= state[8];
ptrWordInOut[10] ^= state[9];
ptrWordInOut[11] ^= state[10];
//Inputs: next column (i.e., next block in sequence)
ptrWordInOut += BLOCK_LEN_INT64;
ptrWordIn += BLOCK_LEN_INT64;
//Output: goes to previous column
ptrWordOut -= BLOCK_LEN_INT64;
}
}
/**
* Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e.,
* the wordwise addition of two columns, ignoring carries between words). The
* output of this operation, "rand", is then used to make
* "M[rowOut][col] = M[rowOut][col] XOR rand" and
* "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit
* rotation to the left.
*
* @param state The current state of the sponge
* @param rowIn Row used only as input
* @param rowInOut Row used as input and to receive output after rotation
* @param rowOut Row receiving the output
*
*/
inline void reducedDuplexRowv2(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) {
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
int i;
for (i = 0; i < nCols; i++) {
//Absorbing "M[prev] [+] M[row*]"
state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]);
state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]);
state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]);
state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]);
state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]);
state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]);
state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]);
state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]);
state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]);
state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]);
state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]);
state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]);
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[rowOut][col] = M[rowOut][col] XOR rand
ptrWordOut[0] ^= state[0];
ptrWordOut[1] ^= state[1];
ptrWordOut[2] ^= state[2];
ptrWordOut[3] ^= state[3];
ptrWordOut[4] ^= state[4];
ptrWordOut[5] ^= state[5];
ptrWordOut[6] ^= state[6];
ptrWordOut[7] ^= state[7];
ptrWordOut[8] ^= state[8];
ptrWordOut[9] ^= state[9];
ptrWordOut[10] ^= state[10];
ptrWordOut[11] ^= state[11];
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
ptrWordInOut[0] ^= state[11];
ptrWordInOut[1] ^= state[0];
ptrWordInOut[2] ^= state[1];
ptrWordInOut[3] ^= state[2];
ptrWordInOut[4] ^= state[3];
ptrWordInOut[5] ^= state[4];
ptrWordInOut[6] ^= state[5];
ptrWordInOut[7] ^= state[6];
ptrWordInOut[8] ^= state[7];
ptrWordInOut[9] ^= state[8];
ptrWordInOut[10] ^= state[9];
ptrWordInOut[11] ^= state[10];
//Goes to next block
ptrWordOut += BLOCK_LEN_INT64;
ptrWordInOut += BLOCK_LEN_INT64;
ptrWordIn += BLOCK_LEN_INT64;
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Performs a duplex operation over "M[rowInOut] [+] M[rowIn]", writing the output "rand"
* on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit
* rotation to the left.
*
* @param state The current state of the sponge
* @param rowIn Row used only as input
* @param rowInOut Row used as input and to receive output after rotation
* @param rowOut Row receiving the output
*
*/
/*
inline void reducedDuplexRowSetupOLD(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
int i;
for (i = 0; i < N_COLS; i++) {
//Absorbing "M[rowInOut] XOR M[rowIn]"
state[0] ^= ptrWordInOut[0] ^ ptrWordIn[0];
state[1] ^= ptrWordInOut[1] ^ ptrWordIn[1];
state[2] ^= ptrWordInOut[2] ^ ptrWordIn[2];
state[3] ^= ptrWordInOut[3] ^ ptrWordIn[3];
state[4] ^= ptrWordInOut[4] ^ ptrWordIn[4];
state[5] ^= ptrWordInOut[5] ^ ptrWordIn[5];
state[6] ^= ptrWordInOut[6] ^ ptrWordIn[6];
state[7] ^= ptrWordInOut[7] ^ ptrWordIn[7];
state[8] ^= ptrWordInOut[8] ^ ptrWordIn[8];
state[9] ^= ptrWordInOut[9] ^ ptrWordIn[9];
state[10] ^= ptrWordInOut[10] ^ ptrWordIn[10];
state[11] ^= ptrWordInOut[11] ^ ptrWordIn[11];
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[row][col] = rand
ptrWordOut[0] = state[0];
ptrWordOut[1] = state[1];
ptrWordOut[2] = state[2];
ptrWordOut[3] = state[3];
ptrWordOut[4] = state[4];
ptrWordOut[5] = state[5];
ptrWordOut[6] = state[6];
ptrWordOut[7] = state[7];
ptrWordOut[8] = state[8];
ptrWordOut[9] = state[9];
ptrWordOut[10] = state[10];
ptrWordOut[11] = state[11];
//M[row*][col] = M[row*][col] XOR rotW(rand)
ptrWordInOut[0] ^= state[10];
ptrWordInOut[1] ^= state[11];
ptrWordInOut[2] ^= state[0];
ptrWordInOut[3] ^= state[1];
ptrWordInOut[4] ^= state[2];
ptrWordInOut[5] ^= state[3];
ptrWordInOut[6] ^= state[4];
ptrWordInOut[7] ^= state[5];
ptrWordInOut[8] ^= state[6];
ptrWordInOut[9] ^= state[7];
ptrWordInOut[10] ^= state[8];
ptrWordInOut[11] ^= state[9];
//Goes to next column (i.e., next block in sequence)
ptrWordInOut += BLOCK_LEN_INT64;
ptrWordIn += BLOCK_LEN_INT64;
ptrWordOut += BLOCK_LEN_INT64;
}
}
*/
/**
* Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand"
* on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit
* rotation to the left.
*
* @param state The current state of the sponge
* @param rowIn Row used only as input
* @param rowInOut Row used as input and to receive output after rotation
* @param rowOut Row receiving the output
*
*/
/*
inline void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
int i;
for (i = 0; i < N_COLS; i++) {
//Absorbing "M[rowInOut] XOR M[rowIn]"
state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[row*][col] = M[row*][col] XOR rotW(rand)
ptrWordInOut[0] ^= state[10];
ptrWordInOut[1] ^= state[11];
ptrWordInOut[2] ^= state[0];
ptrWordInOut[3] ^= state[1];
ptrWordInOut[4] ^= state[2];
ptrWordInOut[5] ^= state[3];
ptrWordInOut[6] ^= state[4];
ptrWordInOut[7] ^= state[5];
ptrWordInOut[8] ^= state[6];
ptrWordInOut[9] ^= state[7];
ptrWordInOut[10] ^= state[8];
ptrWordInOut[11] ^= state[9];
//M[row][col] = rand
ptrWordOut[0] = state[0] ^ ptrWordIn[0];
ptrWordOut[1] = state[1] ^ ptrWordIn[1];
ptrWordOut[2] = state[2] ^ ptrWordIn[2];
ptrWordOut[3] = state[3] ^ ptrWordIn[3];
ptrWordOut[4] = state[4] ^ ptrWordIn[4];
ptrWordOut[5] = state[5] ^ ptrWordIn[5];
ptrWordOut[6] = state[6] ^ ptrWordIn[6];
ptrWordOut[7] = state[7] ^ ptrWordIn[7];
ptrWordOut[8] = state[8] ^ ptrWordIn[8];
ptrWordOut[9] = state[9] ^ ptrWordIn[9];
ptrWordOut[10] = state[10] ^ ptrWordIn[10];
ptrWordOut[11] = state[11] ^ ptrWordIn[11];
//Goes to next column (i.e., next block in sequence)
ptrWordInOut += BLOCK_LEN_INT64;
ptrWordIn += BLOCK_LEN_INT64;
ptrWordOut += BLOCK_LEN_INT64;
}
}
*/
/**
* Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand"
* on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit
* rotation to the left.
*
* @param state The current state of the sponge
* @param rowIn Row used only as input
* @param rowInOut Row used as input and to receive output after rotation
* @param rowOut Row receiving the output
*
*/
/*
inline void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
uint64_t* ptrWordOut = rowOut;
int i;
for (i = 0; i < N_COLS / 2; i++) {
//Absorbing "M[rowInOut] XOR M[rowIn]"
state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[row*][col] = M[row*][col] XOR rotW(rand)
ptrWordInOut[0] ^= state[10];
ptrWordInOut[1] ^= state[11];
ptrWordInOut[2] ^= state[0];
ptrWordInOut[3] ^= state[1];
ptrWordInOut[4] ^= state[2];
ptrWordInOut[5] ^= state[3];
ptrWordInOut[6] ^= state[4];
ptrWordInOut[7] ^= state[5];
ptrWordInOut[8] ^= state[6];
ptrWordInOut[9] ^= state[7];
ptrWordInOut[10] ^= state[8];
ptrWordInOut[11] ^= state[9];
//M[row][col] = rand
ptrWordOut[0] = state[0] ^ ptrWordIn[0];
ptrWordOut[1] = state[1] ^ ptrWordIn[1];
ptrWordOut[2] = state[2] ^ ptrWordIn[2];
ptrWordOut[3] = state[3] ^ ptrWordIn[3];
ptrWordOut[4] = state[4] ^ ptrWordIn[4];
ptrWordOut[5] = state[5] ^ ptrWordIn[5];
ptrWordOut[6] = state[6] ^ ptrWordIn[6];
ptrWordOut[7] = state[7] ^ ptrWordIn[7];
ptrWordOut[8] = state[8] ^ ptrWordIn[8];
ptrWordOut[9] = state[9] ^ ptrWordIn[9];
ptrWordOut[10] = state[10] ^ ptrWordIn[10];
ptrWordOut[11] = state[11] ^ ptrWordIn[11];
//Goes to next column (i.e., next block in sequence)
ptrWordInOut += BLOCK_LEN_INT64;
ptrWordIn += BLOCK_LEN_INT64;
ptrWordOut += 2 * BLOCK_LEN_INT64;
}
ptrWordOut = rowOut + BLOCK_LEN_INT64;
for (i = 0; i < N_COLS / 2; i++) {
//Absorbing "M[rowInOut] XOR M[rowIn]"
state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[row*][col] = M[row*][col] XOR rotW(rand)
ptrWordInOut[0] ^= state[10];
ptrWordInOut[1] ^= state[11];
ptrWordInOut[2] ^= state[0];
ptrWordInOut[3] ^= state[1];
ptrWordInOut[4] ^= state[2];
ptrWordInOut[5] ^= state[3];
ptrWordInOut[6] ^= state[4];
ptrWordInOut[7] ^= state[5];
ptrWordInOut[8] ^= state[6];
ptrWordInOut[9] ^= state[7];
ptrWordInOut[10] ^= state[8];
ptrWordInOut[11] ^= state[9];
//M[row][col] = rand
ptrWordOut[0] = state[0] ^ ptrWordIn[0];
ptrWordOut[1] = state[1] ^ ptrWordIn[1];
ptrWordOut[2] = state[2] ^ ptrWordIn[2];
ptrWordOut[3] = state[3] ^ ptrWordIn[3];
ptrWordOut[4] = state[4] ^ ptrWordIn[4];
ptrWordOut[5] = state[5] ^ ptrWordIn[5];
ptrWordOut[6] = state[6] ^ ptrWordIn[6];
ptrWordOut[7] = state[7] ^ ptrWordIn[7];
ptrWordOut[8] = state[8] ^ ptrWordIn[8];
ptrWordOut[9] = state[9] ^ ptrWordIn[9];
ptrWordOut[10] = state[10] ^ ptrWordIn[10];
ptrWordOut[11] = state[11] ^ ptrWordIn[11];
//Goes to next column (i.e., next block in sequence)
ptrWordInOut += BLOCK_LEN_INT64;
ptrWordIn += BLOCK_LEN_INT64;
ptrWordOut += 2 * BLOCK_LEN_INT64;
}
}
*/
/**
* Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", using the output "rand"
* to make "M[rowOut][col] = M[rowOut][col] XOR rand" and "M[rowInOut] = M[rowInOut] XOR rotW(rand)",
* where rotW is a 64-bit rotation to the left.
*
* @param state The current state of the sponge
* @param rowIn Row used only as input
* @param rowInOut Row used as input and to receive output after rotation
* @param rowOut Row receiving the output
*
*/
/*
inline void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
int i;
for (i = 0; i < N_COLS; i++) {
//Absorbing "M[rowInOut] XOR M[rowIn]"
state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
//Applies the reduced-round transformation f to the sponge's state
reducedBlake2bLyra(state);
//M[rowOut][col] = M[rowOut][col] XOR rand
ptrWordOut[0] ^= state[0];
ptrWordOut[1] ^= state[1];
ptrWordOut[2] ^= state[2];
ptrWordOut[3] ^= state[3];
ptrWordOut[4] ^= state[4];
ptrWordOut[5] ^= state[5];
ptrWordOut[6] ^= state[6];
ptrWordOut[7] ^= state[7];
ptrWordOut[8] ^= state[8];
ptrWordOut[9] ^= state[9];
ptrWordOut[10] ^= state[10];
ptrWordOut[11] ^= state[11];
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
//Goes to next block
ptrWordOut += BLOCK_LEN_INT64;
ptrWordInOut += BLOCK_LEN_INT64;
ptrWordIn += BLOCK_LEN_INT64;
}
}
*/
/**
Prints an array of unsigned chars
*/
void printArrayv2(unsigned char *array, unsigned int size, char *name) {
int i;
printf("%s: ", name);
for (i = 0; i < size; i++) {
printf("%2x|", array[i]);
}
printf("\n");
}
////////////////////////////////////////////////////////////////////////////////////////////////

108
algorithm/spongev2.h

@ -0,0 +1,108 @@ @@ -0,0 +1,108 @@
/**
* Header file for Blake2b's internal permutation in the form of a sponge.
* This code is based on the original Blake2b's implementation provided by
* Samuel Neves (https://blake2.net/)
*
* Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
*
* This software is hereby placed in the public domain.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SPONGE_H_
#define SPONGE_H_
#include <stdint.h>
#if defined(__GNUC__)
#define ALIGN __attribute__ ((aligned(32)))
#elif defined(_MSC_VER)
#define ALIGN __declspec(align(32))
#else
#define ALIGN
#endif
/*Blake2b IV Array*/
static const uint64_t blake2b_IV[8] =
{
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
/*Blake2b's rotation*/
static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
return ( w >> c ) | ( w << ( 64 - c ) );
}
/*Blake2b's G function*/
#define G(r,i,a,b,c,d) \
do { \
a = a + b; \
d = rotr64(d ^ a, 32); \
c = c + d; \
b = rotr64(b ^ c, 24); \
a = a + b; \
d = rotr64(d ^ a, 16); \
c = c + d; \
b = rotr64(b ^ c, 63); \
} while(0)
/*One Round of the Blake2b's compression function*/
#define ROUND_LYRA(r) \
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
G(r,7,v[ 3],v[ 4],v[ 9],v[14]);
//---- Housekeeping
extern void initStatev2(uint64_t state[/*16*/]);
//---- Squeezes
extern void squeezev2(uint64_t *state, unsigned char *out, unsigned int len);
extern void reducedSqueezeRow0v2(uint64_t* state, uint64_t* row, uint64_t nCols);
//---- Absorbs
extern void absorbBlockv2(uint64_t *state, const uint64_t *in);
extern void absorbBlockBlake2Safev2(uint64_t *state, const uint64_t *in);
//---- Duplexes
extern void reducedDuplexRow1v2(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols);
extern void reducedDuplexRowSetupv2(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols);
extern void reducedDuplexRowv2(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols);
//---- Misc
void printArrayv2(unsigned char *array, unsigned int size, char *name);
////////////////////////////////////////////////////////////////////////////////////////////////
////TESTS////
//void reducedDuplexRowc(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
//void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
//void reducedDuplexRowSetupv4(uint64_t *state, uint64_t *rowIn1, uint64_t *rowIn2, uint64_t *rowOut1, uint64_t *rowOut2);
//void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
//void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
//void reducedDuplexRowSetupv5d(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
/////////////
#endif /* SPONGE_H_ */

118
algorithm/whirlpoolx.c

@ -34,7 +34,7 @@ @@ -34,7 +34,7 @@
#include <stdint.h>
#include <string.h>
#include "whirlpoolx.h"
#include "sph/sph_whirlpool.h"
/*
* Encode a length len/4 vector of (uint32_t) into a length len vector of
@ -50,124 +50,16 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) @@ -50,124 +50,16 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
}
void whirlpool_compress(uint8_t state[64], const uint8_t block[64])
{
const int NUM_ROUNDS = 10;
uint64_t tempState[8];
uint64_t tempBlock[8];
int i;
// Initialization
for (i = 0; i < 8; i++) {
tempState[i] =
(uint64_t)state[i << 3]
| (uint64_t)state[(i << 3) + 1] << 8
| (uint64_t)state[(i << 3) + 2] << 16
| (uint64_t)state[(i << 3) + 3] << 24
| (uint64_t)state[(i << 3) + 4] << 32
| (uint64_t)state[(i << 3) + 5] << 40
| (uint64_t)state[(i << 3) + 6] << 48
| (uint64_t)state[(i << 3) + 7] << 56;
tempBlock[i] = (
(uint64_t)block[i << 3]
| (uint64_t)block[(i << 3) + 1] << 8
| (uint64_t)block[(i << 3) + 2] << 16
| (uint64_t)block[(i << 3) + 3] << 24
| (uint64_t)block[(i << 3) + 4] << 32
| (uint64_t)block[(i << 3) + 5] << 40
| (uint64_t)block[(i << 3) + 6] << 48
| (uint64_t)block[(i << 3) + 7] << 56) ^ tempState[i];
}
// Hashing rounds
uint64_t rcon[8];
memset(rcon + 1, 0, sizeof(rcon[0]) * 7);
for (i = 0; i < NUM_ROUNDS; i++) {
rcon[0] = WHIRLPOOL_ROUND_CONSTANTS[i];
whirlpool_round(tempState, rcon);
whirlpool_round(tempBlock, tempState);
}
// Final combining
for (i = 0; i < 64; i++)
state[i] ^= block[i] ^ (uint8_t)(tempBlock[i >> 3] >> ((i & 7) << 3));
}
void whirlpool_round(uint64_t block[8], const uint64_t key[8]) {
uint64_t a = block[0];
uint64_t b = block[1];
uint64_t c = block[2];
uint64_t d = block[3];
uint64_t e = block[4];
uint64_t f = block[5];
uint64_t g = block[6];
uint64_t h = block[7];
uint64_t r;
#define DOROW(i, s, t, u, v, w, x, y, z) \
r = MAGIC_TABLE[(uint8_t)s]; r = (r << 56) | (r >> 8); \
r ^= MAGIC_TABLE[(uint8_t)(t >> 8)]; r = (r << 56) | (r >> 8); \
r ^= MAGIC_TABLE[(uint8_t)(u >> 16)]; r = (r << 56) | (r >> 8); \
r ^= MAGIC_TABLE[(uint8_t)(v >> 24)]; r = (r << 56) | (r >> 8); \
r ^= MAGIC_TABLE[(uint8_t)(w >> 32)]; r = (r << 56) | (r >> 8); \
r ^= MAGIC_TABLE[(uint8_t)(x >> 40)]; r = (r << 56) | (r >> 8); \
r ^= MAGIC_TABLE[(uint8_t)(y >> 48)]; r = (r << 56) | (r >> 8); \
r ^= MAGIC_TABLE[(uint8_t)(z >> 56)]; r = (r << 56) | (r >> 8); \
block[i] = r ^ key[i];
DOROW(0, a, h, g, f, e, d, c, b)
DOROW(1, b, a, h, g, f, e, d, c)
DOROW(2, c, b, a, h, g, f, e, d)
DOROW(3, d, c, b, a, h, g, f, e)
DOROW(4, e, d, c, b, a, h, g, f)
DOROW(5, f, e, d, c, b, a, h, g)
DOROW(6, g, f, e, d, c, b, a, h)
DOROW(7, h, g, f, e, d, c, b, a)
}
void whirlpool_hash(const uint8_t *message, uint32_t len, uint8_t hash[64]) {
memset(hash, 0, 64);
uint32_t i;
for (i = 0; len - i >= 64; i += 64)
whirlpool_compress(hash, message + i);
uint8_t block[64];
uint32_t rem = len - i;
memcpy(block, message + i, rem);
block[rem] = 0x80;
rem++;
if (64 - rem >= 32)
memset(block + rem, 0, 56 - rem);
else {
memset(block + rem, 0, 64 - rem);
whirlpool_compress(hash, block);
memset(block, 0, 56);
}
uint64_t longLen = ((uint64_t)len) << 3;
for (i = 0; i < 8; i++)
block[64 - 1 - i] = (uint8_t)(longLen >> (i * 8));
whirlpool_compress(hash, block);
}
void whirlpoolx_hash(void *state, const void *input)
{
//sph_whirlpool1_context ctx;
sph_whirlpool1_context ctx;
//sph_whirlpool1_init(&ctx);
sph_whirlpool1_init(&ctx);
uint8_t digest[64];
//sph_whirlpool(&ctx, input, 80);
//sph_whirlpool_close(&ctx, digest);
whirlpool_hash((uint8_t *)input, 80, digest);
sph_whirlpool(&ctx, input, 80);
sph_whirlpool_close(&ctx, digest);
uint8_t digest_xored[32];

48
algorithm/whirlpoolx.h

@ -1,58 +1,10 @@ @@ -1,58 +1,10 @@
#ifndef WHIRLPOOLX_H
#define WHIRLPOOLX_H
#include <stdint.h>
#include "miner.h"
// The combined effect of gamma (SubBytes) and theta (MixRows)
static uint64_t MAGIC_TABLE[256] = {
UINT64_C(0xD83078C018601818), UINT64_C(0x2646AF05238C2323), UINT64_C(0xB891F97EC63FC6C6), UINT64_C(0xFBCD6F13E887E8E8), UINT64_C(0xCB13A14C87268787), UINT64_C(0x116D62A9B8DAB8B8), UINT64_C(0x0902050801040101), UINT64_C(0x0D9E6E424F214F4F),
UINT64_C(0x9B6CEEAD36D83636), UINT64_C(0xFF510459A6A2A6A6), UINT64_C(0x0CB9BDDED26FD2D2), UINT64_C(0x0EF706FBF5F3F5F5), UINT64_C(0x96F280EF79F97979), UINT64_C(0x30DECE5F6FA16F6F), UINT64_C(0x6D3FEFFC917E9191), UINT64_C(0xF8A407AA52555252),
UINT64_C(0x47C0FD27609D6060), UINT64_C(0x35657689BCCABCBC), UINT64_C(0x372BCDAC9B569B9B), UINT64_C(0x8A018C048E028E8E), UINT64_C(0xD25B1571A3B6A3A3), UINT64_C(0x6C183C600C300C0C), UINT64_C(0x84F68AFF7BF17B7B), UINT64_C(0x806AE1B535D43535),
UINT64_C(0xF53A69E81D741D1D), UINT64_C(0xB3DD4753E0A7E0E0), UINT64_C(0x21B3ACF6D77BD7D7), UINT64_C(0x9C99ED5EC22FC2C2), UINT64_C(0x435C966D2EB82E2E), UINT64_C(0x29967A624B314B4B), UINT64_C(0x5DE121A3FEDFFEFE), UINT64_C(0xD5AE168257415757),
UINT64_C(0xBD2A41A815541515), UINT64_C(0xE8EEB69F77C17777), UINT64_C(0x926EEBA537DC3737), UINT64_C(0x9ED7567BE5B3E5E5), UINT64_C(0x1323D98C9F469F9F), UINT64_C(0x23FD17D3F0E7F0F0), UINT64_C(0x20947F6A4A354A4A), UINT64_C(0x44A9959EDA4FDADA),
UINT64_C(0xA2B025FA587D5858), UINT64_C(0xCF8FCA06C903C9C9), UINT64_C(0x7C528D5529A42929), UINT64_C(0x5A1422500A280A0A), UINT64_C(0x507F4FE1B1FEB1B1), UINT64_C(0xC95D1A69A0BAA0A0), UINT64_C(0x14D6DA7F6BB16B6B), UINT64_C(0xD917AB5C852E8585),
UINT64_C(0x3C677381BDCEBDBD), UINT64_C(0x8FBA34D25D695D5D), UINT64_C(0x9020508010401010), UINT64_C(0x07F503F3F4F7F4F4), UINT64_C(0xDD8BC016CB0BCBCB), UINT64_C(0xD37CC6ED3EF83E3E), UINT64_C(0x2D0A112805140505), UINT64_C(0x78CEE61F67816767),
UINT64_C(0x97D55373E4B7E4E4), UINT64_C(0x024EBB25279C2727), UINT64_C(0x7382583241194141), UINT64_C(0xA70B9D2C8B168B8B), UINT64_C(0xF6530151A7A6A7A7), UINT64_C(0xB2FA94CF7DE97D7D), UINT64_C(0x4937FBDC956E9595), UINT64_C(0x56AD9F8ED847D8D8),
UINT64_C(0x70EB308BFBCBFBFB), UINT64_C(0xCDC17123EE9FEEEE), UINT64_C(0xBBF891C77CED7C7C), UINT64_C(0x71CCE31766856666), UINT64_C(0x7BA78EA6DD53DDDD), UINT64_C(0xAF2E4BB8175C1717), UINT64_C(0x458E460247014747), UINT64_C(0x1A21DC849E429E9E),
UINT64_C(0xD489C51ECA0FCACA), UINT64_C(0x585A99752DB42D2D), UINT64_C(0x2E637991BFC6BFBF), UINT64_C(0x3F0E1B38071C0707), UINT64_C(0xAC472301AD8EADAD), UINT64_C(0xB0B42FEA5A755A5A), UINT64_C(0xEF1BB56C83368383), UINT64_C(0xB666FF8533CC3333),
UINT64_C(0x5CC6F23F63916363), UINT64_C(0x12040A1002080202), UINT64_C(0x93493839AA92AAAA), UINT64_C(0xDEE2A8AF71D97171), UINT64_C(0xC68DCF0EC807C8C8), UINT64_C(0xD1327DC819641919), UINT64_C(0x3B92707249394949), UINT64_C(0x5FAF9A86D943D9D9),
UINT64_C(0x31F91DC3F2EFF2F2), UINT64_C(0xA8DB484BE3ABE3E3), UINT64_C(0xB9B62AE25B715B5B), UINT64_C(0xBC0D9234881A8888), UINT64_C(0x3E29C8A49A529A9A), UINT64_C(0x0B4CBE2D26982626), UINT64_C(0xBF64FA8D32C83232), UINT64_C(0x597D4AE9B0FAB0B0),
UINT64_C(0xF2CF6A1BE983E9E9), UINT64_C(0x771E33780F3C0F0F), UINT64_C(0x33B7A6E6D573D5D5), UINT64_C(0xF41DBA74803A8080), UINT64_C(0x27617C99BEC2BEBE), UINT64_C(0xEB87DE26CD13CDCD), UINT64_C(0x8968E4BD34D03434), UINT64_C(0x3290757A483D4848),
UINT64_C(0x54E324ABFFDBFFFF), UINT64_C(0x8DF48FF77AF57A7A), UINT64_C(0x643DEAF4907A9090), UINT64_C(0x9DBE3EC25F615F5F), UINT64_C(0x3D40A01D20802020), UINT64_C(0x0FD0D56768BD6868), UINT64_C(0xCA3472D01A681A1A), UINT64_C(0xB7412C19AE82AEAE),
UINT64_C(0x7D755EC9B4EAB4B4), UINT64_C(0xCEA8199A544D5454), UINT64_C(0x7F3BE5EC93769393), UINT64_C(0x2F44AA0D22882222), UINT64_C(0x63C8E907648D6464), UINT64_C(0x2AFF12DBF1E3F1F1), UINT64_C(0xCCE6A2BF73D17373), UINT64_C(0x82245A9012481212),
UINT64_C(0x7A805D3A401D4040), UINT64_C(0x4810284008200808), UINT64_C(0x959BE856C32BC3C3), UINT64_C(0xDFC57B33EC97ECEC), UINT64_C(0x4DAB9096DB4BDBDB), UINT64_C(0xC05F1F61A1BEA1A1), UINT64_C(0x9107831C8D0E8D8D), UINT64_C(0xC87AC9F53DF43D3D),
UINT64_C(0x5B33F1CC97669797), UINT64_C(0x0000000000000000), UINT64_C(0xF983D436CF1BCFCF), UINT64_C(0x6E5687452BAC2B2B), UINT64_C(0xE1ECB39776C57676), UINT64_C(0xE619B06482328282), UINT64_C(0x28B1A9FED67FD6D6), UINT64_C(0xC33677D81B6C1B1B),
UINT64_C(0x74775BC1B5EEB5B5), UINT64_C(0xBE432911AF86AFAF), UINT64_C(0x1DD4DF776AB56A6A), UINT64_C(0xEAA00DBA505D5050), UINT64_C(0x578A4C1245094545), UINT64_C(0x38FB18CBF3EBF3F3), UINT64_C(0xAD60F09D30C03030), UINT64_C(0xC4C3742BEF9BEFEF),
UINT64_C(0xDA7EC3E53FFC3F3F), UINT64_C(0xC7AA1C9255495555), UINT64_C(0xDB591079A2B2A2A2), UINT64_C(0xE9C96503EA8FEAEA), UINT64_C(0x6ACAEC0F65896565), UINT64_C(0x036968B9BAD2BABA), UINT64_C(0x4A5E93652FBC2F2F), UINT64_C(0x8E9DE74EC027C0C0),
UINT64_C(0x60A181BEDE5FDEDE), UINT64_C(0xFC386CE01C701C1C), UINT64_C(0x46E72EBBFDD3FDFD), UINT64_C(0x1F9A64524D294D4D), UINT64_C(0x7639E0E492729292), UINT64_C(0xFAEABC8F75C97575), UINT64_C(0x360C1E3006180606), UINT64_C(0xAE0998248A128A8A),
UINT64_C(0x4B7940F9B2F2B2B2), UINT64_C(0x85D15963E6BFE6E6), UINT64_C(0x7E1C36700E380E0E), UINT64_C(0xE73E63F81F7C1F1F), UINT64_C(0x55C4F73762956262), UINT64_C(0x3AB5A3EED477D4D4), UINT64_C(0x814D3229A89AA8A8), UINT64_C(0x5231F4C496629696),
UINT64_C(0x62EF3A9BF9C3F9F9), UINT64_C(0xA397F666C533C5C5), UINT64_C(0x104AB13525942525), UINT64_C(0xABB220F259795959), UINT64_C(0xD015AE54842A8484), UINT64_C(0xC5E4A7B772D57272), UINT64_C(0xEC72DDD539E43939), UINT64_C(0x1698615A4C2D4C4C),
UINT64_C(0x94BC3BCA5E655E5E), UINT64_C(0x9FF085E778FD7878), UINT64_C(0xE570D8DD38E03838), UINT64_C(0x980586148C0A8C8C), UINT64_C(0x17BFB2C6D163D1D1), UINT64_C(0xE4570B41A5AEA5A5), UINT64_C(0xA1D94D43E2AFE2E2), UINT64_C(0x4EC2F82F61996161),
UINT64_C(0x427B45F1B3F6B3B3), UINT64_C(0x3442A51521842121), UINT64_C(0x0825D6949C4A9C9C), UINT64_C(0xEE3C66F01E781E1E), UINT64_C(0x6186522243114343), UINT64_C(0xB193FC76C73BC7C7), UINT64_C(0x4FE52BB3FCD7FCFC), UINT64_C(0x2408142004100404),
UINT64_C(0xE3A208B251595151), UINT64_C(0x252FC7BC995E9999), UINT64_C(0x22DAC44F6DA96D6D), UINT64_C(0x651A39680D340D0D), UINT64_C(0x79E93583FACFFAFA), UINT64_C(0x69A384B6DF5BDFDF), UINT64_C(0xA9FC9BD77EE57E7E), UINT64_C(0x1948B43D24902424),
UINT64_C(0xFE76D7C53BEC3B3B), UINT64_C(0x9A4B3D31AB96ABAB), UINT64_C(0xF081D13ECE1FCECE), UINT64_C(0x9922558811441111), UINT64_C(0x8303890C8F068F8F), UINT64_C(0x049C6B4A4E254E4E), UINT64_C(0x667351D1B7E6B7B7), UINT64_C(0xE0CB600BEB8BEBEB),
UINT64_C(0xC178CCFD3CF03C3C), UINT64_C(0xFD1FBF7C813E8181), UINT64_C(0x4035FED4946A9494), UINT64_C(0x1CF30CEBF7FBF7F7), UINT64_C(0x186F67A1B9DEB9B9), UINT64_C(0x8B265F98134C1313), UINT64_C(0x51589C7D2CB02C2C), UINT64_C(0x05BBB8D6D36BD3D3),
UINT64_C(0x8CD35C6BE7BBE7E7), UINT64_C(0x39DCCB576EA56E6E), UINT64_C(0xAA95F36EC437C4C4), UINT64_C(0x1B060F18030C0303), UINT64_C(0xDCAC138A56455656), UINT64_C(0x5E88491A440D4444), UINT64_C(0xA0FE9EDF7FE17F7F), UINT64_C(0x884F3721A99EA9A9),
UINT64_C(0x6754824D2AA82A2A), UINT64_C(0x0A6B6DB1BBD6BBBB), UINT64_C(0x879FE246C123C1C1), UINT64_C(0xF1A602A253515353), UINT64_C(0x72A58BAEDC57DCDC), UINT64_C(0x531627580B2C0B0B), UINT64_C(0x0127D39C9D4E9D9D), UINT64_C(0x2BD8C1476CAD6C6C),
UINT64_C(0xA462F59531C43131), UINT64_C(0xF3E8B98774CD7474), UINT64_C(0x15F109E3F6FFF6F6), UINT64_C(0x4C8C430A46054646), UINT64_C(0xA5452609AC8AACAC), UINT64_C(0xB50F973C891E8989), UINT64_C(0xB42844A014501414), UINT64_C(0xBADF425BE1A3E1E1),
UINT64_C(0xA62C4EB016581616), UINT64_C(0xF774D2CD3AE83A3A), UINT64_C(0x06D2D06F69B96969), UINT64_C(0x41122D4809240909), UINT64_C(0xD7E0ADA770DD7070), UINT64_C(0x6F7154D9B6E2B6B6), UINT64_C(0x1EBDB7CED067D0D0), UINT64_C(0xD6C77E3BED93EDED),
UINT64_C(0xE285DB2ECC17CCCC), UINT64_C(0x6884572A42154242), UINT64_C(0x2C2DC2B4985A9898), UINT64_C(0xED550E49A4AAA4A4), UINT64_C(0x7550885D28A02828), UINT64_C(0x86B831DA5C6D5C5C), UINT64_C(0x6BED3F93F8C7F8F8), UINT64_C(0xC211A44486228686),
};
static uint64_t WHIRLPOOL_ROUND_CONSTANTS[32] = {
UINT64_C(0x4F01B887E8C62318), UINT64_C(0x52916F79F5D2A636), UINT64_C(0x357B0CA38E9BBC60), UINT64_C(0x57FE4B2EC2D7E01D),
UINT64_C(0xDA4AF09FE5377715), UINT64_C(0x856BA0B10A29C958), UINT64_C(0x67053ECBF4105DBD), UINT64_C(0xD8957DA78B4127E4),
UINT64_C(0x9E4717DD667CEEFB), UINT64_C(0x33835AAD07BF2DCA), UINT64_C(0xD94919C871AA0263), UINT64_C(0xB032269A885BE3F2),
UINT64_C(0x4834CDBE80D50FE9), UINT64_C(0xAE1A68205F907AFF), UINT64_C(0x1273F164229354B4), UINT64_C(0x3D8DA1DBECC30840),
UINT64_C(0x1BD682762BCF0097), UINT64_C(0xEF30F345506AAFB5), UINT64_C(0xC02FBA65EAA2553F), UINT64_C(0x8A0675924DFD1CDE),
UINT64_C(0x96A8D4621F0EE6B2), UINT64_C(0x4C3972845925C5F9), UINT64_C(0x61E2A5D18C38785E), UINT64_C(0x04FCC7431E9C21B3),
UINT64_C(0x247EDFFA0D6D9951), UINT64_C(0xEBB74E8F11CEAB3B), UINT64_C(0xD32C13B9F794813C), UINT64_C(0xA97F445603C46EE7),
UINT64_C(0x6C9D0BDC53C1BB2A), UINT64_C(0xE11489AC46F67431), UINT64_C(0xEDD0B67009693A16), UINT64_C(0x86F85C28A49842CC),
};
extern int whirlpoolx_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce);
extern void whirlpoolx_regenhash(struct work *work);
extern void whirlpool_round(uint64_t block[8], const uint64_t key[8]);
#endif /* WHIRLPOOLX_H */

8
algorithm/yescrypt-opt.c

@ -99,7 +99,7 @@ alloc_region(yescrypt_region_t * region, size_t size) @@ -99,7 +99,7 @@ alloc_region(yescrypt_region_t * region, size_t size)
if (size + 63 < size) {
errno = ENOMEM;
}
else if ((base = malloc(size + 63)) != NULL) {
else if ((base = (uint8_t *)malloc(size + 63)) != NULL) {
aligned = base + 63;
aligned -= (uintptr_t)aligned & 63;
}
@ -520,7 +520,7 @@ smix1(uint64_t * B, size_t r, uint64_t N, yescrypt_flags_t flags, @@ -520,7 +520,7 @@ smix1(uint64_t * B, size_t r, uint64_t N, yescrypt_flags_t flags,
uint64_t * XY, uint64_t * S)
{
void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) = (S ? blockmix_pwxform : blockmix_salsa8);
const uint64_t * VROM = shared->shared1.aligned;
const uint64_t * VROM = (uint64_t *)shared->shared1.aligned;
uint32_t VROM_mask = shared->mask1;
size_t s = 16 * r;
uint64_t * X = V;
@ -671,7 +671,7 @@ smix2(uint64_t * B, size_t r, uint64_t N, uint64_t Nloop, @@ -671,7 +671,7 @@ smix2(uint64_t * B, size_t r, uint64_t N, uint64_t Nloop,
void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) =
(S ? blockmix_pwxform : blockmix_salsa8);
const uint64_t * VROM = shared->shared1.aligned;
const uint64_t * VROM = (uint64_t *)shared->shared1.aligned;
uint32_t VROM_mask = shared->mask1 | 1;
size_t s = 16 * r;
yescrypt_flags_t rw = flags & YESCRYPT_RW;
@ -835,7 +835,7 @@ smix(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t, @@ -835,7 +835,7 @@ smix(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t,
uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S;
if (Sp)
smix1(Bp, 1, S_SIZE_ALL / 16, flags & ~YESCRYPT_PWXFORM,Sp, NROM, shared, XYp, NULL);
smix1(Bp, 1, S_SIZE_ALL / 16, (yescrypt_flags_t)flags & ~YESCRYPT_PWXFORM,Sp, NROM, shared, XYp, NULL);

2
api.c

@ -1334,7 +1334,7 @@ static void apiversion(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __m @@ -1334,7 +1334,7 @@ static void apiversion(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __m
io_open = io_add(io_data, isjson ? COMSTR JSON_VERSION : _VERSION COMSTR);
root = api_add_string(root, "Miner", PACKAGE " " VERSION, false);
root = api_add_string(root, "CGMiner", CGMINER_VERSION, false);
root = api_add_string(root, "SGMiner", VERSION, false);
root = api_add_const(root, "API", APIVERSION, false);
root = print_data(root, buf, isjson, false);

2
driver-opencl.c

@ -1366,7 +1366,7 @@ static bool opencl_thread_init(struct thr_info *thr) @@ -1366,7 +1366,7 @@ static bool opencl_thread_init(struct thr_info *thr)
static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work)
{
if (work->pool->algorithm.type == ALGO_LYRA2RE || work->pool->algorithm.type == ALGO_LYRA2REv2) {
if (work->pool->algorithm.type == ALGO_LYRA2RE || work->pool->algorithm.type == ALGO_LYRA2REV2) {
work->blk.work = work;
precalc_hash_blake256(&work->blk, 0, (uint32_t *)(work->data));
}

8
kernel/lyra2rev2.cl

@ -31,8 +31,8 @@ @@ -31,8 +31,8 @@
// typedef unsigned int uint;
#pragma OPENCL EXTENSION cl_amd_printf : enable
#ifndef LYRA2RE_CL
#define LYRA2RE_CL
#ifndef LYRA2REV2_CL
#define LYRA2REV2_CL
#if __ENDIAN_LITTLE__
#define SPH_LITTLE_ENDIAN 1
@ -90,7 +90,7 @@ static inline sph_u64 ror64(sph_u64 vw, unsigned a) { @@ -90,7 +90,7 @@ static inline sph_u64 ror64(sph_u64 vw, unsigned a) {
//#define SPH_ROTR64(l,n) ror64(l,n)
#define memshift 3
#include "blake256.cl"
#include "lyra2v2.cl"
#include "Lyra2v2.cl"
#include "keccak1600.cl"
#include "skein256.cl"
#include "cubehash.cl"
@ -522,4 +522,4 @@ __kernel void search6(__global uchar* hashes, __global uint* output, const ulong @@ -522,4 +522,4 @@ __kernel void search6(__global uchar* hashes, __global uint* output, const ulong
}
#endif // LYRA2RE_CL
#endif // LYRA2REV2_CL

97
kernel/neoscrypt.cl

@ -1,5 +1,4 @@ @@ -1,5 +1,4 @@
/* NeoScrypt(128, 2, 1) with Salsa20/20 and ChaCha20/20 */
/* Adapted and improved for 14.x drivers by Wolf9466 (Wolf`) */
// NeoScrypt(128, 2, 1) with Salsa20/20 and ChaCha20/20
// Stupid AMD compiler ignores the unroll pragma in these two
#define SALSA_SMALL_UNROLL 3
@ -351,74 +350,71 @@ uint16 salsa_small_scalar_rnd(uint16 X) @@ -351,74 +350,71 @@ uint16 salsa_small_scalar_rnd(uint16 X)
return(X + st);
}
#define CHACHA_CORE_PARALLEL(state) do { \
state[0] += state[1]; state[3] = rotate(state[3] ^ state[0], (uint4)(16U, 16U, 16U, 16U)); \
state[2] += state[3]; state[1] = rotate(state[1] ^ state[2], (uint4)(12U, 12U, 12U, 12U)); \
state[0] += state[1]; state[3] = rotate(state[3] ^ state[0], (uint4)(8U, 8U, 8U, 8U)); \
state[2] += state[3]; state[1] = rotate(state[1] ^ state[2], (uint4)(7U, 7U, 7U, 7U)); \
\
state[0] += state[1].yzwx; state[3].wxyz = rotate(state[3].wxyz ^ state[0], (uint4)(16U, 16U, 16U, 16U)); \
state[2].zwxy += state[3].wxyz; state[1].yzwx = rotate(state[1].yzwx ^ state[2].zwxy, (uint4)(12U, 12U, 12U, 12U)); \
state[0] += state[1].yzwx; state[3].wxyz = rotate(state[3].wxyz ^ state[0], (uint4)(8U, 8U, 8U, 8U)); \
state[2].zwxy += state[3].wxyz; state[1].yzwx = rotate(state[1].yzwx ^ state[2].zwxy, (uint4)(7U, 7U, 7U, 7U)); \
#define CHACHA_CORE(state) do { \
state.s0 += state.s4; state.sc = as_uint(as_ushort2(state.sc ^ state.s0).s10); state.s8 += state.sc; state.s4 = rotate(state.s4 ^ state.s8, 12U); state.s0 += state.s4; state.sc = rotate(state.sc ^ state.s0, 8U); state.s8 += state.sc; state.s4 = rotate(state.s4 ^ state.s8, 7U); \
state.s1 += state.s5; state.sd = as_uint(as_ushort2(state.sd ^ state.s1).s10); state.s9 += state.sd; state.s5 = rotate(state.s5 ^ state.s9, 12U); state.s1 += state.s5; state.sd = rotate(state.sd ^ state.s1, 8U); state.s9 += state.sd; state.s5 = rotate(state.s5 ^ state.s9, 7U); \
state.s2 += state.s6; state.se = as_uint(as_ushort2(state.se ^ state.s2).s10); state.sa += state.se; state.s6 = rotate(state.s6 ^ state.sa, 12U); state.s2 += state.s6; state.se = rotate(state.se ^ state.s2, 8U); state.sa += state.se; state.s6 = rotate(state.s6 ^ state.sa, 7U); \
state.s3 += state.s7; state.sf = as_uint(as_ushort2(state.sf ^ state.s3).s10); state.sb += state.sf; state.s7 = rotate(state.s7 ^ state.sb, 12U); state.s3 += state.s7; state.sf = rotate(state.sf ^ state.s3, 8U); state.sb += state.sf; state.s7 = rotate(state.s7 ^ state.sb, 7U); \
state.s0 += state.s5; state.sf = as_uint(as_ushort2(state.sf ^ state.s0).s10); state.sa += state.sf; state.s5 = rotate(state.s5 ^ state.sa, 12U); state.s0 += state.s5; state.sf = rotate(state.sf ^ state.s0, 8U); state.sa += state.sf; state.s5 = rotate(state.s5 ^ state.sa, 7U); \
state.s1 += state.s6; state.sc = as_uint(as_ushort2(state.sc ^ state.s1).s10); state.sb += state.sc; state.s6 = rotate(state.s6 ^ state.sb, 12U); state.s1 += state.s6; state.sc = rotate(state.sc ^ state.s1, 8U); state.sb += state.sc; state.s6 = rotate(state.s6 ^ state.sb, 7U); \
state.s2 += state.s7; state.sd = as_uint(as_ushort2(state.sd ^ state.s2).s10); state.s8 += state.sd; state.s7 = rotate(state.s7 ^ state.s8, 12U); state.s2 += state.s7; state.sd = rotate(state.sd ^ state.s2, 8U); state.s8 += state.sd; state.s7 = rotate(state.s7 ^ state.s8, 7U); \
state.s3 += state.s4; state.se = as_uint(as_ushort2(state.se ^ state.s3).s10); state.s9 += state.se; state.s4 = rotate(state.s4 ^ state.s9, 12U); state.s3 += state.s4; state.se = rotate(state.se ^ state.s3, 8U); state.s9 += state.se; state.s4 = rotate(state.s4 ^ state.s9, 7U); \
} while(0)
uint16 chacha_small_parallel_rnd(uint16 X)
{
uint4 t, st[4];
((uint16 *)st)[0] = X;
uint16 chacha_small_scalar_rnd(uint16 X)
{
uint16 st = X;
#if CHACHA_SMALL_UNROLL == 1
for(int i = 0; i < 10; ++i)
{
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE(st);
}
#elif CHACHA_SMALL_UNROLL == 2
for(int i = 0; i < 5; ++i)
{
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE(st);
CHACHA_CORE(st);
}
#elif CHACHA_SMALL_UNROLL == 3
for(int i = 0; i < 4; ++i)
{
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE(st);
if(i == 3) break;
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE(st);
CHACHA_CORE(st);
}
#elif CHACHA_SMALL_UNROLL == 4
for(int i = 0; i < 3; ++i)
{
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE(st);
CHACHA_CORE(st);
if(i == 2) break;
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE(st);
CHACHA_CORE(st);
}
#else
for(int i = 0; i < 2; ++i)
{
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE_PARALLEL(st);
CHACHA_CORE(st);
CHACHA_CORE(st);
CHACHA_CORE(st);
CHACHA_CORE(st);
CHACHA_CORE(st);
}
#endif
return(X + ((uint16 *)st)[0]);
return(X + st);
}
void neoscrypt_blkmix(uint16 *XV, bool alg)
@ -443,10 +439,10 @@ void neoscrypt_blkmix(uint16 *XV, bool alg) @@ -443,10 +439,10 @@ void neoscrypt_blkmix(uint16 *XV, bool alg)
}
else
{
XV[0] = chacha_small_parallel_rnd(XV[0]); XV[1] ^= XV[0];
XV[1] = chacha_small_parallel_rnd(XV[1]); XV[2] ^= XV[1];
XV[2] = chacha_small_parallel_rnd(XV[2]); XV[3] ^= XV[2];
XV[3] = chacha_small_parallel_rnd(XV[3]);
XV[0] = chacha_small_scalar_rnd(XV[0]); XV[1] ^= XV[0];
XV[1] = chacha_small_scalar_rnd(XV[1]); XV[2] ^= XV[1];
XV[2] = chacha_small_scalar_rnd(XV[2]); XV[3] ^= XV[2];
XV[3] = chacha_small_scalar_rnd(XV[3]);
}
XV[1] ^= XV[2];
@ -454,7 +450,7 @@ void neoscrypt_blkmix(uint16 *XV, bool alg) @@ -454,7 +450,7 @@ void neoscrypt_blkmix(uint16 *XV, bool alg)
XV[1] ^= XV[2];
}
void ScratchpadStore(__global void *V, void *X, uchar idx)
void ScratchpadStore(__global void *V, const void *X, uchar idx)
{
((__global ulong16 *)V)[idx << 1] = ((ulong16 *)X)[0];
((__global ulong16 *)V)[(idx << 1) + 1] = ((ulong16 *)X)[1];
@ -466,20 +462,34 @@ void ScratchpadMix(void *X, const __global void *V, uchar idx) @@ -466,20 +462,34 @@ void ScratchpadMix(void *X, const __global void *V, uchar idx)
((ulong16 *)X)[1] ^= ((__global ulong16 *)V)[(idx << 1) + 1];
}
void SMix(uint16 *X, __global uint16 *V, bool flag)
void ScratchpadLoad(void *X, const __global void *V, uchar idx)
{
((ulong16 *)X)[0] = ((__global ulong16 *)V)[idx << 1];
((ulong16 *)X)[1] = ((__global ulong16 *)V)[(idx << 1) + 1];
}
void SMix(uint16 *X, __global uint16 *V, bool flag)
{
#pragma unroll 1
for(int i = 0; i < 128; ++i)
for(int i = 0; i < 64; ++i)
{
ScratchpadStore(V, X, i);
neoscrypt_blkmix(X, flag);
neoscrypt_blkmix(X, flag);
}
#pragma unroll 1
for(int i = 0; i < 128; ++i)
{
uint16 tmp[4];
const uint idx = convert_uchar(((uint *)X)[48] & 0x7F);
ScratchpadMix(X, V, idx);
ScratchpadLoad(tmp, V, idx >> 1);
if(idx & 1) neoscrypt_blkmix(tmp, flag);
((ulong16 *)X)[0] ^= ((ulong16 *)tmp)[0];
((ulong16 *)X)[1] ^= ((ulong16 *)tmp)[1];
neoscrypt_blkmix(X, flag);
}
}
@ -492,7 +502,8 @@ __kernel void search(__global const uchar* restrict input, __global uint* restri @@ -492,7 +502,8 @@ __kernel void search(__global const uchar* restrict input, __global uint* restri
// X = CONSTANT_r * 2 * BLOCK_SIZE(64); Z is a copy of X for ChaCha
uint16 X[4], Z[4];
/* V = CONSTANT_N * CONSTANT_r * 2 * BLOCK_SIZE */
__global ulong16 *V = (__global ulong16 *)(padcache + (0x8000 * (get_global_id(0) % MAX_GLOBAL_THREADS)));
//__global ulong16 *V = (__global ulong16 *)(padcache + (0x8000 * (get_global_id(0) % MAX_GLOBAL_THREADS)));
__global ulong16 *V = (__global ulong16 *)(padcache + (0x4000 * (get_global_id(0) % MAX_GLOBAL_THREADS)));
uchar outbuf[32];
uchar data[PASSWORD_LEN];

1537
kernel/whirlpoolx.cl

File diff suppressed because it is too large Load Diff

16
miner.h

@ -3,17 +3,6 @@ @@ -3,17 +3,6 @@
#include "config.h"
#if defined(USE_GIT_VERSION) && defined(GIT_VERSION)
#undef VERSION
#define VERSION GIT_VERSION
#endif
#ifdef BUILD_NUMBER
#define CGMINER_VERSION VERSION "-" BUILD_NUMBER
#else
#define CGMINER_VERSION VERSION
#endif
#include "algorithm.h"
#include <stdbool.h>
@ -1045,6 +1034,7 @@ extern bool opt_protocol; @@ -1045,6 +1034,7 @@ extern bool opt_protocol;
extern bool have_longpoll;
extern char *opt_kernel_path;
extern char *opt_socks_proxy;
extern bool opt_lyra;
#if defined(unix) || defined(__APPLE__)
extern char *opt_stderr_cmd;
@ -1165,8 +1155,8 @@ extern struct pool *add_pool(void); @@ -1165,8 +1155,8 @@ extern struct pool *add_pool(void);
extern bool add_pool_details(struct pool *pool, bool live, char *url, char *user, char *pass, char *name, char *desc, char *profile, char *algo);
#define MAX_GPUDEVICES 16
#define MAX_DEVICES 4096
//#define MAX_DEVICES 4096
#define MAX_DEVICES 8192
#define MIN_INTENSITY 4
#define MIN_INTENSITY_STR "4"
#define MAX_INTENSITY 31

19
ocl.c

@ -36,8 +36,8 @@ @@ -36,8 +36,8 @@
#include "ocl/binary_kernel.h"
#include "algorithm/neoscrypt.h"
#include "algorithm/pluck.h"
#include "algorithm/yescrypt.h"
#include "algorithm/lyra2re.h"
//#include "algorithm/yescrypt.h"
#include "algorithm/lyra2rev2.h"
/* FIXME: only here for global config vars, replace with configuration.h
* or similar as soon as config is in a struct instead of littered all
@ -500,6 +500,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg @@ -500,6 +500,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
}
#if 0
// Yescrypt TC
else if ((cgpu->algorithm.type == ALGO_YESCRYPT ||
algorithm->type == ALGO_YESCRYPT_MULTI) && !cgpu->opt_tc) {
@ -584,9 +585,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg @@ -584,9 +585,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
}
#endif
// Lyra2re v2 TC
else if (cgpu->algorithm.type == ALGO_LYRA2REv2 && !cgpu->opt_tc) {
// Lyra2REv2 TC
else if (cgpu->algorithm.type == ALGO_LYRA2REV2 /*&& !cgpu->opt_tc*/) {
size_t glob_thread_count;
long max_int;
unsigned char type = 0;
@ -784,6 +786,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg @@ -784,6 +786,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
applog(LOG_DEBUG, "pluck buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
// scrypt/n-scrypt
}
#if 0
else if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) {
/* The scratch/pad-buffer needs 32kBytes memory per thread. */
bufsize = YESCRYPT_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
@ -797,7 +800,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg @@ -797,7 +800,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
applog(LOG_DEBUG, "yescrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
// scrypt/n-scrypt
}
else if (algorithm->type == ALGO_LYRA2REv2) {
#endif
else if (algorithm->type == ALGO_LYRA2REV2) {
/* The scratch/pad-buffer needs 32kBytes memory per thread. */
bufsize = LYRA_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
buf1size = 4* 8 * cgpu->thread_concurrency; //matrix
@ -835,6 +839,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg @@ -835,6 +839,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize);
}
#if 0
if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) {
// need additionnal buffers
clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status);
@ -855,7 +860,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg @@ -855,7 +860,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
return NULL;
}
}
else if (algorithm->type == ALGO_LYRA2REv2) {
else
#endif
if (algorithm->type == ALGO_LYRA2REV2) {
// need additionnal buffers
clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status);
if (status != CL_SUCCESS && !clState->buffer1) {

3
ocl.h

@ -10,7 +10,8 @@ @@ -10,7 +10,8 @@
#include <CL/cl.h>
#endif
#include "algorithm.h"
//#include "algorithm.h"
#include "miner.h"
typedef struct __clState {
cl_context context;

2
ocl/binary_kernel.c

@ -1,7 +1,5 @@ @@ -1,7 +1,5 @@
#include "binary_kernel.h"
#include "miner.h"
#include <sys/stat.h>
#include <stdio.h>
cl_program load_opencl_binary_kernel(build_kernel_data *data)
{

2
ocl/build_kernel.c

@ -1,6 +1,4 @@ @@ -1,6 +1,4 @@
#include <stdio.h>
#include "build_kernel.h"
#include "miner.h"
static char *file_contents(const char *filename, int *length)
{

1
ocl/build_kernel.h

@ -1,6 +1,7 @@ @@ -1,6 +1,7 @@
#ifndef BUILD_KERNEL_H
#define BUILD_KERNEL_H
#include "ocl.h"
#include <stdbool.h>
#include "logging.h"

22
sgminer.c

@ -68,6 +68,10 @@ char *curly = ":D"; @@ -68,6 +68,10 @@ char *curly = ":D";
#include <sys/wait.h>
#endif
#if defined(USE_GIT_VERSION) && defined(GIT_VERSION)
#undef VERSION
#define VERSION GIT_VERSION
#endif
static char packagename[256];
@ -2148,7 +2152,7 @@ static void gen_gbt_work(struct pool *pool, struct work *work) @@ -2148,7 +2152,7 @@ static void gen_gbt_work(struct pool *pool, struct work *work)
}
// Neoscrypt doesn't calc_midstate()
if (pool->algorithm.type == ALGO_NEOSCRYPT) {
if (pool->algorithm.type != ALGO_NEOSCRYPT) {
calc_midstate(work);
}
local_work++;
@ -2567,7 +2571,7 @@ static void curses_print_status(void) @@ -2567,7 +2571,7 @@ static void curses_print_status(void)
unsigned short int line = 0;
wattron(statuswin, A_BOLD);
cg_mvwprintw(statuswin, line, 0, PACKAGE " " CGMINER_VERSION " - Started: %s", datestamp);
cg_mvwprintw(statuswin, line, 0, PACKAGE " " VERSION " - Started: %s", datestamp);
curses_print_uptime(&launch_time);
wattroff(statuswin, A_BOLD);
@ -5574,7 +5578,7 @@ static void *stratum_sthread(void *userdata) @@ -5574,7 +5578,7 @@ static void *stratum_sthread(void *userdata)
applog(LOG_DEBUG, "stratum_sthread() algorithm = %s", pool->algorithm.name);
// Neoscrypt is little endian
if (!pool->algorithm.type == ALGO_NEOSCRYPT) {
if (pool->algorithm.type == ALGO_NEOSCRYPT) {
nonce = htobe32(*((uint32_t *)(work->data + 76)));
//*((uint32_t *)nonce2) = htole32(work->nonce2);
}
@ -6078,7 +6082,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work) @@ -6078,7 +6082,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
applog(LOG_DEBUG, "[THR%d] gen_stratum_work() - algorithm = %s", work->thr_id, pool->algorithm.name);
// Different for Neoscrypt because of Little Endian
if (!pool->algorithm.type == ALGO_NEOSCRYPT) {
if (pool->algorithm.type == ALGO_NEOSCRYPT) {
/* Incoming data is in little endian. */
memcpy(merkle_root, merkle_sha, 32);
@ -6140,7 +6144,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work) @@ -6140,7 +6144,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
}
// For Neoscrypt use set_target_neoscrypt() function
if (!pool->algorithm.type == ALGO_NEOSCRYPT) {
if (pool->algorithm.type == ALGO_NEOSCRYPT) {
set_target_neoscrypt(work->target, work->sdiff, work->thr_id);
} else {
calc_midstate(work);
@ -6238,7 +6242,7 @@ static void apply_initial_gpu_settings(struct pool *pool) @@ -6238,7 +6242,7 @@ static void apply_initial_gpu_settings(struct pool *pool)
//thread-concurrency
// neoscrypt - if not specified set TC to 0 so that TC will be calculated by intensity settings
if (!pool->algorithm.type == ALGO_NEOSCRYPT) {
if (pool->algorithm.type == ALGO_NEOSCRYPT) {
opt = ((empty_string(pool->thread_concurrency))?"0":get_pool_setting(pool->thread_concurrency, default_profile.thread_concurrency));
}
// otherwise use pool/profile setting or default to default profile setting
@ -6562,7 +6566,7 @@ static void apply_switcher_options(unsigned long options, struct pool *pool) @@ -6562,7 +6566,7 @@ static void apply_switcher_options(unsigned long options, struct pool *pool)
if(opt_isset(options, SWITCHER_APPLY_TC))
{
// neoscrypt - if not specified set TC to 0 so that TC will be calculated by intensity settings
if (!pool->algorithm.type == ALGO_NEOSCRYPT) {
if (pool->algorithm.type == ALGO_NEOSCRYPT) {
opt = ((empty_string(pool->thread_concurrency))?"0":get_pool_setting(pool->thread_concurrency, default_profile.thread_concurrency));
}
// otherwise use pool/profile setting or default to default profile setting
@ -8700,7 +8704,7 @@ int main(int argc, char *argv[]) @@ -8700,7 +8704,7 @@ int main(int argc, char *argv[])
/* We use the getq mutex as the staged lock */
stgd_lock = &getq->mutex;
snprintf(packagename, sizeof(packagename), "%s %s", PACKAGE, CGMINER_VERSION);
snprintf(packagename, sizeof(packagename), "%s %s", PACKAGE, VERSION);
#ifndef WIN32
signal(SIGPIPE, SIG_IGN);
@ -8734,7 +8738,7 @@ int main(int argc, char *argv[]) @@ -8734,7 +8738,7 @@ int main(int argc, char *argv[])
#endif
/* Default algorithm specified in algorithm.c ATM */
set_algorithm(&default_profile.algorithm, "scrypt");
set_algorithm(&default_profile.algorithm, "x11");
devcursor = 8;
logstart = devcursor + 1;

6
util.c

@ -1791,7 +1791,7 @@ static bool send_version(struct pool *pool, json_t *val) @@ -1791,7 +1791,7 @@ static bool send_version(struct pool *pool, json_t *val)
if (!id)
return false;
sprintf(s, "{\"id\": %d, \"result\": \""PACKAGE"/"CGMINER_VERSION"\", \"error\": null}", id);
sprintf(s, "{\"id\": %d, \"result\": \""PACKAGE"/"VERSION"\", \"error\": null}", id);
if (!stratum_send(pool, s, strlen(s)))
return false;
@ -2480,9 +2480,9 @@ resend: @@ -2480,9 +2480,9 @@ resend:
sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": []}", swork_id++);
} else {
if (pool->sessionid)
sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"CGMINER_VERSION"\", \"%s\"]}", swork_id++, pool->sessionid);
sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"VERSION"\", \"%s\"]}", swork_id++, pool->sessionid);
else
sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"CGMINER_VERSION"\"]}", swork_id++);
sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"VERSION"\"]}", swork_id++);
}
if (__stratum_send(pool, s, strlen(s)) != SEND_OK) {

6
winbuild/dist/include/config.h vendored

@ -67,11 +67,11 @@ @@ -67,11 +67,11 @@
#endif
#define VERSION "v5.2.0"
#define VERSION "5.2.1"
#define PACKAGE_NAME "sgminer"
#define PACKAGE_TARNAME "sgminer"
#define PACKAGE_VERSION "5.2.0"
#define PACKAGE_STRING "sgminer 5.2.0"
#define PACKAGE_VERSION "5.2.1"
#define PACKAGE_STRING "sgminer 5.2.1"
#define PACKAGE "sgminer"
#define SGMINER_PREFIX ""

26
winbuild/sgminer.vcxproj

@ -115,16 +115,16 @@ @@ -115,16 +115,16 @@
</Link>
<PostBuildEvent>
<Command>
xcopy /Y /E /I "$(ProjectDir)..\kernel" "$(OutDir)\kernel"
REM xcopy /Y /E /I "$(ProjectDir)..\kernel" "$(OutDir)\kernel"
</Command>
</PostBuildEvent>
<PreBuildEvent>
<Command>
del /f "$(OutDir)*.exe"
del /f "$(OutDir)*.dll"
REM del /f "$(OutDir)*.dll"
echo #define USE_GIT_VERSION 1 &gt; "$(ProjectDir)dist\include\gitversion.h"
FOR /F "tokens=*" %%i IN ('call git describe "--abbrev=4" --dirty') DO echo #define GIT_VERSION "%%i" &gt;&gt; "$(ProjectDir)dist\include\gitversion.h"
REM echo #define USE_GIT_VERSION 1 &gt; "$(ProjectDir)dist\include\gitversion.h"
REM FOR /F "tokens=*" %%i IN ('call git describe "--abbrev=4" --dirty') DO echo #define GIT_VERSION "%%i" &gt;&gt; "$(ProjectDir)dist\include\gitversion.h"
exit 0
</Command>
</PreBuildEvent>
@ -200,16 +200,16 @@ @@ -200,16 +200,16 @@
</Link>
<PostBuildEvent>
<Command>
xcopy /Y /E /I "$(ProjectDir)..\kernel" "$(OutDir)\kernel"
REM xcopy /Y /E /I "$(ProjectDir)..\kernel" "$(OutDir)\kernel"
</Command>
</PostBuildEvent>
<PreBuildEvent>
<Command>
del /f "$(OutDir)*.exe"
del /f "$(OutDir)*.dll"
REM del /f "$(OutDir)*.dll"
echo #define USE_GIT_VERSION 1 &gt; "$(ProjectDir)dist\include\gitversion.h"
FOR /F "tokens=*" %%i IN ('call git describe "--abbrev=4" --dirty') DO echo #define GIT_VERSION "%%i" &gt;&gt; "$(ProjectDir)dist\include\gitversion.h"
REM echo #define USE_GIT_VERSION 1 &gt; "$(ProjectDir)dist\include\gitversion.h"
REM FOR /F "tokens=*" %%i IN ('call git describe "--abbrev=4" --dirty') DO echo #define GIT_VERSION "%%i" &gt;&gt; "$(ProjectDir)dist\include\gitversion.h"
exit 0
</Command>
</PreBuildEvent>
@ -263,11 +263,15 @@ @@ -263,11 +263,15 @@
<ClCompile Include="..\algorithm.c" />
<ClCompile Include="..\algorithm\animecoin.c" />
<ClCompile Include="..\algorithm\bitblock.c" />
<ClCompile Include="..\algorithm\credits.c" />
<ClCompile Include="..\algorithm\lyra2.c" />
<ClCompile Include="..\algorithm\lyra2re.c" />
<ClCompile Include="..\algorithm\lyra2rev2.c" />
<ClCompile Include="..\algorithm\lyra2v2.c" />
<ClCompile Include="..\algorithm\neoscrypt.c" />
<ClCompile Include="..\algorithm\pluck.c" />
<ClCompile Include="..\algorithm\sponge.c" />
<ClCompile Include="..\algorithm\spongev2.c" />
<ClCompile Include="..\algorithm\talkcoin.c" />
<ClCompile Include="..\algorithm\whirlpoolx.c" />
<ClCompile Include="..\algorithm\x14.c" />
@ -328,11 +332,16 @@ @@ -328,11 +332,16 @@
<ClInclude Include="..\algorithm.h" />
<ClInclude Include="..\algorithm\animecoin.h" />
<ClInclude Include="..\algorithm\bitblock.h" />
<ClInclude Include="..\algorithm\credits.h" />
<ClInclude Include="..\algorithm\lyra2.h" />
<ClInclude Include="..\algorithm\lyra2re.h" />
<ClInclude Include="..\algorithm\lyra2rev2.h" />
<ClInclude Include="..\algorithm\lyra2v2.h" />
<ClInclude Include="..\algorithm\neoscrypt.h" />
<ClInclude Include="..\algorithm\pluck.h" />
<ClInclude Include="..\algorithm\sponge.h" />
<ClInclude Include="..\algorithm\spongev2.h" />
<ClInclude Include="..\algorithm\sysendian.h" />
<ClInclude Include="..\algorithm\talkcoin.h" />
<ClInclude Include="..\algorithm\whirlpoolx.h" />
<ClInclude Include="..\algorithm\x14.h" />
@ -365,6 +374,7 @@ @@ -365,6 +374,7 @@
<ClInclude Include="..\algorithm\qubitcoin.h" />
<ClInclude Include="..\algorithm\scrypt.h" />
<ClInclude Include="..\algorithm\sifcoin.h" />
<ClInclude Include="..\sph\sha256_Y.h" />
<ClInclude Include="..\sph\sph_blake.h" />
<ClInclude Include="..\sph\sph_bmw.h" />
<ClInclude Include="..\sph\sph_cubehash.h" />

30
winbuild/sgminer.vcxproj.filters

@ -218,6 +218,18 @@ @@ -218,6 +218,18 @@
<ClCompile Include="..\algorithm\pluck.c">
<Filter>Source Files\algorithm</Filter>
</ClCompile>
<ClCompile Include="..\algorithm\lyra2v2.c">
<Filter>Source Files\algorithm</Filter>
</ClCompile>
<ClCompile Include="..\algorithm\lyra2rev2.c">
<Filter>Source Files\algorithm</Filter>
</ClCompile>
<ClCompile Include="..\algorithm\spongev2.c">
<Filter>Source Files\algorithm</Filter>
</ClCompile>
<ClCompile Include="..\algorithm\credits.c">
<Filter>Source Files\algorithm</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\adl.h">
@ -415,6 +427,24 @@ @@ -415,6 +427,24 @@
<ClInclude Include="..\algorithm\pluck.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
<ClInclude Include="..\algorithm\lyra2v2.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
<ClInclude Include="..\algorithm\lyra2rev2.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
<ClInclude Include="..\algorithm\spongev2.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
<ClInclude Include="..\algorithm\credits.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
<ClInclude Include="..\sph\sha256_Y.h">
<Filter>Header Files\sph</Filter>
</ClInclude>
<ClInclude Include="..\algorithm\sysendian.h">
<Filter>Header Files\algorithm</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="README.txt" />

Loading…
Cancel
Save