Upgrade to version 5.2.1

- Fixed Lyra2REv2, Neoscrypt & WhirlpoolX algo - Changed default algo from scrypt to x11
9 years ago · cf397f79e8
35 changed files with 2630 additions and 687 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -73,7 +73,6 @@ sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h
				@@ -73,7 +73,6 @@ sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h
 sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h
 sgminer_SOURCES += algorithm/whirlpoolx.c algorithm/whirlpoolx.h
 sgminer_SOURCES += algorithm/lyra2re.c algorithm/lyra2re.h algorithm/lyra2.c algorithm/lyra2.h algorithm/sponge.c algorithm/sponge.h
-sgminer_SOURCES += algorithm/lyra2re_old.c algorithm/lyra2re_old.h
 sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h
 sgminer_SOURCES += algorithm/credits.c algorithm/credits.h
 sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h 
--- a/algorithm.c
+++ b/algorithm.c
@ -33,9 +33,9 @@
				@@ -33,9 +33,9 @@
 #include "algorithm/neoscrypt.h"
 #include "algorithm/whirlpoolx.h"
 #include "algorithm/lyra2re.h"
-#include "algorithm/lyra2re_old.h"
+#include "algorithm/lyra2rev2.h"
 #include "algorithm/pluck.h"
-#include "algorithm/yescrypt.h"
+//#include "algorithm/yescrypt.h"
 #include "algorithm/credits.h"

 #include "compat.h"
@ -43,6 +43,7 @@
				@@ -43,6 +43,7 @@
 #include <inttypes.h>
 #include <string.h>

+bool opt_lyra;
 const char *algorithm_type_str[] = {
  "Unknown",
  "Credits",
@ -62,7 +63,7 @@ const char *algorithm_type_str[] = {
				@@ -62,7 +63,7 @@ const char *algorithm_type_str[] = {
  "Neoscrypt",
  "WhirlpoolX",
  "Lyra2RE",
-  "Lyra2REv2"
+  "Lyra2REV2"
  "Pluck"
  "Yescrypt",
  "Yescrypt-multi"
@ -216,6 +217,7 @@ static cl_int queue_credits_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_
				@@ -216,6 +217,7 @@ static cl_int queue_credits_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_
  return status;
 }

+#if 0
 static cl_int queue_yescrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
 {
  cl_kernel *kernel = &clState->kernel;
@ -309,6 +311,7 @@ static cl_int queue_yescrypt_multikernel(_clState *clState, dev_blk_ctx *blk, __
				@@ -309,6 +311,7 @@ static cl_int queue_yescrypt_multikernel(_clState *clState, dev_blk_ctx *blk, __

  return status;
 }
+#endif

 static cl_int queue_maxcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
 {
@ -764,40 +767,43 @@ static cl_int queue_whirlcoin_kernel(struct __clState *clState, struct _dev_blk_
				@@ -764,40 +767,43 @@ static cl_int queue_whirlcoin_kernel(struct __clState *clState, struct _dev_blk_

 static cl_int queue_whirlpoolx_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
 {
-  uint64_t midblock[8], key[8] = { 0 }, tmp[8] = { 0 };
+  cl_kernel *kernel = &clState->kernel;
+  unsigned int num = 0;
  cl_ulong le_target;
  cl_int status;

  le_target = *(cl_ulong *)(blk->work->device_target + 24);
  flip80(clState->cldata, blk->work->data);
+  status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);

-  memcpy(midblock, clState->cldata, 64);
-
-  // midblock = n, key = h
-  for (int i = 0; i < 10; ++i) {
-    tmp[0] = WHIRLPOOL_ROUND_CONSTANTS[i];
-    whirlpool_round(key, tmp);
-    tmp[0] = 0;
-    whirlpool_round(midblock, tmp);
-
-    for (int x = 0; x < 8; ++x) {
-      midblock[x] ^= key[x];
-    }
-  }
-
-  for (int i = 0; i < 8; ++i) {
-    midblock[i] ^= ((uint64_t *)(clState->cldata))[i];
-  }
-
-  status = clSetKernelArg(clState->kernel, 0, sizeof(cl_ulong8), (cl_ulong8 *)&midblock);
-  status |= clSetKernelArg(clState->kernel, 1, sizeof(cl_ulong), (void *)(((uint64_t *)clState->cldata) + 8));
-  status |= clSetKernelArg(clState->kernel, 2, sizeof(cl_ulong), (void *)(((uint64_t *)clState->cldata) + 9));
-  status |= clSetKernelArg(clState->kernel, 3, sizeof(cl_mem), (void *)&clState->outputBuffer);
-  status |= clSetKernelArg(clState->kernel, 4, sizeof(cl_ulong), (void *)&le_target);
+  CL_SET_ARG(clState->CLbuffer0);
+  CL_SET_ARG(clState->outputBuffer);
+  CL_SET_ARG(le_target);

  return status;
 }

+typedef struct _algorithm_settings_t {
+  const char *name; /* Human-readable identifier */
+  algorithm_type_t type; //common algorithm type
+  const char *kernelfile; /* alternate kernel file */
+  double   diff_multiplier1;
+  double   diff_multiplier2;
+  double   share_diff_multiplier;
+  uint32_t xintensity_shift;
+  uint32_t intensity_shift;
+  uint32_t found_idx;
+  unsigned long long   diff_numerator;
+  uint32_t diff1targ;
+  size_t n_extra_kernels;
+  long rw_buffer_size;
+  cl_command_queue_properties cq_properties;
+  void     (*regenhash)(struct work *);
+  cl_int   (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
+  void     (*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
+  void     (*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *);
+} algorithm_settings_t;
+
 static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
 {
  cl_kernel *kernel;
@ -842,7 +848,7 @@ static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ct
				@@ -842,7 +848,7 @@ static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ct
  return status;
 }

-static cl_int queue_lyra2REv2_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
+static cl_int queue_lyra2rev2_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
 {
  cl_kernel *kernel;
  unsigned int num;
@ -945,6 +951,7 @@ static algorithm_settings_t algos[] = {
				@@ -945,6 +951,7 @@ static algorithm_settings_t algos[] = {



+#if 0
 #define A_YESCRYPT(a) \
  { a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options}
  A_YESCRYPT("yescrypt"),
@ -955,6 +962,7 @@ static algorithm_settings_t algos[] = {
				@@ -955,6 +962,7 @@ static algorithm_settings_t algos[] = {
  A_YESCRYPT_MULTI("yescrypt-multi"),
 #undef A_YESCRYPT_MULTI

+#endif

  // kernels starting from this will have difficulty calculated by using quarkcoin algorithm
 #define A_QUARK(a, b) \
@ -992,10 +1000,8 @@ static algorithm_settings_t algos[] = {
				@@ -992,10 +1000,8 @@ static algorithm_settings_t algos[] = {

  { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, queue_fresh_kernel, gen_hash, NULL },

-  { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2reold_regenhash, queue_lyra2RE_kernel, gen_hash, NULL },
-
-  { "lyra2rev2", ALGO_LYRA2REv2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2re_regenhash, queue_lyra2REv2_kernel, gen_hash, append_neoscrypt_compiler_options },
-
+  { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2re_regenhash, queue_lyra2RE_kernel, gen_hash, NULL },
+  { "lyra2rev2", ALGO_LYRA2REV2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2rev2_regenhash, queue_lyra2rev2_kernel, gen_hash, append_neoscrypt_compiler_options },

  // kernels starting from this will have difficulty calculated by using fuguecoin algorithm
 #define A_FUGUE(a, b, c) \
@ -1006,7 +1012,7 @@ static algorithm_settings_t algos[] = {
				@@ -1006,7 +1012,7 @@ static algorithm_settings_t algos[] = {
 #undef A_FUGUE

  { "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, queue_whirlcoin_kernel, sha256, NULL },
-  { "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFFU, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, queue_whirlpoolx_kernel, gen_hash, NULL },
+  { "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, queue_whirlpoolx_kernel, gen_hash, NULL },

  // Terminator (do not remove)
  { NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL }
@ -1079,7 +1085,10 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa
				@@ -1079,7 +1085,10 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa
  ALGO_ALIAS("nist5", "talkcoin-mod");
  ALGO_ALIAS("keccak", "maxcoin");
  ALGO_ALIAS("whirlpool", "whirlcoin");
+  ALGO_ALIAS("Lyra2RE", "lyra2re");
  ALGO_ALIAS("lyra2", "lyra2re");
+  ALGO_ALIAS("Lyra2REv2", "lyra2rev2");
+  ALGO_ALIAS("lyra2rev2", "lyra2rev2");
  ALGO_ALIAS("lyra2v2", "lyra2rev2");

 #undef ALGO_ALIAS
@ -1107,6 +1116,7 @@ void set_algorithm(algorithm_t* algo, const char* newname_alias)
				@@ -1107,6 +1116,7 @@ void set_algorithm(algorithm_t* algo, const char* newname_alias)
  if ((old_nfactor > 0) && (old_nfactor != nfactor))
    nfactor = old_nfactor;

+  if (algo->type == ALGO_LYRA2RE || algo->type == ALGO_LYRA2REV2) { opt_lyra = true; }
  set_algorithm_nfactor(algo, nfactor);

  //reapply kernelfile if was set
--- a/algorithm.h
+++ b/algorithm.h
@ -9,7 +9,7 @@
				@@ -9,7 +9,7 @@

 #include <inttypes.h>
 #include <stdbool.h>
-#include "ocl/build_kernel.h"   // For the build_kernel_data type
+//#include "ocl/build_kernel.h"   // For the build_kernel_data type

 typedef enum {
  ALGO_UNK,
@ -30,7 +30,7 @@ typedef enum {
				@@ -30,7 +30,7 @@ typedef enum {
  ALGO_NEOSCRYPT,
  ALGO_WHIRLPOOLX,
  ALGO_LYRA2RE,
-  ALGO_LYRA2REv2,
+  ALGO_LYRA2REV2,
  ALGO_PLUCK,
  ALGO_YESCRYPT,
  ALGO_YESCRYPT_MULTI,
@ -72,28 +72,6 @@ typedef struct _algorithm_t {
				@@ -72,28 +72,6 @@ typedef struct _algorithm_t {
  void(*set_compile_options)(struct _build_kernel_data *, struct cgpu_info *, struct _algorithm_t *);
 } algorithm_t;

-typedef struct _algorithm_settings_t
-{
-	const char *name;
-	algorithm_type_t type;
-	const char *kernelfile;
-	double   diff_multiplier1;
-	double   diff_multiplier2;
-	double   share_diff_multiplier;
-	uint32_t xintensity_shift;
-	uint32_t intensity_shift;
-	uint32_t found_idx;
-	unsigned long long   diff_numerator;
-	uint32_t diff1targ;
-	size_t n_extra_kernels;
-	long rw_buffer_size;
-	cl_command_queue_properties cq_properties;
-	void     (*regenhash)(struct work *);
-	cl_int   (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
-	void     (*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
-	void     (*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *);
-} algorithm_settings_t;
-
 /* Set default parameters based on name. */
 void set_algorithm(algorithm_t* algo, const char* name);

--- a/algorithm/lyra2.c
+++ b/algorithm/lyra2.c
@ -58,19 +58,15 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
				@@ -58,19 +58,15 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *

    //========== Initializing the Memory Matrix and pointers to it =============//
    //Tries to allocate enough space for the whole memory matrix
-
-    const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
-    const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
-
    i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES);
-    uint64_t *wholeMatrix = malloc(i);
+	uint64_t *wholeMatrix = (uint64_t*)malloc(i);
    if (wholeMatrix == NULL) {
      return -1;
    }
 	memset(wholeMatrix, 0, i);

    //Allocates pointers to each row of the matrix
-    uint64_t **memMatrix = malloc(nRows * sizeof (uint64_t*));
+	uint64_t **memMatrix = (uint64_t**)malloc(nRows * sizeof (uint64_t*));
    if (memMatrix == NULL) {
      return -1;
    }
@ -122,7 +118,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
				@@ -122,7 +118,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *

    //======================= Initializing the Sponge State ====================//
    //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
-    uint64_t *state = malloc(16 * sizeof (uint64_t));
+	uint64_t *state = (uint64_t*)malloc(16 * sizeof (uint64_t));
    if (state == NULL) {
      return -1;
    }
@ -134,16 +130,16 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
				@@ -134,16 +130,16 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
    ptrWord = wholeMatrix;
    for (i = 0; i < nBlocksInput; i++) {
      absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
-      ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT64; //goes to next block of pad(pwd || salt || basil)
+      ptrWord += BLOCK_LEN_BLAKE2_SAFE_BYTES; //goes to next block of pad(pwd || salt || basil)
    }

    //Initializes M[0] and M[1]
-    reducedSqueezeRow0(state, memMatrix[0], nCols); //The locally copied password is most likely overwritten here
-    reducedDuplexRow1(state, memMatrix[0], memMatrix[1], nCols);
+    reducedSqueezeRow0(state, memMatrix[0]); //The locally copied password is most likely overwritten here
+    reducedDuplexRow1(state, memMatrix[0], memMatrix[1]);

    do {
      //M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
-      reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
+      reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]);


      //updates the value of row* (deterministically picked during Setup))
@ -176,7 +172,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
				@@ -176,7 +172,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
  	    //------------------------------------------------------------------------------------------

  	    //Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
-  	    reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
+  	    reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]);

  	    //update prev: it now points to the last row ever computed
  	    prev = row;
@ -196,7 +192,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
				@@ -196,7 +192,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
    absorbBlock(state, memMatrix[rowa]);

    //Squeezes the key
-    squeeze(state, K, kLen);
+    squeeze(state, (unsigned char*)K, kLen);
    //==========================================================================/

    //========================= Freeing the memory =============================//
--- a/algorithm/lyra2.h
+++ b/algorithm/lyra2.h
@ -37,6 +37,14 @@ typedef unsigned char byte;
				@@ -37,6 +37,14 @@ typedef unsigned char byte;
        #define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8)    //Block length, in bytes
 #endif

+#ifndef N_COLS
+        #define N_COLS 8                                //Number of columns in the memory matrix: fixed to 64 by default
+#endif
+
+#define ROW_LEN_INT64 (BLOCK_LEN_INT64 * N_COLS) //Total length of a row: N_COLS blocks
+#define ROW_LEN_BYTES (ROW_LEN_INT64 * 8)        //Number of bytes per row
+
+
 int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols);

 #endif /* LYRA2_H_ */
--- a/algorithm/lyra2re.c
+++ b/algorithm/lyra2re.c
@ -36,8 +36,6 @@
				@@ -36,8 +36,6 @@
 #include "sph/sph_groestl.h"
 #include "sph/sph_skein.h"
 #include "sph/sph_keccak.h" 
-#include "sph/sph_bmw.h"
-#include "sph/sph_cubehash.h"
 #include "lyra2.h"

 /*
@ -57,10 +55,9 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
				@@ -57,10 +55,9 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
 inline void lyra2rehash(void *state, const void *input)
 {
    sph_blake256_context     ctx_blake;
-    sph_bmw256_context       ctx_bmw;
+    sph_groestl256_context   ctx_groestl;
    sph_keccak256_context    ctx_keccak;
    sph_skein256_context     ctx_skein;
-    sph_cubehash256_context  ctx_cube;

    uint32_t hashA[8], hashB[8];

@ -72,23 +69,17 @@ inline void lyra2rehash(void *state, const void *input)
				@@ -72,23 +69,17 @@ inline void lyra2rehash(void *state, const void *input)
    sph_keccak256 (&ctx_keccak,hashA, 32);
    sph_keccak256_close(&ctx_keccak, hashB);

-    sph_cubehash256_init(&ctx_cube);
-    sph_cubehash256(&ctx_cube, hashB, 32);
-    sph_cubehash256_close(&ctx_cube, hashA);
+	LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8);

-    LYRA2(hashB, 32, hashA, 32, hashA, 32, 1, 4, 4);

    sph_skein256_init(&ctx_skein);
-    sph_skein256 (&ctx_skein, hashB, 32);
-    sph_skein256_close(&ctx_skein, hashA);
+    sph_skein256 (&ctx_skein, hashA, 32);
+    sph_skein256_close(&ctx_skein, hashB);

-    sph_cubehash256_init(&ctx_cube);
-    sph_cubehash256(&ctx_cube, hashA, 32);
-    sph_cubehash256_close(&ctx_cube, hashB);

-    sph_bmw256_init(&ctx_bmw);
-    sph_bmw256 (&ctx_bmw, hashB, 32);
-    sph_bmw256_close(&ctx_bmw, hashA);
+    sph_groestl256_init(&ctx_groestl);
+    sph_groestl256 (&ctx_groestl, hashB, 32);
+    sph_groestl256_close(&ctx_groestl, hashA);

    memcpy(state, hashA, 32);
 }
--- a/algorithm/lyra2re.h
+++ b/algorithm/lyra2re.h
@ -2,8 +2,6 @@
				@@ -2,8 +2,6 @@
 #define LYRA2RE_H

 #include "miner.h"
-#define LYRA_SCRATCHBUF_SIZE (1536) // matrix size [12][4][4] uint64_t or equivalent
-#define LYRA_SECBUF_SIZE (4) // (not used)

 extern int lyra2re_test(unsigned char *pdata, const unsigned char *ptarget,
 			uint32_t nonce);
--- a/algorithm/lyra2re_old.h
+++ b/algorithm/lyra2re_old.h
@ -1,10 +0,0 @@
				@@ -1,10 +0,0 @@
-#ifndef LYRA2REOLD_H
-#define LYRA2REOLD_H
-
-#include "miner.h"
-
-extern int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget,
-			uint32_t nonce);
-extern void lyra2reold_regenhash(struct work *work);
-
-#endif /* LYRA2RE_H */
--- a/algorithm/lyra2re_old.c
+++ b/algorithm/lyra2re_old.c
@ -36,7 +36,9 @@
				@@ -36,7 +36,9 @@
 #include "sph/sph_groestl.h"
 #include "sph/sph_skein.h"
 #include "sph/sph_keccak.h" 
-#include "lyra2.h"
+#include "sph/sph_bmw.h"
+#include "sph/sph_cubehash.h"
+#include "lyra2v2.h"

 /*
 * Encode a length len/4 vector of (uint32_t) into a length len vector of
@ -52,13 +54,13 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
				@@ -52,13 +54,13 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
 }


-inline void lyra2rehash_old(void *state, const void *input)
+inline void lyra2rev2hash(void *state, const void *input)
 {
    sph_blake256_context     ctx_blake;
-    sph_groestl256_context   ctx_groestl;
+    sph_bmw256_context       ctx_bmw;
    sph_keccak256_context    ctx_keccak;
    sph_skein256_context     ctx_skein;
-
+    sph_cubehash256_context  ctx_cube;
    uint32_t hashA[8], hashB[8];

    sph_blake256_init(&ctx_blake);
@ -69,16 +71,25 @@ inline void lyra2rehash_old(void *state, const void *input)
				@@ -69,16 +71,25 @@ inline void lyra2rehash_old(void *state, const void *input)
    sph_keccak256 (&ctx_keccak,hashA, 32);
    sph_keccak256_close(&ctx_keccak, hashB);

-    LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8);
+	sph_cubehash256_init(&ctx_cube);
+	sph_cubehash256(&ctx_cube, hashB, 32);
+	sph_cubehash256_close(&ctx_cube, hashA);
+
+	LYRA2V2(hashB, 32, hashA, 32, hashA, 32, 1, 4, 4);

 	sph_skein256_init(&ctx_skein);
-    sph_skein256 (&ctx_skein, hashA, 32);
-    sph_skein256_close(&ctx_skein, hashB);
+    sph_skein256 (&ctx_skein, hashB, 32);
+    sph_skein256_close(&ctx_skein, hashA);
+
+	sph_cubehash256_init(&ctx_cube);
+	sph_cubehash256(&ctx_cube, hashA, 32);
+	sph_cubehash256_close(&ctx_cube, hashB);

+    sph_bmw256_init(&ctx_bmw);
+    sph_bmw256 (&ctx_bmw, hashB, 32);
+    sph_bmw256_close(&ctx_bmw, hashA);

-    sph_groestl256_init(&ctx_groestl);
-    sph_groestl256 (&ctx_groestl, hashB, 32);
-    sph_groestl256_close(&ctx_groestl, hashA);
+//printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]);

 	memcpy(state, hashA, 32);
 }
@ -87,14 +98,14 @@ static const uint32_t diff1targ = 0x0000ffff;
				@@ -87,14 +98,14 @@ static const uint32_t diff1targ = 0x0000ffff;


 /* Used externally as confirmation of correct OCL code */
-int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
+int lyra2rev2_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
 {
 	uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
 	uint32_t data[20], ohash[8];

 	be32enc_vect(data, (const uint32_t *)pdata, 19);
 	data[19] = htobe32(nonce);
-	lyra2rehash_old(ohash, data);
+	lyra2rev2hash(ohash, data);
 	tmp_hash7 = be32toh(ohash[7]);

 	applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
@ -108,7 +119,7 @@ int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t
				@@ -108,7 +119,7 @@ int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t
 	return 1;
 }

-void lyra2reold_regenhash(struct work *work)
+void lyra2rev2_regenhash(struct work *work)
 {
        uint32_t data[20];
        uint32_t *nonce = (uint32_t *)(work->data + 76);
@ -116,10 +127,10 @@ void lyra2reold_regenhash(struct work *work)
				@@ -116,10 +127,10 @@ void lyra2reold_regenhash(struct work *work)

        be32enc_vect(data, (const uint32_t *)work->data, 19);
        data[19] = htobe32(*nonce);
-        lyra2rehash_old(ohash, data);
+        lyra2rev2hash(ohash, data);
 }

-bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
+bool scanhash_lyra2rev2(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
 		     unsigned char *pdata, unsigned char __maybe_unused *phash1,
 		     unsigned char __maybe_unused *phash, const unsigned char *ptarget,
 		     uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
@ -137,7 +148,7 @@ bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unuse
				@@ -137,7 +148,7 @@ bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unuse

 		*nonce = ++n;
 		data[19] = (n);
-		lyra2rehash_old(ostate, data);
+		lyra2rev2hash(ostate, data);
 		tmp_hash7 = (ostate[7]);

 		applog(LOG_INFO, "data7 %08lx",
--- a/algorithm/lyra2rev2.h
+++ b/algorithm/lyra2rev2.h
@ -0,0 +1,11 @@
				@@ -0,0 +1,11 @@
+#ifndef LYRA2REV2_H
+#define LYRA2REV2_H
+
+#include "miner.h"
+#define LYRA_SCRATCHBUF_SIZE (1536) // matrix size [12][4][4] uint64_t or equivalent
+#define LYRA_SECBUF_SIZE (4) // (not used)
+extern int lyra2rev2_test(unsigned char *pdata, const unsigned char *ptarget,
+			uint32_t nonce);
+extern void lyra2rev2_regenhash(struct work *work);
+
+#endif /* LYRA2REV2_H */
--- a/algorithm/lyra2v2.c
+++ b/algorithm/lyra2v2.c
@ -0,0 +1,213 @@
				@@ -0,0 +1,213 @@
+/**
+ * Implementation of the Lyra2 Password Hashing Scheme (PHS).
+ *
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ *
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "lyra2v2.h"
+#include "spongev2.h"
+
+/**
+ * Executes Lyra2 based on the G function from Blake2b. This version supports salts and passwords
+ * whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits,
+ * where "b" is the underlying sponge's bitrate). In this implementation, the "basil" is composed by all
+ * integer parameters (treated as type "unsigned int") in the order they are provided, plus the value
+ * of nCols, (i.e., basil = kLen || pwdlen || saltlen || timeCost || nRows || nCols).
+ *
+ * @param K The derived key to be output by the algorithm
+ * @param kLen Desired key length
+ * @param pwd User password
+ * @param pwdlen Password length
+ * @param salt Salt
+ * @param saltlen Salt length
+ * @param timeCost Parameter to determine the processing time (T)
+ * @param nRows Number or rows of the memory matrix (R)
+ * @param nCols Number of columns of the memory matrix (C)
+ *
+ * @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation)
+ */
+int LYRA2V2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols) {
+
+    //============================= Basic variables ============================//
+    int64_t row = 2; //index of row to be processed
+    int64_t prev = 1; //index of prev (last row ever computed/modified)
+    int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering)
+    int64_t tau; //Time Loop iterator
+    int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
+    int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
+    int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
+    int64_t i; //auxiliary iteration counter
+    //==========================================================================/
+
+    //========== Initializing the Memory Matrix and pointers to it =============//
+    //Tries to allocate enough space for the whole memory matrix
+
+
+    const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
+    const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
+
+    i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES);
+    uint64_t *wholeMatrix = (uint64_t*) malloc(i);
+    if (wholeMatrix == NULL) {
+      return -1;
+    }
+	memset(wholeMatrix, 0, i);
+
+    //Allocates pointers to each row of the matrix
+	uint64_t **memMatrix = (uint64_t**) malloc(nRows * sizeof(uint64_t*));
+    if (memMatrix == NULL) {
+      return -1;
+    }
+    //Places the pointers in the correct positions
+    uint64_t *ptrWord = wholeMatrix;
+    for (i = 0; i < nRows; i++) {
+      memMatrix[i] = ptrWord;
+      ptrWord += ROW_LEN_INT64;
+    }
+    //==========================================================================/
+
+    //============= Getting the password + salt + basil padded with 10*1 ===============//
+    //OBS.:The memory matrix will temporarily hold the password: not for saving memory,
+    //but this ensures that the password copied locally will be overwritten as soon as possible
+
+    //First, we clean enough blocks for the password, salt, basil and padding
+    uint64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof (uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1;
+    byte *ptrByte = (byte*) wholeMatrix;
+    memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES);
+
+    //Prepends the password
+    memcpy(ptrByte, pwd, pwdlen);
+    ptrByte += pwdlen;
+
+    //Concatenates the salt
+    memcpy(ptrByte, salt, saltlen);
+    ptrByte += saltlen;
+
+    //Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
+    memcpy(ptrByte, &kLen, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+    memcpy(ptrByte, &pwdlen, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+    memcpy(ptrByte, &saltlen, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+    memcpy(ptrByte, &timeCost, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+    memcpy(ptrByte, &nRows, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+    memcpy(ptrByte, &nCols, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+
+    //Now comes the padding
+    *ptrByte = 0x80; //first byte of padding: right after the password
+    ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
+    ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
+    *ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
+    //==========================================================================/
+
+    //======================= Initializing the Sponge State ====================//
+    //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
+	uint64_t *state = (uint64_t*) malloc(16 * sizeof(uint64_t));
+    if (state == NULL) {
+      return -1;
+    }
+    initStatev2(state);
+    //==========================================================================/
+
+    //================================ Setup Phase =============================//
+    //Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
+    ptrWord = wholeMatrix;
+    for (i = 0; i < nBlocksInput; i++) {
+      absorbBlockBlake2Safev2(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
+      ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT64; //goes to next block of pad(pwd || salt || basil)
+    }
+
+    //Initializes M[0] and M[1]
+    reducedSqueezeRow0v2(state, memMatrix[0], nCols); //The locally copied password is most likely overwritten here
+    reducedDuplexRow1v2(state, memMatrix[0], memMatrix[1], nCols);
+
+    do {
+      //M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
+      reducedDuplexRowSetupv2(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
+
+
+      //updates the value of row* (deterministically picked during Setup))
+      rowa = (rowa + step) & (window - 1);
+      //update prev: it now points to the last row ever computed
+      prev = row;
+      //updates row: goes to the next row to be computed
+      row++;
+
+      //Checks if all rows in the window where visited.
+      if (rowa == 0) {
+      step = window + gap; //changes the step: approximately doubles its value
+      window *= 2; //doubles the size of the re-visitation window
+      gap = -gap; //inverts the modifier to the step
+    }
+
+    } while (row < nRows);
+    //==========================================================================/
+
+    //============================ Wandering Phase =============================//
+    row = 0; //Resets the visitation to the first row of the memory matrix
+    for (tau = 1; tau <= timeCost; tau++) {
+    	//Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1
+    	step = (tau % 2 == 0) ? -1 : nRows / 2 - 1;
+    	do {
+  	    //Selects a pseudorandom index row*
+  	    //------------------------------------------------------------------------------------------
+  	    //rowa = ((unsigned int)state[0]) & (nRows-1);	//(USE THIS IF nRows IS A POWER OF 2)
+  	    rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
+  	    //------------------------------------------------------------------------------------------
+
+  	    //Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
+  	    reducedDuplexRowv2(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
+
+  	    //update prev: it now points to the last row ever computed
+  	    prev = row;
+
+  	    //updates row: goes to the next row to be computed
+  	    //------------------------------------------------------------------------------------------
+  	    //row = (row + step) & (nRows-1);	//(USE THIS IF nRows IS A POWER OF 2)
+  	    row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
+  	    //------------------------------------------------------------------------------------------
+
+      } while (row != 0);
+    }
+    //==========================================================================/
+
+    //============================ Wrap-up Phase ===============================//
+    //Absorbs the last block of the memory matrix
+    absorbBlockv2(state, memMatrix[rowa]);
+
+    //Squeezes the key
+	squeezev2(state, (unsigned char*)K, kLen);
+    //==========================================================================/
+
+    //========================= Freeing the memory =============================//
+    free(memMatrix);
+    free(wholeMatrix);
+
+    //Wiping out the sponge's internal state before freeing it
+    memset(state, 0, 16 * sizeof (uint64_t));
+    free(state);
+    //==========================================================================/
+
+    return 0;
+}
--- a/algorithm/lyra2v2.h
+++ b/algorithm/lyra2v2.h
@ -0,0 +1,42 @@
				@@ -0,0 +1,42 @@
+/**
+ * Header file for the Lyra2 Password Hashing Scheme (PHS).
+ *
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ *
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef LYRA2V2_H_
+#define LYRA2V2_H_
+
+#include <stdint.h>
+
+typedef unsigned char byte;
+
+//Block length required so Blake2's Initialization Vector (IV) is not overwritten (THIS SHOULD NOT BE MODIFIED)
+#define BLOCK_LEN_BLAKE2_SAFE_INT64 8                                   //512 bits (=64 bytes, =8 uint64_t)
+#define BLOCK_LEN_BLAKE2_SAFE_BYTES (BLOCK_LEN_BLAKE2_SAFE_INT64 * 8)   //same as above, in bytes
+
+
+#ifdef BLOCK_LEN_BITS
+        #define BLOCK_LEN_INT64 (BLOCK_LEN_BITS/64)      //Block length: 768 bits (=96 bytes, =12 uint64_t)
+        #define BLOCK_LEN_BYTES (BLOCK_LEN_BITS/8)       //Block length, in bytes
+#else   //default block lenght: 768 bits
+        #define BLOCK_LEN_INT64 12                       //Block length: 768 bits (=96 bytes, =12 uint64_t)
+        #define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8)    //Block length, in bytes
+#endif
+
+int LYRA2V2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols);
+
+#endif /* LYRA2_H_ */
--- a/algorithm/sponge.c
+++ b/algorithm/sponge.c
@ -158,11 +158,11 @@ void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) {
				@@ -158,11 +158,11 @@ void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) {
 * @param state     The current state of the sponge 
 * @param rowOut    Row to receive the data squeezed
 */
-void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut, uint64_t nCols) {
-    uint64_t* ptrWord = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1]
+void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut) {
+    uint64_t* ptrWord = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1]
    int i;
    //M[row][C-1-col] = H.reduced_squeeze()    
-    for (i = 0; i < nCols; i++) {
+    for (i = 0; i < N_COLS; i++) {
 	ptrWord[0] = state[0];
 	ptrWord[1] = state[1];
 	ptrWord[2] = state[2];
@ -193,12 +193,12 @@ void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut, uint64_t nCols) {
				@@ -193,12 +193,12 @@ void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut, uint64_t nCols) {
 * @param rowIn		Row to feed the sponge
 * @param rowOut	Row to receive the sponge's output
 */
-void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols) {
+void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut) {
    uint64_t* ptrWordIn = rowIn;				//In Lyra2: pointer to prev
-    uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
+    uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
    int i;

-    for (i = 0; i < nCols; i++) {
+    for (i = 0; i < N_COLS; i++) {

 	//Absorbing "M[prev][col]"
 	state[0]  ^= (ptrWordIn[0]);
@ -253,13 +253,13 @@ void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint6
				@@ -253,13 +253,13 @@ void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint6
 * @param rowOut         Row receiving the output
 *
 */
-void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) {
+void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
    uint64_t* ptrWordIn = rowIn;				//In Lyra2: pointer to prev
    uint64_t* ptrWordInOut = rowInOut;				//In Lyra2: pointer to row*
-    uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
+    uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
    int i;

-    for (i = 0; i < nCols; i++) {
+    for (i = 0; i < N_COLS; i++) {
 	//Absorbing "M[prev] [+] M[row*]"
 	state[0]  ^= (ptrWordIn[0]  + ptrWordInOut[0]);
 	state[1]  ^= (ptrWordIn[1]  + ptrWordInOut[1]);
@ -327,13 +327,13 @@ void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut,
				@@ -327,13 +327,13 @@ void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut,
 * @param rowOut         Row receiving the output
 *
 */
-void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) {
+void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
    uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
    int i;

-    for (i = 0; i < nCols; i++) {
+    for (i = 0; i < N_COLS; i++) {

 	//Absorbing "M[prev] [+] M[row*]"
 	state[0]  ^= (ptrWordIn[0]  + ptrWordInOut[0]);
--- a/algorithm/sponge.h
+++ b/algorithm/sponge.h
@ -78,16 +78,16 @@ void initState(uint64_t state[/*16*/]);
				@@ -78,16 +78,16 @@ void initState(uint64_t state[/*16*/]);

 //---- Squeezes
 void squeeze(uint64_t *state, unsigned char *out, unsigned int len);
-void reducedSqueezeRow0(uint64_t* state, uint64_t* row, uint64_t nCols);
+void reducedSqueezeRow0(uint64_t* state, uint64_t* row);

 //---- Absorbs
 void absorbBlock(uint64_t *state, const uint64_t *in);
 void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in);

 //---- Duplexes
-void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols);
-void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols);
-void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols);
+void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut);
+void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);

 //---- Misc
 void printArray(unsigned char *array, unsigned int size, char *name);
--- a/algorithm/spongev2.c
+++ b/algorithm/spongev2.c
@ -0,0 +1,745 @@
				@@ -0,0 +1,745 @@
+/**
+ * A simple implementation of Blake2b's internal permutation
+ * in the form of a sponge.
+ *
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ *
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdio.h>
+#include <time.h>
+#include "spongev2.h"
+#include "lyra2v2.h"
+
+
+
+/**
+ * Initializes the Sponge State. The first 512 bits are set to zeros and the remainder
+ * receive Blake2b's IV as per Blake2b's specification. <b>Note:</b> Even though sponges
+ * typically have their internal state initialized with zeros, Blake2b's G function
+ * has a fixed point: if the internal state and message are both filled with zeros. the
+ * resulting permutation will always be a block filled with zeros; this happens because
+ * Blake2b does not use the constants originally employed in Blake2 inside its G function,
+ * relying on the IV for avoiding possible fixed points.
+ *
+ * @param state         The 1024-bit array to be initialized
+ */
+inline void initStatev2(uint64_t state[/*16*/]) {
+    //First 512 bis are zeros
+    memset(state, 0, 64);
+    //Remainder BLOCK_LEN_BLAKE2_SAFE_BYTES are reserved to the IV
+    state[8] = blake2b_IV[0];
+    state[9] = blake2b_IV[1];
+    state[10] = blake2b_IV[2];
+    state[11] = blake2b_IV[3];
+    state[12] = blake2b_IV[4];
+    state[13] = blake2b_IV[5];
+    state[14] = blake2b_IV[6];
+    state[15] = blake2b_IV[7];
+}
+
+/**
+ * Execute Blake2b's G function, with all 12 rounds.
+ *
+ * @param v     A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
+ */
+inline static void blake2bLyra(uint64_t *v) {
+    ROUND_LYRA(0);
+    ROUND_LYRA(1);
+    ROUND_LYRA(2);
+    ROUND_LYRA(3);
+    ROUND_LYRA(4);
+    ROUND_LYRA(5);
+    ROUND_LYRA(6);
+    ROUND_LYRA(7);
+    ROUND_LYRA(8);
+    ROUND_LYRA(9);
+    ROUND_LYRA(10);
+    ROUND_LYRA(11);
+}
+
+/**
+ * Executes a reduced version of Blake2b's G function with only one round
+ * @param v     A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
+ */
+inline static void reducedBlake2bLyra(uint64_t *v) {
+    ROUND_LYRA(0);
+}
+
+/**
+ * Performs a squeeze operation, using Blake2b's G function as the
+ * internal permutation
+ *
+ * @param state      The current state of the sponge
+ * @param out        Array that will receive the data squeezed
+ * @param len        The number of bytes to be squeezed into the "out" array
+ */
+inline void squeezev2(uint64_t *state, byte *out, unsigned int len) {
+    int fullBlocks = len / BLOCK_LEN_BYTES;
+    byte *ptr = out;
+    int i;
+    //Squeezes full blocks
+    for (i = 0; i < fullBlocks; i++) {
+	memcpy(ptr, state, BLOCK_LEN_BYTES);
+	blake2bLyra(state);
+	ptr += BLOCK_LEN_BYTES;
+    }
+
+    //Squeezes remaining bytes
+    memcpy(ptr, state, (len % BLOCK_LEN_BYTES));
+}
+
+/**
+ * Performs an absorb operation for a single block (BLOCK_LEN_INT64 words
+ * of type uint64_t), using Blake2b's G function as the internal permutation
+ *
+ * @param state The current state of the sponge
+ * @param in    The block to be absorbed (BLOCK_LEN_INT64 words)
+ */
+inline void absorbBlockv2(uint64_t *state, const uint64_t *in) {
+    //XORs the first BLOCK_LEN_INT64 words of "in" with the current state
+    state[0] ^= in[0];
+    state[1] ^= in[1];
+    state[2] ^= in[2];
+    state[3] ^= in[3];
+    state[4] ^= in[4];
+    state[5] ^= in[5];
+    state[6] ^= in[6];
+    state[7] ^= in[7];
+    state[8] ^= in[8];
+    state[9] ^= in[9];
+    state[10] ^= in[10];
+    state[11] ^= in[11];
+
+    //Applies the transformation f to the sponge's state
+    blake2bLyra(state);
+}
+
+/**
+ * Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64
+ * words of type uint64_t), using Blake2b's G function as the internal permutation
+ *
+ * @param state The current state of the sponge
+ * @param in    The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words)
+ */
+inline void absorbBlockBlake2Safev2(uint64_t *state, const uint64_t *in) {
+    //XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state
+
+	state[0] ^= in[0];
+    state[1] ^= in[1];
+    state[2] ^= in[2];
+    state[3] ^= in[3];
+    state[4] ^= in[4];
+    state[5] ^= in[5];
+    state[6] ^= in[6];
+    state[7] ^= in[7];
+
+
+    //Applies the transformation f to the sponge's state
+    blake2bLyra(state);
+
+}
+
+/**
+ * Performs a reduced squeeze operation for a single row, from the highest to
+ * the lowest index, using the reduced-round Blake2b's G function as the
+ * internal permutation
+ *
+ * @param state     The current state of the sponge
+ * @param rowOut    Row to receive the data squeezed
+ */
+inline void reducedSqueezeRow0v2(uint64_t* state, uint64_t* rowOut, uint64_t nCols) {
+    uint64_t* ptrWord = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1]
+    int i;
+    //M[row][C-1-col] = H.reduced_squeeze()
+    for (i = 0; i < nCols; i++) {
+	ptrWord[0] = state[0];
+	ptrWord[1] = state[1];
+	ptrWord[2] = state[2];
+	ptrWord[3] = state[3];
+	ptrWord[4] = state[4];
+	ptrWord[5] = state[5];
+	ptrWord[6] = state[6];
+	ptrWord[7] = state[7];
+	ptrWord[8] = state[8];
+	ptrWord[9] = state[9];
+	ptrWord[10] = state[10];
+	ptrWord[11] = state[11];
+
+	//Goes to next block (column) that will receive the squeezed data
+	ptrWord -= BLOCK_LEN_INT64;
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+    }
+}
+
+/**
+ * Performs a reduced duplex operation for a single row, from the highest to
+ * the lowest index, using the reduced-round Blake2b's G function as the
+ * internal permutation
+ *
+ * @param state		The current state of the sponge
+ * @param rowIn		Row to feed the sponge
+ * @param rowOut	Row to receive the sponge's output
+ */
+inline void reducedDuplexRow1v2(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols) {
+    uint64_t* ptrWordIn = rowIn;				//In Lyra2: pointer to prev
+    uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
+    int i;
+
+    for (i = 0; i < nCols; i++) {
+
+	//Absorbing "M[prev][col]"
+	state[0]  ^= (ptrWordIn[0]);
+	state[1]  ^= (ptrWordIn[1]);
+	state[2]  ^= (ptrWordIn[2]);
+	state[3]  ^= (ptrWordIn[3]);
+	state[4]  ^= (ptrWordIn[4]);
+	state[5]  ^= (ptrWordIn[5]);
+	state[6]  ^= (ptrWordIn[6]);
+	state[7]  ^= (ptrWordIn[7]);
+	state[8]  ^= (ptrWordIn[8]);
+	state[9]  ^= (ptrWordIn[9]);
+	state[10] ^= (ptrWordIn[10]);
+	state[11] ^= (ptrWordIn[11]);
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+	//M[row][C-1-col] = M[prev][col] XOR rand
+	ptrWordOut[0] = ptrWordIn[0]  ^ state[0];
+	ptrWordOut[1] = ptrWordIn[1]  ^ state[1];
+	ptrWordOut[2] = ptrWordIn[2]  ^ state[2];
+	ptrWordOut[3] = ptrWordIn[3]  ^ state[3];
+	ptrWordOut[4] = ptrWordIn[4]  ^ state[4];
+	ptrWordOut[5] = ptrWordIn[5]  ^ state[5];
+	ptrWordOut[6] = ptrWordIn[6]  ^ state[6];
+	ptrWordOut[7] = ptrWordIn[7]  ^ state[7];
+	ptrWordOut[8] = ptrWordIn[8]  ^ state[8];
+	ptrWordOut[9] = ptrWordIn[9]  ^ state[9];
+	ptrWordOut[10] = ptrWordIn[10] ^ state[10];
+	ptrWordOut[11] = ptrWordIn[11] ^ state[11];
+
+
+	//Input: next column (i.e., next block in sequence)
+	ptrWordIn += BLOCK_LEN_INT64;
+	//Output: goes to previous column
+	ptrWordOut -= BLOCK_LEN_INT64;
+    }
+}
+
+/**
+ * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e.,
+ * the wordwise addition of two columns, ignoring carries between words). The
+ * output of this operation, "rand", is then used to make
+ * "M[rowOut][(N_COLS-1)-col] = M[rowIn][col] XOR rand" and
+ * "M[rowInOut][col] =  M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit
+ * rotation to the left and N_COLS is a system parameter.
+ *
+ * @param state          The current state of the sponge
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+inline void reducedDuplexRowSetupv2(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) {
+    uint64_t* ptrWordIn = rowIn;				//In Lyra2: pointer to prev
+    uint64_t* ptrWordInOut = rowInOut;				//In Lyra2: pointer to row*
+    uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
+    int i;
+
+    for (i = 0; i < nCols; i++) {
+	//Absorbing "M[prev] [+] M[row*]"
+	state[0]  ^= (ptrWordIn[0]  + ptrWordInOut[0]);
+	state[1]  ^= (ptrWordIn[1]  + ptrWordInOut[1]);
+	state[2]  ^= (ptrWordIn[2]  + ptrWordInOut[2]);
+	state[3]  ^= (ptrWordIn[3]  + ptrWordInOut[3]);
+	state[4]  ^= (ptrWordIn[4]  + ptrWordInOut[4]);
+	state[5]  ^= (ptrWordIn[5]  + ptrWordInOut[5]);
+	state[6]  ^= (ptrWordIn[6]  + ptrWordInOut[6]);
+	state[7]  ^= (ptrWordIn[7]  + ptrWordInOut[7]);
+	state[8]  ^= (ptrWordIn[8]  + ptrWordInOut[8]);
+	state[9]  ^= (ptrWordIn[9]  + ptrWordInOut[9]);
+	state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]);
+	state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]);
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+	//M[row][col] = M[prev][col] XOR rand
+	ptrWordOut[0] = ptrWordIn[0]  ^ state[0];
+	ptrWordOut[1] = ptrWordIn[1]  ^ state[1];
+	ptrWordOut[2] = ptrWordIn[2]  ^ state[2];
+	ptrWordOut[3] = ptrWordIn[3]  ^ state[3];
+	ptrWordOut[4] = ptrWordIn[4]  ^ state[4];
+	ptrWordOut[5] = ptrWordIn[5]  ^ state[5];
+	ptrWordOut[6] = ptrWordIn[6]  ^ state[6];
+	ptrWordOut[7] = ptrWordIn[7]  ^ state[7];
+	ptrWordOut[8] = ptrWordIn[8]  ^ state[8];
+	ptrWordOut[9] = ptrWordIn[9]  ^ state[9];
+	ptrWordOut[10] = ptrWordIn[10] ^ state[10];
+	ptrWordOut[11] = ptrWordIn[11] ^ state[11];
+
+	//M[row*][col] = M[row*][col] XOR rotW(rand)
+	ptrWordInOut[0]  ^= state[11];
+	ptrWordInOut[1]  ^= state[0];
+	ptrWordInOut[2]  ^= state[1];
+	ptrWordInOut[3]  ^= state[2];
+	ptrWordInOut[4]  ^= state[3];
+	ptrWordInOut[5]  ^= state[4];
+	ptrWordInOut[6]  ^= state[5];
+	ptrWordInOut[7]  ^= state[6];
+	ptrWordInOut[8]  ^= state[7];
+	ptrWordInOut[9]  ^= state[8];
+	ptrWordInOut[10] ^= state[9];
+	ptrWordInOut[11] ^= state[10];
+
+	//Inputs: next column (i.e., next block in sequence)
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+	//Output: goes to previous column
+	ptrWordOut -= BLOCK_LEN_INT64;
+    }
+}
+
+/**
+ * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e.,
+ * the wordwise addition of two columns, ignoring carries between words). The
+ * output of this operation, "rand", is then used to make
+ * "M[rowOut][col] = M[rowOut][col] XOR rand" and
+ * "M[rowInOut][col] =  M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit
+ * rotation to the left.
+ *
+ * @param state          The current state of the sponge
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+inline void reducedDuplexRowv2(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols) {
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
+    int i;
+
+    for (i = 0; i < nCols; i++) {
+
+	//Absorbing "M[prev] [+] M[row*]"
+	state[0]  ^= (ptrWordIn[0]  + ptrWordInOut[0]);
+	state[1]  ^= (ptrWordIn[1]  + ptrWordInOut[1]);
+	state[2]  ^= (ptrWordIn[2]  + ptrWordInOut[2]);
+	state[3]  ^= (ptrWordIn[3]  + ptrWordInOut[3]);
+	state[4]  ^= (ptrWordIn[4]  + ptrWordInOut[4]);
+	state[5]  ^= (ptrWordIn[5]  + ptrWordInOut[5]);
+	state[6]  ^= (ptrWordIn[6]  + ptrWordInOut[6]);
+	state[7]  ^= (ptrWordIn[7]  + ptrWordInOut[7]);
+	state[8]  ^= (ptrWordIn[8]  + ptrWordInOut[8]);
+	state[9]  ^= (ptrWordIn[9]  + ptrWordInOut[9]);
+	state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]);
+	state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]);
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+	//M[rowOut][col] = M[rowOut][col] XOR rand
+	ptrWordOut[0] ^= state[0];
+	ptrWordOut[1] ^= state[1];
+	ptrWordOut[2] ^= state[2];
+	ptrWordOut[3] ^= state[3];
+	ptrWordOut[4] ^= state[4];
+	ptrWordOut[5] ^= state[5];
+	ptrWordOut[6] ^= state[6];
+	ptrWordOut[7] ^= state[7];
+	ptrWordOut[8] ^= state[8];
+	ptrWordOut[9] ^= state[9];
+	ptrWordOut[10] ^= state[10];
+	ptrWordOut[11] ^= state[11];
+
+	//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
+	ptrWordInOut[0] ^= state[11];
+	ptrWordInOut[1] ^= state[0];
+	ptrWordInOut[2] ^= state[1];
+	ptrWordInOut[3] ^= state[2];
+	ptrWordInOut[4] ^= state[3];
+	ptrWordInOut[5] ^= state[4];
+	ptrWordInOut[6] ^= state[5];
+	ptrWordInOut[7] ^= state[6];
+	ptrWordInOut[8] ^= state[7];
+	ptrWordInOut[9] ^= state[8];
+	ptrWordInOut[10] ^= state[9];
+	ptrWordInOut[11] ^= state[10];
+
+	//Goes to next block
+	ptrWordOut += BLOCK_LEN_INT64;
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+    }
+}
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Performs a duplex operation over "M[rowInOut] [+] M[rowIn]", writing the output "rand"
+ * on M[rowOut] and making "M[rowInOut] =  M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit
+ * rotation to the left.
+ *
+ * @param state          The current state of the sponge
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+/*
+inline void reducedDuplexRowSetupOLD(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
+    int i;
+    for (i = 0; i < N_COLS; i++) {
+
+	//Absorbing "M[rowInOut] XOR M[rowIn]"
+	state[0] ^= ptrWordInOut[0] ^ ptrWordIn[0];
+	state[1] ^= ptrWordInOut[1] ^ ptrWordIn[1];
+	state[2] ^= ptrWordInOut[2] ^ ptrWordIn[2];
+	state[3] ^= ptrWordInOut[3] ^ ptrWordIn[3];
+	state[4] ^= ptrWordInOut[4] ^ ptrWordIn[4];
+	state[5] ^= ptrWordInOut[5] ^ ptrWordIn[5];
+	state[6] ^= ptrWordInOut[6] ^ ptrWordIn[6];
+	state[7] ^= ptrWordInOut[7] ^ ptrWordIn[7];
+	state[8] ^= ptrWordInOut[8] ^ ptrWordIn[8];
+	state[9] ^= ptrWordInOut[9] ^ ptrWordIn[9];
+	state[10] ^= ptrWordInOut[10] ^ ptrWordIn[10];
+	state[11] ^= ptrWordInOut[11] ^ ptrWordIn[11];
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+	//M[row][col] = rand
+	ptrWordOut[0] = state[0];
+	ptrWordOut[1] = state[1];
+	ptrWordOut[2] = state[2];
+	ptrWordOut[3] = state[3];
+	ptrWordOut[4] = state[4];
+	ptrWordOut[5] = state[5];
+	ptrWordOut[6] = state[6];
+	ptrWordOut[7] = state[7];
+	ptrWordOut[8] = state[8];
+	ptrWordOut[9] = state[9];
+	ptrWordOut[10] = state[10];
+	ptrWordOut[11] = state[11];
+
+
+	//M[row*][col] = M[row*][col] XOR rotW(rand)
+	ptrWordInOut[0] ^= state[10];
+	ptrWordInOut[1] ^= state[11];
+	ptrWordInOut[2] ^= state[0];
+	ptrWordInOut[3] ^= state[1];
+	ptrWordInOut[4] ^= state[2];
+	ptrWordInOut[5] ^= state[3];
+	ptrWordInOut[6] ^= state[4];
+	ptrWordInOut[7] ^= state[5];
+	ptrWordInOut[8] ^= state[6];
+	ptrWordInOut[9] ^= state[7];
+	ptrWordInOut[10] ^= state[8];
+	ptrWordInOut[11] ^= state[9];
+
+	//Goes to next column (i.e., next block in sequence)
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+	ptrWordOut += BLOCK_LEN_INT64;
+    }
+}
+*/
+
+/**
+ * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand"
+ * on M[rowOut] and making "M[rowInOut] =  M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit
+ * rotation to the left.
+ *
+ * @param state          The current state of the sponge
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+/*
+inline void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
+    int i;
+    for (i = 0; i < N_COLS; i++) {
+
+	//Absorbing "M[rowInOut] XOR M[rowIn]"
+	state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
+	state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
+	state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
+	state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
+	state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
+	state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
+	state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
+	state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
+	state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
+	state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
+	state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
+	state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+
+	//M[row*][col] = M[row*][col] XOR rotW(rand)
+	ptrWordInOut[0] ^= state[10];
+	ptrWordInOut[1] ^= state[11];
+	ptrWordInOut[2] ^= state[0];
+	ptrWordInOut[3] ^= state[1];
+	ptrWordInOut[4] ^= state[2];
+	ptrWordInOut[5] ^= state[3];
+	ptrWordInOut[6] ^= state[4];
+	ptrWordInOut[7] ^= state[5];
+	ptrWordInOut[8] ^= state[6];
+	ptrWordInOut[9] ^= state[7];
+	ptrWordInOut[10] ^= state[8];
+	ptrWordInOut[11] ^= state[9];
+
+
+	//M[row][col] = rand
+	ptrWordOut[0] = state[0] ^ ptrWordIn[0];
+	ptrWordOut[1] = state[1] ^ ptrWordIn[1];
+	ptrWordOut[2] = state[2] ^ ptrWordIn[2];
+	ptrWordOut[3] = state[3] ^ ptrWordIn[3];
+	ptrWordOut[4] = state[4] ^ ptrWordIn[4];
+	ptrWordOut[5] = state[5] ^ ptrWordIn[5];
+	ptrWordOut[6] = state[6] ^ ptrWordIn[6];
+	ptrWordOut[7] = state[7] ^ ptrWordIn[7];
+	ptrWordOut[8] = state[8] ^ ptrWordIn[8];
+	ptrWordOut[9] = state[9] ^ ptrWordIn[9];
+	ptrWordOut[10] = state[10] ^ ptrWordIn[10];
+	ptrWordOut[11] = state[11] ^ ptrWordIn[11];
+
+	//Goes to next column (i.e., next block in sequence)
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+	ptrWordOut += BLOCK_LEN_INT64;
+    }
+}
+*/
+
+/**
+ * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand"
+ * on M[rowOut] and making "M[rowInOut] =  M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit
+ * rotation to the left.
+ *
+ * @param state          The current state of the sponge
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+/*
+inline void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordOut = rowOut;
+    int i;
+
+    for (i = 0; i < N_COLS / 2; i++) {
+	//Absorbing "M[rowInOut] XOR M[rowIn]"
+	state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
+	state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
+	state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
+	state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
+	state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
+	state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
+	state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
+	state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
+	state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
+	state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
+	state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
+	state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+
+	//M[row*][col] = M[row*][col] XOR rotW(rand)
+	ptrWordInOut[0] ^= state[10];
+	ptrWordInOut[1] ^= state[11];
+	ptrWordInOut[2] ^= state[0];
+	ptrWordInOut[3] ^= state[1];
+	ptrWordInOut[4] ^= state[2];
+	ptrWordInOut[5] ^= state[3];
+	ptrWordInOut[6] ^= state[4];
+	ptrWordInOut[7] ^= state[5];
+	ptrWordInOut[8] ^= state[6];
+	ptrWordInOut[9] ^= state[7];
+	ptrWordInOut[10] ^= state[8];
+	ptrWordInOut[11] ^= state[9];
+
+
+	//M[row][col] = rand
+	ptrWordOut[0] = state[0] ^ ptrWordIn[0];
+	ptrWordOut[1] = state[1] ^ ptrWordIn[1];
+	ptrWordOut[2] = state[2] ^ ptrWordIn[2];
+	ptrWordOut[3] = state[3] ^ ptrWordIn[3];
+	ptrWordOut[4] = state[4] ^ ptrWordIn[4];
+	ptrWordOut[5] = state[5] ^ ptrWordIn[5];
+	ptrWordOut[6] = state[6] ^ ptrWordIn[6];
+	ptrWordOut[7] = state[7] ^ ptrWordIn[7];
+	ptrWordOut[8] = state[8] ^ ptrWordIn[8];
+	ptrWordOut[9] = state[9] ^ ptrWordIn[9];
+	ptrWordOut[10] = state[10] ^ ptrWordIn[10];
+	ptrWordOut[11] = state[11] ^ ptrWordIn[11];
+
+	//Goes to next column (i.e., next block in sequence)
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+	ptrWordOut += 2 * BLOCK_LEN_INT64;
+    }
+
+    ptrWordOut =  rowOut + BLOCK_LEN_INT64;
+    for (i = 0; i < N_COLS / 2; i++) {
+	//Absorbing "M[rowInOut] XOR M[rowIn]"
+	state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
+	state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
+	state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
+	state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
+	state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
+	state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
+	state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
+	state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
+	state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
+	state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
+	state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
+	state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+
+	//M[row*][col] = M[row*][col] XOR rotW(rand)
+	ptrWordInOut[0] ^= state[10];
+	ptrWordInOut[1] ^= state[11];
+	ptrWordInOut[2] ^= state[0];
+	ptrWordInOut[3] ^= state[1];
+	ptrWordInOut[4] ^= state[2];
+	ptrWordInOut[5] ^= state[3];
+	ptrWordInOut[6] ^= state[4];
+	ptrWordInOut[7] ^= state[5];
+	ptrWordInOut[8] ^= state[6];
+	ptrWordInOut[9] ^= state[7];
+	ptrWordInOut[10] ^= state[8];
+	ptrWordInOut[11] ^= state[9];
+
+
+	//M[row][col] = rand
+	ptrWordOut[0] = state[0] ^ ptrWordIn[0];
+	ptrWordOut[1] = state[1] ^ ptrWordIn[1];
+	ptrWordOut[2] = state[2] ^ ptrWordIn[2];
+	ptrWordOut[3] = state[3] ^ ptrWordIn[3];
+	ptrWordOut[4] = state[4] ^ ptrWordIn[4];
+	ptrWordOut[5] = state[5] ^ ptrWordIn[5];
+	ptrWordOut[6] = state[6] ^ ptrWordIn[6];
+	ptrWordOut[7] = state[7] ^ ptrWordIn[7];
+	ptrWordOut[8] = state[8] ^ ptrWordIn[8];
+	ptrWordOut[9] = state[9] ^ ptrWordIn[9];
+	ptrWordOut[10] = state[10] ^ ptrWordIn[10];
+	ptrWordOut[11] = state[11] ^ ptrWordIn[11];
+
+	//Goes to next column (i.e., next block in sequence)
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+	ptrWordOut += 2 * BLOCK_LEN_INT64;
+    }
+}
+*/
+
+/**
+ * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", using the output "rand"
+ * to make "M[rowOut][col] = M[rowOut][col] XOR rand" and "M[rowInOut] = M[rowInOut] XOR rotW(rand)",
+ * where rotW is a 64-bit rotation to the left.
+ *
+ * @param state          The current state of the sponge
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+/*
+inline void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
+    int i;
+    for (i = 0; i < N_COLS; i++) {
+
+	//Absorbing "M[rowInOut] XOR M[rowIn]"
+	state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
+	state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
+	state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
+	state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
+	state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
+	state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
+	state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
+	state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
+	state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
+	state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
+	state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
+	state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+	//M[rowOut][col] = M[rowOut][col] XOR rand
+	ptrWordOut[0] ^= state[0];
+	ptrWordOut[1] ^= state[1];
+	ptrWordOut[2] ^= state[2];
+	ptrWordOut[3] ^= state[3];
+	ptrWordOut[4] ^= state[4];
+	ptrWordOut[5] ^= state[5];
+	ptrWordOut[6] ^= state[6];
+	ptrWordOut[7] ^= state[7];
+	ptrWordOut[8] ^= state[8];
+	ptrWordOut[9] ^= state[9];
+	ptrWordOut[10] ^= state[10];
+	ptrWordOut[11] ^= state[11];
+
+	//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
+
+
+	//Goes to next block
+	ptrWordOut += BLOCK_LEN_INT64;
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+    }
+}
+*/
+
+/**
+ Prints an array of unsigned chars
+ */
+void printArrayv2(unsigned char *array, unsigned int size, char *name) {
+    int i;
+    printf("%s: ", name);
+    for (i = 0; i < size; i++) {
+	printf("%2x|", array[i]);
+    }
+    printf("\n");
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////
--- a/algorithm/spongev2.h
+++ b/algorithm/spongev2.h
@ -0,0 +1,108 @@
				@@ -0,0 +1,108 @@
+/**
+ * Header file for Blake2b's internal permutation in the form of a sponge.
+ * This code is based on the original Blake2b's implementation provided by
+ * Samuel Neves (https://blake2.net/)
+ *
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ *
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef SPONGE_H_
+#define SPONGE_H_
+
+#include <stdint.h>
+
+#if defined(__GNUC__)
+#define ALIGN __attribute__ ((aligned(32)))
+#elif defined(_MSC_VER)
+#define ALIGN __declspec(align(32))
+#else
+#define ALIGN
+#endif
+
+
+/*Blake2b IV Array*/
+static const uint64_t blake2b_IV[8] =
+{
+  0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
+  0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
+  0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
+  0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
+};
+
+/*Blake2b's rotation*/
+static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
+    return ( w >> c ) | ( w << ( 64 - c ) );
+}
+
+/*Blake2b's G function*/
+#define G(r,i,a,b,c,d) \
+  do { \
+    a = a + b; \
+    d = rotr64(d ^ a, 32); \
+    c = c + d; \
+    b = rotr64(b ^ c, 24); \
+    a = a + b; \
+    d = rotr64(d ^ a, 16); \
+    c = c + d; \
+    b = rotr64(b ^ c, 63); \
+  } while(0)
+
+
+/*One Round of the Blake2b's compression function*/
+#define ROUND_LYRA(r)  \
+    G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
+    G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
+    G(r,2,v[ 2],v[ 6],v[10],v[14]); \
+    G(r,3,v[ 3],v[ 7],v[11],v[15]); \
+    G(r,4,v[ 0],v[ 5],v[10],v[15]); \
+    G(r,5,v[ 1],v[ 6],v[11],v[12]); \
+    G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
+    G(r,7,v[ 3],v[ 4],v[ 9],v[14]);
+
+
+//---- Housekeeping
+extern void initStatev2(uint64_t state[/*16*/]);
+
+//---- Squeezes
+extern void squeezev2(uint64_t *state, unsigned char *out, unsigned int len);
+extern void reducedSqueezeRow0v2(uint64_t* state, uint64_t* row, uint64_t nCols);
+
+//---- Absorbs
+extern void absorbBlockv2(uint64_t *state, const uint64_t *in);
+extern void absorbBlockBlake2Safev2(uint64_t *state, const uint64_t *in);
+
+//---- Duplexes
+extern void reducedDuplexRow1v2(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols);
+extern void reducedDuplexRowSetupv2(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols);
+extern void reducedDuplexRowv2(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols);
+
+//---- Misc
+void printArrayv2(unsigned char *array, unsigned int size, char *name);
+
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////TESTS////
+//void reducedDuplexRowc(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+//void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+//void reducedDuplexRowSetupv4(uint64_t *state, uint64_t *rowIn1, uint64_t *rowIn2, uint64_t *rowOut1, uint64_t *rowOut2);
+//void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+//void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+//void reducedDuplexRowSetupv5d(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+/////////////
+
+
+#endif /* SPONGE_H_ */
--- a/algorithm/whirlpoolx.c
+++ b/algorithm/whirlpoolx.c
@ -34,7 +34,7 @@
				@@ -34,7 +34,7 @@
 #include <stdint.h>
 #include <string.h>

-#include "whirlpoolx.h"
+#include "sph/sph_whirlpool.h"

 /*
 * Encode a length len/4 vector of (uint32_t) into a length len vector of
@ -50,124 +50,16 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
				@@ -50,124 +50,16 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
 }


-void whirlpool_compress(uint8_t state[64], const uint8_t block[64])
-{
-	const int NUM_ROUNDS = 10;
-	uint64_t tempState[8];
-	uint64_t tempBlock[8];
-	int i;
-	
-	// Initialization
-	for (i = 0; i < 8; i++) {
-		tempState[i] = 
-			  (uint64_t)state[i << 3]
-			| (uint64_t)state[(i << 3) + 1] <<  8
-			| (uint64_t)state[(i << 3) + 2] << 16
-			| (uint64_t)state[(i << 3) + 3] << 24
-			| (uint64_t)state[(i << 3) + 4] << 32
-			| (uint64_t)state[(i << 3) + 5] << 40
-			| (uint64_t)state[(i << 3) + 6] << 48
-			| (uint64_t)state[(i << 3) + 7] << 56;
-		tempBlock[i] = (
-			  (uint64_t)block[i << 3]
-			| (uint64_t)block[(i << 3) + 1] <<  8
-			| (uint64_t)block[(i << 3) + 2] << 16
-			| (uint64_t)block[(i << 3) + 3] << 24
-			| (uint64_t)block[(i << 3) + 4] << 32
-			| (uint64_t)block[(i << 3) + 5] << 40
-			| (uint64_t)block[(i << 3) + 6] << 48
-			| (uint64_t)block[(i << 3) + 7] << 56) ^ tempState[i];
-	}
-	
-	// Hashing rounds
-	uint64_t rcon[8];
-	memset(rcon + 1, 0, sizeof(rcon[0]) * 7);
-	for (i = 0; i < NUM_ROUNDS; i++) {
-		rcon[0] = WHIRLPOOL_ROUND_CONSTANTS[i];
-		whirlpool_round(tempState, rcon);
-		whirlpool_round(tempBlock, tempState);
-	}
-	
-	// Final combining
-	for (i = 0; i < 64; i++)
-		state[i] ^= block[i] ^ (uint8_t)(tempBlock[i >> 3] >> ((i & 7) << 3));
-}
-
-
-
-
-
-void whirlpool_round(uint64_t block[8], const uint64_t key[8]) {
-	uint64_t a = block[0];
-	uint64_t b = block[1];
-	uint64_t c = block[2];
-	uint64_t d = block[3];
-	uint64_t e = block[4];
-	uint64_t f = block[5];
-	uint64_t g = block[6];
-	uint64_t h = block[7];
-	
-	uint64_t r;
-	#define DOROW(i, s, t, u, v, w, x, y, z)  \
-		r = MAGIC_TABLE[(uint8_t)s];  r = (r << 56) | (r >> 8);  \
-		r ^= MAGIC_TABLE[(uint8_t)(t >>  8)];  r = (r << 56) | (r >> 8);  \
-		r ^= MAGIC_TABLE[(uint8_t)(u >> 16)];  r = (r << 56) | (r >> 8);  \
-		r ^= MAGIC_TABLE[(uint8_t)(v >> 24)];  r = (r << 56) | (r >> 8);  \
-		r ^= MAGIC_TABLE[(uint8_t)(w >> 32)];  r = (r << 56) | (r >> 8);  \
-		r ^= MAGIC_TABLE[(uint8_t)(x >> 40)];  r = (r << 56) | (r >> 8);  \
-		r ^= MAGIC_TABLE[(uint8_t)(y >> 48)];  r = (r << 56) | (r >> 8);  \
-		r ^= MAGIC_TABLE[(uint8_t)(z >> 56)];  r = (r << 56) | (r >> 8);  \
-		block[i] = r ^ key[i];
-	
-	DOROW(0, a, h, g, f, e, d, c, b)
-	DOROW(1, b, a, h, g, f, e, d, c)
-	DOROW(2, c, b, a, h, g, f, e, d)
-	DOROW(3, d, c, b, a, h, g, f, e)
-	DOROW(4, e, d, c, b, a, h, g, f)
-	DOROW(5, f, e, d, c, b, a, h, g)
-	DOROW(6, g, f, e, d, c, b, a, h)
-	DOROW(7, h, g, f, e, d, c, b, a)
-}
-
-void whirlpool_hash(const uint8_t *message, uint32_t len, uint8_t hash[64]) {
-	memset(hash, 0, 64);
-	
-	uint32_t i;
-	for (i = 0; len - i >= 64; i += 64)
-		whirlpool_compress(hash, message + i);
-	
-	uint8_t block[64];
-	uint32_t rem = len - i;
-	memcpy(block, message + i, rem);
-	
-	block[rem] = 0x80;
-	rem++;
-	if (64 - rem >= 32)
-		memset(block + rem, 0, 56 - rem);
-	else {
-		memset(block + rem, 0, 64 - rem);
-		whirlpool_compress(hash, block);
-		memset(block, 0, 56);
-	}
-	
-	uint64_t longLen = ((uint64_t)len) << 3;
-	for (i = 0; i < 8; i++)
-		block[64 - 1 - i] = (uint8_t)(longLen >> (i * 8));
-	whirlpool_compress(hash, block);
-}
-
 void whirlpoolx_hash(void *state, const void *input)
 {
-	//sph_whirlpool1_context ctx;
+	sph_whirlpool1_context ctx;
    
-	//sph_whirlpool1_init(&ctx);
+	sph_whirlpool1_init(&ctx);

    uint8_t digest[64];  

-	//sph_whirlpool(&ctx, input, 80);
-	//sph_whirlpool_close(&ctx, digest);
-	
-	whirlpool_hash((uint8_t *)input, 80, digest);
+	sph_whirlpool(&ctx, input, 80);
+	sph_whirlpool_close(&ctx, digest);
 	
 	uint8_t digest_xored[32]; 

--- a/algorithm/whirlpoolx.h
+++ b/algorithm/whirlpoolx.h
@ -1,58 +1,10 @@
				@@ -1,58 +1,10 @@
 #ifndef WHIRLPOOLX_H
 #define WHIRLPOOLX_H

-#include <stdint.h>
 #include "miner.h"

-// The combined effect of gamma (SubBytes) and theta (MixRows)
-static uint64_t MAGIC_TABLE[256] = {
-	UINT64_C(0xD83078C018601818), UINT64_C(0x2646AF05238C2323), UINT64_C(0xB891F97EC63FC6C6), UINT64_C(0xFBCD6F13E887E8E8), UINT64_C(0xCB13A14C87268787), UINT64_C(0x116D62A9B8DAB8B8), UINT64_C(0x0902050801040101), UINT64_C(0x0D9E6E424F214F4F),
-	UINT64_C(0x9B6CEEAD36D83636), UINT64_C(0xFF510459A6A2A6A6), UINT64_C(0x0CB9BDDED26FD2D2), UINT64_C(0x0EF706FBF5F3F5F5), UINT64_C(0x96F280EF79F97979), UINT64_C(0x30DECE5F6FA16F6F), UINT64_C(0x6D3FEFFC917E9191), UINT64_C(0xF8A407AA52555252),
-	UINT64_C(0x47C0FD27609D6060), UINT64_C(0x35657689BCCABCBC), UINT64_C(0x372BCDAC9B569B9B), UINT64_C(0x8A018C048E028E8E), UINT64_C(0xD25B1571A3B6A3A3), UINT64_C(0x6C183C600C300C0C), UINT64_C(0x84F68AFF7BF17B7B), UINT64_C(0x806AE1B535D43535),
-	UINT64_C(0xF53A69E81D741D1D), UINT64_C(0xB3DD4753E0A7E0E0), UINT64_C(0x21B3ACF6D77BD7D7), UINT64_C(0x9C99ED5EC22FC2C2), UINT64_C(0x435C966D2EB82E2E), UINT64_C(0x29967A624B314B4B), UINT64_C(0x5DE121A3FEDFFEFE), UINT64_C(0xD5AE168257415757),
-	UINT64_C(0xBD2A41A815541515), UINT64_C(0xE8EEB69F77C17777), UINT64_C(0x926EEBA537DC3737), UINT64_C(0x9ED7567BE5B3E5E5), UINT64_C(0x1323D98C9F469F9F), UINT64_C(0x23FD17D3F0E7F0F0), UINT64_C(0x20947F6A4A354A4A), UINT64_C(0x44A9959EDA4FDADA),
-	UINT64_C(0xA2B025FA587D5858), UINT64_C(0xCF8FCA06C903C9C9), UINT64_C(0x7C528D5529A42929), UINT64_C(0x5A1422500A280A0A), UINT64_C(0x507F4FE1B1FEB1B1), UINT64_C(0xC95D1A69A0BAA0A0), UINT64_C(0x14D6DA7F6BB16B6B), UINT64_C(0xD917AB5C852E8585),
-	UINT64_C(0x3C677381BDCEBDBD), UINT64_C(0x8FBA34D25D695D5D), UINT64_C(0x9020508010401010), UINT64_C(0x07F503F3F4F7F4F4), UINT64_C(0xDD8BC016CB0BCBCB), UINT64_C(0xD37CC6ED3EF83E3E), UINT64_C(0x2D0A112805140505), UINT64_C(0x78CEE61F67816767),
-	UINT64_C(0x97D55373E4B7E4E4), UINT64_C(0x024EBB25279C2727), UINT64_C(0x7382583241194141), UINT64_C(0xA70B9D2C8B168B8B), UINT64_C(0xF6530151A7A6A7A7), UINT64_C(0xB2FA94CF7DE97D7D), UINT64_C(0x4937FBDC956E9595), UINT64_C(0x56AD9F8ED847D8D8),
-	UINT64_C(0x70EB308BFBCBFBFB), UINT64_C(0xCDC17123EE9FEEEE), UINT64_C(0xBBF891C77CED7C7C), UINT64_C(0x71CCE31766856666), UINT64_C(0x7BA78EA6DD53DDDD), UINT64_C(0xAF2E4BB8175C1717), UINT64_C(0x458E460247014747), UINT64_C(0x1A21DC849E429E9E),
-	UINT64_C(0xD489C51ECA0FCACA), UINT64_C(0x585A99752DB42D2D), UINT64_C(0x2E637991BFC6BFBF), UINT64_C(0x3F0E1B38071C0707), UINT64_C(0xAC472301AD8EADAD), UINT64_C(0xB0B42FEA5A755A5A), UINT64_C(0xEF1BB56C83368383), UINT64_C(0xB666FF8533CC3333),
-	UINT64_C(0x5CC6F23F63916363), UINT64_C(0x12040A1002080202), UINT64_C(0x93493839AA92AAAA), UINT64_C(0xDEE2A8AF71D97171), UINT64_C(0xC68DCF0EC807C8C8), UINT64_C(0xD1327DC819641919), UINT64_C(0x3B92707249394949), UINT64_C(0x5FAF9A86D943D9D9),
-	UINT64_C(0x31F91DC3F2EFF2F2), UINT64_C(0xA8DB484BE3ABE3E3), UINT64_C(0xB9B62AE25B715B5B), UINT64_C(0xBC0D9234881A8888), UINT64_C(0x3E29C8A49A529A9A), UINT64_C(0x0B4CBE2D26982626), UINT64_C(0xBF64FA8D32C83232), UINT64_C(0x597D4AE9B0FAB0B0),
-	UINT64_C(0xF2CF6A1BE983E9E9), UINT64_C(0x771E33780F3C0F0F), UINT64_C(0x33B7A6E6D573D5D5), UINT64_C(0xF41DBA74803A8080), UINT64_C(0x27617C99BEC2BEBE), UINT64_C(0xEB87DE26CD13CDCD), UINT64_C(0x8968E4BD34D03434), UINT64_C(0x3290757A483D4848),
-	UINT64_C(0x54E324ABFFDBFFFF), UINT64_C(0x8DF48FF77AF57A7A), UINT64_C(0x643DEAF4907A9090), UINT64_C(0x9DBE3EC25F615F5F), UINT64_C(0x3D40A01D20802020), UINT64_C(0x0FD0D56768BD6868), UINT64_C(0xCA3472D01A681A1A), UINT64_C(0xB7412C19AE82AEAE),
-	UINT64_C(0x7D755EC9B4EAB4B4), UINT64_C(0xCEA8199A544D5454), UINT64_C(0x7F3BE5EC93769393), UINT64_C(0x2F44AA0D22882222), UINT64_C(0x63C8E907648D6464), UINT64_C(0x2AFF12DBF1E3F1F1), UINT64_C(0xCCE6A2BF73D17373), UINT64_C(0x82245A9012481212),
-	UINT64_C(0x7A805D3A401D4040), UINT64_C(0x4810284008200808), UINT64_C(0x959BE856C32BC3C3), UINT64_C(0xDFC57B33EC97ECEC), UINT64_C(0x4DAB9096DB4BDBDB), UINT64_C(0xC05F1F61A1BEA1A1), UINT64_C(0x9107831C8D0E8D8D), UINT64_C(0xC87AC9F53DF43D3D),
-	UINT64_C(0x5B33F1CC97669797), UINT64_C(0x0000000000000000), UINT64_C(0xF983D436CF1BCFCF), UINT64_C(0x6E5687452BAC2B2B), UINT64_C(0xE1ECB39776C57676), UINT64_C(0xE619B06482328282), UINT64_C(0x28B1A9FED67FD6D6), UINT64_C(0xC33677D81B6C1B1B),
-	UINT64_C(0x74775BC1B5EEB5B5), UINT64_C(0xBE432911AF86AFAF), UINT64_C(0x1DD4DF776AB56A6A), UINT64_C(0xEAA00DBA505D5050), UINT64_C(0x578A4C1245094545), UINT64_C(0x38FB18CBF3EBF3F3), UINT64_C(0xAD60F09D30C03030), UINT64_C(0xC4C3742BEF9BEFEF),
-	UINT64_C(0xDA7EC3E53FFC3F3F), UINT64_C(0xC7AA1C9255495555), UINT64_C(0xDB591079A2B2A2A2), UINT64_C(0xE9C96503EA8FEAEA), UINT64_C(0x6ACAEC0F65896565), UINT64_C(0x036968B9BAD2BABA), UINT64_C(0x4A5E93652FBC2F2F), UINT64_C(0x8E9DE74EC027C0C0),
-	UINT64_C(0x60A181BEDE5FDEDE), UINT64_C(0xFC386CE01C701C1C), UINT64_C(0x46E72EBBFDD3FDFD), UINT64_C(0x1F9A64524D294D4D), UINT64_C(0x7639E0E492729292), UINT64_C(0xFAEABC8F75C97575), UINT64_C(0x360C1E3006180606), UINT64_C(0xAE0998248A128A8A),
-	UINT64_C(0x4B7940F9B2F2B2B2), UINT64_C(0x85D15963E6BFE6E6), UINT64_C(0x7E1C36700E380E0E), UINT64_C(0xE73E63F81F7C1F1F), UINT64_C(0x55C4F73762956262), UINT64_C(0x3AB5A3EED477D4D4), UINT64_C(0x814D3229A89AA8A8), UINT64_C(0x5231F4C496629696),
-	UINT64_C(0x62EF3A9BF9C3F9F9), UINT64_C(0xA397F666C533C5C5), UINT64_C(0x104AB13525942525), UINT64_C(0xABB220F259795959), UINT64_C(0xD015AE54842A8484), UINT64_C(0xC5E4A7B772D57272), UINT64_C(0xEC72DDD539E43939), UINT64_C(0x1698615A4C2D4C4C),
-	UINT64_C(0x94BC3BCA5E655E5E), UINT64_C(0x9FF085E778FD7878), UINT64_C(0xE570D8DD38E03838), UINT64_C(0x980586148C0A8C8C), UINT64_C(0x17BFB2C6D163D1D1), UINT64_C(0xE4570B41A5AEA5A5), UINT64_C(0xA1D94D43E2AFE2E2), UINT64_C(0x4EC2F82F61996161),
-	UINT64_C(0x427B45F1B3F6B3B3), UINT64_C(0x3442A51521842121), UINT64_C(0x0825D6949C4A9C9C), UINT64_C(0xEE3C66F01E781E1E), UINT64_C(0x6186522243114343), UINT64_C(0xB193FC76C73BC7C7), UINT64_C(0x4FE52BB3FCD7FCFC), UINT64_C(0x2408142004100404),
-	UINT64_C(0xE3A208B251595151), UINT64_C(0x252FC7BC995E9999), UINT64_C(0x22DAC44F6DA96D6D), UINT64_C(0x651A39680D340D0D), UINT64_C(0x79E93583FACFFAFA), UINT64_C(0x69A384B6DF5BDFDF), UINT64_C(0xA9FC9BD77EE57E7E), UINT64_C(0x1948B43D24902424),
-	UINT64_C(0xFE76D7C53BEC3B3B), UINT64_C(0x9A4B3D31AB96ABAB), UINT64_C(0xF081D13ECE1FCECE), UINT64_C(0x9922558811441111), UINT64_C(0x8303890C8F068F8F), UINT64_C(0x049C6B4A4E254E4E), UINT64_C(0x667351D1B7E6B7B7), UINT64_C(0xE0CB600BEB8BEBEB),
-	UINT64_C(0xC178CCFD3CF03C3C), UINT64_C(0xFD1FBF7C813E8181), UINT64_C(0x4035FED4946A9494), UINT64_C(0x1CF30CEBF7FBF7F7), UINT64_C(0x186F67A1B9DEB9B9), UINT64_C(0x8B265F98134C1313), UINT64_C(0x51589C7D2CB02C2C), UINT64_C(0x05BBB8D6D36BD3D3),
-	UINT64_C(0x8CD35C6BE7BBE7E7), UINT64_C(0x39DCCB576EA56E6E), UINT64_C(0xAA95F36EC437C4C4), UINT64_C(0x1B060F18030C0303), UINT64_C(0xDCAC138A56455656), UINT64_C(0x5E88491A440D4444), UINT64_C(0xA0FE9EDF7FE17F7F), UINT64_C(0x884F3721A99EA9A9),
-	UINT64_C(0x6754824D2AA82A2A), UINT64_C(0x0A6B6DB1BBD6BBBB), UINT64_C(0x879FE246C123C1C1), UINT64_C(0xF1A602A253515353), UINT64_C(0x72A58BAEDC57DCDC), UINT64_C(0x531627580B2C0B0B), UINT64_C(0x0127D39C9D4E9D9D), UINT64_C(0x2BD8C1476CAD6C6C),
-	UINT64_C(0xA462F59531C43131), UINT64_C(0xF3E8B98774CD7474), UINT64_C(0x15F109E3F6FFF6F6), UINT64_C(0x4C8C430A46054646), UINT64_C(0xA5452609AC8AACAC), UINT64_C(0xB50F973C891E8989), UINT64_C(0xB42844A014501414), UINT64_C(0xBADF425BE1A3E1E1),
-	UINT64_C(0xA62C4EB016581616), UINT64_C(0xF774D2CD3AE83A3A), UINT64_C(0x06D2D06F69B96969), UINT64_C(0x41122D4809240909), UINT64_C(0xD7E0ADA770DD7070), UINT64_C(0x6F7154D9B6E2B6B6), UINT64_C(0x1EBDB7CED067D0D0), UINT64_C(0xD6C77E3BED93EDED),
-	UINT64_C(0xE285DB2ECC17CCCC), UINT64_C(0x6884572A42154242), UINT64_C(0x2C2DC2B4985A9898), UINT64_C(0xED550E49A4AAA4A4), UINT64_C(0x7550885D28A02828), UINT64_C(0x86B831DA5C6D5C5C), UINT64_C(0x6BED3F93F8C7F8F8), UINT64_C(0xC211A44486228686),
-};
-
-static uint64_t WHIRLPOOL_ROUND_CONSTANTS[32] = {
-	UINT64_C(0x4F01B887E8C62318), UINT64_C(0x52916F79F5D2A636), UINT64_C(0x357B0CA38E9BBC60), UINT64_C(0x57FE4B2EC2D7E01D),
-	UINT64_C(0xDA4AF09FE5377715), UINT64_C(0x856BA0B10A29C958), UINT64_C(0x67053ECBF4105DBD), UINT64_C(0xD8957DA78B4127E4),
-	UINT64_C(0x9E4717DD667CEEFB), UINT64_C(0x33835AAD07BF2DCA), UINT64_C(0xD94919C871AA0263), UINT64_C(0xB032269A885BE3F2),
-	UINT64_C(0x4834CDBE80D50FE9), UINT64_C(0xAE1A68205F907AFF), UINT64_C(0x1273F164229354B4), UINT64_C(0x3D8DA1DBECC30840),
-	UINT64_C(0x1BD682762BCF0097), UINT64_C(0xEF30F345506AAFB5), UINT64_C(0xC02FBA65EAA2553F), UINT64_C(0x8A0675924DFD1CDE),
-	UINT64_C(0x96A8D4621F0EE6B2), UINT64_C(0x4C3972845925C5F9), UINT64_C(0x61E2A5D18C38785E), UINT64_C(0x04FCC7431E9C21B3),
-	UINT64_C(0x247EDFFA0D6D9951), UINT64_C(0xEBB74E8F11CEAB3B), UINT64_C(0xD32C13B9F794813C), UINT64_C(0xA97F445603C46EE7),
-	UINT64_C(0x6C9D0BDC53C1BB2A), UINT64_C(0xE11489AC46F67431), UINT64_C(0xEDD0B67009693A16), UINT64_C(0x86F85C28A49842CC),
-};

 extern int whirlpoolx_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce);
 extern void whirlpoolx_regenhash(struct work *work);
-extern void whirlpool_round(uint64_t block[8], const uint64_t key[8]);

 #endif /* WHIRLPOOLX_H */
--- a/algorithm/yescrypt-opt.c
+++ b/algorithm/yescrypt-opt.c
@ -99,7 +99,7 @@ alloc_region(yescrypt_region_t * region, size_t size)
				@@ -99,7 +99,7 @@ alloc_region(yescrypt_region_t * region, size_t size)
 	if (size + 63 < size) {
 		errno = ENOMEM;
 	}
-	else if ((base = malloc(size + 63)) != NULL) {
+	else if ((base = (uint8_t *)malloc(size + 63)) != NULL) {
 		aligned = base + 63;
 		aligned -= (uintptr_t)aligned & 63;
 	}
@ -520,7 +520,7 @@ smix1(uint64_t * B, size_t r, uint64_t N, yescrypt_flags_t flags,
				@@ -520,7 +520,7 @@ smix1(uint64_t * B, size_t r, uint64_t N, yescrypt_flags_t flags,
 	uint64_t * XY, uint64_t * S)
 {
 	void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) = (S ? blockmix_pwxform : blockmix_salsa8);
-	const uint64_t * VROM = shared->shared1.aligned;
+	const uint64_t * VROM = (uint64_t *)shared->shared1.aligned;
 	uint32_t VROM_mask = shared->mask1;
 	size_t s = 16 * r;
 	uint64_t * X = V;
@ -671,7 +671,7 @@ smix2(uint64_t * B, size_t r, uint64_t N, uint64_t Nloop,
				@@ -671,7 +671,7 @@ smix2(uint64_t * B, size_t r, uint64_t N, uint64_t Nloop,
 	
 	void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) =
 		(S ? blockmix_pwxform : blockmix_salsa8);
-	const uint64_t * VROM = shared->shared1.aligned;
+	const uint64_t * VROM = (uint64_t *)shared->shared1.aligned;
 	uint32_t VROM_mask = shared->mask1 | 1;
 	size_t s = 16 * r;
 	yescrypt_flags_t rw = flags & YESCRYPT_RW;
@ -835,7 +835,7 @@ smix(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t,
				@@ -835,7 +835,7 @@ smix(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t,
 		uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S;

 		if (Sp) 
-			smix1(Bp, 1, S_SIZE_ALL / 16, flags & ~YESCRYPT_PWXFORM,Sp, NROM, shared, XYp, NULL);
+			smix1(Bp, 1, S_SIZE_ALL / 16, (yescrypt_flags_t)flags & ~YESCRYPT_PWXFORM,Sp, NROM, shared, XYp, NULL);

 	

--- a/api.c
+++ b/api.c
@ -1334,7 +1334,7 @@ static void apiversion(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __m
				@@ -1334,7 +1334,7 @@ static void apiversion(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __m
  io_open = io_add(io_data, isjson ? COMSTR JSON_VERSION : _VERSION COMSTR);

  root = api_add_string(root, "Miner", PACKAGE " " VERSION, false);
-  root = api_add_string(root, "CGMiner", CGMINER_VERSION, false);
+  root = api_add_string(root, "SGMiner", VERSION, false);
  root = api_add_const(root, "API", APIVERSION, false);

  root = print_data(root, buf, isjson, false);
--- a/driver-opencl.c
+++ b/driver-opencl.c
@ -1366,7 +1366,7 @@ static bool opencl_thread_init(struct thr_info *thr)
				@@ -1366,7 +1366,7 @@ static bool opencl_thread_init(struct thr_info *thr)

 static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work)
 {
-  if (work->pool->algorithm.type == ALGO_LYRA2RE || work->pool->algorithm.type == ALGO_LYRA2REv2) {
+  if (work->pool->algorithm.type == ALGO_LYRA2RE || work->pool->algorithm.type == ALGO_LYRA2REV2) {
    work->blk.work = work;
    precalc_hash_blake256(&work->blk, 0, (uint32_t *)(work->data));
  }
--- a/kernel/lyra2rev2.cl
+++ b/kernel/lyra2rev2.cl
@ -31,8 +31,8 @@
				@@ -31,8 +31,8 @@
 // typedef unsigned int uint;
 #pragma OPENCL EXTENSION cl_amd_printf : enable

-#ifndef LYRA2RE_CL
-#define LYRA2RE_CL
+#ifndef LYRA2REV2_CL
+#define LYRA2REV2_CL

 #if __ENDIAN_LITTLE__
 #define SPH_LITTLE_ENDIAN 1
@ -90,7 +90,7 @@ static inline sph_u64 ror64(sph_u64 vw, unsigned a) {
				@@ -90,7 +90,7 @@ static inline sph_u64 ror64(sph_u64 vw, unsigned a) {
 //#define SPH_ROTR64(l,n) ror64(l,n)
 #define memshift 3
 #include "blake256.cl"
-#include "lyra2v2.cl"
+#include "Lyra2v2.cl"
 #include "keccak1600.cl"
 #include "skein256.cl"
 #include "cubehash.cl"
@ -522,4 +522,4 @@ __kernel void search6(__global uchar* hashes, __global uint* output, const ulong
				@@ -522,4 +522,4 @@ __kernel void search6(__global uchar* hashes, __global uint* output, const ulong
 }


-#endif // LYRA2RE_CL
+#endif // LYRA2REV2_CL
--- a/kernel/neoscrypt.cl
+++ b/kernel/neoscrypt.cl
@ -1,5 +1,4 @@
				@@ -1,5 +1,4 @@
-/* NeoScrypt(128, 2, 1) with Salsa20/20 and ChaCha20/20 */
-/* Adapted and improved for 14.x drivers by Wolf9466 (Wolf`) */
+// NeoScrypt(128, 2, 1) with Salsa20/20 and ChaCha20/20

 // Stupid AMD compiler ignores the unroll pragma in these two
 #define SALSA_SMALL_UNROLL 3
@ -351,74 +350,71 @@ uint16 salsa_small_scalar_rnd(uint16 X)
				@@ -351,74 +350,71 @@ uint16 salsa_small_scalar_rnd(uint16 X)
 	return(X + st);
 }

-#define CHACHA_CORE_PARALLEL(state)	do { \
-	state[0] += state[1]; state[3] = rotate(state[3] ^ state[0], (uint4)(16U, 16U, 16U, 16U)); \
-	state[2] += state[3]; state[1] = rotate(state[1] ^ state[2], (uint4)(12U, 12U, 12U, 12U)); \
-	state[0] += state[1]; state[3] = rotate(state[3] ^ state[0], (uint4)(8U, 8U, 8U, 8U)); \
-	state[2] += state[3]; state[1] = rotate(state[1] ^ state[2], (uint4)(7U, 7U, 7U, 7U)); \
-	\
-	state[0] += state[1].yzwx; state[3].wxyz = rotate(state[3].wxyz ^ state[0], (uint4)(16U, 16U, 16U, 16U)); \
-	state[2].zwxy += state[3].wxyz; state[1].yzwx = rotate(state[1].yzwx ^ state[2].zwxy, (uint4)(12U, 12U, 12U, 12U)); \
-	state[0] += state[1].yzwx; state[3].wxyz = rotate(state[3].wxyz ^ state[0], (uint4)(8U, 8U, 8U, 8U)); \
-	state[2].zwxy += state[3].wxyz; state[1].yzwx = rotate(state[1].yzwx ^ state[2].zwxy, (uint4)(7U, 7U, 7U, 7U)); \
+#define CHACHA_CORE(state)	do { \
+	state.s0 += state.s4; state.sc = as_uint(as_ushort2(state.sc ^ state.s0).s10); state.s8 += state.sc; state.s4 = rotate(state.s4 ^ state.s8, 12U); state.s0 += state.s4; state.sc = rotate(state.sc ^ state.s0, 8U); state.s8 += state.sc; state.s4 = rotate(state.s4 ^ state.s8, 7U); \
+	state.s1 += state.s5; state.sd = as_uint(as_ushort2(state.sd ^ state.s1).s10); state.s9 += state.sd; state.s5 = rotate(state.s5 ^ state.s9, 12U); state.s1 += state.s5; state.sd = rotate(state.sd ^ state.s1, 8U); state.s9 += state.sd; state.s5 = rotate(state.s5 ^ state.s9, 7U); \
+	state.s2 += state.s6; state.se = as_uint(as_ushort2(state.se ^ state.s2).s10); state.sa += state.se; state.s6 = rotate(state.s6 ^ state.sa, 12U); state.s2 += state.s6; state.se = rotate(state.se ^ state.s2, 8U); state.sa += state.se; state.s6 = rotate(state.s6 ^ state.sa, 7U); \
+	state.s3 += state.s7; state.sf = as_uint(as_ushort2(state.sf ^ state.s3).s10); state.sb += state.sf; state.s7 = rotate(state.s7 ^ state.sb, 12U); state.s3 += state.s7; state.sf = rotate(state.sf ^ state.s3, 8U); state.sb += state.sf; state.s7 = rotate(state.s7 ^ state.sb, 7U); \
+	state.s0 += state.s5; state.sf = as_uint(as_ushort2(state.sf ^ state.s0).s10); state.sa += state.sf; state.s5 = rotate(state.s5 ^ state.sa, 12U); state.s0 += state.s5; state.sf = rotate(state.sf ^ state.s0, 8U); state.sa += state.sf; state.s5 = rotate(state.s5 ^ state.sa, 7U); \
+	state.s1 += state.s6; state.sc = as_uint(as_ushort2(state.sc ^ state.s1).s10); state.sb += state.sc; state.s6 = rotate(state.s6 ^ state.sb, 12U); state.s1 += state.s6; state.sc = rotate(state.sc ^ state.s1, 8U); state.sb += state.sc; state.s6 = rotate(state.s6 ^ state.sb, 7U); \
+	state.s2 += state.s7; state.sd = as_uint(as_ushort2(state.sd ^ state.s2).s10); state.s8 += state.sd; state.s7 = rotate(state.s7 ^ state.s8, 12U); state.s2 += state.s7; state.sd = rotate(state.sd ^ state.s2, 8U); state.s8 += state.sd; state.s7 = rotate(state.s7 ^ state.s8, 7U); \
+	state.s3 += state.s4; state.se = as_uint(as_ushort2(state.se ^ state.s3).s10); state.s9 += state.se; state.s4 = rotate(state.s4 ^ state.s9, 12U); state.s3 += state.s4; state.se = rotate(state.se ^ state.s3, 8U); state.s9 += state.se; state.s4 = rotate(state.s4 ^ state.s9, 7U); \
 } while(0)

-uint16 chacha_small_parallel_rnd(uint16 X)
+uint16 chacha_small_scalar_rnd(uint16 X)
 {   
-	uint4 t, st[4];
-
-	((uint16 *)st)[0] = X;
+	uint16 st = X;
 	
 	#if CHACHA_SMALL_UNROLL == 1

 	for(int i = 0; i < 10; ++i)
 	{
-		CHACHA_CORE_PARALLEL(st);
+		CHACHA_CORE(st);
 	}

 	#elif CHACHA_SMALL_UNROLL == 2

 	for(int i = 0; i < 5; ++i)
 	{
-		CHACHA_CORE_PARALLEL(st);
-		CHACHA_CORE_PARALLEL(st);
+		CHACHA_CORE(st);
+		CHACHA_CORE(st);
 	}

 	#elif CHACHA_SMALL_UNROLL == 3

 	for(int i = 0; i < 4; ++i)
 	{
-		CHACHA_CORE_PARALLEL(st);
+		CHACHA_CORE(st);
 		if(i == 3) break;
-		CHACHA_CORE_PARALLEL(st);
-		CHACHA_CORE_PARALLEL(st);
+		CHACHA_CORE(st);
+		CHACHA_CORE(st);
 	}

 	#elif CHACHA_SMALL_UNROLL == 4

 	for(int i = 0; i < 3; ++i)
 	{
-		CHACHA_CORE_PARALLEL(st);
-		CHACHA_CORE_PARALLEL(st);
+		CHACHA_CORE(st);
+		CHACHA_CORE(st);
 		if(i == 2) break;
-		CHACHA_CORE_PARALLEL(st);
-		CHACHA_CORE_PARALLEL(st);
+		CHACHA_CORE(st);
+		CHACHA_CORE(st);
 	}

 	#else

 	for(int i = 0; i < 2; ++i)
 	{
-		CHACHA_CORE_PARALLEL(st);
-		CHACHA_CORE_PARALLEL(st);
-		CHACHA_CORE_PARALLEL(st);
-		CHACHA_CORE_PARALLEL(st);
-		CHACHA_CORE_PARALLEL(st);
+		CHACHA_CORE(st);
+		CHACHA_CORE(st);
+		CHACHA_CORE(st);
+		CHACHA_CORE(st);
+		CHACHA_CORE(st);
 	}

 	#endif

-	return(X + ((uint16 *)st)[0]);
+	return(X + st);
 }

 void neoscrypt_blkmix(uint16 *XV, bool alg)
@ -443,10 +439,10 @@ void neoscrypt_blkmix(uint16 *XV, bool alg)
				@@ -443,10 +439,10 @@ void neoscrypt_blkmix(uint16 *XV, bool alg)
 	}
 	else
 	{
-		XV[0] = chacha_small_parallel_rnd(XV[0]); XV[1] ^= XV[0];
-		XV[1] = chacha_small_parallel_rnd(XV[1]); XV[2] ^= XV[1];
-		XV[2] = chacha_small_parallel_rnd(XV[2]); XV[3] ^= XV[2];
-		XV[3] = chacha_small_parallel_rnd(XV[3]);
+		XV[0] = chacha_small_scalar_rnd(XV[0]); XV[1] ^= XV[0];
+		XV[1] = chacha_small_scalar_rnd(XV[1]); XV[2] ^= XV[1];
+		XV[2] = chacha_small_scalar_rnd(XV[2]); XV[3] ^= XV[2];
+		XV[3] = chacha_small_scalar_rnd(XV[3]);
 	}

 	XV[1] ^= XV[2];
@ -454,7 +450,7 @@ void neoscrypt_blkmix(uint16 *XV, bool alg)
				@@ -454,7 +450,7 @@ void neoscrypt_blkmix(uint16 *XV, bool alg)
 	XV[1] ^= XV[2];
 }

-void ScratchpadStore(__global void *V, void *X, uchar idx)
+void ScratchpadStore(__global void *V, const void *X, uchar idx)
 {
 	((__global ulong16 *)V)[idx << 1] = ((ulong16 *)X)[0];
 	((__global ulong16 *)V)[(idx << 1) + 1] = ((ulong16 *)X)[1];
@ -466,20 +462,34 @@ void ScratchpadMix(void *X, const __global void *V, uchar idx)
				@@ -466,20 +462,34 @@ void ScratchpadMix(void *X, const __global void *V, uchar idx)
 	((ulong16 *)X)[1] ^= ((__global ulong16 *)V)[(idx << 1) + 1];
 }

+void ScratchpadLoad(void *X, const __global void *V, uchar idx)
+{
+	((ulong16 *)X)[0] = ((__global ulong16 *)V)[idx << 1];
+	((ulong16 *)X)[1] = ((__global ulong16 *)V)[(idx << 1) + 1];
+}
+
 void SMix(uint16 *X, __global uint16 *V, bool flag)
 {	
 	#pragma unroll 1
-	for(int i = 0; i < 128; ++i)
+	for(int i = 0; i < 64; ++i)
 	{
 		ScratchpadStore(V, X, i);
 		neoscrypt_blkmix(X, flag);
+		neoscrypt_blkmix(X, flag);
 	}

 	#pragma unroll 1
 	for(int i = 0; i < 128; ++i)
 	{
+		uint16 tmp[4];
 		const uint idx = convert_uchar(((uint *)X)[48] & 0x7F);
-		ScratchpadMix(X, V, idx);
+
+		ScratchpadLoad(tmp, V, idx >> 1);
+
+		if(idx & 1) neoscrypt_blkmix(tmp, flag);
+
+		((ulong16 *)X)[0] ^= ((ulong16 *)tmp)[0];
+		((ulong16 *)X)[1] ^= ((ulong16 *)tmp)[1];
 		neoscrypt_blkmix(X, flag);
 	}
 }
@ -492,7 +502,8 @@ __kernel void search(__global const uchar* restrict input, __global uint* restri
				@@ -492,7 +502,8 @@ __kernel void search(__global const uchar* restrict input, __global uint* restri
 	// X = CONSTANT_r * 2 * BLOCK_SIZE(64); Z is a copy of X for ChaCha
 	uint16 X[4], Z[4];
 	/* V = CONSTANT_N * CONSTANT_r * 2 * BLOCK_SIZE */
-	__global ulong16 *V = (__global ulong16 *)(padcache + (0x8000 * (get_global_id(0) % MAX_GLOBAL_THREADS)));
+	//__global ulong16 *V = (__global ulong16 *)(padcache + (0x8000 * (get_global_id(0) % MAX_GLOBAL_THREADS)));
+	__global ulong16 *V = (__global ulong16 *)(padcache + (0x4000 * (get_global_id(0) % MAX_GLOBAL_THREADS)));
 	uchar outbuf[32];
 	uchar data[PASSWORD_LEN];

--- a/kernel/whirlpoolx.cl
+++ b/kernel/whirlpoolx.cl
--- a/miner.h
+++ b/miner.h
@ -3,17 +3,6 @@
				@@ -3,17 +3,6 @@

 #include "config.h"

-#if defined(USE_GIT_VERSION) && defined(GIT_VERSION)
-#undef VERSION
-#define VERSION GIT_VERSION
-#endif
-
-#ifdef BUILD_NUMBER
-#define CGMINER_VERSION VERSION "-" BUILD_NUMBER
-#else
-#define CGMINER_VERSION VERSION
-#endif
-
 #include "algorithm.h"

 #include <stdbool.h>
@ -1045,6 +1034,7 @@ extern bool opt_protocol;
				@@ -1045,6 +1034,7 @@ extern bool opt_protocol;
 extern bool have_longpoll;
 extern char *opt_kernel_path;
 extern char *opt_socks_proxy;
+extern bool opt_lyra;

 #if defined(unix) || defined(__APPLE__)
    extern char *opt_stderr_cmd;
@ -1165,8 +1155,8 @@ extern struct pool *add_pool(void);
				@@ -1165,8 +1155,8 @@ extern struct pool *add_pool(void);
 extern bool add_pool_details(struct pool *pool, bool live, char *url, char *user, char *pass, char *name, char *desc, char *profile, char *algo);

 #define MAX_GPUDEVICES 16
-#define MAX_DEVICES 4096
-
+//#define MAX_DEVICES 4096
+#define MAX_DEVICES 8192
 #define MIN_INTENSITY 4
 #define MIN_INTENSITY_STR "4"
 #define MAX_INTENSITY 31
--- a/ocl.c
+++ b/ocl.c
@ -36,8 +36,8 @@
				@@ -36,8 +36,8 @@
 #include "ocl/binary_kernel.h"
 #include "algorithm/neoscrypt.h"
 #include "algorithm/pluck.h"
-#include "algorithm/yescrypt.h"
-#include "algorithm/lyra2re.h"
+//#include "algorithm/yescrypt.h"
+#include "algorithm/lyra2rev2.h"

 /* FIXME: only here for global config vars, replace with configuration.h
 * or similar as soon as config is in a struct instead of littered all
@ -500,6 +500,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
				@@ -500,6 +500,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
    applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
  }

+#if 0
  // Yescrypt TC
  else if ((cgpu->algorithm.type == ALGO_YESCRYPT ||
            algorithm->type == ALGO_YESCRYPT_MULTI) && !cgpu->opt_tc) {
@ -584,9 +585,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
				@@ -584,9 +585,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg

    applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
  }
+#endif

-  // Lyra2re v2 TC
-  else if (cgpu->algorithm.type == ALGO_LYRA2REv2 && !cgpu->opt_tc) {
+  // Lyra2REv2 TC
+  else if (cgpu->algorithm.type == ALGO_LYRA2REV2 /*&& !cgpu->opt_tc*/) {
    size_t glob_thread_count;
    long max_int;
    unsigned char type = 0;
@ -784,6 +786,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
				@@ -784,6 +786,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
      applog(LOG_DEBUG, "pluck buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
      // scrypt/n-scrypt
    }
+#if 0
    else if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) {
      /* The scratch/pad-buffer needs 32kBytes memory per thread. */
      bufsize = YESCRYPT_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
@ -797,7 +800,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
				@@ -797,7 +800,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
      applog(LOG_DEBUG, "yescrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
      // scrypt/n-scrypt
    }
-    else if (algorithm->type == ALGO_LYRA2REv2) {
+#endif
+    else if (algorithm->type == ALGO_LYRA2REV2) {
      /* The scratch/pad-buffer needs 32kBytes memory per thread. */
      bufsize = LYRA_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
      buf1size = 4* 8 * cgpu->thread_concurrency; //matrix
@ -835,6 +839,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
				@@ -835,6 +839,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
      applog(LOG_WARNING, "Your settings come to %lu", (unsigned long)bufsize);
    }

+#if 0
    if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_MULTI) {
      // need additionnal buffers
      clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status);
@ -855,7 +860,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
				@@ -855,7 +860,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
        return NULL;
      }
    }
-    else if (algorithm->type == ALGO_LYRA2REv2) {
+    else
+#endif
+    if (algorithm->type == ALGO_LYRA2REV2) {
      // need additionnal buffers
      clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status);
      if (status != CL_SUCCESS && !clState->buffer1) {
--- a/ocl.h
+++ b/ocl.h
@ -10,7 +10,8 @@
				@@ -10,7 +10,8 @@
 #include <CL/cl.h>
 #endif

-#include "algorithm.h"
+//#include "algorithm.h"
+#include "miner.h"

 typedef struct __clState {
  cl_context context;
--- a/ocl/binary_kernel.c
+++ b/ocl/binary_kernel.c
@ -1,7 +1,5 @@
				@@ -1,7 +1,5 @@
 #include "binary_kernel.h"
-#include "miner.h"
 #include <sys/stat.h>
-#include <stdio.h>

 cl_program load_opencl_binary_kernel(build_kernel_data *data)
 {
--- a/ocl/build_kernel.c
+++ b/ocl/build_kernel.c
@ -1,6 +1,4 @@
				@@ -1,6 +1,4 @@
-#include <stdio.h>
 #include "build_kernel.h"
-#include "miner.h"

 static char *file_contents(const char *filename, int *length)
 {
--- a/ocl/build_kernel.h
+++ b/ocl/build_kernel.h
@ -1,6 +1,7 @@
				@@ -1,6 +1,7 @@
 #ifndef BUILD_KERNEL_H
 #define BUILD_KERNEL_H

+#include "ocl.h"
 #include <stdbool.h>
 #include "logging.h"

--- a/sgminer.c
+++ b/sgminer.c
@ -68,6 +68,10 @@ char *curly = ":D";
				@@ -68,6 +68,10 @@ char *curly = ":D";
  #include <sys/wait.h>
 #endif

+#if defined(USE_GIT_VERSION) && defined(GIT_VERSION)
+#undef VERSION
+#define VERSION GIT_VERSION
+#endif

 static char packagename[256];

@ -2148,7 +2152,7 @@ static void gen_gbt_work(struct pool *pool, struct work *work)
				@@ -2148,7 +2152,7 @@ static void gen_gbt_work(struct pool *pool, struct work *work)
  }

  // Neoscrypt doesn't calc_midstate()
-  if (pool->algorithm.type == ALGO_NEOSCRYPT) {
+  if (pool->algorithm.type != ALGO_NEOSCRYPT) {
    calc_midstate(work);
  }
  local_work++;
@ -2567,7 +2571,7 @@ static void curses_print_status(void)
				@@ -2567,7 +2571,7 @@ static void curses_print_status(void)
  unsigned short int line = 0;

  wattron(statuswin, A_BOLD);
-  cg_mvwprintw(statuswin, line, 0, PACKAGE " " CGMINER_VERSION " - Started: %s", datestamp);
+  cg_mvwprintw(statuswin, line, 0, PACKAGE " " VERSION " - Started: %s", datestamp);
  curses_print_uptime(&launch_time);
  wattroff(statuswin, A_BOLD);

@ -5574,7 +5578,7 @@ static void *stratum_sthread(void *userdata)
				@@ -5574,7 +5578,7 @@ static void *stratum_sthread(void *userdata)
    applog(LOG_DEBUG, "stratum_sthread() algorithm = %s", pool->algorithm.name);

    // Neoscrypt is little endian
-    if (!pool->algorithm.type == ALGO_NEOSCRYPT) {
+    if (pool->algorithm.type == ALGO_NEOSCRYPT) {
      nonce = htobe32(*((uint32_t *)(work->data + 76)));
      //*((uint32_t *)nonce2) = htole32(work->nonce2);
    }
@ -6078,7 +6082,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
				@@ -6078,7 +6082,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
  applog(LOG_DEBUG, "[THR%d] gen_stratum_work() - algorithm = %s", work->thr_id, pool->algorithm.name);

  // Different for Neoscrypt because of Little Endian
-  if (!pool->algorithm.type == ALGO_NEOSCRYPT) {
+  if (pool->algorithm.type == ALGO_NEOSCRYPT) {
    /* Incoming data is in little endian. */
    memcpy(merkle_root, merkle_sha, 32);

@ -6140,7 +6144,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
				@@ -6140,7 +6144,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
  }

  // For Neoscrypt use set_target_neoscrypt() function
-  if (!pool->algorithm.type == ALGO_NEOSCRYPT) {
+  if (pool->algorithm.type == ALGO_NEOSCRYPT) {
    set_target_neoscrypt(work->target, work->sdiff, work->thr_id);
  } else {
    calc_midstate(work);
@ -6238,7 +6242,7 @@ static void apply_initial_gpu_settings(struct pool *pool)
				@@ -6238,7 +6242,7 @@ static void apply_initial_gpu_settings(struct pool *pool)

  //thread-concurrency
  // neoscrypt - if not specified set TC to 0 so that TC will be calculated by intensity settings
-  if (!pool->algorithm.type == ALGO_NEOSCRYPT) {
+  if (pool->algorithm.type == ALGO_NEOSCRYPT) {
    opt = ((empty_string(pool->thread_concurrency))?"0":get_pool_setting(pool->thread_concurrency, default_profile.thread_concurrency));
  }
  // otherwise use pool/profile setting or default to default profile setting
@ -6562,7 +6566,7 @@ static void apply_switcher_options(unsigned long options, struct pool *pool)
				@@ -6562,7 +6566,7 @@ static void apply_switcher_options(unsigned long options, struct pool *pool)
  if(opt_isset(options, SWITCHER_APPLY_TC))
  {
    // neoscrypt - if not specified set TC to 0 so that TC will be calculated by intensity settings
-    if (!pool->algorithm.type == ALGO_NEOSCRYPT) {
+    if (pool->algorithm.type == ALGO_NEOSCRYPT) {
      opt = ((empty_string(pool->thread_concurrency))?"0":get_pool_setting(pool->thread_concurrency, default_profile.thread_concurrency));
    }
    // otherwise use pool/profile setting or default to default profile setting
@ -8700,7 +8704,7 @@ int main(int argc, char *argv[])
				@@ -8700,7 +8704,7 @@ int main(int argc, char *argv[])
  /* We use the getq mutex as the staged lock */
  stgd_lock = &getq->mutex;

-  snprintf(packagename, sizeof(packagename), "%s %s", PACKAGE, CGMINER_VERSION);
+  snprintf(packagename, sizeof(packagename), "%s %s", PACKAGE, VERSION);

 #ifndef WIN32
  signal(SIGPIPE, SIG_IGN);
@ -8734,7 +8738,7 @@ int main(int argc, char *argv[])
				@@ -8734,7 +8738,7 @@ int main(int argc, char *argv[])
 #endif

  /* Default algorithm specified in algorithm.c ATM */
-  set_algorithm(&default_profile.algorithm, "scrypt");
+  set_algorithm(&default_profile.algorithm, "x11");

  devcursor = 8;
  logstart = devcursor + 1;
--- a/util.c
+++ b/util.c
@ -1791,7 +1791,7 @@ static bool send_version(struct pool *pool, json_t *val)
				@@ -1791,7 +1791,7 @@ static bool send_version(struct pool *pool, json_t *val)
  if (!id)
    return false;

-  sprintf(s, "{\"id\": %d, \"result\": \""PACKAGE"/"CGMINER_VERSION"\", \"error\": null}", id);
+  sprintf(s, "{\"id\": %d, \"result\": \""PACKAGE"/"VERSION"\", \"error\": null}", id);
  if (!stratum_send(pool, s, strlen(s)))
    return false;

@ -2480,9 +2480,9 @@ resend:
				@@ -2480,9 +2480,9 @@ resend:
    sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": []}", swork_id++);
  } else {
    if (pool->sessionid)
-      sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"CGMINER_VERSION"\", \"%s\"]}", swork_id++, pool->sessionid);
+      sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"VERSION"\", \"%s\"]}", swork_id++, pool->sessionid);
    else
-      sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"CGMINER_VERSION"\"]}", swork_id++);
+      sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"VERSION"\"]}", swork_id++);
  }

  if (__stratum_send(pool, s, strlen(s)) != SEND_OK) {
--- a/winbuild/dist/include/config.h
+++ b/winbuild/dist/include/config.h
@ -67,11 +67,11 @@
				@@ -67,11 +67,11 @@

 #endif

-#define VERSION "v5.2.0"
+#define VERSION "5.2.1"
 #define PACKAGE_NAME "sgminer"
 #define PACKAGE_TARNAME "sgminer"
-#define PACKAGE_VERSION "5.2.0"
-#define PACKAGE_STRING "sgminer 5.2.0"
+#define PACKAGE_VERSION "5.2.1"
+#define PACKAGE_STRING "sgminer 5.2.1"
 #define PACKAGE "sgminer"

 #define SGMINER_PREFIX ""
--- a/winbuild/sgminer.vcxproj
+++ b/winbuild/sgminer.vcxproj
@ -115,16 +115,16 @@
				@@ -115,16 +115,16 @@
    </Link>
    <PostBuildEvent>
      <Command>
-        xcopy /Y /E /I "$(ProjectDir)..\kernel" "$(OutDir)\kernel"
+REM        xcopy /Y /E /I "$(ProjectDir)..\kernel" "$(OutDir)\kernel"
      </Command>
    </PostBuildEvent>
    <PreBuildEvent>
      <Command>
        del /f "$(OutDir)*.exe"
-        del /f "$(OutDir)*.dll"
+REM        del /f "$(OutDir)*.dll"

-        echo #define USE_GIT_VERSION 1 &gt; "$(ProjectDir)dist\include\gitversion.h"
-        FOR /F "tokens=*" %%i IN ('call git describe "--abbrev=4" --dirty') DO echo #define GIT_VERSION "%%i" &gt;&gt; "$(ProjectDir)dist\include\gitversion.h"
+REM        echo #define USE_GIT_VERSION 1 &gt; "$(ProjectDir)dist\include\gitversion.h"
+REM        FOR /F "tokens=*" %%i IN ('call git describe "--abbrev=4" --dirty') DO echo #define GIT_VERSION "%%i" &gt;&gt; "$(ProjectDir)dist\include\gitversion.h"
        exit 0
      </Command>
    </PreBuildEvent>
@ -200,16 +200,16 @@
				@@ -200,16 +200,16 @@
    </Link>
    <PostBuildEvent>
      <Command>
-        xcopy /Y /E /I "$(ProjectDir)..\kernel" "$(OutDir)\kernel"
+REM        xcopy /Y /E /I "$(ProjectDir)..\kernel" "$(OutDir)\kernel"
      </Command>
    </PostBuildEvent>
    <PreBuildEvent>
      <Command>
        del /f "$(OutDir)*.exe"
-        del /f "$(OutDir)*.dll"
+REM        del /f "$(OutDir)*.dll"

-        echo #define USE_GIT_VERSION 1 &gt; "$(ProjectDir)dist\include\gitversion.h"
-        FOR /F "tokens=*" %%i IN ('call git describe "--abbrev=4" --dirty') DO echo #define GIT_VERSION "%%i" &gt;&gt; "$(ProjectDir)dist\include\gitversion.h"
+REM        echo #define USE_GIT_VERSION 1 &gt; "$(ProjectDir)dist\include\gitversion.h"
+REM        FOR /F "tokens=*" %%i IN ('call git describe "--abbrev=4" --dirty') DO echo #define GIT_VERSION "%%i" &gt;&gt; "$(ProjectDir)dist\include\gitversion.h"
        exit 0
      </Command>
    </PreBuildEvent>
@ -263,11 +263,15 @@
				@@ -263,11 +263,15 @@
    <ClCompile Include="..\algorithm.c" />
    <ClCompile Include="..\algorithm\animecoin.c" />
    <ClCompile Include="..\algorithm\bitblock.c" />
+    <ClCompile Include="..\algorithm\credits.c" />
    <ClCompile Include="..\algorithm\lyra2.c" />
    <ClCompile Include="..\algorithm\lyra2re.c" />
+    <ClCompile Include="..\algorithm\lyra2rev2.c" />
+    <ClCompile Include="..\algorithm\lyra2v2.c" />
    <ClCompile Include="..\algorithm\neoscrypt.c" />
    <ClCompile Include="..\algorithm\pluck.c" />
    <ClCompile Include="..\algorithm\sponge.c" />
+    <ClCompile Include="..\algorithm\spongev2.c" />
    <ClCompile Include="..\algorithm\talkcoin.c" />
    <ClCompile Include="..\algorithm\whirlpoolx.c" />
    <ClCompile Include="..\algorithm\x14.c" />
@ -328,11 +332,16 @@
				@@ -328,11 +332,16 @@
    <ClInclude Include="..\algorithm.h" />
    <ClInclude Include="..\algorithm\animecoin.h" />
    <ClInclude Include="..\algorithm\bitblock.h" />
+    <ClInclude Include="..\algorithm\credits.h" />
    <ClInclude Include="..\algorithm\lyra2.h" />
    <ClInclude Include="..\algorithm\lyra2re.h" />
+    <ClInclude Include="..\algorithm\lyra2rev2.h" />
+    <ClInclude Include="..\algorithm\lyra2v2.h" />
    <ClInclude Include="..\algorithm\neoscrypt.h" />
    <ClInclude Include="..\algorithm\pluck.h" />
    <ClInclude Include="..\algorithm\sponge.h" />
+    <ClInclude Include="..\algorithm\spongev2.h" />
+    <ClInclude Include="..\algorithm\sysendian.h" />
    <ClInclude Include="..\algorithm\talkcoin.h" />
    <ClInclude Include="..\algorithm\whirlpoolx.h" />
    <ClInclude Include="..\algorithm\x14.h" />
@ -365,6 +374,7 @@
				@@ -365,6 +374,7 @@
    <ClInclude Include="..\algorithm\qubitcoin.h" />
    <ClInclude Include="..\algorithm\scrypt.h" />
    <ClInclude Include="..\algorithm\sifcoin.h" />
+    <ClInclude Include="..\sph\sha256_Y.h" />
    <ClInclude Include="..\sph\sph_blake.h" />
    <ClInclude Include="..\sph\sph_bmw.h" />
    <ClInclude Include="..\sph\sph_cubehash.h" />
--- a/winbuild/sgminer.vcxproj.filters
+++ b/winbuild/sgminer.vcxproj.filters
@ -218,6 +218,18 @@
				@@ -218,6 +218,18 @@
    <ClCompile Include="..\algorithm\pluck.c">
      <Filter>Source Files\algorithm</Filter>
    </ClCompile>
+    <ClCompile Include="..\algorithm\lyra2v2.c">
+      <Filter>Source Files\algorithm</Filter>
+    </ClCompile>
+    <ClCompile Include="..\algorithm\lyra2rev2.c">
+      <Filter>Source Files\algorithm</Filter>
+    </ClCompile>
+    <ClCompile Include="..\algorithm\spongev2.c">
+      <Filter>Source Files\algorithm</Filter>
+    </ClCompile>
+    <ClCompile Include="..\algorithm\credits.c">
+      <Filter>Source Files\algorithm</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\adl.h">
@ -415,6 +427,24 @@
				@@ -415,6 +427,24 @@
    <ClInclude Include="..\algorithm\pluck.h">
      <Filter>Header Files\algorithm</Filter>
    </ClInclude>
+    <ClInclude Include="..\algorithm\lyra2v2.h">
+      <Filter>Header Files\algorithm</Filter>
+    </ClInclude>
+    <ClInclude Include="..\algorithm\lyra2rev2.h">
+      <Filter>Header Files\algorithm</Filter>
+    </ClInclude>
+    <ClInclude Include="..\algorithm\spongev2.h">
+      <Filter>Header Files\algorithm</Filter>
+    </ClInclude>
+    <ClInclude Include="..\algorithm\credits.h">
+      <Filter>Header Files\algorithm</Filter>
+    </ClInclude>
+    <ClInclude Include="..\sph\sha256_Y.h">
+      <Filter>Header Files\sph</Filter>
+    </ClInclude>
+    <ClInclude Include="..\algorithm\sysendian.h">
+      <Filter>Header Files\algorithm</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <None Include="README.txt" />