Added Lyra2RE algo

2025-03-13 06:01:03 +00:00 · 2015-03-10 09:47:46 +01:00 · 2015-03-10 09:47:46 +01:00 · 8d68e96934
commit 8d68e96934
parent f366619269
24 changed files with 3278 additions and 3658 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -69,6 +69,7 @@ sgminer_SOURCES += algorithm/fresh.c algorithm/fresh.h
 sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h
 sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h
 sgminer_SOURCES += algorithm/whirlpoolx.c algorithm/whirlpoolx.h
+sgminer_SOURCES += algorithm/lyra2re.c algorithm/lyra2re.h algorithm/lyra2.c algorithm/lyra2.h algorithm/sponge.c algorithm/sponge.h

 bin_SCRIPTS	= $(top_srcdir)/kernel/*.cl

--- a/algorithm.c
+++ b/algorithm.c
@ -32,6 +32,7 @@
 #include "algorithm/whirlcoin.h"
 #include "algorithm/neoscrypt.h"
 #include "algorithm/whirlpoolx.h"
+#include "algorithm/lyra2re.h"

 #include "compat.h"

@ -54,7 +55,8 @@ const char *algorithm_type_str[] = {
  "Fresh",
  "Whirlcoin",
  "Neoscrypt",
-  "WhirlpoolX"
+  "WhirlpoolX",
+  "Lyra2RE"
 };

 void sha256(const unsigned char *message, unsigned int len, unsigned char *digest)
@ -653,6 +655,50 @@ static cl_int queue_whirlpoolx_kernel(struct __clState *clState, struct _dev_blk
  return status;
 }

+static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
+{
+	cl_kernel *kernel;
+	unsigned int num;
+	cl_int status = 0;
+	cl_ulong le_target;
+
+	le_target = *(cl_ulong *)(blk->work->device_target + 24);
+	flip80(clState->cldata, blk->work->data);
+	status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
+
+	// blake - search
+	kernel = &clState->kernel;
+	num = 0;
+
+	CL_SET_ARG(clState->padbuffer8);
+	CL_SET_ARG(blk->work->blk.ctx_a);
+	CL_SET_ARG(blk->work->blk.ctx_b);
+	CL_SET_ARG(blk->work->blk.ctx_c);
+	CL_SET_ARG(blk->work->blk.ctx_d);
+	CL_SET_ARG(blk->work->blk.ctx_e);
+	CL_SET_ARG(blk->work->blk.ctx_f);
+	CL_SET_ARG(blk->work->blk.ctx_g);
+	CL_SET_ARG(blk->work->blk.ctx_h);
+	CL_SET_ARG(blk->work->blk.cty_a);
+	CL_SET_ARG(blk->work->blk.cty_b);
+	CL_SET_ARG(blk->work->blk.cty_c);
+
+	// bmw - search1
+	kernel = clState->extra_kernels;
+	CL_SET_ARG_0(clState->padbuffer8);
+	// groestl - search2
+	CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
+	// skein - search3
+	CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
+	// jh - search4
+	num = 0;
+	CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
+	CL_SET_ARG(clState->outputBuffer);
+	CL_SET_ARG(le_target);
+
+	return status;
+}
+
 typedef struct _algorithm_settings_t {
  const char *name; /* Human-readable identifier */
  algorithm_type_t type; //common algorithm type
@ -728,6 +774,8 @@ static algorithm_settings_t algos[] = {

  { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, queue_fresh_kernel, gen_hash, NULL},

+  { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304 , 0, lyra2re_regenhash, queue_lyra2RE_kernel, gen_hash, NULL},
+
  // kernels starting from this will have difficulty calculated by using fuguecoin algorithm
 #define A_FUGUE(a, b, c) \
    { a, ALGO_FUGUE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, c, NULL}
@ -737,7 +785,6 @@ static algorithm_settings_t algos[] = {
 #undef A_FUGUE

  { "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, queue_whirlcoin_kernel, sha256, NULL},
-
  { "whirlpoolx", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, 0, whirlpoolx_regenhash, queue_sph_kernel, gen_hash, NULL },

  // Terminator (do not remove)
@ -812,6 +859,8 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa
  ALGO_ALIAS("keccak", "maxcoin");
  ALGO_ALIAS("whirlpool", "whirlcoin");
  ALGO_ALIAS("whirlpoolx", "whirlpoolx");
+  ALGO_ALIAS("Lyra2RE", "lyra2re");
+  ALGO_ALIAS("lyra2", "lyra2re");

  #undef ALGO_ALIAS
  #undef ALGO_ALIAS_NF
--- a/algorithm.h
+++ b/algorithm.h
@ -25,7 +25,8 @@ typedef enum {
  ALGO_NIST,
  ALGO_FRESH,
  ALGO_WHIRL,
-  ALGO_NEOSCRYPT
+  ALGO_NEOSCRYPT,
+  ALGO_LYRA2RE
 } algorithm_type_t;

 extern const char *algorithm_type_str[];
--- a/algorithm/lyra2.c
+++ b/algorithm/lyra2.c
@ -0,0 +1,208 @@
+/**
+ * Implementation of the Lyra2 Password Hashing Scheme (PHS).
+ *
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ *
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "lyra2.h"
+#include "sponge.h"
+
+/**
+ * Executes Lyra2 based on the G function from Blake2b. This version supports salts and passwords
+ * whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits,
+ * where "b" is the underlying sponge's bitrate). In this implementation, the "basil" is composed by all
+ * integer parameters (treated as type "unsigned int") in the order they are provided, plus the value
+ * of nCols, (i.e., basil = kLen || pwdlen || saltlen || timeCost || nRows || nCols).
+ *
+ * @param K The derived key to be output by the algorithm
+ * @param kLen Desired key length
+ * @param pwd User password
+ * @param pwdlen Password length
+ * @param salt Salt
+ * @param saltlen Salt length
+ * @param timeCost Parameter to determine the processing time (T)
+ * @param nRows Number or rows of the memory matrix (R)
+ * @param nCols Number of columns of the memory matrix (C)
+ *
+ * @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation)
+ */
+int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols) {
+
+    //============================= Basic variables ============================//
+    int64_t row = 2; //index of row to be processed
+    int64_t prev = 1; //index of prev (last row ever computed/modified)
+    int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering)
+    int64_t tau; //Time Loop iterator
+    int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
+    int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
+    int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
+    int64_t i; //auxiliary iteration counter
+    //==========================================================================/
+
+    //========== Initializing the Memory Matrix and pointers to it =============//
+    //Tries to allocate enough space for the whole memory matrix
+    i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES);
+	uint64_t *wholeMatrix = (uint64_t*)malloc(i);
+    if (wholeMatrix == NULL) {
+      return -1;
+    }
+	memset(wholeMatrix, 0, i);
+
+    //Allocates pointers to each row of the matrix
+	uint64_t **memMatrix = (uint64_t**)malloc(nRows * sizeof (uint64_t*));
+    if (memMatrix == NULL) {
+      return -1;
+    }
+    //Places the pointers in the correct positions
+    uint64_t *ptrWord = wholeMatrix;
+    for (i = 0; i < nRows; i++) {
+      memMatrix[i] = ptrWord;
+      ptrWord += ROW_LEN_INT64;
+    }
+    //==========================================================================/
+
+    //============= Getting the password + salt + basil padded with 10*1 ===============//
+    //OBS.:The memory matrix will temporarily hold the password: not for saving memory,
+    //but this ensures that the password copied locally will be overwritten as soon as possible
+
+    //First, we clean enough blocks for the password, salt, basil and padding
+    uint64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof (uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1;
+    byte *ptrByte = (byte*) wholeMatrix;
+    memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES);
+
+    //Prepends the password
+    memcpy(ptrByte, pwd, pwdlen);
+    ptrByte += pwdlen;
+
+    //Concatenates the salt
+    memcpy(ptrByte, salt, saltlen);
+    ptrByte += saltlen;
+
+    //Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
+    memcpy(ptrByte, &kLen, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+    memcpy(ptrByte, &pwdlen, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+    memcpy(ptrByte, &saltlen, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+    memcpy(ptrByte, &timeCost, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+    memcpy(ptrByte, &nRows, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+    memcpy(ptrByte, &nCols, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+
+    //Now comes the padding
+    *ptrByte = 0x80; //first byte of padding: right after the password
+    ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
+    ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
+    *ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
+    //==========================================================================/
+
+    //======================= Initializing the Sponge State ====================//
+    //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
+	uint64_t *state = (uint64_t*)malloc(16 * sizeof (uint64_t));
+    if (state == NULL) {
+      return -1;
+    }
+    initState(state);
+    //==========================================================================/
+
+    //================================ Setup Phase =============================//
+    //Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
+    ptrWord = wholeMatrix;
+    for (i = 0; i < nBlocksInput; i++) {
+      absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
+      ptrWord += BLOCK_LEN_BLAKE2_SAFE_BYTES; //goes to next block of pad(pwd || salt || basil)
+    }
+
+    //Initializes M[0] and M[1]
+    reducedSqueezeRow0(state, memMatrix[0]); //The locally copied password is most likely overwritten here
+    reducedDuplexRow1(state, memMatrix[0], memMatrix[1]);
+
+    do {
+      //M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
+      reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]);
+
+
+      //updates the value of row* (deterministically picked during Setup))
+      rowa = (rowa + step) & (window - 1);
+      //update prev: it now points to the last row ever computed
+      prev = row;
+      //updates row: goes to the next row to be computed
+      row++;
+
+      //Checks if all rows in the window where visited.
+      if (rowa == 0) {
+      step = window + gap; //changes the step: approximately doubles its value
+      window *= 2; //doubles the size of the re-visitation window
+      gap = -gap; //inverts the modifier to the step
+    }
+
+    } while (row < nRows);
+    //==========================================================================/
+
+    //============================ Wandering Phase =============================//
+    row = 0; //Resets the visitation to the first row of the memory matrix
+    for (tau = 1; tau <= timeCost; tau++) {
+    	//Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1
+    	step = (tau % 2 == 0) ? -1 : nRows / 2 - 1;
+    	do {
+  	    //Selects a pseudorandom index row*
+  	    //------------------------------------------------------------------------------------------
+  	    //rowa = ((unsigned int)state[0]) & (nRows-1);	//(USE THIS IF nRows IS A POWER OF 2)
+  	    rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
+  	    //------------------------------------------------------------------------------------------
+
+  	    //Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
+  	    reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]);
+
+  	    //update prev: it now points to the last row ever computed
+  	    prev = row;
+
+  	    //updates row: goes to the next row to be computed
+  	    //------------------------------------------------------------------------------------------
+  	    //row = (row + step) & (nRows-1);	//(USE THIS IF nRows IS A POWER OF 2)
+  	    row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
+  	    //------------------------------------------------------------------------------------------
+
+      } while (row != 0);
+    }
+    //==========================================================================/
+
+    //============================ Wrap-up Phase ===============================//
+    //Absorbs the last block of the memory matrix
+    absorbBlock(state, memMatrix[rowa]);
+
+    //Squeezes the key
+    squeeze(state, (unsigned char*)K, kLen);
+    //==========================================================================/
+
+    //========================= Freeing the memory =============================//
+    free(memMatrix);
+    free(wholeMatrix);
+
+    //Wiping out the sponge's internal state before freeing it
+    memset(state, 0, 16 * sizeof (uint64_t));
+    free(state);
+    //==========================================================================/
+
+    return 0;
+}
--- a/algorithm/lyra2.h
+++ b/algorithm/lyra2.h
@ -0,0 +1,50 @@
+/**
+ * Header file for the Lyra2 Password Hashing Scheme (PHS).
+ * 
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ * 
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef LYRA2_H_
+#define LYRA2_H_
+
+#include <stdint.h>
+
+typedef unsigned char byte;
+
+//Block length required so Blake2's Initialization Vector (IV) is not overwritten (THIS SHOULD NOT BE MODIFIED)
+#define BLOCK_LEN_BLAKE2_SAFE_INT64 8                                   //512 bits (=64 bytes, =8 uint64_t)
+#define BLOCK_LEN_BLAKE2_SAFE_BYTES (BLOCK_LEN_BLAKE2_SAFE_INT64 * 8)   //same as above, in bytes
+
+
+#ifdef BLOCK_LEN_BITS
+        #define BLOCK_LEN_INT64 (BLOCK_LEN_BITS/64)      //Block length: 768 bits (=96 bytes, =12 uint64_t)
+        #define BLOCK_LEN_BYTES (BLOCK_LEN_BITS/8)       //Block length, in bytes
+#else   //default block lenght: 768 bits
+        #define BLOCK_LEN_INT64 12                       //Block length: 768 bits (=96 bytes, =12 uint64_t)
+        #define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8)    //Block length, in bytes
+#endif
+
+#ifndef N_COLS
+        #define N_COLS 8                                //Number of columns in the memory matrix: fixed to 64 by default
+#endif
+
+#define ROW_LEN_INT64 (BLOCK_LEN_INT64 * N_COLS) //Total length of a row: N_COLS blocks
+#define ROW_LEN_BYTES (ROW_LEN_INT64 * 8)        //Number of bytes per row
+
+
+int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols);
+
+#endif /* LYRA2_H_ */
--- a/algorithm/lyra2re.c
+++ b/algorithm/lyra2re.c
@ -0,0 +1,169 @@
+/*-
+ * Copyright 2014 James Lovejoy
+ * Copyright 2014 phm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "miner.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "sph/sph_blake.h"
+#include "sph/sph_groestl.h"
+#include "sph/sph_skein.h"
+#include "sph/sph_keccak.h" 
+#include "lyra2.h"
+
+/*
+ * Encode a length len/4 vector of (uint32_t) into a length len vector of
+ * (unsigned char) in big-endian form.  Assumes len is a multiple of 4.
+ */
+static inline void
+be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
+{
+	uint32_t i;
+
+	for (i = 0; i < len; i++)
+		dst[i] = htobe32(src[i]);
+}
+
+
+inline void lyra2rehash(void *state, const void *input)
+{
+    sph_blake256_context     ctx_blake;
+    sph_groestl256_context   ctx_groestl;
+    sph_keccak256_context    ctx_keccak;
+    sph_skein256_context     ctx_skein;
+
+    uint32_t hashA[8], hashB[8];
+
+    sph_blake256_init(&ctx_blake);
+    sph_blake256 (&ctx_blake, input, 80);
+    sph_blake256_close (&ctx_blake, hashA);
+
+
+
+
+    sph_keccak256_init(&ctx_keccak);
+    sph_keccak256 (&ctx_keccak,hashA, 32);
+    sph_keccak256_close(&ctx_keccak, hashB);
+
+	LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8);
+
+
+	sph_skein256_init(&ctx_skein);
+    sph_skein256 (&ctx_skein, hashA, 32);
+    sph_skein256_close(&ctx_skein, hashB);
+
+
+    sph_groestl256_init(&ctx_groestl);
+    sph_groestl256 (&ctx_groestl, hashB, 32);
+    sph_groestl256_close(&ctx_groestl, hashA);
+
+//printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]);
+
+	memcpy(state, hashA, 32);
+}
+
+static const uint32_t diff1targ = 0x0000ffff;
+
+
+/* Used externally as confirmation of correct OCL code */
+int lyra2re_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
+{
+	uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
+	uint32_t data[20], ohash[8];
+
+	be32enc_vect(data, (const uint32_t *)pdata, 19);
+	data[19] = htobe32(nonce);
+	lyra2rehash(ohash, data);
+	tmp_hash7 = be32toh(ohash[7]);
+
+	applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
+				(long unsigned int)Htarg,
+				(long unsigned int)diff1targ,
+				(long unsigned int)tmp_hash7);
+	if (tmp_hash7 > diff1targ)
+		return -1;
+	if (tmp_hash7 > Htarg)
+		return 0;
+	return 1;
+}
+
+void lyra2re_regenhash(struct work *work)
+{
+        uint32_t data[20];
+        uint32_t *nonce = (uint32_t *)(work->data + 76);
+        uint32_t *ohash = (uint32_t *)(work->hash);
+
+        be32enc_vect(data, (const uint32_t *)work->data, 19);
+        data[19] = htobe32(*nonce);
+        lyra2rehash(ohash, data);
+}
+
+bool scanhash_lyra2re(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
+		     unsigned char *pdata, unsigned char __maybe_unused *phash1,
+		     unsigned char __maybe_unused *phash, const unsigned char *ptarget,
+		     uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
+{
+	uint32_t *nonce = (uint32_t *)(pdata + 76);
+	uint32_t data[20];
+	uint32_t tmp_hash7;
+	uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
+	bool ret = false;
+
+	be32enc_vect(data, (const uint32_t *)pdata, 19);
+
+	while(1) {
+		uint32_t ostate[8];
+
+		*nonce = ++n;
+		data[19] = (n);
+		lyra2rehash(ostate, data);
+		tmp_hash7 = (ostate[7]);
+
+		applog(LOG_INFO, "data7 %08lx",
+					(long unsigned int)data[7]);
+
+		if (unlikely(tmp_hash7 <= Htarg)) {
+			((uint32_t *)pdata)[19] = htobe32(n);
+			*last_nonce = n;
+			ret = true;
+			break;
+		}
+
+		if (unlikely((n >= max_nonce) || thr->work_restart)) {
+			*last_nonce = n;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+
+
--- a/algorithm/lyra2re.h
+++ b/algorithm/lyra2re.h
@ -0,0 +1,10 @@
+#ifndef LYRA2RE_H
+#define LYRA2RE_H
+
+#include "miner.h"
+
+extern int lyra2re_test(unsigned char *pdata, const unsigned char *ptarget,
+			uint32_t nonce);
+extern void lyra2re_regenhash(struct work *work);
+
+#endif /* LYRA2RE_H */
--- a/algorithm/sponge.c
+++ b/algorithm/sponge.c
@ -0,0 +1,742 @@
+/**
+ * A simple implementation of Blake2b's internal permutation 
+ * in the form of a sponge.
+ * 
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ * 
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdio.h>
+#include <time.h>
+#include "sponge.h"
+#include "lyra2.h"
+
+
+
+/**
+ * Initializes the Sponge State. The first 512 bits are set to zeros and the remainder 
+ * receive Blake2b's IV as per Blake2b's specification. <b>Note:</b> Even though sponges
+ * typically have their internal state initialized with zeros, Blake2b's G function
+ * has a fixed point: if the internal state and message are both filled with zeros. the 
+ * resulting permutation will always be a block filled with zeros; this happens because 
+ * Blake2b does not use the constants originally employed in Blake2 inside its G function, 
+ * relying on the IV for avoiding possible fixed points.
+ * 
+ * @param state         The 1024-bit array to be initialized
+ */
+void initState(uint64_t state[/*16*/]) {
+    //First 512 bis are zeros
+    memset(state, 0, 64); 
+    //Remainder BLOCK_LEN_BLAKE2_SAFE_BYTES are reserved to the IV
+    state[8] = blake2b_IV[0];
+    state[9] = blake2b_IV[1];
+    state[10] = blake2b_IV[2];
+    state[11] = blake2b_IV[3];
+    state[12] = blake2b_IV[4];
+    state[13] = blake2b_IV[5];
+    state[14] = blake2b_IV[6];
+    state[15] = blake2b_IV[7];
+}
+
+/**
+ * Execute Blake2b's G function, with all 12 rounds.
+ * 
+ * @param v     A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
+ */
+static void blake2bLyra(uint64_t *v) {
+    ROUND_LYRA(0);
+    ROUND_LYRA(1);
+    ROUND_LYRA(2);
+    ROUND_LYRA(3);
+    ROUND_LYRA(4);
+    ROUND_LYRA(5);
+    ROUND_LYRA(6);
+    ROUND_LYRA(7);
+    ROUND_LYRA(8);
+    ROUND_LYRA(9);
+    ROUND_LYRA(10);
+    ROUND_LYRA(11);
+}
+
+/**
+ * Executes a reduced version of Blake2b's G function with only one round
+ * @param v     A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
+ */
+static void reducedBlake2bLyra(uint64_t *v) {
+    ROUND_LYRA(0);
+}
+
+/**
+ * Performs a squeeze operation, using Blake2b's G function as the 
+ * internal permutation
+ * 
+ * @param state      The current state of the sponge 
+ * @param out        Array that will receive the data squeezed
+ * @param len        The number of bytes to be squeezed into the "out" array
+ */
+void squeeze(uint64_t *state, byte *out, unsigned int len) {
+    int fullBlocks = len / BLOCK_LEN_BYTES;
+    byte *ptr = out;
+    int i;
+    //Squeezes full blocks
+    for (i = 0; i < fullBlocks; i++) {
+	memcpy(ptr, state, BLOCK_LEN_BYTES);
+	blake2bLyra(state);
+	ptr += BLOCK_LEN_BYTES;
+    }
+
+    //Squeezes remaining bytes
+    memcpy(ptr, state, (len % BLOCK_LEN_BYTES));
+}
+
+/**
+ * Performs an absorb operation for a single block (BLOCK_LEN_INT64 words
+ * of type uint64_t), using Blake2b's G function as the internal permutation
+ * 
+ * @param state The current state of the sponge 
+ * @param in    The block to be absorbed (BLOCK_LEN_INT64 words)
+ */
+void absorbBlock(uint64_t *state, const uint64_t *in) {
+    //XORs the first BLOCK_LEN_INT64 words of "in" with the current state
+    state[0] ^= in[0];
+    state[1] ^= in[1];
+    state[2] ^= in[2];
+    state[3] ^= in[3];
+    state[4] ^= in[4];
+    state[5] ^= in[5];
+    state[6] ^= in[6];
+    state[7] ^= in[7];
+    state[8] ^= in[8];
+    state[9] ^= in[9];
+    state[10] ^= in[10];
+    state[11] ^= in[11];
+
+    //Applies the transformation f to the sponge's state
+    blake2bLyra(state);
+}
+
+/**
+ * Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64 
+ * words of type uint64_t), using Blake2b's G function as the internal permutation
+ * 
+ * @param state The current state of the sponge 
+ * @param in    The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words)
+ */
+void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) {
+    //XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state
+    state[0] ^= in[0];
+    state[1] ^= in[1];
+    state[2] ^= in[2];
+    state[3] ^= in[3];
+    state[4] ^= in[4];
+    state[5] ^= in[5];
+    state[6] ^= in[6];
+    state[7] ^= in[7];
+
+    //Applies the transformation f to the sponge's state
+    blake2bLyra(state);
+}
+
+/** 
+ * Performs a reduced squeeze operation for a single row, from the highest to 
+ * the lowest index, using the reduced-round Blake2b's G function as the 
+ * internal permutation
+ * 
+ * @param state     The current state of the sponge 
+ * @param rowOut    Row to receive the data squeezed
+ */
+void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut) {
+    uint64_t* ptrWord = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1]
+    int i;
+    //M[row][C-1-col] = H.reduced_squeeze()    
+    for (i = 0; i < N_COLS; i++) {
+	ptrWord[0] = state[0];
+	ptrWord[1] = state[1];
+	ptrWord[2] = state[2];
+	ptrWord[3] = state[3];
+	ptrWord[4] = state[4];
+	ptrWord[5] = state[5];
+	ptrWord[6] = state[6];
+	ptrWord[7] = state[7];
+	ptrWord[8] = state[8];
+	ptrWord[9] = state[9];
+	ptrWord[10] = state[10];
+	ptrWord[11] = state[11];
+
+	//Goes to next block (column) that will receive the squeezed data
+	ptrWord -= BLOCK_LEN_INT64;
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+    }
+}
+
+/** 
+ * Performs a reduced duplex operation for a single row, from the highest to 
+ * the lowest index, using the reduced-round Blake2b's G function as the 
+ * internal permutation
+ * 
+ * @param state		The current state of the sponge 
+ * @param rowIn		Row to feed the sponge
+ * @param rowOut	Row to receive the sponge's output
+ */
+void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut) {
+    uint64_t* ptrWordIn = rowIn;				//In Lyra2: pointer to prev
+    uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
+    int i;
+
+    for (i = 0; i < N_COLS; i++) {
+
+	//Absorbing "M[prev][col]"
+	state[0]  ^= (ptrWordIn[0]);
+	state[1]  ^= (ptrWordIn[1]);
+	state[2]  ^= (ptrWordIn[2]);
+	state[3]  ^= (ptrWordIn[3]);
+	state[4]  ^= (ptrWordIn[4]);
+	state[5]  ^= (ptrWordIn[5]);
+	state[6]  ^= (ptrWordIn[6]);
+	state[7]  ^= (ptrWordIn[7]);
+	state[8]  ^= (ptrWordIn[8]);
+	state[9]  ^= (ptrWordIn[9]);
+	state[10] ^= (ptrWordIn[10]);
+	state[11] ^= (ptrWordIn[11]);
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+	//M[row][C-1-col] = M[prev][col] XOR rand
+	ptrWordOut[0] = ptrWordIn[0]  ^ state[0];
+	ptrWordOut[1] = ptrWordIn[1]  ^ state[1];
+	ptrWordOut[2] = ptrWordIn[2]  ^ state[2];
+	ptrWordOut[3] = ptrWordIn[3]  ^ state[3];
+	ptrWordOut[4] = ptrWordIn[4]  ^ state[4];
+	ptrWordOut[5] = ptrWordIn[5]  ^ state[5];
+	ptrWordOut[6] = ptrWordIn[6]  ^ state[6];
+	ptrWordOut[7] = ptrWordIn[7]  ^ state[7];
+	ptrWordOut[8] = ptrWordIn[8]  ^ state[8];
+	ptrWordOut[9] = ptrWordIn[9]  ^ state[9];
+	ptrWordOut[10] = ptrWordIn[10] ^ state[10];
+	ptrWordOut[11] = ptrWordIn[11] ^ state[11];
+	
+	
+	//Input: next column (i.e., next block in sequence)
+	ptrWordIn += BLOCK_LEN_INT64;
+	//Output: goes to previous column
+	ptrWordOut -= BLOCK_LEN_INT64;
+    }
+}
+
+/**
+ * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., 
+ * the wordwise addition of two columns, ignoring carries between words). The
+ * output of this operation, "rand", is then used to make 
+ * "M[rowOut][(N_COLS-1)-col] = M[rowIn][col] XOR rand" and 
+ * "M[rowInOut][col] =  M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit 
+ * rotation to the left and N_COLS is a system parameter.
+ *
+ * @param state          The current state of the sponge 
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordIn = rowIn;				//In Lyra2: pointer to prev
+    uint64_t* ptrWordInOut = rowInOut;				//In Lyra2: pointer to row*
+    uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
+    int i;
+
+    for (i = 0; i < N_COLS; i++) {
+	//Absorbing "M[prev] [+] M[row*]"
+	state[0]  ^= (ptrWordIn[0]  + ptrWordInOut[0]);
+	state[1]  ^= (ptrWordIn[1]  + ptrWordInOut[1]);
+	state[2]  ^= (ptrWordIn[2]  + ptrWordInOut[2]);
+	state[3]  ^= (ptrWordIn[3]  + ptrWordInOut[3]);
+	state[4]  ^= (ptrWordIn[4]  + ptrWordInOut[4]);
+	state[5]  ^= (ptrWordIn[5]  + ptrWordInOut[5]);
+	state[6]  ^= (ptrWordIn[6]  + ptrWordInOut[6]);
+	state[7]  ^= (ptrWordIn[7]  + ptrWordInOut[7]);
+	state[8]  ^= (ptrWordIn[8]  + ptrWordInOut[8]);
+	state[9]  ^= (ptrWordIn[9]  + ptrWordInOut[9]);
+	state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]);
+	state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]);
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+	//M[row][col] = M[prev][col] XOR rand
+	ptrWordOut[0] = ptrWordIn[0]  ^ state[0];
+	ptrWordOut[1] = ptrWordIn[1]  ^ state[1];
+	ptrWordOut[2] = ptrWordIn[2]  ^ state[2];
+	ptrWordOut[3] = ptrWordIn[3]  ^ state[3];
+	ptrWordOut[4] = ptrWordIn[4]  ^ state[4];
+	ptrWordOut[5] = ptrWordIn[5]  ^ state[5];
+	ptrWordOut[6] = ptrWordIn[6]  ^ state[6];
+	ptrWordOut[7] = ptrWordIn[7]  ^ state[7];
+	ptrWordOut[8] = ptrWordIn[8]  ^ state[8];
+	ptrWordOut[9] = ptrWordIn[9]  ^ state[9];
+	ptrWordOut[10] = ptrWordIn[10] ^ state[10];
+	ptrWordOut[11] = ptrWordIn[11] ^ state[11];
+	
+	//M[row*][col] = M[row*][col] XOR rotW(rand)
+	ptrWordInOut[0]  ^= state[11];
+	ptrWordInOut[1]  ^= state[0];
+	ptrWordInOut[2]  ^= state[1];
+	ptrWordInOut[3]  ^= state[2];
+	ptrWordInOut[4]  ^= state[3];
+	ptrWordInOut[5]  ^= state[4];
+	ptrWordInOut[6]  ^= state[5];
+	ptrWordInOut[7]  ^= state[6];
+	ptrWordInOut[8]  ^= state[7];
+	ptrWordInOut[9]  ^= state[8];
+	ptrWordInOut[10] ^= state[9];
+	ptrWordInOut[11] ^= state[10];
+
+	//Inputs: next column (i.e., next block in sequence)
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+	//Output: goes to previous column
+	ptrWordOut -= BLOCK_LEN_INT64;
+    }
+}
+
+/**
+ * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., 
+ * the wordwise addition of two columns, ignoring carries between words). The
+ * output of this operation, "rand", is then used to make 
+ * "M[rowOut][col] = M[rowOut][col] XOR rand" and 
+ * "M[rowInOut][col] =  M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit 
+ * rotation to the left.
+ *
+ * @param state          The current state of the sponge 
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
+    int i;
+
+    for (i = 0; i < N_COLS; i++) {
+
+	//Absorbing "M[prev] [+] M[row*]"
+	state[0]  ^= (ptrWordIn[0]  + ptrWordInOut[0]);
+	state[1]  ^= (ptrWordIn[1]  + ptrWordInOut[1]);
+	state[2]  ^= (ptrWordIn[2]  + ptrWordInOut[2]);
+	state[3]  ^= (ptrWordIn[3]  + ptrWordInOut[3]);
+	state[4]  ^= (ptrWordIn[4]  + ptrWordInOut[4]);
+	state[5]  ^= (ptrWordIn[5]  + ptrWordInOut[5]);
+	state[6]  ^= (ptrWordIn[6]  + ptrWordInOut[6]);
+	state[7]  ^= (ptrWordIn[7]  + ptrWordInOut[7]);
+	state[8]  ^= (ptrWordIn[8]  + ptrWordInOut[8]);
+	state[9]  ^= (ptrWordIn[9]  + ptrWordInOut[9]);
+	state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]);
+	state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]);
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+	//M[rowOut][col] = M[rowOut][col] XOR rand
+	ptrWordOut[0] ^= state[0];
+	ptrWordOut[1] ^= state[1];
+	ptrWordOut[2] ^= state[2];
+	ptrWordOut[3] ^= state[3];
+	ptrWordOut[4] ^= state[4];
+	ptrWordOut[5] ^= state[5];
+	ptrWordOut[6] ^= state[6];
+	ptrWordOut[7] ^= state[7];
+	ptrWordOut[8] ^= state[8];
+	ptrWordOut[9] ^= state[9];
+	ptrWordOut[10] ^= state[10];
+	ptrWordOut[11] ^= state[11];
+
+	//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
+	ptrWordInOut[0] ^= state[11];
+	ptrWordInOut[1] ^= state[0];
+	ptrWordInOut[2] ^= state[1];
+	ptrWordInOut[3] ^= state[2];
+	ptrWordInOut[4] ^= state[3];
+	ptrWordInOut[5] ^= state[4];
+	ptrWordInOut[6] ^= state[5];
+	ptrWordInOut[7] ^= state[6];
+	ptrWordInOut[8] ^= state[7];
+	ptrWordInOut[9] ^= state[8];
+	ptrWordInOut[10] ^= state[9];
+	ptrWordInOut[11] ^= state[10];
+
+	//Goes to next block
+	ptrWordOut += BLOCK_LEN_INT64;
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+    }
+}
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Performs a duplex operation over "M[rowInOut] [+] M[rowIn]", writing the output "rand"
+ * on M[rowOut] and making "M[rowInOut] =  M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit 
+ * rotation to the left.
+ *
+ * @param state          The current state of the sponge 
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+/*
+void reducedDuplexRowSetupOLD(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
+    int i;
+    for (i = 0; i < N_COLS; i++) {
+
+	//Absorbing "M[rowInOut] XOR M[rowIn]"
+	state[0] ^= ptrWordInOut[0] ^ ptrWordIn[0];
+	state[1] ^= ptrWordInOut[1] ^ ptrWordIn[1];
+	state[2] ^= ptrWordInOut[2] ^ ptrWordIn[2];
+	state[3] ^= ptrWordInOut[3] ^ ptrWordIn[3];
+	state[4] ^= ptrWordInOut[4] ^ ptrWordIn[4];
+	state[5] ^= ptrWordInOut[5] ^ ptrWordIn[5];
+	state[6] ^= ptrWordInOut[6] ^ ptrWordIn[6];
+	state[7] ^= ptrWordInOut[7] ^ ptrWordIn[7];
+	state[8] ^= ptrWordInOut[8] ^ ptrWordIn[8];
+	state[9] ^= ptrWordInOut[9] ^ ptrWordIn[9];
+	state[10] ^= ptrWordInOut[10] ^ ptrWordIn[10];
+	state[11] ^= ptrWordInOut[11] ^ ptrWordIn[11];
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+	//M[row][col] = rand
+	ptrWordOut[0] = state[0];
+	ptrWordOut[1] = state[1];
+	ptrWordOut[2] = state[2];
+	ptrWordOut[3] = state[3];
+	ptrWordOut[4] = state[4];
+	ptrWordOut[5] = state[5];
+	ptrWordOut[6] = state[6];
+	ptrWordOut[7] = state[7];
+	ptrWordOut[8] = state[8];
+	ptrWordOut[9] = state[9];
+	ptrWordOut[10] = state[10];
+	ptrWordOut[11] = state[11];
+
+
+	//M[row*][col] = M[row*][col] XOR rotW(rand)
+	ptrWordInOut[0] ^= state[10];
+	ptrWordInOut[1] ^= state[11];
+	ptrWordInOut[2] ^= state[0];
+	ptrWordInOut[3] ^= state[1];
+	ptrWordInOut[4] ^= state[2];
+	ptrWordInOut[5] ^= state[3];
+	ptrWordInOut[6] ^= state[4];
+	ptrWordInOut[7] ^= state[5];
+	ptrWordInOut[8] ^= state[6];
+	ptrWordInOut[9] ^= state[7];
+	ptrWordInOut[10] ^= state[8];
+	ptrWordInOut[11] ^= state[9];
+
+	//Goes to next column (i.e., next block in sequence)
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+	ptrWordOut += BLOCK_LEN_INT64;
+    }
+}
+*/
+
+/**
+ * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand"
+ * on M[rowOut] and making "M[rowInOut] =  M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit 
+ * rotation to the left.
+ *
+ * @param state          The current state of the sponge 
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+/*
+void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
+    int i;
+    for (i = 0; i < N_COLS; i++) {
+
+	//Absorbing "M[rowInOut] XOR M[rowIn]"
+	state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
+	state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
+	state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
+	state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
+	state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
+	state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
+	state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
+	state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
+	state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
+	state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
+	state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
+	state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+
+	//M[row*][col] = M[row*][col] XOR rotW(rand)
+	ptrWordInOut[0] ^= state[10];
+	ptrWordInOut[1] ^= state[11];
+	ptrWordInOut[2] ^= state[0];
+	ptrWordInOut[3] ^= state[1];
+	ptrWordInOut[4] ^= state[2];
+	ptrWordInOut[5] ^= state[3];
+	ptrWordInOut[6] ^= state[4];
+	ptrWordInOut[7] ^= state[5];
+	ptrWordInOut[8] ^= state[6];
+	ptrWordInOut[9] ^= state[7];
+	ptrWordInOut[10] ^= state[8];
+	ptrWordInOut[11] ^= state[9];
+
+
+	//M[row][col] = rand
+	ptrWordOut[0] = state[0] ^ ptrWordIn[0];
+	ptrWordOut[1] = state[1] ^ ptrWordIn[1];
+	ptrWordOut[2] = state[2] ^ ptrWordIn[2];
+	ptrWordOut[3] = state[3] ^ ptrWordIn[3];
+	ptrWordOut[4] = state[4] ^ ptrWordIn[4];
+	ptrWordOut[5] = state[5] ^ ptrWordIn[5];
+	ptrWordOut[6] = state[6] ^ ptrWordIn[6];
+	ptrWordOut[7] = state[7] ^ ptrWordIn[7];
+	ptrWordOut[8] = state[8] ^ ptrWordIn[8];
+	ptrWordOut[9] = state[9] ^ ptrWordIn[9];
+	ptrWordOut[10] = state[10] ^ ptrWordIn[10];
+	ptrWordOut[11] = state[11] ^ ptrWordIn[11];
+
+	//Goes to next column (i.e., next block in sequence)
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+	ptrWordOut += BLOCK_LEN_INT64;
+    }
+}
+*/
+
+/**
+ * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand"
+ * on M[rowOut] and making "M[rowInOut] =  M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit 
+ * rotation to the left.
+ *
+ * @param state          The current state of the sponge 
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+/*
+void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordOut = rowOut;
+    int i;
+
+    for (i = 0; i < N_COLS / 2; i++) {
+	//Absorbing "M[rowInOut] XOR M[rowIn]"
+	state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
+	state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
+	state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
+	state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
+	state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
+	state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
+	state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
+	state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
+	state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
+	state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
+	state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
+	state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+
+	//M[row*][col] = M[row*][col] XOR rotW(rand)
+	ptrWordInOut[0] ^= state[10];
+	ptrWordInOut[1] ^= state[11];
+	ptrWordInOut[2] ^= state[0];
+	ptrWordInOut[3] ^= state[1];
+	ptrWordInOut[4] ^= state[2];
+	ptrWordInOut[5] ^= state[3];
+	ptrWordInOut[6] ^= state[4];
+	ptrWordInOut[7] ^= state[5];
+	ptrWordInOut[8] ^= state[6];
+	ptrWordInOut[9] ^= state[7];
+	ptrWordInOut[10] ^= state[8];
+	ptrWordInOut[11] ^= state[9];
+
+
+	//M[row][col] = rand
+	ptrWordOut[0] = state[0] ^ ptrWordIn[0];
+	ptrWordOut[1] = state[1] ^ ptrWordIn[1];
+	ptrWordOut[2] = state[2] ^ ptrWordIn[2];
+	ptrWordOut[3] = state[3] ^ ptrWordIn[3];
+	ptrWordOut[4] = state[4] ^ ptrWordIn[4];
+	ptrWordOut[5] = state[5] ^ ptrWordIn[5];
+	ptrWordOut[6] = state[6] ^ ptrWordIn[6];
+	ptrWordOut[7] = state[7] ^ ptrWordIn[7];
+	ptrWordOut[8] = state[8] ^ ptrWordIn[8];
+	ptrWordOut[9] = state[9] ^ ptrWordIn[9];
+	ptrWordOut[10] = state[10] ^ ptrWordIn[10];
+	ptrWordOut[11] = state[11] ^ ptrWordIn[11];
+
+	//Goes to next column (i.e., next block in sequence)
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+	ptrWordOut += 2 * BLOCK_LEN_INT64;
+    }
+
+    ptrWordOut =  rowOut + BLOCK_LEN_INT64;
+    for (i = 0; i < N_COLS / 2; i++) {
+	//Absorbing "M[rowInOut] XOR M[rowIn]"
+	state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
+	state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
+	state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
+	state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
+	state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
+	state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
+	state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
+	state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
+	state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
+	state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
+	state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
+	state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+
+	//M[row*][col] = M[row*][col] XOR rotW(rand)
+	ptrWordInOut[0] ^= state[10];
+	ptrWordInOut[1] ^= state[11];
+	ptrWordInOut[2] ^= state[0];
+	ptrWordInOut[3] ^= state[1];
+	ptrWordInOut[4] ^= state[2];
+	ptrWordInOut[5] ^= state[3];
+	ptrWordInOut[6] ^= state[4];
+	ptrWordInOut[7] ^= state[5];
+	ptrWordInOut[8] ^= state[6];
+	ptrWordInOut[9] ^= state[7];
+	ptrWordInOut[10] ^= state[8];
+	ptrWordInOut[11] ^= state[9];
+
+
+	//M[row][col] = rand
+	ptrWordOut[0] = state[0] ^ ptrWordIn[0];
+	ptrWordOut[1] = state[1] ^ ptrWordIn[1];
+	ptrWordOut[2] = state[2] ^ ptrWordIn[2];
+	ptrWordOut[3] = state[3] ^ ptrWordIn[3];
+	ptrWordOut[4] = state[4] ^ ptrWordIn[4];
+	ptrWordOut[5] = state[5] ^ ptrWordIn[5];
+	ptrWordOut[6] = state[6] ^ ptrWordIn[6];
+	ptrWordOut[7] = state[7] ^ ptrWordIn[7];
+	ptrWordOut[8] = state[8] ^ ptrWordIn[8];
+	ptrWordOut[9] = state[9] ^ ptrWordIn[9];
+	ptrWordOut[10] = state[10] ^ ptrWordIn[10];
+	ptrWordOut[11] = state[11] ^ ptrWordIn[11];
+
+	//Goes to next column (i.e., next block in sequence)
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+	ptrWordOut += 2 * BLOCK_LEN_INT64;
+    }
+}
+*/
+
+/**
+ * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", using the output "rand"
+ * to make "M[rowOut][col] = M[rowOut][col] XOR rand" and "M[rowInOut] = M[rowInOut] XOR rotW(rand)", 
+ * where rotW is a 64-bit rotation to the left.
+ *
+ * @param state          The current state of the sponge 
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+/*
+void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
+    int i;
+    for (i = 0; i < N_COLS; i++) {
+
+	//Absorbing "M[rowInOut] XOR M[rowIn]"
+	state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
+	state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
+	state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
+	state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
+	state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
+	state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
+	state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
+	state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
+	state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
+	state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
+	state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
+	state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+	//M[rowOut][col] = M[rowOut][col] XOR rand
+	ptrWordOut[0] ^= state[0];
+	ptrWordOut[1] ^= state[1];
+	ptrWordOut[2] ^= state[2];
+	ptrWordOut[3] ^= state[3];
+	ptrWordOut[4] ^= state[4];
+	ptrWordOut[5] ^= state[5];
+	ptrWordOut[6] ^= state[6];
+	ptrWordOut[7] ^= state[7];
+	ptrWordOut[8] ^= state[8];
+	ptrWordOut[9] ^= state[9];
+	ptrWordOut[10] ^= state[10];
+	ptrWordOut[11] ^= state[11];
+
+	//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
+	
+
+	//Goes to next block
+	ptrWordOut += BLOCK_LEN_INT64;
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+    }
+}
+*/
+
+/**
+ Prints an array of unsigned chars
+ */
+void printArray(unsigned char *array, unsigned int size, char *name) {
+    int i;
+    printf("%s: ", name);
+    for (i = 0; i < size; i++) {
+	printf("%2x|", array[i]);
+    }
+    printf("\n");
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////
--- a/algorithm/sponge.h
+++ b/algorithm/sponge.h
@ -0,0 +1,108 @@
+/**
+ * Header file for Blake2b's internal permutation in the form of a sponge. 
+ * This code is based on the original Blake2b's implementation provided by 
+ * Samuel Neves (https://blake2.net/)
+ * 
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ * 
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef SPONGE_H_
+#define SPONGE_H_
+
+#include <stdint.h>
+
+#if defined(__GNUC__)
+#define ALIGN __attribute__ ((aligned(32)))
+#elif defined(_MSC_VER)
+#define ALIGN __declspec(align(32))
+#else
+#define ALIGN
+#endif
+
+
+/*Blake2b IV Array*/
+static const uint64_t blake2b_IV[8] =
+{
+  0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
+  0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
+  0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
+  0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
+};
+
+/*Blake2b's rotation*/
+static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
+    return ( w >> c ) | ( w << ( 64 - c ) );
+}
+
+/*Blake2b's G function*/
+#define G(r,i,a,b,c,d) \
+  do { \
+    a = a + b; \
+    d = rotr64(d ^ a, 32); \
+    c = c + d; \
+    b = rotr64(b ^ c, 24); \
+    a = a + b; \
+    d = rotr64(d ^ a, 16); \
+    c = c + d; \
+    b = rotr64(b ^ c, 63); \
+  } while(0)
+
+
+/*One Round of the Blake2b's compression function*/
+#define ROUND_LYRA(r)  \
+    G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
+    G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
+    G(r,2,v[ 2],v[ 6],v[10],v[14]); \
+    G(r,3,v[ 3],v[ 7],v[11],v[15]); \
+    G(r,4,v[ 0],v[ 5],v[10],v[15]); \
+    G(r,5,v[ 1],v[ 6],v[11],v[12]); \
+    G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
+    G(r,7,v[ 3],v[ 4],v[ 9],v[14]);
+
+
+//---- Housekeeping
+void initState(uint64_t state[/*16*/]);
+
+//---- Squeezes
+void squeeze(uint64_t *state, unsigned char *out, unsigned int len);
+void reducedSqueezeRow0(uint64_t* state, uint64_t* row);
+
+//---- Absorbs
+void absorbBlock(uint64_t *state, const uint64_t *in);
+void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in);
+
+//---- Duplexes
+void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut);
+void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+
+//---- Misc
+void printArray(unsigned char *array, unsigned int size, char *name);
+
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////TESTS////
+//void reducedDuplexRowc(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+//void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+//void reducedDuplexRowSetupv4(uint64_t *state, uint64_t *rowIn1, uint64_t *rowIn2, uint64_t *rowOut1, uint64_t *rowOut2);
+//void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+//void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+//void reducedDuplexRowSetupv5d(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+/////////////
+
+
+#endif /* SPONGE_H_ */
--- a/driver-opencl.c
+++ b/driver-opencl.c
@ -284,7 +284,7 @@ char *set_gpu_engine(const char *_arg)
  char *arg = (char *)alloca(strlen(_arg) + 1);
  strcpy(arg, _arg);

-  if(!(nextptr = strtok(arg, ",")))
+  if (!(nextptr = strtok(arg, ",")))
    return "Invalid parameters for set gpu engine";

  do {
@ -408,12 +408,12 @@ char *set_gpu_memdiff(char *arg)

    gpus[device++].gpu_memdiff = val;
  }
-    if (device == 1) {
-      for (i = device; i < MAX_GPUDEVICES; i++)
-        gpus[i].gpu_memdiff = gpus[0].gpu_memdiff;
-    }
+  if (device == 1) {
+    for (i = device; i < MAX_GPUDEVICES; i++)
+      gpus[i].gpu_memdiff = gpus[0].gpu_memdiff;
+  }

-      return NULL;
+  return NULL;
 }

 char *set_gpu_powertune(char *arg)
@ -630,12 +630,12 @@ char *set_xintensity(const char *_arg)
    device++;
  }
  if (device == 1)
-    for (i = device; i < MAX_GPUDEVICES; i++) {
-      gpus[i].dynamic = gpus[0].dynamic;
-      gpus[i].intensity = gpus[0].intensity;
-      gpus[i].rawintensity = gpus[0].rawintensity;
-      gpus[i].xintensity = gpus[0].xintensity;
-    }
+  for (i = device; i < MAX_GPUDEVICES; i++) {
+    gpus[i].dynamic = gpus[0].dynamic;
+    gpus[i].intensity = gpus[0].intensity;
+    gpus[i].rawintensity = gpus[0].rawintensity;
+    gpus[i].xintensity = gpus[0].xintensity;
+  }

  return NULL;
 }
@ -673,12 +673,12 @@ char *set_rawintensity(const char *_arg)
    device++;
  }
  if (device == 1)
-    for (i = device; i < MAX_GPUDEVICES; i++) {
-      gpus[i].dynamic = gpus[0].dynamic;
-      gpus[i].intensity = gpus[0].intensity;
-      gpus[i].rawintensity = gpus[0].rawintensity;
-      gpus[i].xintensity = gpus[0].xintensity;
-    }
+  for (i = device; i < MAX_GPUDEVICES; i++) {
+    gpus[i].dynamic = gpus[0].dynamic;
+    gpus[i].intensity = gpus[0].intensity;
+    gpus[i].rawintensity = gpus[0].rawintensity;
+    gpus[i].xintensity = gpus[0].xintensity;
+  }

  return NULL;
 }
@ -799,22 +799,22 @@ retry: // TODO: refactor
      displayed_rolling = thr->rolling;
      if (!mhash_base)
        displayed_rolling *= 1000;
-      wlog("Thread %d: %.1f %sh/s %s ", i, displayed_rolling, mhash_base ? "M" : "K" , cgpu->deven != DEV_DISABLED ? "Enabled" : "Disabled");
+      wlog("Thread %d: %.1f %sh/s %s ", i, displayed_rolling, mhash_base ? "M" : "K", cgpu->deven != DEV_DISABLED ? "Enabled" : "Disabled");
      switch (cgpu->status) {
-        default:
-        case LIFE_WELL:
-          wlog("ALIVE");
-          break;
-        case LIFE_SICK:
-          wlog("SICK reported in %s", checkin);
-          break;
-        case LIFE_DEAD:
-          wlog("DEAD reported in %s", checkin);
-          break;
-        case LIFE_INIT:
-        case LIFE_NOSTART:
-          wlog("Never started");
-          break;
+      default:
+      case LIFE_WELL:
+        wlog("ALIVE");
+        break;
+      case LIFE_SICK:
+        wlog("SICK reported in %s", checkin);
+        break;
+      case LIFE_DEAD:
+        wlog("DEAD reported in %s", checkin);
+        break;
+      case LIFE_INIT:
+      case LIFE_NOSTART:
+        wlog("Never started");
+        break;
      }
      if (thr->pause)
        wlog(" paused");
@ -825,7 +825,7 @@ retry: // TODO: refactor
    wlog("\n");
  }

-  wlogprint("[E]nable  [D]isable  [R]estart GPU  %s\n",adl_active ? "[C]hange settings" : "");
+  wlogprint("[E]nable  [D]isable  [R]estart GPU  %s\n", adl_active ? "[C]hange settings" : "");
  wlogprint("[I]ntensity  E[x]perimental intensity  R[a]w Intensity\n");

  wlogprint("Or press any other key to continue\n");
@ -868,7 +868,8 @@ retry: // TODO: refactor
    }
    rd_unlock(&mining_thr_lock);
    goto retry;
-  } else if (!strncasecmp(&input, "d", 1)) {
+  }
+  else if (!strncasecmp(&input, "d", 1)) {
    if (selected)
      selected = curses_int("Select GPU to disable");
    if (selected < 0 || selected >= nDevs) {
@ -881,7 +882,8 @@ retry: // TODO: refactor
    }
    gpus[selected].deven = DEV_DISABLED;
    goto retry;
-  } else if (!strncasecmp(&input, "i", 1)) {
+  }
+  else if (!strncasecmp(&input, "i", 1)) {
    int intensity;
    char *intvar;

@ -893,8 +895,8 @@ retry: // TODO: refactor
    }

    intvar = curses_input("Set GPU scan intensity (d or "
-                MIN_INTENSITY_STR " -> "
-                MAX_INTENSITY_STR ")");
+      MIN_INTENSITY_STR " -> "
+      MAX_INTENSITY_STR ")");
    if (!intvar) {
      wlogprint("Invalid input\n");
      goto retry;
@ -927,7 +929,8 @@ retry: // TODO: refactor

    pause_dynamic_threads(selected);
    goto retry;
-  } else if (!strncasecmp(&input, "x", 1)) {
+  }
+  else if (!strncasecmp(&input, "x", 1)) {
    int xintensity;
    char *intvar;

@ -960,7 +963,8 @@ retry: // TODO: refactor

    pause_dynamic_threads(selected);
    goto retry;
-  } else if (!strncasecmp(&input, "a", 1)) {
+  }
+  else if (!strncasecmp(&input, "a", 1)) {
    int rawintensity;
    char *intvar;

@ -993,7 +997,8 @@ retry: // TODO: refactor

    pause_dynamic_threads(selected);
    goto retry;
-  } else if (!strncasecmp(&input, "r", 1)) {
+  }
+  else if (!strncasecmp(&input, "r", 1)) {
    if (selected)
      selected = curses_int("Select GPU to attempt to restart");
    if (selected < 0 || selected >= nDevs) {
@ -1003,7 +1008,8 @@ retry: // TODO: refactor
    wlogprint("Attempting to restart threads of GPU %d\n", selected);
    reinit_device(&gpus[selected]);
    goto retry;
-  } else if (adl_active && (!strncasecmp(&input, "c", 1))) {
+  }
+  else if (adl_active && (!strncasecmp(&input, "c", 1))) {
    if (selected)
      selected = curses_int("Select GPU to change settings on");
    if (selected < 0 || selected >= nDevs) {
@ -1012,7 +1018,8 @@ retry: // TODO: refactor
    }
    change_gpusettings(selected);
    goto retry;
-  } else
+  }
+  else
    clear_logwin();

  immedok(logwin, false);
@ -1027,8 +1034,8 @@ void manage_gpu(void)
 static _clState *clStates[MAX_GPUDEVICES];

 static void set_threads_hashes(unsigned int vectors, unsigned int compute_shaders, int64_t *hashes, size_t *globalThreads,
-             unsigned int minthreads, __maybe_unused int *intensity, __maybe_unused int *xintensity,
-             __maybe_unused int *rawintensity, algorithm_t *algorithm)
+  unsigned int minthreads, __maybe_unused int *intensity, __maybe_unused int *xintensity,
+  __maybe_unused int *rawintensity, algorithm_t *algorithm)
 {
  unsigned int threads = 0;
  while (threads < minthreads) {
@ -1037,7 +1044,7 @@ static void set_threads_hashes(unsigned int vectors, unsigned int compute_shader
      threads = *rawintensity;
    }
    else if (*xintensity > 0) {
-      threads = compute_shaders * ((algorithm->xintensity_shift)?(1 << (algorithm->xintensity_shift + *xintensity)):*xintensity);
+      threads = compute_shaders * ((algorithm->xintensity_shift) ? (1 << (algorithm->xintensity_shift + *xintensity)) : *xintensity);
    }
    else {
      threads = 1 << (algorithm->intensity_shift + *intensity);
@ -1102,7 +1109,8 @@ select_cgpu:
      applog(LOG_WARNING, "Thread %d still exists, killing it off", thr_id);
      cg_completion_timeout(&thr_info_cancel_join, thr, 5000);
      thr->cgpu->drv->thread_shutdown(thr);
-    } else
+    }
+    else
      applog(LOG_WARNING, "Thread %d no longer exists", thr_id);
  }
  rd_unlock(&mining_thr_lock);
@ -1234,7 +1242,8 @@ static void get_opencl_statline_before(char *buf, size_t bufsiz, struct cgpu_inf
    else
      tailsprintf(buf, bufsiz, "        ");
    tailsprintf(buf, bufsiz, "| ");
-  } else
+  }
+  else
    gpu->drv->get_statline_before = &blank_get_statline_before;
 }
 #endif
@ -1250,7 +1259,7 @@ static void get_opencl_statline(char *buf, size_t bufsiz, struct cgpu_info *gpu)
 }

 struct opencl_thread_data {
-  cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *, cl_uint);
+  cl_int(*queue_kernel_parameters)(_clState *, dev_blk_ctx *, cl_uint);
  uint32_t *res;
 };

@ -1340,7 +1349,7 @@ static bool opencl_thread_init(struct thr_info *thr)
  }

  status |= clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
-               buffersize, blank_res, 0, NULL, NULL);
+    buffersize, blank_res, 0, NULL, NULL);
  if (unlikely(status != CL_SUCCESS)) {
    free(thrdata->res);
    free(thrdata);
@ -1357,7 +1366,13 @@ static bool opencl_thread_init(struct thr_info *thr)

 static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work)
 {
-  work->blk.work = work;
+  if (!safe_cmp(work->pool->algorithm.name, "Lyra2RE")) {
+    work->blk.work = work;
+    precalc_hash_blake256(&work->blk, 0, (uint32_t *)(work->data));
+  }
+  else {
+    work->blk.work = work;
+  }
  thr->pool_no = work->pool->pool_no;
  return true;
 }
@ -1365,7 +1380,7 @@ static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work
 extern int opt_dynamic_interval;

 static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
-        int64_t __maybe_unused max_nonce)
+  int64_t __maybe_unused max_nonce)
 {
  const int thr_id = thr->id;
  struct opencl_thread_data *thrdata = (struct opencl_thread_data *)thr->cgpu_data;
@ -1376,11 +1391,11 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
  cl_int status;
  size_t globalThreads[1];
  size_t localThreads[1] = { clState->wsize };
-    size_t *p_global_work_offset = NULL;
+  size_t *p_global_work_offset = NULL;
  int64_t hashes;
  int found = gpu->algorithm.found_idx;
  int buffersize = BUFFERSIZE;
-    unsigned int i;
+  unsigned int i;

  /* Windows' timer resolution is only 15ms so oversample 5x */
  if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 70000) {
@ -1392,7 +1407,8 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
    if (gpu_us > dynamic_us) {
      if (gpu->intensity > MIN_INTENSITY)
        --gpu->intensity;
-    } else if (gpu_us < dynamic_us / 2) {
+    }
+    else if (gpu_us < dynamic_us / 2) {
      if (gpu->intensity < MAX_INTENSITY)
        ++gpu->intensity;
    }
@ -1401,7 +1417,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
  }

  set_threads_hashes(clState->vwidth, clState->compute_shaders, &hashes, globalThreads, localThreads[0],
-         &gpu->intensity, &gpu->xintensity, &gpu->rawintensity, &gpu->algorithm);
+    &gpu->intensity, &gpu->xintensity, &gpu->rawintensity, &gpu->algorithm);
  if (hashes > gpu->max_hashes)
    gpu->max_hashes = hashes;

@ -1411,27 +1427,27 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
    return -1;
  }

-    if (clState->goffset)
-        p_global_work_offset = (size_t *)&work->blk.nonce;
+  if (clState->goffset)
+    p_global_work_offset = (size_t *)&work->blk.nonce;

-    status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, p_global_work_offset,
-                    globalThreads, localThreads, 0,  NULL, NULL);
+  status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, p_global_work_offset,
+    globalThreads, localThreads, 0, NULL, NULL);
  if (unlikely(status != CL_SUCCESS)) {
    applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
    return -1;
  }

  for (i = 0; i < clState->n_extra_kernels; i++) {
-      status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset,
-                      globalThreads, localThreads, 0,  NULL, NULL);
-      if (unlikely(status != CL_SUCCESS)) {
-          applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
-          return -1;
-      }
+    status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset,
+      globalThreads, localThreads, 0, NULL, NULL);
+    if (unlikely(status != CL_SUCCESS)) {
+      applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
+      return -1;
+    }
  }

  status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
-             buffersize, thrdata->res, 0, NULL, NULL);
+    buffersize, thrdata->res, 0, NULL, NULL);
  if (unlikely(status != CL_SUCCESS)) {
    applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
    return -1;
@ -1449,7 +1465,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
  if (thrdata->res[found]) {
    /* Clear the buffer again */
    status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
-                buffersize, blank_res, 0, NULL, NULL);
+      buffersize, blank_res, 0, NULL, NULL);
    if (unlikely(status != CL_SUCCESS)) {
      applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
      return -1;
@ -1471,7 +1487,7 @@ static void opencl_thread_shutdown(struct thr_info *thr)
  const int thr_id = thr->id;
  _clState *clState = clStates[thr_id];
  clStates[thr_id] = NULL;
-    unsigned int i;
+  unsigned int i;

  if (clState) {
    clFinish(clState->commandQueue);
@ -1480,8 +1496,8 @@ static void opencl_thread_shutdown(struct thr_info *thr)
    if (clState->padbuffer8)
      clReleaseMemObject(clState->padbuffer8);
    clReleaseKernel(clState->kernel);
-        for (i = 0; i < clState->n_extra_kernels; i++)
-            clReleaseKernel(clState->extra_kernels[i]);
+    for (i = 0; i < clState->n_extra_kernels; i++)
+      clReleaseKernel(clState->extra_kernels[i]);
    clReleaseProgram(clState->program);
    clReleaseCommandQueue(clState->commandQueue);
    clReleaseContext(clState->context);
@ -1503,7 +1519,7 @@ struct device_drv opencl_drv = {
 #ifdef HAVE_ADL
  /*.get_statline_before = */ get_opencl_statline_before,
 #else
-          NULL,
+  NULL,
 #endif
  /*.get_statline = */    get_opencl_statline,
  /*.api_data = */    NULL,
@ -1524,7 +1540,7 @@ struct device_drv opencl_drv = {
  /*.hw_error = */    NULL,
  /*.thread_shutdown = */   opencl_thread_shutdown,
  /*.thread_enable =*/    NULL,
-          false,
-          0,
-          0
+  false,
+  0,
+  0
 };
--- a/findnonce.c
+++ b/findnonce.c
@ -92,7 +92,7 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data)
  blk->T1 = blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
  blk->PreVal4_2 = blk->PreVal4 + blk->T1;
  blk->PreVal0 = blk->PreVal4 + blk->ctx_a;
-  blk->PreW31 = 0x00000280 + (rotr(blk->W16,  7) ^ rotr(blk->W16, 18) ^ (blk->W16 >> 3));
+  blk->PreW31 = 0x00000280 + (rotr(blk->W16, 7) ^ rotr(blk->W16, 18) ^ (blk->W16 >> 3));
  blk->PreW32 = blk->W16 + (rotr(blk->W17, 7) ^ rotr(blk->W17, 18) ^ (blk->W17 >> 3));
  blk->PreW18 = data[2] + (rotr(blk->W16, 17) ^ rotr(blk->W16, 19) ^ (blk->W16 >> 10));
  blk->PreW19 = 0x11002000 + (rotr(blk->W17, 17) ^ rotr(blk->W17, 19) ^ (blk->W17 >> 10));
@ -191,7 +191,7 @@ static void *postcalc_hash(void *userdata)
   * end of the res[] array */
  if (unlikely(pcd->res[found] & ~found)) {
    applog(LOG_WARNING, "%s%d: invalid nonce count - HW error",
-        thr->cgpu->drv->name, thr->cgpu->device_id);
+      thr->cgpu->drv->name, thr->cgpu->device_id);
    hw_errors++;
    thr->cgpu->hw_errors++;
    pcd->res[found] &= found;
@ -200,7 +200,7 @@ static void *postcalc_hash(void *userdata)
  for (entry = 0; entry < pcd->res[found]; entry++) {
    uint32_t nonce = pcd->res[entry];
    if (found == 0x0F)
-        nonce = swab32(nonce);
+      nonce = swab32(nonce);

    applog(LOG_DEBUG, "[THR%d] OCL NONCE %08x (%lu) found in slot %d (found = %d)", thr->id, nonce, nonce, entry, found);
    submit_nonce(thr, pcd->work, nonce);
@ -234,3 +234,136 @@ void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res)
    free(pcd);
  }
 }
+
+// BLAKE 256 14 rounds (standard)
+
+typedef struct
+{
+  uint32_t h[8];
+  uint32_t t;
+} blake_state256;
+
+#define NB_ROUNDS32 14
+
+const uint8_t blake_sigma[][16] =
+{
+  { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+  { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
+  { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
+  { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
+  { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
+  { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
+  { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
+  { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
+  { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
+  { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
+  { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+  { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
+  { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
+  { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
+  { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
+  { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
+};
+
+const uint32_t blake_u256[16] =
+{
+  0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
+  0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
+  0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
+  0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917
+};
+
+#define ROT32(x,n) (((x)<<(32-n))|( (x)>>(n)))
+//#define ROT32(x,n)   (rotate((uint)x, (uint)32-n))
+#define ADD32(x,y)   ((uint32_t)((x) + (y)))
+#define XOR32(x,y)   ((uint32_t)((x) ^ (y)))
+
+#define G(a,b,c,d,i) \
+do { \
+  v[a] += XOR32(m[blake_sigma[r][i]], blake_u256[blake_sigma[r][i + 1]]) + v[b]; \
+  v[d] = ROT32(XOR32(v[d], v[a]), 16); \
+  v[c] += v[d]; \
+  v[b] = ROT32(XOR32(v[b], v[c]), 12); \
+  v[a] += XOR32(m[blake_sigma[r][i + 1]], blake_u256[blake_sigma[r][i]]) + v[b]; \
+  v[d] = ROT32(XOR32(v[d], v[a]), 8); \
+  v[c] += v[d]; \
+  v[b] = ROT32(XOR32(v[b], v[c]), 7); \
+} while (0)
+
+// compress a block
+void blake256_compress_block(blake_state256 *S, uint32_t *m)
+{
+  uint32_t v[16];
+  int i, r;
+  for (i = 0; i < 8; ++i)  v[i] = S->h[i];
+
+  v[8] = blake_u256[0];
+  v[9] = blake_u256[1];
+  v[10] = blake_u256[2];
+  v[11] = blake_u256[3];
+  v[12] = blake_u256[4];
+  v[13] = blake_u256[5];
+  v[14] = blake_u256[6];
+  v[15] = blake_u256[7];
+
+  v[12] ^= S->t;
+  v[13] ^= S->t;
+
+  for (r = 0; r < NB_ROUNDS32; ++r)
+  {
+    /* column step */
+    G(0, 4, 8, 12, 0);
+    G(1, 5, 9, 13, 2);
+    G(2, 6, 10, 14, 4);
+    G(3, 7, 11, 15, 6);
+    /* diagonal step */
+    G(0, 5, 10, 15, 8);
+    G(1, 6, 11, 12, 10);
+    G(2, 7, 8, 13, 12);
+    G(3, 4, 9, 14, 14);
+  }
+
+  for (i = 0; i < 16; ++i)  S->h[i & 7] ^= v[i];
+}
+
+void blake256_init(blake_state256 *S)
+{
+  S->h[0] = 0x6a09e667;
+  S->h[1] = 0xbb67ae85;
+  S->h[2] = 0x3c6ef372;
+  S->h[3] = 0xa54ff53a;
+  S->h[4] = 0x510e527f;
+  S->h[5] = 0x9b05688c;
+  S->h[6] = 0x1f83d9ab;
+  S->h[7] = 0x5be0cd19;
+  S->t = 0;
+}
+
+void blake256_update(blake_state256 *S, const uint32_t *in)
+{
+  uint32_t m[16];
+  int i;
+  S->t = 512;
+  for (i = 0; i < 16; ++i)  m[i] = in[i];
+  blake256_compress_block(S, m);
+}
+
+void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *data)
+{
+  blake_state256 S;
+  blake256_init(&S);
+  blake256_update(&S, data);
+
+  blk->ctx_a = S.h[0];
+  blk->ctx_b = S.h[1];
+  blk->ctx_c = S.h[2];
+  blk->ctx_d = S.h[3];
+  blk->ctx_e = S.h[4];
+  blk->ctx_f = S.h[5];
+  blk->ctx_g = S.h[6];
+  blk->ctx_h = S.h[7];
+
+  blk->cty_a = data[16];
+  blk->cty_b = data[17];
+  blk->cty_c = data[18];
+}
--- a/findnonce.h
+++ b/findnonce.h
@ -10,5 +10,6 @@

 extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
 extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res);
+extern void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);

 #endif /*FINDNONCE_H*/
--- a/kernel/Lyra2.cl
+++ b/kernel/Lyra2.cl
@ -0,0 +1,145 @@
+/*
+* Lyra2 kernel implementation.
+*
+* ==========================(LICENSE BEGIN)============================
+* Copyright (c) 2014 djm34
+* 
+*
+* Permission is hereby granted, free of charge, to any person obtaining
+* a copy of this software and associated documentation files (the
+* "Software"), to deal in the Software without restriction, including
+* without limitation the rights to use, copy, modify, merge, publish,
+* distribute, sublicense, and/or sell copies of the Software, and to
+* permit persons to whom the Software is furnished to do so, subject to
+* the following conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*
+* ===========================(LICENSE END)=============================
+*
+* @author   djm34
+*/
+
+/*Blake2b IV Array*/
+__constant static const sph_u64 blake2b_IV[8] =
+{
+  0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
+  0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
+  0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
+  0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
+};
+
+/*Blake2b's rotation*/
+static inline sph_u64 rotr64( const sph_u64 w, const unsigned c ){
+	return rotate(w, (ulong)(64-c));
+}
+
+/*Blake2b's G function*/
+#define G(a,b,c,d) \
+  do { \
+a += b; d ^= a; d = SPH_ROTR64(d, 32); \
+c += d; b ^= c; b = SPH_ROTR64(b, 24); \
+a += b; d ^= a; d = SPH_ROTR64(d, 16); \
+c += d; b ^= c; b = SPH_ROTR64(b, 63); \
+  } while(0)
+
+
+/*One Round of the Blake2b's compression function*/
+#define round_lyra(v)  \
+ do { \
+    G(v[ 0],v[ 4],v[ 8],v[12]); \
+    G(v[ 1],v[ 5],v[ 9],v[13]); \
+    G(v[ 2],v[ 6],v[10],v[14]); \
+    G(v[ 3],v[ 7],v[11],v[15]); \
+    G(v[ 0],v[ 5],v[10],v[15]); \
+    G(v[ 1],v[ 6],v[11],v[12]); \
+    G(v[ 2],v[ 7],v[ 8],v[13]); \
+    G(v[ 3],v[ 4],v[ 9],v[14]); \
+ } while(0)
+
+
+#define reduceDuplexRowSetup(rowIn, rowInOut, rowOut) \
+   { \
+	for (int i = 0; i < 8; i++) \
+				{ \
+\
+		for (int j = 0; j < 12; j++) {state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut];} \
+		round_lyra(state); \
+		for (int j = 0; j < 12; j++) {Matrix[j + 84 - 12 * i][rowOut] = Matrix[12 * i + j][rowIn] ^ state[j];} \
+\
+		Matrix[0 + 12 * i][rowInOut] ^= state[11]; \
+		Matrix[1 + 12 * i][rowInOut] ^= state[0]; \
+		Matrix[2 + 12 * i][rowInOut] ^= state[1]; \
+		Matrix[3 + 12 * i][rowInOut] ^= state[2]; \
+		Matrix[4 + 12 * i][rowInOut] ^= state[3]; \
+		Matrix[5 + 12 * i][rowInOut] ^= state[4]; \
+		Matrix[6 + 12 * i][rowInOut] ^= state[5]; \
+		Matrix[7 + 12 * i][rowInOut] ^= state[6]; \
+		Matrix[8 + 12 * i][rowInOut] ^= state[7]; \
+		Matrix[9 + 12 * i][rowInOut] ^= state[8]; \
+		Matrix[10 + 12 * i][rowInOut] ^= state[9]; \
+		Matrix[11 + 12 * i][rowInOut] ^= state[10]; \
+				} \
+ \
+   } 
+
+#define reduceDuplexRow(rowIn, rowInOut, rowOut) \
+  { \
+	 for (int i = 0; i < 8; i++) \
+	 	 	 	 	 { \
+		 for (int j = 0; j < 12; j++) \
+			 state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut]; \
+ \
+		 round_lyra(state); \
+		 for (int j = 0; j < 12; j++) {Matrix[j + 12 * i][rowOut] ^= state[j];} \
+\
+		 Matrix[0 + 12 * i][rowInOut] ^= state[11]; \
+		 Matrix[1 + 12 * i][rowInOut] ^= state[0]; \
+		 Matrix[2 + 12 * i][rowInOut] ^= state[1]; \
+		 Matrix[3 + 12 * i][rowInOut] ^= state[2]; \
+		 Matrix[4 + 12 * i][rowInOut] ^= state[3]; \
+		 Matrix[5 + 12 * i][rowInOut] ^= state[4]; \
+		 Matrix[6 + 12 * i][rowInOut] ^= state[5]; \
+		 Matrix[7 + 12 * i][rowInOut] ^= state[6]; \
+		 Matrix[8 + 12 * i][rowInOut] ^= state[7]; \
+		 Matrix[9 + 12 * i][rowInOut] ^= state[8]; \
+		 Matrix[10 + 12 * i][rowInOut] ^= state[9]; \
+		 Matrix[11 + 12 * i][rowInOut] ^= state[10]; \
+	 	 	 	 	 } \
+ \
+  } 
+#define absorbblock(in)  { \
+	state[0] ^= Matrix[0][in]; \
+	state[1] ^= Matrix[1][in]; \
+	state[2] ^= Matrix[2][in]; \
+	state[3] ^= Matrix[3][in]; \
+	state[4] ^= Matrix[4][in]; \
+	state[5] ^= Matrix[5][in]; \
+	state[6] ^= Matrix[6][in]; \
+	state[7] ^= Matrix[7][in]; \
+	state[8] ^= Matrix[8][in]; \
+	state[9] ^= Matrix[9][in]; \
+	state[10] ^= Matrix[10][in]; \
+	state[11] ^= Matrix[11][in]; \
+	round_lyra(state); \
+	round_lyra(state); \
+	round_lyra(state); \
+	round_lyra(state); \
+	round_lyra(state); \
+	round_lyra(state); \
+	round_lyra(state); \
+	round_lyra(state); \
+	round_lyra(state); \
+	round_lyra(state); \
+	round_lyra(state); \
+	round_lyra(state); \
+  } 
--- a/kernel/Lyra2RE.cl
+++ b/kernel/Lyra2RE.cl
@ -0,0 +1,392 @@
+/*
+ * Lyra2RE kernel implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ * Copyright (c) 2014 djm34
+ * Copyright (c) 2014 James Lovejoy
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   djm34
+ */
+
+#pragma OPENCL EXTENSION cl_amd_printf : enable
+
+#ifndef LYRA2RE_CL
+#define LYRA2RE_CL
+
+#if __ENDIAN_LITTLE__
+#define SPH_LITTLE_ENDIAN 1
+#else
+#define SPH_BIG_ENDIAN 1
+#endif
+
+#define SPH_UPTR sph_u64
+
+typedef unsigned int sph_u32;
+typedef int sph_s32;
+#ifndef __OPENCL_VERSION__
+typedef unsigned long long sph_u64;
+typedef long long sph_s64;
+#else
+typedef unsigned long sph_u64;
+typedef long sph_s64;
+#endif
+
+
+#define SPH_64 1
+#define SPH_64_TRUE 1
+
+#define SPH_C32(x)    ((sph_u32)(x ## U))
+#define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
+
+#define SPH_C64(x)    ((sph_u64)(x ## UL))
+#define SPH_T64(x)    ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
+
+//#define SPH_ROTL32(x, n)   (((x) << (n)) | ((x) >> (32 - (n))))
+//#define SPH_ROTR32(x, n)   (((x) >> (n)) | ((x) << (32 - (n))))
+//#define SPH_ROTL64(x, n)   (((x) << (n)) | ((x) >> (64 - (n))))
+//#define SPH_ROTR64(x, n)   (((x) >> (n)) | ((x) << (64 - (n))))
+
+#define SPH_ROTL32(x,n) rotate(x,(uint)n)     //faster with driver 14.6
+#define SPH_ROTR32(x,n) rotate(x,(uint)(32-n))
+#define SPH_ROTL64(x,n) rotate(x,(ulong)n)
+#define SPH_ROTR64(x,n) rotate(x,(ulong)(64-n))
+
+
+#include "blake256.cl"
+#include "groestl256.cl"
+#include "Lyra2.cl"
+#include "keccak1600.cl"
+#include "skein256.cl"
+
+#define SWAP4(x) as_uint(as_uchar4(x).wzyx)
+#define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
+
+#if SPH_BIG_ENDIAN
+  #define DEC64E(x) (x)
+  #define DEC64BE(x) (*(const __global sph_u64 *) (x));
+  #define DEC64LE(x) SWAP8(*(const __global sph_u64 *) (x));
+  #define DEC32LE(x) (*(const __global sph_u32 *) (x));
+#else
+  #define DEC64E(x) SWAP8(x)
+  #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x));
+  #define DEC64LE(x) (*(const __global sph_u64 *) (x));
+#define DEC32LE(x) SWAP4(*(const __global sph_u32 *) (x));
+#endif
+
+typedef union {
+  unsigned char h1[64];
+  uint h4[16];
+  ulong h8[8];
+} hash_t;
+
+__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
+__kernel void search(
+	 __global hash_t* hashes,
+	// precalc hash from fisrt part of message
+	const uint h0,
+	const uint h1,
+	const uint h2,
+	const uint h3,
+	const uint h4,
+	const uint h5,
+	const uint h6,
+	const uint h7,
+	// last 12 bytes of original message
+	const uint in16,
+	const uint in17,
+	const uint in18
+)
+{
+ uint gid = get_global_id(0);
+  __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
+
+    sph_u32 h[8];
+	sph_u32 m[16];
+	sph_u32 v[16];
+
+
+h[0]=h0;
+h[1]=h1;
+h[2]=h2;
+h[3]=h3;
+h[4]=h4;
+h[5]=h5;
+h[6]=h6;
+h[7]=h7;
+// compress 2nd round
+ m[0] = in16;
+ m[1] = in17;
+ m[2] = in18;
+ m[3] = SWAP4(gid);
+
+	for (int i = 4; i < 16; i++) {m[i] = c_Padding[i];}
+
+	for (int i = 0; i < 8; i++) {v[i] = h[i];}
+
+	v[8] =  c_u256[0];
+	v[9] =  c_u256[1];
+	v[10] = c_u256[2];
+	v[11] = c_u256[3];
+	v[12] = c_u256[4] ^ 640;
+	v[13] = c_u256[5] ^ 640;
+	v[14] = c_u256[6];
+	v[15] = c_u256[7];
+
+	for (int r = 0; r < 14; r++) {	
+		GS(0, 4, 0x8, 0xC, 0x0);
+		GS(1, 5, 0x9, 0xD, 0x2);
+		GS(2, 6, 0xA, 0xE, 0x4);
+		GS(3, 7, 0xB, 0xF, 0x6);
+		GS(0, 5, 0xA, 0xF, 0x8);
+		GS(1, 6, 0xB, 0xC, 0xA);
+		GS(2, 7, 0x8, 0xD, 0xC);
+		GS(3, 4, 0x9, 0xE, 0xE);
+	}
+
+	for (int i = 0; i < 16; i++) {
+		 int j = i & 7;
+		h[j] ^= v[i];}
+
+for (int i=0;i<8;i++) {hash->h4[i]=SWAP4(h[i]);}
+
+ barrier(CLK_GLOBAL_MEM_FENCE);
+
+}
+
+// keccak256
+
+
+__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
+__kernel void search1(__global hash_t* hashes)
+{
+  uint gid = get_global_id(0);
+  __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
+
+ 		sph_u64 keccak_gpu_state[25];
+
+		for (int i = 0; i<25; i++) {
+			if (i<4) { keccak_gpu_state[i] = hash->h8[i]; }
+			else    { keccak_gpu_state[i] = 0; }
+		}
+		keccak_gpu_state[4] = 0x0000000000000001;
+		keccak_gpu_state[16] = 0x8000000000000000;
+
+		keccak_block(keccak_gpu_state);
+		for (int i = 0; i<4; i++) { hash->h8[i] = keccak_gpu_state[i]; }
+
+  barrier(CLK_GLOBAL_MEM_FENCE);
+
+
+}
+
+/// lyra2 algo 
+
+
+__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
+__kernel void search2(__global hash_t* hashes)
+{
+ uint gid = get_global_id(0);
+  __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
+
+
+
+  sph_u64 state[16];
+
+  for (int i = 0; i<4; i++) { state[i] = hash->h8[i];} //password
+  for (int i = 0; i<4; i++) { state[i + 4] = state[i]; } //salt 
+
+  for (int i = 0; i<8; i++) { state[i + 8] = blake2b_IV[i]; }
+
+  //     blake2blyra x2 
+
+  for (int i = 0; i<24; i++) { round_lyra(state); } //because 12 is not enough
+
+  sph_u64 Matrix[96][8]; // very uncool
+  /// reducedSqueezeRow0
+
+  for (int i = 0; i < 8; i++)
+  {
+	  for (int j = 0; j<12; j++) { Matrix[j + 84 - 12 * i][0] = state[j]; }
+	  round_lyra(state);
+  }
+
+  /// reducedSqueezeRow1
+
+  for (int i = 0; i < 8; i++)
+  {
+	  for (int j = 0; j<12; j++) { state[j] ^= Matrix[j + 12 * i][0]; }
+	  round_lyra(state);
+	  for (int j = 0; j<12; j++) { Matrix[j + 84 - 12 * i][1] = Matrix[j + 12 * i][0] ^ state[j]; }
+  }
+
+ 
+  reduceDuplexRowSetup(1, 0, 2);
+  reduceDuplexRowSetup(2, 1, 3);
+  reduceDuplexRowSetup(3, 0, 4);
+  reduceDuplexRowSetup(4, 3, 5);
+  reduceDuplexRowSetup(5, 2, 6);
+  reduceDuplexRowSetup(6, 1, 7);
+
+  sph_u64 rowa;
+  rowa = state[0] & 7;
+
+  reduceDuplexRow(7, rowa, 0);
+  rowa = state[0] & 7;
+  reduceDuplexRow(0, rowa, 3);
+  rowa = state[0] & 7;
+  reduceDuplexRow(3, rowa, 6);
+  rowa = state[0] & 7;
+  reduceDuplexRow(6, rowa, 1);
+  rowa = state[0] & 7;
+  reduceDuplexRow(1, rowa, 4);
+  rowa = state[0] & 7;
+  reduceDuplexRow(4, rowa, 7);
+  rowa = state[0] & 7;
+  reduceDuplexRow(7, rowa, 2);
+  rowa = state[0] & 7;
+  reduceDuplexRow(2, rowa, 5);
+
+  absorbblock(rowa);
+
+  for (int i = 0; i<4; i++) {hash->h8[i] = state[i];} 
+
+  barrier(CLK_GLOBAL_MEM_FENCE);
+
+}
+
+//skein256
+
+__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
+__kernel void search3(__global hash_t* hashes)
+{
+ uint gid = get_global_id(0);
+  __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
+
+
+		sph_u64 h[9];
+		sph_u64 t[3];
+        sph_u64 dt0,dt1,dt2,dt3;
+		sph_u64 p0, p1, p2, p3, p4, p5, p6, p7;
+        h[8] = skein_ks_parity;
+
+		for (int i = 0; i<8; i++) {
+			h[i] = SKEIN_IV512_256[i];
+			h[8] ^= h[i];}
+		    
+			t[0]=t12[0];
+			t[1]=t12[1];
+			t[2]=t12[2];
+
+        dt0=hash->h8[0];
+        dt1=hash->h8[1];
+        dt2=hash->h8[2];
+        dt3=hash->h8[3];
+
+		p0 = h[0] + dt0;
+		p1 = h[1] + dt1;
+		p2 = h[2] + dt2;
+		p3 = h[3] + dt3;
+		p4 = h[4];
+		p5 = h[5] + t[0];
+		p6 = h[6] + t[1];
+		p7 = h[7];
+
+        #pragma unroll 
+		for (int i = 1; i<19; i+=2) {Round_8_512(p0,p1,p2,p3,p4,p5,p6,p7,i);}
+        p0 ^= dt0;
+        p1 ^= dt1;
+        p2 ^= dt2;
+        p3 ^= dt3;
+
+		h[0] = p0;
+		h[1] = p1;
+		h[2] = p2;
+		h[3] = p3;
+		h[4] = p4;
+		h[5] = p5;
+		h[6] = p6;
+		h[7] = p7;
+		h[8] = skein_ks_parity;
+        
+		for (int i = 0; i<8; i++) { h[8] ^= h[i]; }
+		
+		t[0] = t12[3];
+		t[1] = t12[4];
+		t[2] = t12[5];
+		p5 += t[0];  //p5 already equal h[5] 
+		p6 += t[1];
+       
+        #pragma unroll
+		for (int i = 1; i<19; i+=2) { Round_8_512(p0, p1, p2, p3, p4, p5, p6, p7, i); }
+
+		hash->h8[0]      = p0;
+		hash->h8[1]      = p1;
+		hash->h8[2]      = p2;
+		hash->h8[3]      = p3;
+	
+
+
+  barrier(CLK_GLOBAL_MEM_FENCE);
+
+}
+
+__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
+__kernel void search4(__global hash_t* hashes, __global uint* output, const uint target)
+{
+	uint gid = get_global_id(0);
+	__global hash_t *hash = &(hashes[gid - get_global_offset(0)]);
+
+	sph_u64 message[8], state[8];
+	sph_u64 t[8];
+	for (int k = 0; k<4; k++) { message[k] = hash->h8[k]; }
+
+	message[4] = 0x80UL;
+	message[5] = 0UL;
+	message[6] = 0UL; 
+	message[7] = 0x0100000000000000UL;
+
+	for (int u = 0; u<8; u++) { state[u] = message[u]; }
+	state[7] ^= 0x0001000000000000UL;
+
+	PERM_SMALL_P(state);
+	state[7] ^= 0x0001000000000000UL;
+	PERM_SMALL_Q(message);
+
+	for (int u = 0; u<8; u++) { state[u] ^= message[u]; }
+	message[7] = state[7];
+
+	PERM_SMALL_Pf(state);
+
+	state[7] ^= message[7];
+	barrier(CLK_GLOBAL_MEM_FENCE);
+
+	bool result = ( as_uint2(state[7]).y <= target);
+	if (result) {
+		output[atomic_inc(output + 0xFF)] = SWAP4(gid);
+	}
+
+}
+
+
+#endif // LYRA2RE_CL
--- a/kernel/arebyp.cl
+++ b/kernel/arebyp.cl
@ -904,7 +904,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
  //------------------------------------------------------------------------------------------------------------
  uint cotmp = x * zSIZE;
  progress = 0;
-  for (i = 0; i < N[NFACTOR] + 512 + 42; i++)
+  for (i = 0; i < N[NFACTOR] + (N[NFACTOR] / LOOKUP_GAP) + 42; i++)
    {
        //if (progress < 2 * N[NFACTOR])
 		{
--- a/kernel/blake256.cl
+++ b/kernel/blake256.cl
@ -0,0 +1,96 @@
+/*
+* blake256 kernel implementation.
+*
+* ==========================(LICENSE BEGIN)============================
+* Copyright (c) 2014 djm34
+* Copyright (c) 2014 tpruvot
+* Permission is hereby granted, free of charge, to any person obtaining
+* a copy of this software and associated documentation files (the
+* "Software"), to deal in the Software without restriction, including
+* without limitation the rights to use, copy, modify, merge, publish,
+* distribute, sublicense, and/or sell copies of the Software, and to
+* permit persons to whom the Software is furnished to do so, subject to
+* the following conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*
+* ===========================(LICENSE END)=============================
+*
+* @author   djm34
+*/
+__constant static const int sigma[16][16] = {
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+		{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
+		{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
+		{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
+		{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
+		{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
+		{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
+		{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
+		{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
+		{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+		{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
+		{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
+		{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
+		{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
+		{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
+};
+
+
+__constant static const sph_u32  c_IV256[8] = {
+	0x6A09E667, 0xBB67AE85,
+	0x3C6EF372, 0xA54FF53A,
+	0x510E527F, 0x9B05688C,
+	0x1F83D9AB, 0x5BE0CD19
+};
+
+/* Second part (64-80) msg never change, store it */
+__constant static const sph_u32  c_Padding[16] = {
+	0, 0, 0, 0,
+	0x80000000, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 1, 0, 640,
+};
+__constant static const sph_u32  c_u256[16] = {
+	0x243F6A88, 0x85A308D3,
+	0x13198A2E, 0x03707344,
+	0xA4093822, 0x299F31D0,
+	0x082EFA98, 0xEC4E6C89,
+	0x452821E6, 0x38D01377,
+	0xBE5466CF, 0x34E90C6C,
+	0xC0AC29B7, 0xC97C50DD,
+	0x3F84D5B5, 0xB5470917
+};
+
+#define GS(a,b,c,d,x) { \
+	const sph_u32 idx1 = sigma[r][x]; \
+	const sph_u32 idx2 = sigma[r][x+1]; \
+	v[a] += (m[idx1] ^ c_u256[idx2]) + v[b]; \
+	v[d] ^= v[a]; \
+    v[d] = SPH_ROTR32(v[d], 16); \
+	v[c] += v[d]; \
+    v[b] ^= v[c]; \
+	v[b] = SPH_ROTR32(v[b], 12); \
+\
+	v[a] += (m[idx2] ^ c_u256[idx1]) + v[b]; \
+    v[d] ^= v[a]; \
+	v[d] = SPH_ROTR32(v[d], 8); \
+	v[c] += v[d]; \
+    v[b] ^= v[c]; \
+	v[b] = SPH_ROTR32(v[b], 7); \
+}
+
+
+
+
+
--- a/kernel/diamond.cl
+++ b/kernel/diamond.cl
--- a/kernel/groestl256.cl
+++ b/kernel/groestl256.cl
@ -0,0 +1,415 @@
+/* $Id: groestl.c 260 2011-07-21 01:02:38Z tp $ */
+/*
+ * Groestl256
+ *
+ * ==========================(LICENSE BEGIN)============================
+ * Copyright (c) 2014 djm34
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+/*
+ * Apparently, the 32-bit-only version is not faster than the 64-bit
+ * version unless using the "small footprint" code on a 32-bit machine.
+ */
+
+#define C64e(x)     ((SPH_C64(x) >> 56) \
+                    | ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \
+                    | ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \
+                    | ((SPH_C64(x) >>  8) & SPH_C64(0x00000000FF000000)) \
+                    | ((SPH_C64(x) <<  8) & SPH_C64(0x000000FF00000000)) \
+                    | ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \
+                    | ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \
+                    | ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000)))
+#define dec64e_aligned   sph_dec64le_aligned
+#define enc64e           sph_enc64le
+#define B64_0(x)    ((x) & 0xFF)
+#define B64_1(x)    (((x) >> 8) & 0xFF)
+#define B64_2(x)    (((x) >> 16) & 0xFF)
+#define B64_3(x)    (((x) >> 24) & 0xFF)
+#define B64_4(x)    (((x) >> 32) & 0xFF)
+#define B64_5(x)    (((x) >> 40) & 0xFF)
+#define B64_6(x)    (((x) >> 48) & 0xFF)
+#define B64_7(x)    ((x) >> 56)
+#define R64         SPH_ROTL64
+#define PC64(j, r)  ((sph_u64)((j) + (r)))
+#define QC64(j, r)  (((sph_u64)(r) << 56) ^ SPH_T64(~((sph_u64)(j) << 56)))
+
+__constant static const sph_u64 T0[] = {
+	C64e(0xc632f4a5f497a5c6), C64e(0xf86f978497eb84f8),
+	C64e(0xee5eb099b0c799ee), C64e(0xf67a8c8d8cf78df6),
+	C64e(0xffe8170d17e50dff), C64e(0xd60adcbddcb7bdd6),
+	C64e(0xde16c8b1c8a7b1de), C64e(0x916dfc54fc395491),
+	C64e(0x6090f050f0c05060), C64e(0x0207050305040302),
+	C64e(0xce2ee0a9e087a9ce), C64e(0x56d1877d87ac7d56),
+	C64e(0xe7cc2b192bd519e7), C64e(0xb513a662a67162b5),
+	C64e(0x4d7c31e6319ae64d), C64e(0xec59b59ab5c39aec),
+	C64e(0x8f40cf45cf05458f), C64e(0x1fa3bc9dbc3e9d1f),
+	C64e(0x8949c040c0094089), C64e(0xfa68928792ef87fa),
+	C64e(0xefd03f153fc515ef), C64e(0xb29426eb267febb2),
+	C64e(0x8ece40c94007c98e), C64e(0xfbe61d0b1ded0bfb),
+	C64e(0x416e2fec2f82ec41), C64e(0xb31aa967a97d67b3),
+	C64e(0x5f431cfd1cbefd5f), C64e(0x456025ea258aea45),
+	C64e(0x23f9dabfda46bf23), C64e(0x535102f702a6f753),
+	C64e(0xe445a196a1d396e4), C64e(0x9b76ed5bed2d5b9b),
+	C64e(0x75285dc25deac275), C64e(0xe1c5241c24d91ce1),
+	C64e(0x3dd4e9aee97aae3d), C64e(0x4cf2be6abe986a4c),
+	C64e(0x6c82ee5aeed85a6c), C64e(0x7ebdc341c3fc417e),
+	C64e(0xf5f3060206f102f5), C64e(0x8352d14fd11d4f83),
+	C64e(0x688ce45ce4d05c68), C64e(0x515607f407a2f451),
+	C64e(0xd18d5c345cb934d1), C64e(0xf9e1180818e908f9),
+	C64e(0xe24cae93aedf93e2), C64e(0xab3e9573954d73ab),
+	C64e(0x6297f553f5c45362), C64e(0x2a6b413f41543f2a),
+	C64e(0x081c140c14100c08), C64e(0x9563f652f6315295),
+	C64e(0x46e9af65af8c6546), C64e(0x9d7fe25ee2215e9d),
+	C64e(0x3048782878602830), C64e(0x37cff8a1f86ea137),
+	C64e(0x0a1b110f11140f0a), C64e(0x2febc4b5c45eb52f),
+	C64e(0x0e151b091b1c090e), C64e(0x247e5a365a483624),
+	C64e(0x1badb69bb6369b1b), C64e(0xdf98473d47a53ddf),
+	C64e(0xcda76a266a8126cd), C64e(0x4ef5bb69bb9c694e),
+	C64e(0x7f334ccd4cfecd7f), C64e(0xea50ba9fbacf9fea),
+	C64e(0x123f2d1b2d241b12), C64e(0x1da4b99eb93a9e1d),
+	C64e(0x58c49c749cb07458), C64e(0x3446722e72682e34),
+	C64e(0x3641772d776c2d36), C64e(0xdc11cdb2cda3b2dc),
+	C64e(0xb49d29ee2973eeb4), C64e(0x5b4d16fb16b6fb5b),
+	C64e(0xa4a501f60153f6a4), C64e(0x76a1d74dd7ec4d76),
+	C64e(0xb714a361a37561b7), C64e(0x7d3449ce49face7d),
+	C64e(0x52df8d7b8da47b52), C64e(0xdd9f423e42a13edd),
+	C64e(0x5ecd937193bc715e), C64e(0x13b1a297a2269713),
+	C64e(0xa6a204f50457f5a6), C64e(0xb901b868b86968b9),
+	C64e(0x0000000000000000), C64e(0xc1b5742c74992cc1),
+	C64e(0x40e0a060a0806040), C64e(0xe3c2211f21dd1fe3),
+	C64e(0x793a43c843f2c879), C64e(0xb69a2ced2c77edb6),
+	C64e(0xd40dd9bed9b3bed4), C64e(0x8d47ca46ca01468d),
+	C64e(0x671770d970ced967), C64e(0x72afdd4bdde44b72),
+	C64e(0x94ed79de7933de94), C64e(0x98ff67d4672bd498),
+	C64e(0xb09323e8237be8b0), C64e(0x855bde4ade114a85),
+	C64e(0xbb06bd6bbd6d6bbb), C64e(0xc5bb7e2a7e912ac5),
+	C64e(0x4f7b34e5349ee54f), C64e(0xedd73a163ac116ed),
+	C64e(0x86d254c55417c586), C64e(0x9af862d7622fd79a),
+	C64e(0x6699ff55ffcc5566), C64e(0x11b6a794a7229411),
+	C64e(0x8ac04acf4a0fcf8a), C64e(0xe9d9301030c910e9),
+	C64e(0x040e0a060a080604), C64e(0xfe66988198e781fe),
+	C64e(0xa0ab0bf00b5bf0a0), C64e(0x78b4cc44ccf04478),
+	C64e(0x25f0d5bad54aba25), C64e(0x4b753ee33e96e34b),
+	C64e(0xa2ac0ef30e5ff3a2), C64e(0x5d4419fe19bafe5d),
+	C64e(0x80db5bc05b1bc080), C64e(0x0580858a850a8a05),
+	C64e(0x3fd3ecadec7ead3f), C64e(0x21fedfbcdf42bc21),
+	C64e(0x70a8d848d8e04870), C64e(0xf1fd0c040cf904f1),
+	C64e(0x63197adf7ac6df63), C64e(0x772f58c158eec177),
+	C64e(0xaf309f759f4575af), C64e(0x42e7a563a5846342),
+	C64e(0x2070503050403020), C64e(0xe5cb2e1a2ed11ae5),
+	C64e(0xfdef120e12e10efd), C64e(0xbf08b76db7656dbf),
+	C64e(0x8155d44cd4194c81), C64e(0x18243c143c301418),
+	C64e(0x26795f355f4c3526), C64e(0xc3b2712f719d2fc3),
+	C64e(0xbe8638e13867e1be), C64e(0x35c8fda2fd6aa235),
+	C64e(0x88c74fcc4f0bcc88), C64e(0x2e654b394b5c392e),
+	C64e(0x936af957f93d5793), C64e(0x55580df20daaf255),
+	C64e(0xfc619d829de382fc), C64e(0x7ab3c947c9f4477a),
+	C64e(0xc827efacef8bacc8), C64e(0xba8832e7326fe7ba),
+	C64e(0x324f7d2b7d642b32), C64e(0xe642a495a4d795e6),
+	C64e(0xc03bfba0fb9ba0c0), C64e(0x19aab398b3329819),
+	C64e(0x9ef668d16827d19e), C64e(0xa322817f815d7fa3),
+	C64e(0x44eeaa66aa886644), C64e(0x54d6827e82a87e54),
+	C64e(0x3bdde6abe676ab3b), C64e(0x0b959e839e16830b),
+	C64e(0x8cc945ca4503ca8c), C64e(0xc7bc7b297b9529c7),
+	C64e(0x6b056ed36ed6d36b), C64e(0x286c443c44503c28),
+	C64e(0xa72c8b798b5579a7), C64e(0xbc813de23d63e2bc),
+	C64e(0x1631271d272c1d16), C64e(0xad379a769a4176ad),
+	C64e(0xdb964d3b4dad3bdb), C64e(0x649efa56fac85664),
+	C64e(0x74a6d24ed2e84e74), C64e(0x1436221e22281e14),
+	C64e(0x92e476db763fdb92), C64e(0x0c121e0a1e180a0c),
+	C64e(0x48fcb46cb4906c48), C64e(0xb88f37e4376be4b8),
+	C64e(0x9f78e75de7255d9f), C64e(0xbd0fb26eb2616ebd),
+	C64e(0x43692aef2a86ef43), C64e(0xc435f1a6f193a6c4),
+	C64e(0x39dae3a8e372a839), C64e(0x31c6f7a4f762a431),
+	C64e(0xd38a593759bd37d3), C64e(0xf274868b86ff8bf2),
+	C64e(0xd583563256b132d5), C64e(0x8b4ec543c50d438b),
+	C64e(0x6e85eb59ebdc596e), C64e(0xda18c2b7c2afb7da),
+	C64e(0x018e8f8c8f028c01), C64e(0xb11dac64ac7964b1),
+	C64e(0x9cf16dd26d23d29c), C64e(0x49723be03b92e049),
+	C64e(0xd81fc7b4c7abb4d8), C64e(0xacb915fa1543faac),
+	C64e(0xf3fa090709fd07f3), C64e(0xcfa06f256f8525cf),
+	C64e(0xca20eaafea8fafca), C64e(0xf47d898e89f38ef4),
+	C64e(0x476720e9208ee947), C64e(0x1038281828201810),
+	C64e(0x6f0b64d564ded56f), C64e(0xf073838883fb88f0),
+	C64e(0x4afbb16fb1946f4a), C64e(0x5cca967296b8725c),
+	C64e(0x38546c246c702438), C64e(0x575f08f108aef157),
+	C64e(0x732152c752e6c773), C64e(0x9764f351f3355197),
+	C64e(0xcbae6523658d23cb), C64e(0xa125847c84597ca1),
+	C64e(0xe857bf9cbfcb9ce8), C64e(0x3e5d6321637c213e),
+	C64e(0x96ea7cdd7c37dd96), C64e(0x611e7fdc7fc2dc61),
+	C64e(0x0d9c9186911a860d), C64e(0x0f9b9485941e850f),
+	C64e(0xe04bab90abdb90e0), C64e(0x7cbac642c6f8427c),
+	C64e(0x712657c457e2c471), C64e(0xcc29e5aae583aacc),
+	C64e(0x90e373d8733bd890), C64e(0x06090f050f0c0506),
+	C64e(0xf7f4030103f501f7), C64e(0x1c2a36123638121c),
+	C64e(0xc23cfea3fe9fa3c2), C64e(0x6a8be15fe1d45f6a),
+	C64e(0xaebe10f91047f9ae), C64e(0x69026bd06bd2d069),
+	C64e(0x17bfa891a82e9117), C64e(0x9971e858e8295899),
+	C64e(0x3a5369276974273a), C64e(0x27f7d0b9d04eb927),
+	C64e(0xd991483848a938d9), C64e(0xebde351335cd13eb),
+	C64e(0x2be5ceb3ce56b32b), C64e(0x2277553355443322),
+	C64e(0xd204d6bbd6bfbbd2), C64e(0xa9399070904970a9),
+	C64e(0x07878089800e8907), C64e(0x33c1f2a7f266a733),
+	C64e(0x2decc1b6c15ab62d), C64e(0x3c5a66226678223c),
+	C64e(0x15b8ad92ad2a9215), C64e(0xc9a96020608920c9),
+	C64e(0x875cdb49db154987), C64e(0xaab01aff1a4fffaa),
+	C64e(0x50d8887888a07850), C64e(0xa52b8e7a8e517aa5),
+	C64e(0x03898a8f8a068f03), C64e(0x594a13f813b2f859),
+	C64e(0x09929b809b128009), C64e(0x1a2339173934171a),
+	C64e(0x651075da75cada65), C64e(0xd784533153b531d7),
+	C64e(0x84d551c65113c684), C64e(0xd003d3b8d3bbb8d0),
+	C64e(0x82dc5ec35e1fc382), C64e(0x29e2cbb0cb52b029),
+	C64e(0x5ac3997799b4775a), C64e(0x1e2d3311333c111e),
+	C64e(0x7b3d46cb46f6cb7b), C64e(0xa8b71ffc1f4bfca8),
+	C64e(0x6d0c61d661dad66d), C64e(0x2c624e3a4e583a2c)
+};
+
+__constant static const sph_u64 T4[] = {
+	C64e(0xf497a5c6c632f4a5), C64e(0x97eb84f8f86f9784),
+	C64e(0xb0c799eeee5eb099), C64e(0x8cf78df6f67a8c8d),
+	C64e(0x17e50dffffe8170d), C64e(0xdcb7bdd6d60adcbd),
+	C64e(0xc8a7b1dede16c8b1), C64e(0xfc395491916dfc54),
+	C64e(0xf0c050606090f050), C64e(0x0504030202070503),
+	C64e(0xe087a9cece2ee0a9), C64e(0x87ac7d5656d1877d),
+	C64e(0x2bd519e7e7cc2b19), C64e(0xa67162b5b513a662),
+	C64e(0x319ae64d4d7c31e6), C64e(0xb5c39aecec59b59a),
+	C64e(0xcf05458f8f40cf45), C64e(0xbc3e9d1f1fa3bc9d),
+	C64e(0xc00940898949c040), C64e(0x92ef87fafa689287),
+	C64e(0x3fc515efefd03f15), C64e(0x267febb2b29426eb),
+	C64e(0x4007c98e8ece40c9), C64e(0x1ded0bfbfbe61d0b),
+	C64e(0x2f82ec41416e2fec), C64e(0xa97d67b3b31aa967),
+	C64e(0x1cbefd5f5f431cfd), C64e(0x258aea45456025ea),
+	C64e(0xda46bf2323f9dabf), C64e(0x02a6f753535102f7),
+	C64e(0xa1d396e4e445a196), C64e(0xed2d5b9b9b76ed5b),
+	C64e(0x5deac27575285dc2), C64e(0x24d91ce1e1c5241c),
+	C64e(0xe97aae3d3dd4e9ae), C64e(0xbe986a4c4cf2be6a),
+	C64e(0xeed85a6c6c82ee5a), C64e(0xc3fc417e7ebdc341),
+	C64e(0x06f102f5f5f30602), C64e(0xd11d4f838352d14f),
+	C64e(0xe4d05c68688ce45c), C64e(0x07a2f451515607f4),
+	C64e(0x5cb934d1d18d5c34), C64e(0x18e908f9f9e11808),
+	C64e(0xaedf93e2e24cae93), C64e(0x954d73abab3e9573),
+	C64e(0xf5c453626297f553), C64e(0x41543f2a2a6b413f),
+	C64e(0x14100c08081c140c), C64e(0xf63152959563f652),
+	C64e(0xaf8c654646e9af65), C64e(0xe2215e9d9d7fe25e),
+	C64e(0x7860283030487828), C64e(0xf86ea13737cff8a1),
+	C64e(0x11140f0a0a1b110f), C64e(0xc45eb52f2febc4b5),
+	C64e(0x1b1c090e0e151b09), C64e(0x5a483624247e5a36),
+	C64e(0xb6369b1b1badb69b), C64e(0x47a53ddfdf98473d),
+	C64e(0x6a8126cdcda76a26), C64e(0xbb9c694e4ef5bb69),
+	C64e(0x4cfecd7f7f334ccd), C64e(0xbacf9feaea50ba9f),
+	C64e(0x2d241b12123f2d1b), C64e(0xb93a9e1d1da4b99e),
+	C64e(0x9cb0745858c49c74), C64e(0x72682e343446722e),
+	C64e(0x776c2d363641772d), C64e(0xcda3b2dcdc11cdb2),
+	C64e(0x2973eeb4b49d29ee), C64e(0x16b6fb5b5b4d16fb),
+	C64e(0x0153f6a4a4a501f6), C64e(0xd7ec4d7676a1d74d),
+	C64e(0xa37561b7b714a361), C64e(0x49face7d7d3449ce),
+	C64e(0x8da47b5252df8d7b), C64e(0x42a13edddd9f423e),
+	C64e(0x93bc715e5ecd9371), C64e(0xa226971313b1a297),
+	C64e(0x0457f5a6a6a204f5), C64e(0xb86968b9b901b868),
+	C64e(0x0000000000000000), C64e(0x74992cc1c1b5742c),
+	C64e(0xa080604040e0a060), C64e(0x21dd1fe3e3c2211f),
+	C64e(0x43f2c879793a43c8), C64e(0x2c77edb6b69a2ced),
+	C64e(0xd9b3bed4d40dd9be), C64e(0xca01468d8d47ca46),
+	C64e(0x70ced967671770d9), C64e(0xdde44b7272afdd4b),
+	C64e(0x7933de9494ed79de), C64e(0x672bd49898ff67d4),
+	C64e(0x237be8b0b09323e8), C64e(0xde114a85855bde4a),
+	C64e(0xbd6d6bbbbb06bd6b), C64e(0x7e912ac5c5bb7e2a),
+	C64e(0x349ee54f4f7b34e5), C64e(0x3ac116ededd73a16),
+	C64e(0x5417c58686d254c5), C64e(0x622fd79a9af862d7),
+	C64e(0xffcc55666699ff55), C64e(0xa722941111b6a794),
+	C64e(0x4a0fcf8a8ac04acf), C64e(0x30c910e9e9d93010),
+	C64e(0x0a080604040e0a06), C64e(0x98e781fefe669881),
+	C64e(0x0b5bf0a0a0ab0bf0), C64e(0xccf0447878b4cc44),
+	C64e(0xd54aba2525f0d5ba), C64e(0x3e96e34b4b753ee3),
+	C64e(0x0e5ff3a2a2ac0ef3), C64e(0x19bafe5d5d4419fe),
+	C64e(0x5b1bc08080db5bc0), C64e(0x850a8a050580858a),
+	C64e(0xec7ead3f3fd3ecad), C64e(0xdf42bc2121fedfbc),
+	C64e(0xd8e0487070a8d848), C64e(0x0cf904f1f1fd0c04),
+	C64e(0x7ac6df6363197adf), C64e(0x58eec177772f58c1),
+	C64e(0x9f4575afaf309f75), C64e(0xa584634242e7a563),
+	C64e(0x5040302020705030), C64e(0x2ed11ae5e5cb2e1a),
+	C64e(0x12e10efdfdef120e), C64e(0xb7656dbfbf08b76d),
+	C64e(0xd4194c818155d44c), C64e(0x3c30141818243c14),
+	C64e(0x5f4c352626795f35), C64e(0x719d2fc3c3b2712f),
+	C64e(0x3867e1bebe8638e1), C64e(0xfd6aa23535c8fda2),
+	C64e(0x4f0bcc8888c74fcc), C64e(0x4b5c392e2e654b39),
+	C64e(0xf93d5793936af957), C64e(0x0daaf25555580df2),
+	C64e(0x9de382fcfc619d82), C64e(0xc9f4477a7ab3c947),
+	C64e(0xef8bacc8c827efac), C64e(0x326fe7baba8832e7),
+	C64e(0x7d642b32324f7d2b), C64e(0xa4d795e6e642a495),
+	C64e(0xfb9ba0c0c03bfba0), C64e(0xb332981919aab398),
+	C64e(0x6827d19e9ef668d1), C64e(0x815d7fa3a322817f),
+	C64e(0xaa88664444eeaa66), C64e(0x82a87e5454d6827e),
+	C64e(0xe676ab3b3bdde6ab), C64e(0x9e16830b0b959e83),
+	C64e(0x4503ca8c8cc945ca), C64e(0x7b9529c7c7bc7b29),
+	C64e(0x6ed6d36b6b056ed3), C64e(0x44503c28286c443c),
+	C64e(0x8b5579a7a72c8b79), C64e(0x3d63e2bcbc813de2),
+	C64e(0x272c1d161631271d), C64e(0x9a4176adad379a76),
+	C64e(0x4dad3bdbdb964d3b), C64e(0xfac85664649efa56),
+	C64e(0xd2e84e7474a6d24e), C64e(0x22281e141436221e),
+	C64e(0x763fdb9292e476db), C64e(0x1e180a0c0c121e0a),
+	C64e(0xb4906c4848fcb46c), C64e(0x376be4b8b88f37e4),
+	C64e(0xe7255d9f9f78e75d), C64e(0xb2616ebdbd0fb26e),
+	C64e(0x2a86ef4343692aef), C64e(0xf193a6c4c435f1a6),
+	C64e(0xe372a83939dae3a8), C64e(0xf762a43131c6f7a4),
+	C64e(0x59bd37d3d38a5937), C64e(0x86ff8bf2f274868b),
+	C64e(0x56b132d5d5835632), C64e(0xc50d438b8b4ec543),
+	C64e(0xebdc596e6e85eb59), C64e(0xc2afb7dada18c2b7),
+	C64e(0x8f028c01018e8f8c), C64e(0xac7964b1b11dac64),
+	C64e(0x6d23d29c9cf16dd2), C64e(0x3b92e04949723be0),
+	C64e(0xc7abb4d8d81fc7b4), C64e(0x1543faacacb915fa),
+	C64e(0x09fd07f3f3fa0907), C64e(0x6f8525cfcfa06f25),
+	C64e(0xea8fafcaca20eaaf), C64e(0x89f38ef4f47d898e),
+	C64e(0x208ee947476720e9), C64e(0x2820181010382818),
+	C64e(0x64ded56f6f0b64d5), C64e(0x83fb88f0f0738388),
+	C64e(0xb1946f4a4afbb16f), C64e(0x96b8725c5cca9672),
+	C64e(0x6c70243838546c24), C64e(0x08aef157575f08f1),
+	C64e(0x52e6c773732152c7), C64e(0xf33551979764f351),
+	C64e(0x658d23cbcbae6523), C64e(0x84597ca1a125847c),
+	C64e(0xbfcb9ce8e857bf9c), C64e(0x637c213e3e5d6321),
+	C64e(0x7c37dd9696ea7cdd), C64e(0x7fc2dc61611e7fdc),
+	C64e(0x911a860d0d9c9186), C64e(0x941e850f0f9b9485),
+	C64e(0xabdb90e0e04bab90), C64e(0xc6f8427c7cbac642),
+	C64e(0x57e2c471712657c4), C64e(0xe583aacccc29e5aa),
+	C64e(0x733bd89090e373d8), C64e(0x0f0c050606090f05),
+	C64e(0x03f501f7f7f40301), C64e(0x3638121c1c2a3612),
+	C64e(0xfe9fa3c2c23cfea3), C64e(0xe1d45f6a6a8be15f),
+	C64e(0x1047f9aeaebe10f9), C64e(0x6bd2d06969026bd0),
+	C64e(0xa82e911717bfa891), C64e(0xe82958999971e858),
+	C64e(0x6974273a3a536927), C64e(0xd04eb92727f7d0b9),
+	C64e(0x48a938d9d9914838), C64e(0x35cd13ebebde3513),
+	C64e(0xce56b32b2be5ceb3), C64e(0x5544332222775533),
+	C64e(0xd6bfbbd2d204d6bb), C64e(0x904970a9a9399070),
+	C64e(0x800e890707878089), C64e(0xf266a73333c1f2a7),
+	C64e(0xc15ab62d2decc1b6), C64e(0x6678223c3c5a6622),
+	C64e(0xad2a921515b8ad92), C64e(0x608920c9c9a96020),
+	C64e(0xdb154987875cdb49), C64e(0x1a4fffaaaab01aff),
+	C64e(0x88a0785050d88878), C64e(0x8e517aa5a52b8e7a),
+	C64e(0x8a068f0303898a8f), C64e(0x13b2f859594a13f8),
+	C64e(0x9b12800909929b80), C64e(0x3934171a1a233917),
+	C64e(0x75cada65651075da), C64e(0x53b531d7d7845331),
+	C64e(0x5113c68484d551c6), C64e(0xd3bbb8d0d003d3b8),
+	C64e(0x5e1fc38282dc5ec3), C64e(0xcb52b02929e2cbb0),
+	C64e(0x99b4775a5ac39977), C64e(0x333c111e1e2d3311),
+	C64e(0x46f6cb7b7b3d46cb), C64e(0x1f4bfca8a8b71ffc),
+	C64e(0x61dad66d6d0c61d6), C64e(0x4e583a2c2c624e3a)
+};
+
+#define RSTT(d, a, b0, b1, b2, b3, b4, b5, b6, b7)   do { \
+		t[d] = T0[B64_0(a[b0])] \
+			^ R64(T0[B64_1(a[b1])],  8) \
+			^ R64(T0[B64_2(a[b2])], 16) \
+			^ R64(T0[B64_3(a[b3])], 24) \
+			^ T4[B64_4(a[b4])] \
+			^ R64(T4[B64_5(a[b5])],  8) \
+			^ R64(T4[B64_6(a[b6])], 16) \
+			^ R64(T4[B64_7(a[b7])], 24); \
+		} while (0)
+
+#define ROUND_SMALL_P(a, r)   do { \
+		a[0] ^= PC64(0x00, r); \
+		a[1] ^= PC64(0x10, r); \
+		a[2] ^= PC64(0x20, r); \
+		a[3] ^= PC64(0x30, r); \
+		a[4] ^= PC64(0x40, r); \
+		a[5] ^= PC64(0x50, r); \
+		a[6] ^= PC64(0x60, r); \
+		a[7] ^= PC64(0x70, r); \
+		RSTT(0, a, 0, 1, 2, 3, 4, 5, 6, 7); \
+		RSTT(1, a, 1, 2, 3, 4, 5, 6, 7, 0); \
+		RSTT(2, a, 2, 3, 4, 5, 6, 7, 0, 1); \
+		RSTT(3, a, 3, 4, 5, 6, 7, 0, 1, 2); \
+		RSTT(4, a, 4, 5, 6, 7, 0, 1, 2, 3); \
+		RSTT(5, a, 5, 6, 7, 0, 1, 2, 3, 4); \
+		RSTT(6, a, 6, 7, 0, 1, 2, 3, 4, 5); \
+		RSTT(7, a, 7, 0, 1, 2, 3, 4, 5, 6); \
+		a[0] = t[0]; \
+		a[1] = t[1]; \
+		a[2] = t[2]; \
+		a[3] = t[3]; \
+		a[4] = t[4]; \
+		a[5] = t[5]; \
+		a[6] = t[6]; \
+		a[7] = t[7]; \
+		} while (0)
+
+#define ROUND_SMALL_Pf(a, r)   do { \
+		a[0] ^= PC64(0x00, r); \
+		a[1] ^= PC64(0x10, r); \
+		a[2] ^= PC64(0x20, r); \
+		a[3] ^= PC64(0x30, r); \
+		a[4] ^= PC64(0x40, r); \
+		a[5] ^= PC64(0x50, r); \
+		a[6] ^= PC64(0x60, r); \
+		a[7] ^= PC64(0x70, r); \
+		RSTT(7, a, 7, 0, 1, 2, 3, 4, 5, 6); \
+		a[7] = t[7]; \
+			} while (0)
+
+#define ROUND_SMALL_Q(a, r)   do { \
+		a[0] ^= QC64(0x00, r); \
+		a[1] ^= QC64(0x10, r); \
+		a[2] ^= QC64(0x20, r); \
+		a[3] ^= QC64(0x30, r); \
+		a[4] ^= QC64(0x40, r); \
+		a[5] ^= QC64(0x50, r); \
+		a[6] ^= QC64(0x60, r); \
+		a[7] ^= QC64(0x70, r); \
+		RSTT(0, a, 1, 3, 5, 7, 0, 2, 4, 6); \
+		RSTT(1, a, 2, 4, 6, 0, 1, 3, 5, 7); \
+		RSTT(2, a, 3, 5, 7, 1, 2, 4, 6, 0); \
+		RSTT(3, a, 4, 6, 0, 2, 3, 5, 7, 1); \
+		RSTT(4, a, 5, 7, 1, 3, 4, 6, 0, 2); \
+		RSTT(5, a, 6, 0, 2, 4, 5, 7, 1, 3); \
+		RSTT(6, a, 7, 1, 3, 5, 6, 0, 2, 4); \
+		RSTT(7, a, 0, 2, 4, 6, 7, 1, 3, 5); \
+		a[0] = t[0]; \
+		a[1] = t[1]; \
+		a[2] = t[2]; \
+		a[3] = t[3]; \
+		a[4] = t[4]; \
+		a[5] = t[5]; \
+		a[6] = t[6]; \
+		a[7] = t[7]; \
+		} while (0)
+
+#define PERM_SMALL_P(a)   do { \
+		for (int r = 0; r < 10; r ++) \
+			ROUND_SMALL_P(a, r); \
+		} while (0)
+
+#define PERM_SMALL_Pf(a)   do { \
+		for (int r = 0; r < 9; r ++) { \
+			ROUND_SMALL_P(a, r);} \
+            ROUND_SMALL_Pf(a,9); \
+			} while (0)
+
+#define PERM_SMALL_Q(a)   do { \
+		for (int r = 0; r < 10; r ++) \
+			ROUND_SMALL_Q(a, r); \
+		} while (0)
+
--- a/kernel/groestlcoin.cl
+++ b/kernel/groestlcoin.cl
--- a/kernel/keccak1600.cl
+++ b/kernel/keccak1600.cl
@ -0,0 +1,84 @@
+/*
+ * keccak_1600 function
+ * C. Buchner 2014 
+ *
+ */
+
+__constant static const sph_u64 RC[] = {
+  SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
+  SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
+  SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001),
+  SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009),
+  SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088),
+  SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A),
+  SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B),
+  SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003),
+  SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080),
+  SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A),
+  SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
+  SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
+};
+
+
+inline void keccak_block(ulong *s) {
+	size_t i;
+	ulong t[5], u[5], v, w;
+
+	for (i = 0; i < 24; i++) {
+		/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
+		t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
+		t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
+		t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
+		t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
+		t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
+
+		/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
+		u[0] = t[4] ^ SPH_ROTL64(t[1], 1);
+		u[1] = t[0] ^ SPH_ROTL64(t[2], 1);
+		u[2] = t[1] ^ SPH_ROTL64(t[3], 1);
+		u[3] = t[2] ^ SPH_ROTL64(t[4], 1);
+		u[4] = t[3] ^ SPH_ROTL64(t[0], 1);
+
+		/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
+		s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0];
+		s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1];
+		s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2];
+		s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3];
+		s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4];
+
+		/* rho pi: b[..] = rotl(a[..], ..) */
+		v = s[1];
+		s[1] = SPH_ROTL64(s[6], 44);
+		s[6] = SPH_ROTL64(s[9], 20);
+		s[9] = SPH_ROTL64(s[22], 61);
+		s[22] = SPH_ROTL64(s[14], 39);
+		s[14] = SPH_ROTL64(s[20], 18);
+		s[20] = SPH_ROTL64(s[2], 62);
+		s[2] = SPH_ROTL64(s[12], 43);
+		s[12] = SPH_ROTL64(s[13], 25);
+		s[13] = SPH_ROTL64(s[19], 8);
+		s[19] = SPH_ROTL64(s[23], 56);
+		s[23] = SPH_ROTL64(s[15], 41);
+		s[15] = SPH_ROTL64(s[4], 27);
+		s[4] = SPH_ROTL64(s[24], 14);
+		s[24] = SPH_ROTL64(s[21], 2);
+		s[21] = SPH_ROTL64(s[8], 55);
+		s[8] = SPH_ROTL64(s[16], 45);
+		s[16] = SPH_ROTL64(s[5], 36);
+		s[5] = SPH_ROTL64(s[3], 28);
+		s[3] = SPH_ROTL64(s[18], 21);
+		s[18] = SPH_ROTL64(s[17], 15);
+		s[17] = SPH_ROTL64(s[11], 10);
+		s[11] = SPH_ROTL64(s[7], 6);
+		s[7] = SPH_ROTL64(s[10], 3);
+		s[10] = SPH_ROTL64(v, 1);
+
+		v = s[0]; w = s[1]; s[0] ^= (~w) & s[2]; s[1] ^= (~s[2]) & s[3]; s[2] ^= (~s[3]) & s[4]; s[3] ^= (~s[4]) & v; s[4] ^= (~v) & w;
+		v = s[5]; w = s[6]; s[5] ^= (~w) & s[7]; s[6] ^= (~s[7]) & s[8]; s[7] ^= (~s[8]) & s[9]; s[8] ^= (~s[9]) & v; s[9] ^= (~v) & w;
+		v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w;
+		v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w;
+		v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w;
+
+		s[0] ^= RC[i];
+	}
+};
--- a/kernel/skein256.cl
+++ b/kernel/skein256.cl
@ -0,0 +1,107 @@
+
+/*
+* skein256 kernel implementation.
+*
+* ==========================(LICENSE BEGIN)============================
+* Copyright (c) 2014 djm34
+*
+* Permission is hereby granted, free of charge, to any person obtaining
+* a copy of this software and associated documentation files (the
+* "Software"), to deal in the Software without restriction, including
+* without limitation the rights to use, copy, modify, merge, publish,
+* distribute, sublicense, and/or sell copies of the Software, and to
+* permit persons to whom the Software is furnished to do so, subject to
+* the following conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*
+* ===========================(LICENSE END)=============================
+*
+* @author   djm34
+*/
+
+
+__constant static const sph_u64 SKEIN_IV512[] = {
+  SPH_C64(0x4903ADFF749C51CE), SPH_C64(0x0D95DE399746DF03),
+  SPH_C64(0x8FD1934127C79BCE), SPH_C64(0x9A255629FF352CB1),
+  SPH_C64(0x5DB62599DF6CA7B0), SPH_C64(0xEABE394CA9D5C3F4),
+  SPH_C64(0x991112C71A75B523), SPH_C64(0xAE18A40B660FCC33)
+};
+
+__constant static const sph_u64 SKEIN_IV512_256[8] = {
+	0xCCD044A12FDB3E13UL, 0xE83590301A79A9EBUL,
+	0x55AEA0614F816E6FUL, 0x2A2767A4AE9B94DBUL,
+	0xEC06025E74DD7683UL, 0xE7A436CDC4746251UL,
+	0xC36FBAF9393AD185UL, 0x3EEDBA1833EDFC13UL
+};
+
+
+
+__constant static const int ROT256[8][4] =
+{
+	46, 36, 19, 37,
+	33, 27, 14, 42,
+	17, 49, 36, 39,
+	44, 9, 54, 56,
+	39, 30, 34, 24,
+	13, 50, 10, 17,
+	25, 29, 39, 43,
+	8, 35, 56, 22,
+};
+
+__constant static const sph_u64 skein_ks_parity = 0x1BD11BDAA9FC1A22;
+
+__constant static const sph_u64 t12[6] =
+{ 0x20UL,
+0xf000000000000000UL,
+0xf000000000000020UL,
+0x08UL,
+0xff00000000000000UL,
+0xff00000000000008UL
+};
+static inline ulong ROTL64(const ulong v, const ulong n){
+	return rotate(v,n); 
+}
+
+#define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT)  { \
+p0 += p1; p1 = SPH_ROTL64(p1, ROT256[ROT][0]);  p1 ^= p0; \
+p2 += p3; p3 = SPH_ROTL64(p3, ROT256[ROT][1]);  p3 ^= p2; \
+p4 += p5; p5 = SPH_ROTL64(p5, ROT256[ROT][2]);  p5 ^= p4; \
+p6 += p7; p7 = SPH_ROTL64(p7, ROT256[ROT][3]);  p7 ^= p6; \
+} 
+
+#define Round_8_512(p0, p1, p2, p3, p4, p5, p6, p7, R) { \
+	    Round512(p0, p1, p2, p3, p4, p5, p6, p7, 0); \
+	    Round512(p2, p1, p4, p7, p6, p5, p0, p3, 1); \
+	    Round512(p4, p1, p6, p3, p0, p5, p2, p7, 2); \
+	    Round512(p6, p1, p0, p7, p2, p5, p4, p3, 3); \
+	    p0 += h[((R)+0) % 9];  \ 
+        p1 += h[((R)+1) % 9]; \
+		p2 += h[((R)+2) % 9]; \
+		p3 += h[((R)+3) % 9]; \
+		p4 += h[((R)+4) % 9]; \
+		p5 += h[((R)+5) % 9] + t[((R)+0) % 3]; \
+		p6 += h[((R)+6) % 9] + t[((R)+1) % 3]; \
+		p7 += h[((R)+7) % 9] + R; \
+		Round512(p0, p1, p2, p3, p4, p5, p6, p7, 4); \
+		Round512(p2, p1, p4, p7, p6, p5, p0, p3, 5); \
+		Round512(p4, p1, p6, p3, p0, p5, p2, p7, 6); \
+		Round512(p6, p1, p0, p7, p2, p5, p4, p3, 7); \
+		p0 += h[((R)+1) % 9];   \ 
+		p1 += h[((R)+2) % 9]; \
+		p2 += h[((R)+3) % 9]; \
+		p3 += h[((R)+4) % 9]; \
+		p4 += h[((R)+5) % 9]; \
+		p5 += h[((R)+6) % 9] + t[((R)+1) % 3]; \
+		p6 += h[((R)+7) % 9] + t[((R)+2) % 3]; \
+		p7 += h[((R)+8) % 9] + (R+1); \
+}
--- a/sgminer.c
+++ b/sgminer.c
@ -48,6 +48,7 @@ char *curly = ":D";
 #endif
 #include <libgen.h>
 #include "sph/sph_sha2.h"
+#include "sph/sph_blake.h"

 #include "compat.h"
 #include "miner.h"
--- a/winbuild/sgminer.vcxproj
+++ b/winbuild/sgminer.vcxproj
@ -263,7 +263,10 @@
    <ClCompile Include="..\algorithm.c" />
    <ClCompile Include="..\algorithm\animecoin.c" />
    <ClCompile Include="..\algorithm\bitblock.c" />
+    <ClCompile Include="..\algorithm\lyra2.c" />
+    <ClCompile Include="..\algorithm\lyra2re.c" />
    <ClCompile Include="..\algorithm\neoscrypt.c" />
+    <ClCompile Include="..\algorithm\sponge.c" />
    <ClCompile Include="..\algorithm\talkcoin.c" />
    <ClCompile Include="..\algorithm\whirlpoolx.c" />
    <ClCompile Include="..\algorithm\x14.c" />
@ -325,7 +328,10 @@
    <ClInclude Include="..\algorithm.h" />
    <ClInclude Include="..\algorithm\animecoin.h" />
    <ClInclude Include="..\algorithm\bitblock.h" />
+    <ClInclude Include="..\algorithm\lyra2.h" />
+    <ClInclude Include="..\algorithm\lyra2re.h" />
    <ClInclude Include="..\algorithm\neoscrypt.h" />
+    <ClInclude Include="..\algorithm\sponge.h" />
    <ClInclude Include="..\algorithm\talkcoin.h" />
    <ClInclude Include="..\algorithm\whirlpoolx.h" />
    <ClInclude Include="..\algorithm\x14.h" />
--- a/winbuild/sgminer.vcxproj.filters
+++ b/winbuild/sgminer.vcxproj.filters
@ -209,6 +209,15 @@
    <ClCompile Include="..\algorithm\whirlpoolx.c">
      <Filter>Source Files\algorithm</Filter>
    </ClCompile>
+    <ClCompile Include="..\algorithm\lyra2.c">
+      <Filter>Source Files\algorithm</Filter>
+    </ClCompile>
+    <ClCompile Include="..\algorithm\lyra2re.c">
+      <Filter>Source Files\algorithm</Filter>
+    </ClCompile>
+    <ClCompile Include="..\algorithm\sponge.c">
+      <Filter>Source Files\algorithm</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\adl.h">
@ -397,6 +406,15 @@
    <ClInclude Include="..\algorithm\whirlpoolx.h">
      <Filter>Header Files\algorithm</Filter>
    </ClInclude>
+    <ClInclude Include="..\algorithm\lyra2.h">
+      <Filter>Header Files\algorithm</Filter>
+    </ClInclude>
+    <ClInclude Include="..\algorithm\lyra2re.h">
+      <Filter>Header Files\algorithm</Filter>
+    </ClInclude>
+    <ClInclude Include="..\algorithm\sponge.h">
+      <Filter>Header Files\algorithm</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <None Include="README.txt" />