Merge pull request #5 from nicehash/develop

Blake algo + fixes
9 years ago · fb9b8a339c
30 changed files with 2085 additions and 306 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -73,10 +73,12 @@ sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h
				@@ -73,10 +73,12 @@ sgminer_SOURCES += algorithm/whirlcoin.c algorithm/whirlcoin.h
 sgminer_SOURCES += algorithm/neoscrypt.c algorithm/neoscrypt.h
 sgminer_SOURCES += algorithm/whirlpoolx.c algorithm/whirlpoolx.h
 sgminer_SOURCES += algorithm/lyra2re.c algorithm/lyra2re.h algorithm/lyra2.c algorithm/lyra2.h algorithm/sponge.c algorithm/sponge.h
-sgminer_SOURCES += algorithm/lyra2re_old.c algorithm/lyra2re_old.h
+sgminer_SOURCES += algorithm/lyra2rev2.c algorithm/lyra2rev2.h
 sgminer_SOURCES += algorithm/pluck.c algorithm/pluck.h
 sgminer_SOURCES += algorithm/credits.c algorithm/credits.h
 sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h 
+sgminer_SOURCES += algorithm/blake256.c algorithm/blake256.h
+sgminer_SOURCES += algorithm/blakecoin.c algorithm/blakecoin.h

 bin_SCRIPTS	= $(top_srcdir)/kernel/*.cl

--- a/algorithm.c
+++ b/algorithm.c
@ -33,10 +33,12 @@
				@@ -33,10 +33,12 @@
 #include "algorithm/neoscrypt.h"
 #include "algorithm/whirlpoolx.h"
 #include "algorithm/lyra2re.h"
-#include "algorithm/lyra2re_old.h"
+#include "algorithm/lyra2rev2.h"
 #include "algorithm/pluck.h"
 #include "algorithm/yescrypt.h"
 #include "algorithm/credits.h"
+#include "algorithm/blake256.h"
+#include "algorithm/blakecoin.h"

 #include "compat.h"

@ -62,10 +64,13 @@ const char *algorithm_type_str[] = {
				@@ -62,10 +64,13 @@ const char *algorithm_type_str[] = {
  "Neoscrypt",
  "WhirlpoolX",
  "Lyra2RE",
-  "Lyra2REv2"
+  "Lyra2REV2"
  "Pluck"
  "Yescrypt",
-  "Yescrypt-multi"
+  "Yescrypt-multi",
+  "Blakecoin",
+  "Blake",
+  "Vanilla"
 };

 void sha256(const unsigned char *message, unsigned int len, unsigned char *digest)
@ -798,7 +803,7 @@ static cl_int queue_whirlpoolx_kernel(struct __clState *clState, struct _dev_blk
				@@ -798,7 +803,7 @@ static cl_int queue_whirlpoolx_kernel(struct __clState *clState, struct _dev_blk
  return status;
 }

-static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
+static cl_int queue_lyra2re_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
 {
  cl_kernel *kernel;
  unsigned int num;
@ -842,7 +847,7 @@ static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ct
				@@ -842,7 +847,7 @@ static cl_int queue_lyra2RE_kernel(struct __clState *clState, struct _dev_blk_ct
  return status;
 }

-static cl_int queue_lyra2REv2_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
+static cl_int queue_lyra2rev2_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
 {
  cl_kernel *kernel;
  unsigned int num;
@ -915,10 +920,38 @@ static cl_int queue_pluck_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_un
				@@ -915,10 +920,38 @@ static cl_int queue_pluck_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_un
  return status;
 }

+static cl_int queue_blake_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
+{
+	cl_kernel *kernel = &clState->kernel;
+	unsigned int num = 0;
+	cl_int status = 0;
+	cl_ulong le_target;
+
+	le_target = *(cl_ulong *)(blk->work->device_target + 24);
+	flip80(clState->cldata, blk->work->data);
+	status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
+
+	CL_SET_ARG(clState->outputBuffer);
+	CL_SET_ARG(blk->work->blk.ctx_a);
+	CL_SET_ARG(blk->work->blk.ctx_b);
+	CL_SET_ARG(blk->work->blk.ctx_c);
+	CL_SET_ARG(blk->work->blk.ctx_d);
+	CL_SET_ARG(blk->work->blk.ctx_e);
+	CL_SET_ARG(blk->work->blk.ctx_f);
+	CL_SET_ARG(blk->work->blk.ctx_g);
+	CL_SET_ARG(blk->work->blk.ctx_h);
+
+	CL_SET_ARG(blk->work->blk.cty_a);
+	CL_SET_ARG(blk->work->blk.cty_b);
+	CL_SET_ARG(blk->work->blk.cty_c);
+
+	return status;
+}
+
 static algorithm_settings_t algos[] = {
  // kernels starting from this will have difficulty calculated by using litecoin algorithm
 #define A_SCRYPT(a) \
-  { a, ALGO_SCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFFFFFFULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, scrypt_regenhash, queue_scrypt_kernel, gen_hash, append_scrypt_compiler_options }
+  { a, ALGO_SCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFFFFFFULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, scrypt_regenhash, NULL, queue_scrypt_kernel, gen_hash, append_scrypt_compiler_options }
  A_SCRYPT("ckolivas"),
  A_SCRYPT("alexkarnew"),
  A_SCRYPT("alexkarnold"),
@ -929,36 +962,33 @@ static algorithm_settings_t algos[] = {
				@@ -929,36 +962,33 @@ static algorithm_settings_t algos[] = {
 #undef A_SCRYPT

 #define A_NEOSCRYPT(a) \
-  { a, ALGO_NEOSCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, neoscrypt_regenhash, queue_neoscrypt_kernel, gen_hash, append_neoscrypt_compiler_options }
+  { a, ALGO_NEOSCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, neoscrypt_regenhash, NULL, queue_neoscrypt_kernel, gen_hash, append_neoscrypt_compiler_options }
  A_NEOSCRYPT("neoscrypt"),
 #undef A_NEOSCRYPT

 #define A_PLUCK(a) \
-  { a, ALGO_PLUCK, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pluck_regenhash, queue_pluck_kernel, gen_hash, append_neoscrypt_compiler_options }
+  { a, ALGO_PLUCK, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pluck_regenhash, NULL, queue_pluck_kernel, gen_hash, append_neoscrypt_compiler_options }
  A_PLUCK("pluck"),
 #undef A_PLUCK

 #define A_CREDITS(a) \
-  { a, ALGO_CRE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, credits_regenhash, queue_credits_kernel, gen_hash, NULL}
+  { a, ALGO_CRE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, credits_regenhash, NULL, queue_credits_kernel, gen_hash, NULL}
  A_CREDITS("credits"),
 #undef A_CREDITS

-
-
 #define A_YESCRYPT(a) \
-  { a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options}
+  { a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, NULL, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options}
  A_YESCRYPT("yescrypt"),
 #undef A_YESCRYPT

 #define A_YESCRYPT_MULTI(a) \
-  { a, ALGO_YESCRYPT_MULTI, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 6,-1,CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE , yescrypt_regenhash, queue_yescrypt_multikernel, gen_hash, append_neoscrypt_compiler_options}
+  { a, ALGO_YESCRYPT_MULTI, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 6,-1,CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE , yescrypt_regenhash, NULL, queue_yescrypt_multikernel, gen_hash, append_neoscrypt_compiler_options}
  A_YESCRYPT_MULTI("yescrypt-multi"),
 #undef A_YESCRYPT_MULTI

-
  // kernels starting from this will have difficulty calculated by using quarkcoin algorithm
 #define A_QUARK(a, b) \
-  { a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, append_x11_compiler_options }
+  { a, ALGO_QUARK, "", 256, 256, 256, 0, 0, 0xFF, 0xFFFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options }
  A_QUARK("quarkcoin", quarkcoin_regenhash),
  A_QUARK("qubitcoin", qubitcoin_regenhash),
  A_QUARK("animecoin", animecoin_regenhash),
@ -967,46 +997,48 @@ static algorithm_settings_t algos[] = {
				@@ -967,46 +997,48 @@ static algorithm_settings_t algos[] = {

  // kernels starting from this will have difficulty calculated by using bitcoin algorithm
 #define A_DARK(a, b) \
-  { a, ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, gen_hash, append_x11_compiler_options }
+  { a, ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options }
  A_DARK("darkcoin", darkcoin_regenhash),
  A_DARK("inkcoin", inkcoin_regenhash),
  A_DARK("myriadcoin-groestl", myriadcoin_groestl_regenhash),
 #undef A_DARK

-  { "twecoin", ALGO_TWE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, twecoin_regenhash, queue_sph_kernel, sha256, NULL },
-  { "maxcoin", ALGO_KECCAK, "", 1, 256, 1, 4, 15, 0x0F, 0xFFFFULL, 0x000000ffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, maxcoin_regenhash, queue_maxcoin_kernel, sha256, NULL },
-
-  { "darkcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, queue_darkcoin_mod_kernel, gen_hash, append_x11_compiler_options },
+  { "twecoin", ALGO_TWE, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, twecoin_regenhash, NULL, queue_sph_kernel, sha256, NULL },
+  { "maxcoin", ALGO_KECCAK, "", 1, 256, 1, 4, 15, 0x0F, 0xFFFFULL, 0x000000ffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, maxcoin_regenhash, NULL, queue_maxcoin_kernel, sha256, NULL },

-  { "marucoin", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, marucoin_regenhash, queue_sph_kernel, gen_hash, append_x13_compiler_options },
-  { "marucoin-mod", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_kernel, gen_hash, append_x13_compiler_options },
-  { "marucoin-modold", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, queue_marucoin_mod_old_kernel, gen_hash, append_x13_compiler_options },
+  { "darkcoin-mod", ALGO_X11, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, darkcoin_regenhash, NULL, queue_darkcoin_mod_kernel, gen_hash, append_x11_compiler_options },

-  { "x14", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 13, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_kernel, gen_hash, append_x13_compiler_options },
-  { "x14old", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, x14_regenhash, queue_x14_old_kernel, gen_hash, append_x13_compiler_options },
+  { "marucoin", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, marucoin_regenhash, NULL, queue_sph_kernel, gen_hash, append_x13_compiler_options },
+  { "marucoin-mod", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 12, 8 * 16 * 4194304, 0, marucoin_regenhash, NULL, queue_marucoin_mod_kernel, gen_hash, append_x13_compiler_options },
+  { "marucoin-modold", ALGO_X13, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, marucoin_regenhash, NULL, queue_marucoin_mod_old_kernel, gen_hash, append_x13_compiler_options },

-  { "bitblock", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 14, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblock_kernel, gen_hash, append_x13_compiler_options },
-  { "bitblockold", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 4 * 16 * 4194304, 0, bitblock_regenhash, queue_bitblockold_kernel, gen_hash, append_x13_compiler_options },
+  { "x14", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 13, 8 * 16 * 4194304, 0, x14_regenhash, NULL, queue_x14_kernel, gen_hash, append_x13_compiler_options },
+  { "x14old", ALGO_X14, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 8 * 16 * 4194304, 0, x14_regenhash, NULL, queue_x14_old_kernel, gen_hash, append_x13_compiler_options },

-  { "talkcoin-mod", ALGO_NIST, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, queue_talkcoin_mod_kernel, gen_hash, append_x11_compiler_options },
+  { "bitblock", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 14, 4 * 16 * 4194304, 0, bitblock_regenhash, NULL, queue_bitblock_kernel, gen_hash, append_x13_compiler_options },
+  { "bitblockold", ALGO_X15, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 10, 4 * 16 * 4194304, 0, bitblock_regenhash, NULL, queue_bitblockold_kernel, gen_hash, append_x13_compiler_options },

-  { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, queue_fresh_kernel, gen_hash, NULL },
+  { "talkcoin-mod", ALGO_NIST, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, NULL, queue_talkcoin_mod_kernel, gen_hash, append_x11_compiler_options },

-  { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2reold_regenhash, queue_lyra2RE_kernel, gen_hash, NULL },
-
-  { "lyra2rev2", ALGO_LYRA2REv2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2re_regenhash, queue_lyra2REv2_kernel, gen_hash, append_neoscrypt_compiler_options },
+  { "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, NULL, queue_fresh_kernel, gen_hash, NULL },

+  { "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2re_regenhash, precalc_hash_blake256, queue_lyra2re_kernel, gen_hash, NULL },
+  { "lyra2rev2", ALGO_LYRA2REV2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2rev2_regenhash, precalc_hash_blake256, queue_lyra2rev2_kernel, gen_hash, append_neoscrypt_compiler_options },

  // kernels starting from this will have difficulty calculated by using fuguecoin algorithm
 #define A_FUGUE(a, b, c) \
-  { a, ALGO_FUGUE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, queue_sph_kernel, c, NULL }
+  { a, ALGO_FUGUE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, queue_sph_kernel, c, NULL }
  A_FUGUE("fuguecoin", fuguecoin_regenhash, sha256),
  A_FUGUE("groestlcoin", groestlcoin_regenhash, sha256),
  A_FUGUE("diamond", groestlcoin_regenhash, gen_hash),
 #undef A_FUGUE

-  { "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, queue_whirlcoin_kernel, sha256, NULL },
-  { "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFFU, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, queue_whirlpoolx_kernel, gen_hash, NULL },
+  { "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, NULL, queue_whirlcoin_kernel, sha256, NULL },
+  { "whirlpoolx", ALGO_WHIRLPOOLX, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000FFFFUL, 0, 0, 0, whirlpoolx_regenhash, NULL, queue_whirlpoolx_kernel, gen_hash, NULL },
+
+  { "blake256r8",  ALGO_BLAKECOIN, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, precalc_hash_blakecoin, queue_blake_kernel, sha256,   NULL },
+  { "blake256r14", ALGO_BLAKE,     "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x00000000UL, 0, 128, 0, blake256_regenhash, precalc_hash_blake256, queue_blake_kernel, gen_hash, NULL },
+  { "vanilla",     ALGO_VANILLA,   "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x000000ffUL, 0, 128, 0, blakecoin_regenhash, precalc_hash_blakecoin, queue_blake_kernel, gen_hash, NULL },

  // Terminator (do not remove)
  { NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL }
@ -1019,7 +1051,7 @@ void copy_algorithm_settings(algorithm_t* dest, const char* algo)
				@@ -1019,7 +1051,7 @@ void copy_algorithm_settings(algorithm_t* dest, const char* algo)
  // Find algorithm settings and copy
  for (src = algos; src->name; src++)
  {
-    if (strcmp(src->name, algo) == 0)
+    if (strcasecmp(src->name, algo) == 0)
    {
      strcpy(dest->name, src->name);
      dest->kernelfile = src->kernelfile;
@ -1037,6 +1069,7 @@ void copy_algorithm_settings(algorithm_t* dest, const char* algo)
				@@ -1037,6 +1069,7 @@ void copy_algorithm_settings(algorithm_t* dest, const char* algo)
      dest->rw_buffer_size = src->rw_buffer_size;
      dest->cq_properties = src->cq_properties;
      dest->regenhash = src->regenhash;
+      dest->precalc_hash = src->precalc_hash;
      dest->queue_kernel = src->queue_kernel;
      dest->gen_hash = src->gen_hash;
      dest->set_compile_options = src->set_compile_options;
@ -1081,6 +1114,8 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa
				@@ -1081,6 +1114,8 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa
  ALGO_ALIAS("whirlpool", "whirlcoin");
  ALGO_ALIAS("lyra2", "lyra2re");
  ALGO_ALIAS("lyra2v2", "lyra2rev2");
+  ALGO_ALIAS("blakecoin", "blake256r8");
+  ALGO_ALIAS("blake", "blake256r14");

 #undef ALGO_ALIAS
 #undef ALGO_ALIAS_NF
--- a/algorithm.h
+++ b/algorithm.h
@ -30,10 +30,13 @@ typedef enum {
				@@ -30,10 +30,13 @@ typedef enum {
  ALGO_NEOSCRYPT,
  ALGO_WHIRLPOOLX,
  ALGO_LYRA2RE,
-  ALGO_LYRA2REv2,
+  ALGO_LYRA2REV2,
  ALGO_PLUCK,
  ALGO_YESCRYPT,
  ALGO_YESCRYPT_MULTI,
+  ALGO_BLAKECOIN,
+  ALGO_BLAKE,
+  ALGO_VANILLA
 } algorithm_type_t;

 extern const char *algorithm_type_str[];
@ -67,6 +70,7 @@ typedef struct _algorithm_t {
				@@ -67,6 +70,7 @@ typedef struct _algorithm_t {
  long rw_buffer_size;
  cl_command_queue_properties cq_properties;
  void(*regenhash)(struct work *);
+  void(*precalc_hash)(struct _dev_blk_ctx *, uint32_t *, uint32_t *);
  cl_int(*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
  void(*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
  void(*set_compile_options)(struct _build_kernel_data *, struct cgpu_info *, struct _algorithm_t *);
@ -89,6 +93,7 @@ typedef struct _algorithm_settings_t
				@@ -89,6 +93,7 @@ typedef struct _algorithm_settings_t
 	long rw_buffer_size;
 	cl_command_queue_properties cq_properties;
 	void     (*regenhash)(struct work *);
+	void     (*precalc_hash)(struct _dev_blk_ctx *, uint32_t *, uint32_t *);
 	cl_int   (*queue_kernel)(struct __clState *, struct _dev_blk_ctx *, cl_uint);
 	void     (*gen_hash)(const unsigned char *, unsigned int, unsigned char *);
 	void     (*set_compile_options)(build_kernel_data *, struct cgpu_info *, algorithm_t *);
--- a/algorithm/blake256.c
+++ b/algorithm/blake256.c
@ -0,0 +1,163 @@
				@@ -0,0 +1,163 @@
+/*
+ * BLAKE implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ *
+ * Modified for more speed by BlueDragon747 for the Blakecoin project
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <limits.h>
+#include <stdint.h>
+
+#include "sph/sph_blake.h"
+#include "algorithm/blake256.h"
+
+/*
+* Encode a length len/4 vector of (uint32_t) into a length len vector of
+* (unsigned char) in big-endian form.  Assumes len is a multiple of 4.
+*/
+static inline void
+be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
+{
+  uint32_t i;
+
+  for (i = 0; i < len; i++)
+    dst[i] = htobe32(src[i]);
+}
+
+static const uint32_t diff1targ_blake256 = 0x000000ff;
+
+inline void blake256hash(void *state, const void *input)
+{
+  sph_blake256_context ctx_blake;
+  sph_blake256_init(&ctx_blake);
+  sph_blake256(&ctx_blake, input, 80);
+  sph_blake256_close(&ctx_blake, state);
+}
+
+void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata)
+{
+	sph_blake256_context ctx_blake;
+	uint32_t data[16];
+
+	be32enc_vect(data, (const uint32_t *)pdata, 16);
+
+	sph_blake256_init(&ctx_blake);
+	sph_blake256(&ctx_blake, data, 64);
+
+	blk->ctx_a = ctx_blake.H[0];
+	blk->ctx_b = ctx_blake.H[1];
+	blk->ctx_c = ctx_blake.H[2];
+	blk->ctx_d = ctx_blake.H[3];
+	blk->ctx_e = ctx_blake.H[4];
+	blk->ctx_f = ctx_blake.H[5];
+	blk->ctx_g = ctx_blake.H[6];
+	blk->ctx_h = ctx_blake.H[7];
+
+	blk->cty_a = pdata[16];
+	blk->cty_b = pdata[17];
+	blk->cty_c = pdata[18];
+}
+
+
+static const uint32_t diff1targ = 0x0000ffff;
+
+/* Used externally as confirmation of correct OCL code */
+int blake256_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
+{
+	uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
+	uint32_t data[20], ohash[8];
+
+	be32enc_vect(data, (const uint32_t *)pdata, 19);
+	data[19] = htobe32(nonce);
+	blake256hash(ohash, data);
+	tmp_hash7 = be32toh(ohash[7]);
+
+	applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
+				(long unsigned int)Htarg,
+				(long unsigned int)diff1targ,
+				(long unsigned int)tmp_hash7);
+	if (tmp_hash7 > diff1targ)
+		return -1;
+	if (tmp_hash7 > Htarg)
+		return 0;
+	return 1;
+}
+
+void blake256_regenhash(struct work *work)
+{
+        uint32_t data[20];
+        uint32_t *nonce = (uint32_t *)(work->data + 76);
+        uint32_t *ohash = (uint32_t *)(work->hash);
+
+        be32enc_vect(data, (const uint32_t *)work->data, 19);
+        data[19] = htobe32(*nonce);
+        blake256hash(ohash, data);
+}
+
+bool scanhash_blake256(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
+		     unsigned char *pdata, unsigned char __maybe_unused *phash1,
+		     unsigned char __maybe_unused *phash, const unsigned char *ptarget,
+		     uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
+{
+	uint32_t *nonce = (uint32_t *)(pdata + 76);
+	uint32_t data[20];
+	uint32_t tmp_hash7;
+	uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
+	bool ret = false;
+
+	be32enc_vect(data, (const uint32_t *)pdata, 19);
+
+	while(1) {
+		uint32_t ostate[8];
+
+		*nonce = ++n;
+		data[19] = (n);
+		blake256hash(ostate, data);
+		tmp_hash7 = (ostate[7]);
+
+		applog(LOG_INFO, "data7 %08lx",
+					(long unsigned int)data[7]);
+
+		if (unlikely(tmp_hash7 <= Htarg)) {
+			((uint32_t *)pdata)[19] = htobe32(n);
+			*last_nonce = n;
+			ret = true;
+			break;
+		}
+
+		if (unlikely((n >= max_nonce) || thr->work_restart)) {
+			*last_nonce = n;
+			break;
+		}
+	}
+
+	return ret;
+}
--- a/algorithm/blake256.h
+++ b/algorithm/blake256.h
@ -0,0 +1,10 @@
				@@ -0,0 +1,10 @@
+#ifndef BLAKE256_H
+#define BLAKE256_H
+
+#include "miner.h"
+
+extern int blake256_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce);
+extern void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata);
+extern void blake256_regenhash(struct work *work);
+
+#endif /* BLAKE256_H */
--- a/algorithm/blakecoin.c
+++ b/algorithm/blakecoin.c
@ -0,0 +1,163 @@
				@@ -0,0 +1,163 @@
+/*
+ * BLAKE implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ *
+ * Modified for more speed by BlueDragon747 for the Blakecoin project
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <limits.h>
+#include <stdint.h>
+
+#include "sph/sph_blake.h"
+#include "algorithm/blakecoin.h"
+
+/*
+* Encode a length len/4 vector of (uint32_t) into a length len vector of
+* (unsigned char) in big-endian form.  Assumes len is a multiple of 4.
+*/
+static inline void
+be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
+{
+  uint32_t i;
+
+  for (i = 0; i < len; i++)
+    dst[i] = htobe32(src[i]);
+}
+
+static const uint32_t diff1targ_blake256 = 0x000000ff;
+
+inline void blakecoinhash(void *state, const void *input)
+{
+  sph_blake256_context     ctx_blake;
+  sph_blake256_init(&ctx_blake);
+  sph_blake256r8(&ctx_blake, input, 80);
+  sph_blake256r8_close(&ctx_blake, state);
+}
+
+void precalc_hash_blakecoin(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata)
+{
+	sph_blake256_context ctx_blake;
+	uint32_t data[16];
+
+	be32enc_vect(data, (const uint32_t *)pdata, 16);
+
+	sph_blake256_init(&ctx_blake);
+	sph_blake256r8(&ctx_blake, data, 64);
+
+	blk->ctx_a = ctx_blake.H[0];
+	blk->ctx_b = ctx_blake.H[1];
+	blk->ctx_c = ctx_blake.H[2];
+	blk->ctx_d = ctx_blake.H[3];
+	blk->ctx_e = ctx_blake.H[4];
+	blk->ctx_f = ctx_blake.H[5];
+	blk->ctx_g = ctx_blake.H[6];
+	blk->ctx_h = ctx_blake.H[7];
+
+	blk->cty_a = pdata[16];
+	blk->cty_b = pdata[17];
+	blk->cty_c = pdata[18];
+}
+
+
+static const uint32_t diff1targ = 0x0000ffff;
+
+/* Used externally as confirmation of correct OCL code */
+int blakecoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
+{
+	uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
+	uint32_t data[20], ohash[8];
+
+	be32enc_vect(data, (const uint32_t *)pdata, 19);
+	data[19] = htobe32(nonce);
+	blakecoinhash(ohash, data);
+	tmp_hash7 = be32toh(ohash[7]);
+
+	applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
+				(long unsigned int)Htarg,
+				(long unsigned int)diff1targ,
+				(long unsigned int)tmp_hash7);
+	if (tmp_hash7 > diff1targ)
+		return -1;
+	if (tmp_hash7 > Htarg)
+		return 0;
+	return 1;
+}
+
+void blakecoin_regenhash(struct work *work)
+{
+        uint32_t data[20];
+        uint32_t *nonce = (uint32_t *)(work->data + 76);
+        uint32_t *ohash = (uint32_t *)(work->hash);
+
+        be32enc_vect(data, (const uint32_t *)work->data, 19);
+        data[19] = htobe32(*nonce);
+        blakecoinhash(ohash, data);
+}
+
+bool scanhash_blakecoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
+		     unsigned char *pdata, unsigned char __maybe_unused *phash1,
+		     unsigned char __maybe_unused *phash, const unsigned char *ptarget,
+		     uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
+{
+	uint32_t *nonce = (uint32_t *)(pdata + 76);
+	uint32_t data[20];
+	uint32_t tmp_hash7;
+	uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
+	bool ret = false;
+
+	be32enc_vect(data, (const uint32_t *)pdata, 19);
+
+	while(1) {
+		uint32_t ostate[8];
+
+		*nonce = ++n;
+		data[19] = (n);
+		blakecoinhash(ostate, data);
+		tmp_hash7 = (ostate[7]);
+
+		applog(LOG_INFO, "data7 %08lx",
+					(long unsigned int)data[7]);
+
+		if (unlikely(tmp_hash7 <= Htarg)) {
+			((uint32_t *)pdata)[19] = htobe32(n);
+			*last_nonce = n;
+			ret = true;
+			break;
+		}
+
+		if (unlikely((n >= max_nonce) || thr->work_restart)) {
+			*last_nonce = n;
+			break;
+		}
+	}
+
+	return ret;
+}
--- a/algorithm/blakecoin.h
+++ b/algorithm/blakecoin.h
@ -0,0 +1,10 @@
				@@ -0,0 +1,10 @@
+#ifndef BLAKECOIN_H
+#define BLAKECOIN_H
+
+#include "miner.h"
+
+extern int blakecoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce);
+extern void precalc_hash_blakecoin(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
+extern void blakecoin_regenhash(struct work *work);
+
+#endif /* BLAKECOIN_H */
--- a/algorithm/lyra2.c
+++ b/algorithm/lyra2.c
@ -61,16 +61,18 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
				@@ -61,16 +61,18 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *

    const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
    const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
+    // for Lyra2REv2, nCols = 4, v1 was using 8
+    const int64_t BLOCK_LEN = (nCols == 4) ? BLOCK_LEN_BLAKE2_SAFE_INT64 : BLOCK_LEN_BLAKE2_SAFE_BYTES;

    i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES);
-    uint64_t *wholeMatrix = malloc(i);
+	uint64_t *wholeMatrix = (uint64_t*)malloc(i);
    if (wholeMatrix == NULL) {
      return -1;
    }
 	memset(wholeMatrix, 0, i);

    //Allocates pointers to each row of the matrix
-    uint64_t **memMatrix = malloc(nRows * sizeof (uint64_t*));
+	uint64_t **memMatrix = (uint64_t**)malloc(nRows * sizeof (uint64_t*));
    if (memMatrix == NULL) {
      return -1;
    }
@ -122,7 +124,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
				@@ -122,7 +124,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *

    //======================= Initializing the Sponge State ====================//
    //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
-    uint64_t *state = malloc(16 * sizeof (uint64_t));
+	uint64_t *state = (uint64_t*)malloc(16 * sizeof (uint64_t));
    if (state == NULL) {
      return -1;
    }
@ -134,7 +136,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
				@@ -134,7 +136,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
    ptrWord = wholeMatrix;
    for (i = 0; i < nBlocksInput; i++) {
      absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
-      ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT64; //goes to next block of pad(pwd || salt || basil)
+      ptrWord += BLOCK_LEN; //goes to next block of pad(pwd || salt || basil)
    }

    //Initializes M[0] and M[1]
@ -196,7 +198,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
				@@ -196,7 +198,7 @@ int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *
    absorbBlock(state, memMatrix[rowa]);

    //Squeezes the key
-    squeeze(state, K, kLen);
+    squeeze(state, (unsigned char*)K, kLen);
    //==========================================================================/

    //========================= Freeing the memory =============================//
--- a/algorithm/lyra2re.c
+++ b/algorithm/lyra2re.c
@ -36,8 +36,6 @@
				@@ -36,8 +36,6 @@
 #include "sph/sph_groestl.h"
 #include "sph/sph_skein.h"
 #include "sph/sph_keccak.h" 
-#include "sph/sph_bmw.h"
-#include "sph/sph_cubehash.h"
 #include "lyra2.h"

 /*
@ -57,10 +55,9 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
				@@ -57,10 +55,9 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
 inline void lyra2rehash(void *state, const void *input)
 {
    sph_blake256_context     ctx_blake;
-    sph_bmw256_context       ctx_bmw;
+    sph_groestl256_context   ctx_groestl;
    sph_keccak256_context    ctx_keccak;
    sph_skein256_context     ctx_skein;
-    sph_cubehash256_context  ctx_cube;

    uint32_t hashA[8], hashB[8];

@ -72,23 +69,17 @@ inline void lyra2rehash(void *state, const void *input)
				@@ -72,23 +69,17 @@ inline void lyra2rehash(void *state, const void *input)
    sph_keccak256 (&ctx_keccak,hashA, 32);
    sph_keccak256_close(&ctx_keccak, hashB);

-    sph_cubehash256_init(&ctx_cube);
-    sph_cubehash256(&ctx_cube, hashB, 32);
-    sph_cubehash256_close(&ctx_cube, hashA);
+	LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8);

-    LYRA2(hashB, 32, hashA, 32, hashA, 32, 1, 4, 4);

    sph_skein256_init(&ctx_skein);
-    sph_skein256 (&ctx_skein, hashB, 32);
-    sph_skein256_close(&ctx_skein, hashA);
+    sph_skein256 (&ctx_skein, hashA, 32);
+    sph_skein256_close(&ctx_skein, hashB);

-    sph_cubehash256_init(&ctx_cube);
-    sph_cubehash256(&ctx_cube, hashA, 32);
-    sph_cubehash256_close(&ctx_cube, hashB);

-    sph_bmw256_init(&ctx_bmw);
-    sph_bmw256 (&ctx_bmw, hashB, 32);
-    sph_bmw256_close(&ctx_bmw, hashA);
+    sph_groestl256_init(&ctx_groestl);
+    sph_groestl256 (&ctx_groestl, hashB, 32);
+    sph_groestl256_close(&ctx_groestl, hashA);

    memcpy(state, hashA, 32);
 }
--- a/algorithm/lyra2re.h
+++ b/algorithm/lyra2re.h
@ -2,8 +2,6 @@
				@@ -2,8 +2,6 @@
 #define LYRA2RE_H

 #include "miner.h"
-#define LYRA_SCRATCHBUF_SIZE (1536) // matrix size [12][4][4] uint64_t or equivalent
-#define LYRA_SECBUF_SIZE (4) // (not used)

 extern int lyra2re_test(unsigned char *pdata, const unsigned char *ptarget,
 			uint32_t nonce);
--- a/algorithm/lyra2re_old.h
+++ b/algorithm/lyra2re_old.h
@ -1,10 +0,0 @@
				@@ -1,10 +0,0 @@
-#ifndef LYRA2REOLD_H
-#define LYRA2REOLD_H
-
-#include "miner.h"
-
-extern int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget,
-			uint32_t nonce);
-extern void lyra2reold_regenhash(struct work *work);
-
-#endif /* LYRA2RE_H */
--- a/algorithm/lyra2re_old.c
+++ b/algorithm/lyra2re_old.c
@ -36,6 +36,8 @@
				@@ -36,6 +36,8 @@
 #include "sph/sph_groestl.h"
 #include "sph/sph_skein.h"
 #include "sph/sph_keccak.h" 
+#include "sph/sph_bmw.h"
+#include "sph/sph_cubehash.h"
 #include "lyra2.h"

 /*
@ -52,13 +54,13 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
				@@ -52,13 +54,13 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
 }


-inline void lyra2rehash_old(void *state, const void *input)
+inline void lyra2rev2hash(void *state, const void *input)
 {
    sph_blake256_context     ctx_blake;
-    sph_groestl256_context   ctx_groestl;
+    sph_bmw256_context       ctx_bmw;
    sph_keccak256_context    ctx_keccak;
    sph_skein256_context     ctx_skein;
-
+    sph_cubehash256_context  ctx_cube;
    uint32_t hashA[8], hashB[8];

    sph_blake256_init(&ctx_blake);
@ -69,32 +71,41 @@ inline void lyra2rehash_old(void *state, const void *input)
				@@ -69,32 +71,41 @@ inline void lyra2rehash_old(void *state, const void *input)
    sph_keccak256 (&ctx_keccak,hashA, 32);
    sph_keccak256_close(&ctx_keccak, hashB);

-    LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8);
+	sph_cubehash256_init(&ctx_cube);
+	sph_cubehash256(&ctx_cube, hashB, 32);
+	sph_cubehash256_close(&ctx_cube, hashA);
+
+	LYRA2(hashB, 32, hashA, 32, hashA, 32, 1, 4, 4);
+
+	sph_skein256_init(&ctx_skein);
+    sph_skein256 (&ctx_skein, hashB, 32);
+    sph_skein256_close(&ctx_skein, hashA);

-    sph_skein256_init(&ctx_skein);
-    sph_skein256 (&ctx_skein, hashA, 32);
-    sph_skein256_close(&ctx_skein, hashB);
+	sph_cubehash256_init(&ctx_cube);
+	sph_cubehash256(&ctx_cube, hashA, 32);
+	sph_cubehash256_close(&ctx_cube, hashB);

+    sph_bmw256_init(&ctx_bmw);
+    sph_bmw256 (&ctx_bmw, hashB, 32);
+    sph_bmw256_close(&ctx_bmw, hashA);

-    sph_groestl256_init(&ctx_groestl);
-    sph_groestl256 (&ctx_groestl, hashB, 32);
-    sph_groestl256_close(&ctx_groestl, hashA);
+//printf("cpu hash %08x %08x %08x %08x\n",hashA[0],hashA[1],hashA[2],hashA[3]);

-    memcpy(state, hashA, 32);
+	memcpy(state, hashA, 32);
 }

 static const uint32_t diff1targ = 0x0000ffff;


 /* Used externally as confirmation of correct OCL code */
-int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
+int lyra2rev2_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
 {
 	uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
 	uint32_t data[20], ohash[8];

 	be32enc_vect(data, (const uint32_t *)pdata, 19);
 	data[19] = htobe32(nonce);
-	lyra2rehash_old(ohash, data);
+	lyra2rev2hash(ohash, data);
 	tmp_hash7 = be32toh(ohash[7]);

 	applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
@ -108,7 +119,7 @@ int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t
				@@ -108,7 +119,7 @@ int lyra2reold_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t
 	return 1;
 }

-void lyra2reold_regenhash(struct work *work)
+void lyra2rev2_regenhash(struct work *work)
 {
        uint32_t data[20];
        uint32_t *nonce = (uint32_t *)(work->data + 76);
@ -116,10 +127,10 @@ void lyra2reold_regenhash(struct work *work)
				@@ -116,10 +127,10 @@ void lyra2reold_regenhash(struct work *work)

        be32enc_vect(data, (const uint32_t *)work->data, 19);
        data[19] = htobe32(*nonce);
-        lyra2rehash_old(ohash, data);
+        lyra2rev2hash(ohash, data);
 }

-bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
+bool scanhash_lyra2rev2(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
 		     unsigned char *pdata, unsigned char __maybe_unused *phash1,
 		     unsigned char __maybe_unused *phash, const unsigned char *ptarget,
 		     uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
@ -137,7 +148,7 @@ bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unuse
				@@ -137,7 +148,7 @@ bool scanhash_lyra2reold(struct thr_info *thr, const unsigned char __maybe_unuse

 		*nonce = ++n;
 		data[19] = (n);
-		lyra2rehash_old(ostate, data);
+		lyra2rev2hash(ostate, data);
 		tmp_hash7 = (ostate[7]);

 		applog(LOG_INFO, "data7 %08lx",
--- a/algorithm/lyra2rev2.h
+++ b/algorithm/lyra2rev2.h
@ -0,0 +1,11 @@
				@@ -0,0 +1,11 @@
+#ifndef LYRA2REV2_H
+#define LYRA2REV2_H
+
+#include "miner.h"
+#define LYRA_SCRATCHBUF_SIZE (1536) // matrix size [12][4][4] uint64_t or equivalent
+#define LYRA_SECBUF_SIZE (4) // (not used)
+extern int lyra2rev2_test(unsigned char *pdata, const unsigned char *ptarget,
+			uint32_t nonce);
+extern void lyra2rev2_regenhash(struct work *work);
+
+#endif /* LYRA2REV2_H */
--- a/algorithm/yescrypt-opt.c
+++ b/algorithm/yescrypt-opt.c
@ -99,7 +99,7 @@ alloc_region(yescrypt_region_t * region, size_t size)
				@@ -99,7 +99,7 @@ alloc_region(yescrypt_region_t * region, size_t size)
 	if (size + 63 < size) {
 		errno = ENOMEM;
 	}
-	else if ((base = malloc(size + 63)) != NULL) {
+	else if ((base = (uint8_t *)malloc(size + 63)) != NULL) {
 		aligned = base + 63;
 		aligned -= (uintptr_t)aligned & 63;
 	}
@ -520,7 +520,7 @@ smix1(uint64_t * B, size_t r, uint64_t N, yescrypt_flags_t flags,
				@@ -520,7 +520,7 @@ smix1(uint64_t * B, size_t r, uint64_t N, yescrypt_flags_t flags,
 	uint64_t * XY, uint64_t * S)
 {
 	void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) = (S ? blockmix_pwxform : blockmix_salsa8);
-	const uint64_t * VROM = shared->shared1.aligned;
+	const uint64_t * VROM = (uint64_t *)shared->shared1.aligned;
 	uint32_t VROM_mask = shared->mask1;
 	size_t s = 16 * r;
 	uint64_t * X = V;
@ -671,7 +671,7 @@ smix2(uint64_t * B, size_t r, uint64_t N, uint64_t Nloop,
				@@ -671,7 +671,7 @@ smix2(uint64_t * B, size_t r, uint64_t N, uint64_t Nloop,
 	
 	void (*blockmix)(const uint64_t *, uint64_t *, uint64_t *, size_t) =
 		(S ? blockmix_pwxform : blockmix_salsa8);
-	const uint64_t * VROM = shared->shared1.aligned;
+	const uint64_t * VROM = (uint64_t *)shared->shared1.aligned;
 	uint32_t VROM_mask = shared->mask1 | 1;
 	size_t s = 16 * r;
 	yescrypt_flags_t rw = flags & YESCRYPT_RW;
@ -835,7 +835,7 @@ smix(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t,
				@@ -835,7 +835,7 @@ smix(uint64_t * B, size_t r, uint64_t N, uint32_t p, uint32_t t,
 		uint64_t * Sp = S ? &S[i * S_SIZE_ALL] : S;

 		if (Sp) 
-			smix1(Bp, 1, S_SIZE_ALL / 16, flags & ~YESCRYPT_PWXFORM,Sp, NROM, shared, XYp, NULL);
+			smix1(Bp, 1, S_SIZE_ALL / 16, (yescrypt_flags_t)flags & ~YESCRYPT_PWXFORM,Sp, NROM, shared, XYp, NULL);

 	

--- a/api.c
+++ b/api.c
@ -1334,7 +1334,7 @@ static void apiversion(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __m
				@@ -1334,7 +1334,7 @@ static void apiversion(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __m
  io_open = io_add(io_data, isjson ? COMSTR JSON_VERSION : _VERSION COMSTR);

  root = api_add_string(root, "Miner", PACKAGE " " VERSION, false);
-  root = api_add_string(root, "CGMiner", CGMINER_VERSION, false);
+  root = api_add_string(root, "SGMiner", CGMINER_VERSION, false);
  root = api_add_const(root, "API", APIVERSION, false);

  root = print_data(root, buf, isjson, false);
--- a/configure.ac
+++ b/configure.ac
@ -1,7 +1,7 @@
				@@ -1,7 +1,7 @@
 ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
 ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
 m4_define([v_maj], [5])
-m4_define([v_min], [2])
+m4_define([v_min], [3])
 m4_define([v_mic], [0])
 m4_define([v_rev], [nicehash])
 ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
--- a/driver-opencl.c
+++ b/driver-opencl.c
@ -1366,13 +1366,8 @@ static bool opencl_thread_init(struct thr_info *thr)
				@@ -1366,13 +1366,8 @@ static bool opencl_thread_init(struct thr_info *thr)

 static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work)
 {
-  if (work->pool->algorithm.type == ALGO_LYRA2RE || work->pool->algorithm.type == ALGO_LYRA2REv2) {
-    work->blk.work = work;
-    precalc_hash_blake256(&work->blk, 0, (uint32_t *)(work->data));
-  }
-  else {
-    work->blk.work = work;
-  }
+  work->blk.work = work;
+  if (work->pool->algorithm.precalc_hash) work->pool->algorithm.precalc_hash(&work->blk, 0, (uint32_t *)(work->data));
  thr->pool_no = work->pool->pool_no;
  return true;
 }
--- a/findnonce.c
+++ b/findnonce.c
@ -234,136 +234,3 @@ void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res)
				@@ -234,136 +234,3 @@ void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res)
    free(pcd);
  }
 }
-
-// BLAKE 256 14 rounds (standard)
-
-typedef struct
-{
-  uint32_t h[8];
-  uint32_t t;
-} blake_state256;
-
-#define NB_ROUNDS32 14
-
-const uint8_t blake_sigma[][16] =
-{
-  { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
-  { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
-  { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
-  { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
-  { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
-  { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
-  { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
-  { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
-  { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
-  { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
-  { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
-  { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
-  { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
-  { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
-  { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
-  { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
-};
-
-const uint32_t blake_u256[16] =
-{
-  0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
-  0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
-  0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
-  0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917
-};
-
-#define ROT32(x,n) (((x)<<(32-n))|( (x)>>(n)))
-//#define ROT32(x,n)   (rotate((uint)x, (uint)32-n))
-#define ADD32(x,y)   ((uint32_t)((x) + (y)))
-#define XOR32(x,y)   ((uint32_t)((x) ^ (y)))
-
-#define G(a,b,c,d,i) \
-do { \
-  v[a] += XOR32(m[blake_sigma[r][i]], blake_u256[blake_sigma[r][i + 1]]) + v[b]; \
-  v[d] = ROT32(XOR32(v[d], v[a]), 16); \
-  v[c] += v[d]; \
-  v[b] = ROT32(XOR32(v[b], v[c]), 12); \
-  v[a] += XOR32(m[blake_sigma[r][i + 1]], blake_u256[blake_sigma[r][i]]) + v[b]; \
-  v[d] = ROT32(XOR32(v[d], v[a]), 8); \
-  v[c] += v[d]; \
-  v[b] = ROT32(XOR32(v[b], v[c]), 7); \
-} while (0)
-
-// compress a block
-void blake256_compress_block(blake_state256 *S, uint32_t *m)
-{
-  uint32_t v[16];
-  int i, r;
-  for (i = 0; i < 8; ++i)  v[i] = S->h[i];
-
-  v[8] = blake_u256[0];
-  v[9] = blake_u256[1];
-  v[10] = blake_u256[2];
-  v[11] = blake_u256[3];
-  v[12] = blake_u256[4];
-  v[13] = blake_u256[5];
-  v[14] = blake_u256[6];
-  v[15] = blake_u256[7];
-
-  v[12] ^= S->t;
-  v[13] ^= S->t;
-
-  for (r = 0; r < NB_ROUNDS32; ++r)
-  {
-    /* column step */
-    G(0, 4, 8, 12, 0);
-    G(1, 5, 9, 13, 2);
-    G(2, 6, 10, 14, 4);
-    G(3, 7, 11, 15, 6);
-    /* diagonal step */
-    G(0, 5, 10, 15, 8);
-    G(1, 6, 11, 12, 10);
-    G(2, 7, 8, 13, 12);
-    G(3, 4, 9, 14, 14);
-  }
-
-  for (i = 0; i < 16; ++i)  S->h[i & 7] ^= v[i];
-}
-
-void blake256_init(blake_state256 *S)
-{
-  S->h[0] = 0x6a09e667;
-  S->h[1] = 0xbb67ae85;
-  S->h[2] = 0x3c6ef372;
-  S->h[3] = 0xa54ff53a;
-  S->h[4] = 0x510e527f;
-  S->h[5] = 0x9b05688c;
-  S->h[6] = 0x1f83d9ab;
-  S->h[7] = 0x5be0cd19;
-  S->t = 0;
-}
-
-void blake256_update(blake_state256 *S, const uint32_t *in)
-{
-  uint32_t m[16];
-  int i;
-  S->t = 512;
-  for (i = 0; i < 16; ++i)  m[i] = in[i];
-  blake256_compress_block(S, m);
-}
-
-void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *data)
-{
-  blake_state256 S;
-  blake256_init(&S);
-  blake256_update(&S, data);
-
-  blk->ctx_a = S.h[0];
-  blk->ctx_b = S.h[1];
-  blk->ctx_c = S.h[2];
-  blk->ctx_d = S.h[3];
-  blk->ctx_e = S.h[4];
-  blk->ctx_f = S.h[5];
-  blk->ctx_g = S.h[6];
-  blk->ctx_h = S.h[7];
-
-  blk->cty_a = data[16];
-  blk->cty_b = data[17];
-  blk->cty_c = data[18];
-}
--- a/findnonce.h
+++ b/findnonce.h
@ -10,6 +10,5 @@
				@@ -10,6 +10,5 @@

 extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
 extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res);
-extern void precalc_hash_blake256(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);

 #endif /*FINDNONCE_H*/
--- a/kernel/blake256r14.cl
+++ b/kernel/blake256r14.cl
@ -0,0 +1,157 @@
				@@ -0,0 +1,157 @@
+// (c) 2013 originally written by smolen, modified by kr105
+
+#define SPH_ROTR32(v,n) rotate((uint)(v),(uint)(32-(n)))
+
+__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
+__kernel void search(
+	volatile __global uint * restrict output,
+	// precalc hash from fisrt part of message
+	const uint h0,
+	const uint h1,
+	const uint h2,
+	const uint h3,
+	const uint h4,
+	const uint h5,
+	const uint h6,
+	const uint h7,
+	// last 12 bytes of original message
+	const uint in16,
+	const uint in17,
+	const uint in18
+)
+{
+	uint M0, M1, M2, M3, M4, M5, M6, M7;
+	uint M8, M9, MA, MB, MC, MD, ME, MF;
+	uint V0, V1, V2, V3, V4, V5, V6, V7;
+	uint V8, V9, VA, VB, VC, VD, VE, VF;
+	uint pre7;
+	uint nonce = get_global_id(0);
+
+	V0 = h0;
+	V1 = h1;
+	V2 = h2;
+	V3 = h3;
+	V4 = h4;
+	V5 = h5;
+	V6 = h6;
+	pre7 = V7 = h7;
+	M0 = in16;
+	M1 = in17;
+	M2 = in18;
+	M3 = nonce;
+
+	V8 = 0x243F6A88UL;
+	V9 = 0x85A308D3UL;
+	VA = 0x13198A2EUL;
+	VB = 0x03707344UL;
+	VC = 640 ^ 0xA4093822UL;
+	VD = 640 ^ 0x299F31D0UL;
+	VE = 0x082EFA98UL;
+	VF = 0xEC4E6C89UL;
+
+	M4 = 0x80000000;
+	M5 = 0;
+	M6 = 0;
+	M7 = 0;
+	M8 = 0;
+	M9 = 0;
+	MA = 0;
+	MB = 0;
+	MC = 0;
+	MD = 1;
+	ME = 0;
+	MF = 640;
+
+	V0 = (V0 + V4 + (M0 ^ 0x85A308D3UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M1 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M2 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M3 ^ 0x13198A2EUL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M4 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M5 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M6 ^ 0xEC4E6C89UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M7 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M8 ^ 0x38D01377UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M9 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MA ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MB ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MC ^ 0xC97C50DDUL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (MD ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (ME ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MF ^ 0x3F84D5B5UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (ME ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MA ^ 0x3F84D5B5UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M4 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M8 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M9 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MF ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MD ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M6 ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M1 ^ 0xC0AC29B7UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MC ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M0 ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M2 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MB ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M7 ^ 0x34E90C6CUL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M5 ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M3 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (MB ^ 0x452821E6UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M8 ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (MC ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M0 ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M5 ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M2 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MF ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MD ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (MA ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (ME ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M3 ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M6 ^ 0x03707344UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M7 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M1 ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M9 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M4 ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (M7 ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M9 ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M3 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M1 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (MD ^ 0xC0AC29B7UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MC ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MB ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (ME ^ 0x34E90C6CUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M2 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M6 ^ 0x13198A2EUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M5 ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MA ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M4 ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M0 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (MF ^ 0x452821E6UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M8 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (M9 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M0 ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M5 ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M7 ^ 0x299F31D0UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M2 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M4 ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MA ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MF ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (ME ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M1 ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MB ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MC ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M6 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M8 ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M3 ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MD ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (M2 ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MC ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M6 ^ 0xBE5466CFUL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (MA ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M0 ^ 0x34E90C6CUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MB ^ 0x243F6A88UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M8 ^ 0x03707344UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M3 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M4 ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MD ^ 0xA4093822UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M7 ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M5 ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MF ^ 0x3F84D5B5UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (ME ^ 0xB5470917UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M1 ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M9 ^ 0x85A308D3UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (MC ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M5 ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M1 ^ 0xB5470917UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (MF ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (ME ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MD ^ 0x3F84D5B5UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M4 ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MA ^ 0xA4093822UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M0 ^ 0xEC4E6C89UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M7 ^ 0x243F6A88UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M6 ^ 0x03707344UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M3 ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M9 ^ 0x13198A2EUL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M2 ^ 0x38D01377UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M8 ^ 0x34E90C6CUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MB ^ 0x452821E6UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (MD ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MB ^ 0xC97C50DDUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M7 ^ 0x3F84D5B5UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (ME ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (MC ^ 0x85A308D3UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M1 ^ 0xC0AC29B7UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M3 ^ 0x38D01377UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M9 ^ 0x03707344UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M5 ^ 0x243F6A88UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M0 ^ 0x299F31D0UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MF ^ 0xA4093822UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M4 ^ 0xB5470917UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M8 ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M6 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M2 ^ 0xBE5466CFUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MA ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+
+	// Constants
+	// 00 = 0x243F6A88UL 
+	// 01 = 0x85A308D3UL 
+	// 02 = 0x13198A2EUL 
+	// 03 = 0x03707344UL 
+	// 04 = 0xA4093822UL 
+	// 05 = 0x299F31D0UL 
+	// 06 = 0x082EFA98UL 
+	// 07 = 0xEC4E6C89UL 
+	// 08 = 0x452821E6UL 
+	// 09 = 0x38D01377UL 
+	// 10 = 0xBE5466CFUL 
+	// 11 = 0x34E90C6CUL 
+	// 12 = 0xC0AC29B7UL 
+	// 13 = 0xC97C50DDUL 
+	// 14 = 0x3F84D5B5UL
+	// 15 = 0xB5470917UL 
+	// A=10,B=11,C=12,D=13,E=14,F=15
+
+	// Round 9: 
+	// 6^15
+	V0 = (V0 + V4 + (M6 ^ 0xB5470917UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MF ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; 
+	// 14^9
+	V1 = (V1 + V5 + (ME ^ 0x38D01377UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M9 ^ 0x3F84D5B5UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; 
+	// 11^3 
+	V2 = (V2 + V6 + (MB ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M3 ^ 0x34E90C6CUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; 
+	// 0^8 
+	V3 = (V3 + V7 + (M0 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M8 ^ 0x243F6A88UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; 
+	// 12^2 
+	V0 = (V0 + V5 + (MC ^ 0x13198A2EUL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M2 ^ 0xC0AC29B7UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; 
+	// 13^7 
+	V1 = (V1 + V6 + (MD ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M7 ^ 0xC97C50DDUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; 
+	// 1^4 
+	V2 = (V2 + V7 + (M1 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M4 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; 
+	// 10^5
+	V3 = (V3 + V4 + (MA ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M5 ^ 0xBE5466CFUL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+
+	// Constants
+	// 00 = 0x243F6A88UL 
+	// 01 = 0x85A308D3UL 
+	// 02 = 0x13198A2EUL 
+	// 03 = 0x03707344UL 
+	// 04 = 0xA4093822UL 
+	// 05 = 0x299F31D0UL 
+	// 06 = 0x082EFA98UL 
+	// 07 = 0xEC4E6C89UL 
+	// 08 = 0x452821E6UL 
+	// 09 = 0x38D01377UL 
+	// 10 = 0xBE5466CFUL 
+	// 11 = 0x34E90C6CUL 
+	// 12 = 0xC0AC29B7UL 
+	// 13 = 0xC97C50DDUL 
+	// 14 = 0x3F84D5B5UL
+	// 15 = 0xB5470917UL 
+	// A=10,B=11,C=12,D=13,E=14,F=15
+
+	// Round 10
+	// 10^2
+	V0 = (V0 + V4 + (MA ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M2 ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; 
+	// 8^4
+	V1 = (V1 + V5 + (M8 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M4 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; 
+	// 7^6
+	V2 = (V2 + V6 + (M7 ^ 0x082EFA98UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M6 ^ 0xEC4E6C89UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; 
+	// 1^5
+	V3 = (V3 + V7 + (M1 ^ 0x299F31D0UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M5 ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; 
+	// 15^11
+	V0 = (V0 + V5 + (MF ^ 0x34E90C6CUL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MB ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; 
+	// 9^14
+	V1 = (V1 + V6 + (M9 ^ 0x3F84D5B5UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (ME ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; 
+	// 3^12
+	V2 = (V2 + V7 + (M3 ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (MC ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; 
+	// 13^0
+	V3 = (V3 + V4 + (MD ^ 0x243F6A88UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M0 ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+
+	// Round 11,12,13,14 repeated from beginning again
+	V0 = (V0 + V4 + (M0 ^ 0x85A308D3UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M1 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M2 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M3 ^ 0x13198A2EUL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M4 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M5 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M6 ^ 0xEC4E6C89UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M7 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M8 ^ 0x38D01377UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M9 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MA ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MB ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MC ^ 0xC97C50DDUL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (MD ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (ME ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MF ^ 0x3F84D5B5UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (ME ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MA ^ 0x3F84D5B5UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M4 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M8 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M9 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MF ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MD ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M6 ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M1 ^ 0xC0AC29B7UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MC ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M0 ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M2 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MB ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M7 ^ 0x34E90C6CUL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M5 ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M3 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (MB ^ 0x452821E6UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M8 ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (MC ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M0 ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M5 ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M2 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MF ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MD ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (MA ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (ME ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M3 ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M6 ^ 0x03707344UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M7 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M1 ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M9 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M4 ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (M7 ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M9 ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M3 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M1 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (MD ^ 0xC0AC29B7UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MC ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MB ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (ME ^ 0x34E90C6CUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M2 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M6 ^ 0x13198A2EUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M5 ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MA ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M4 ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M0 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (MF ^ 0x452821E6UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M8 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+
+	if(pre7 ^ V7 ^ VF)
+		return;
+	output[output[0xFF]++] = nonce;
+}
--- a/kernel/blake256r8.cl
+++ b/kernel/blake256r8.cl
@ -0,0 +1,77 @@
				@@ -0,0 +1,77 @@
+// (c) 2013 originally written by smolen, modified by kr105
+
+#define SPH_ROTR32(v,n) rotate((uint)(v),(uint)(32-(n)))
+
+__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
+__kernel void search(
+	volatile __global uint * restrict output,
+	// precalc hash from fisrt part of message
+	const uint h0,
+	const uint h1,
+	const uint h2,
+	const uint h3,
+	const uint h4,
+	const uint h5,
+	const uint h6,
+	const uint h7,
+	// last 12 bytes of original message
+	const uint in16,
+	const uint in17,
+	const uint in18
+)
+{
+	uint M0, M1, M2, M3, M4, M5, M6, M7;
+	uint M8, M9, MA, MB, MC, MD, ME, MF;
+	uint V0, V1, V2, V3, V4, V5, V6, V7;
+	uint V8, V9, VA, VB, VC, VD, VE, VF;
+	uint pre7;
+	uint nonce = get_global_id(0);
+
+	V0 = h0;
+	V1 = h1;
+	V2 = h2;
+	V3 = h3;
+	V4 = h4;
+	V5 = h5;
+	V6 = h6;
+	pre7 = V7 = h7;
+	M0 = in16;
+	M1 = in17;
+	M2 = in18;
+	M3 = nonce;
+
+	V8 = 0x243F6A88UL;
+	V9 = 0x85A308D3UL;
+	VA = 0x13198A2EUL;
+	VB = 0x03707344UL;
+	VC = 640 ^ 0xA4093822UL;
+	VD = 640 ^ 0x299F31D0UL;
+	VE = 0x082EFA98UL;
+	VF = 0xEC4E6C89UL;
+
+	M4 = 0x80000000;
+	M5 = 0;
+	M6 = 0;
+	M7 = 0;
+	M8 = 0;
+	M9 = 0;
+	MA = 0;
+	MB = 0;
+	MC = 0;
+	MD = 1;
+	ME = 0;
+	MF = 640;
+
+	V0 = (V0 + V4 + (M0 ^ 0x85A308D3UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M1 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M2 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M3 ^ 0x13198A2EUL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M4 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M5 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M6 ^ 0xEC4E6C89UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M7 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M8 ^ 0x38D01377UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M9 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MA ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MB ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MC ^ 0xC97C50DDUL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (MD ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (ME ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MF ^ 0x3F84D5B5UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (ME ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MA ^ 0x3F84D5B5UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M4 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M8 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M9 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MF ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MD ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M6 ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M1 ^ 0xC0AC29B7UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MC ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M0 ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M2 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MB ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M7 ^ 0x34E90C6CUL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M5 ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M3 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (MB ^ 0x452821E6UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M8 ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (MC ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M0 ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M5 ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M2 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MF ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MD ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (MA ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (ME ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M3 ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M6 ^ 0x03707344UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M7 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M1 ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M9 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M4 ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (M7 ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M9 ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M3 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M1 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (MD ^ 0xC0AC29B7UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MC ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MB ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (ME ^ 0x34E90C6CUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M2 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M6 ^ 0x13198A2EUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M5 ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MA ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M4 ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M0 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (MF ^ 0x452821E6UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M8 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (M9 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M0 ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M5 ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M7 ^ 0x299F31D0UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M2 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M4 ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MA ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MF ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (ME ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M1 ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MB ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MC ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M6 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M8 ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M3 ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MD ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (M2 ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MC ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M6 ^ 0xBE5466CFUL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (MA ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M0 ^ 0x34E90C6CUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MB ^ 0x243F6A88UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M8 ^ 0x03707344UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M3 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M4 ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MD ^ 0xA4093822UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M7 ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M5 ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MF ^ 0x3F84D5B5UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (ME ^ 0xB5470917UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M1 ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M9 ^ 0x85A308D3UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (MC ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M5 ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M1 ^ 0xB5470917UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (MF ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (ME ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MD ^ 0x3F84D5B5UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M4 ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MA ^ 0xA4093822UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M0 ^ 0xEC4E6C89UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M7 ^ 0x243F6A88UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M6 ^ 0x03707344UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M3 ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M9 ^ 0x13198A2EUL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M2 ^ 0x38D01377UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M8 ^ 0x34E90C6CUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MB ^ 0x452821E6UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (MD ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MB ^ 0xC97C50DDUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M7 ^ 0x3F84D5B5UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (ME ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (MC ^ 0x85A308D3UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M1 ^ 0xC0AC29B7UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M3 ^ 0x38D01377UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M9 ^ 0x03707344UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M5 ^ 0x243F6A88UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M0 ^ 0x299F31D0UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MF ^ 0xA4093822UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M4 ^ 0xB5470917UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M8 ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M6 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M2 ^ 0xBE5466CFUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MA ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+
+	if(pre7 ^ V7 ^ VF)
+		return;
+	output[output[0xFF]++] = nonce;
+}
--- a/kernel/lyra2rev2.cl
+++ b/kernel/lyra2rev2.cl
@ -31,8 +31,8 @@
				@@ -31,8 +31,8 @@
 // typedef unsigned int uint;
 #pragma OPENCL EXTENSION cl_amd_printf : enable

-#ifndef LYRA2RE_CL
-#define LYRA2RE_CL
+#ifndef LYRA2REV2_CL
+#define LYRA2REV2_CL

 #if __ENDIAN_LITTLE__
 #define SPH_LITTLE_ENDIAN 1
@ -522,4 +522,4 @@ __kernel void search6(__global uchar* hashes, __global uint* output, const ulong
				@@ -522,4 +522,4 @@ __kernel void search6(__global uchar* hashes, __global uint* output, const ulong
 }


-#endif // LYRA2RE_CL
+#endif // LYRA2REV2_CL
--- a/kernel/neoscrypt.cl
+++ b/kernel/neoscrypt.cl
--- a/kernel/vanilla.cl
+++ b/kernel/vanilla.cl
@ -0,0 +1,77 @@
				@@ -0,0 +1,77 @@
+// (c) 2013 originally written by smolen, modified by kr105
+
+#define SPH_ROTR32(v,n) rotate((uint)(v),(uint)(32-(n)))
+
+__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
+__kernel void search(
+	volatile __global uint * restrict output,
+	// precalc hash from fisrt part of message
+	const uint h0,
+	const uint h1,
+	const uint h2,
+	const uint h3,
+	const uint h4,
+	const uint h5,
+	const uint h6,
+	const uint h7,
+	// last 12 bytes of original message
+	const uint in16,
+	const uint in17,
+	const uint in18
+)
+{
+	uint M0, M1, M2, M3, M4, M5, M6, M7;
+	uint M8, M9, MA, MB, MC, MD, ME, MF;
+	uint V0, V1, V2, V3, V4, V5, V6, V7;
+	uint V8, V9, VA, VB, VC, VD, VE, VF;
+	uint pre7;
+	uint nonce = get_global_id(0);
+
+	V0 = h0;
+	V1 = h1;
+	V2 = h2;
+	V3 = h3;
+	V4 = h4;
+	V5 = h5;
+	V6 = h6;
+	pre7 = V7 = h7;
+	M0 = in16;
+	M1 = in17;
+	M2 = in18;
+	M3 = nonce;
+
+	V8 = 0x243F6A88UL;
+	V9 = 0x85A308D3UL;
+	VA = 0x13198A2EUL;
+	VB = 0x03707344UL;
+	VC = 640 ^ 0xA4093822UL;
+	VD = 640 ^ 0x299F31D0UL;
+	VE = 0x082EFA98UL;
+	VF = 0xEC4E6C89UL;
+
+	M4 = 0x80000000;
+	M5 = 0;
+	M6 = 0;
+	M7 = 0;
+	M8 = 0;
+	M9 = 0;
+	MA = 0;
+	MB = 0;
+	MC = 0;
+	MD = 1;
+	ME = 0;
+	MF = 640;
+
+	V0 = (V0 + V4 + (M0 ^ 0x85A308D3UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M1 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M2 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M3 ^ 0x13198A2EUL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M4 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M5 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M6 ^ 0xEC4E6C89UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M7 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M8 ^ 0x38D01377UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M9 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MA ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MB ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MC ^ 0xC97C50DDUL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (MD ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (ME ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MF ^ 0x3F84D5B5UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (ME ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MA ^ 0x3F84D5B5UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M4 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M8 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M9 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MF ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MD ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M6 ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M1 ^ 0xC0AC29B7UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MC ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M0 ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M2 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MB ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M7 ^ 0x34E90C6CUL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M5 ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M3 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (MB ^ 0x452821E6UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M8 ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (MC ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M0 ^ 0xC0AC29B7UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M5 ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M2 ^ 0x299F31D0UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MF ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MD ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (MA ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (ME ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M3 ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M6 ^ 0x03707344UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M7 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M1 ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M9 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M4 ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (M7 ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M9 ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M3 ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M1 ^ 0x03707344UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (MD ^ 0xC0AC29B7UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MC ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MB ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (ME ^ 0x34E90C6CUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M2 ^ 0x082EFA98UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M6 ^ 0x13198A2EUL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M5 ^ 0xBE5466CFUL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MA ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M4 ^ 0x243F6A88UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M0 ^ 0xA4093822UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (MF ^ 0x452821E6UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M8 ^ 0xB5470917UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (M9 ^ 0x243F6A88UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M0 ^ 0x38D01377UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M5 ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (M7 ^ 0x299F31D0UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M2 ^ 0xA4093822UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M4 ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (MA ^ 0xB5470917UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MF ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (ME ^ 0x85A308D3UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M1 ^ 0x3F84D5B5UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MB ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (MC ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M6 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M8 ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M3 ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MD ^ 0x03707344UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (M2 ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MC ^ 0x13198A2EUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M6 ^ 0xBE5466CFUL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (MA ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (M0 ^ 0x34E90C6CUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MB ^ 0x243F6A88UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M8 ^ 0x03707344UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M3 ^ 0x452821E6UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M4 ^ 0xC97C50DDUL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (MD ^ 0xA4093822UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M7 ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M5 ^ 0xEC4E6C89UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (MF ^ 0x3F84D5B5UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (ME ^ 0xB5470917UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M1 ^ 0x38D01377UL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (M9 ^ 0x85A308D3UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (MC ^ 0x299F31D0UL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (M5 ^ 0xC0AC29B7UL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M1 ^ 0xB5470917UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (MF ^ 0x85A308D3UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (ME ^ 0xC97C50DDUL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (MD ^ 0x3F84D5B5UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M4 ^ 0xBE5466CFUL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (MA ^ 0xA4093822UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M0 ^ 0xEC4E6C89UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M7 ^ 0x243F6A88UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (M6 ^ 0x03707344UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M3 ^ 0x082EFA98UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M9 ^ 0x13198A2EUL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M2 ^ 0x38D01377UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M8 ^ 0x34E90C6CUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MB ^ 0x452821E6UL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+	V0 = (V0 + V4 + (MD ^ 0x34E90C6CUL)); VC = SPH_ROTR32(VC ^ V0, 16); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 12); V0 = (V0 + V4 + (MB ^ 0xC97C50DDUL)); VC = SPH_ROTR32(VC ^ V0, 8); V8 = (V8 + VC); V4 = SPH_ROTR32(V4 ^ V8, 7);; V1 = (V1 + V5 + (M7 ^ 0x3F84D5B5UL)); VD = SPH_ROTR32(VD ^ V1, 16); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 12); V1 = (V1 + V5 + (ME ^ 0xEC4E6C89UL)); VD = SPH_ROTR32(VD ^ V1, 8); V9 = (V9 + VD); V5 = SPH_ROTR32(V5 ^ V9, 7);; V2 = (V2 + V6 + (MC ^ 0x85A308D3UL)); VE = SPH_ROTR32(VE ^ V2, 16); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 12); V2 = (V2 + V6 + (M1 ^ 0xC0AC29B7UL)); VE = SPH_ROTR32(VE ^ V2, 8); VA = (VA + VE); V6 = SPH_ROTR32(V6 ^ VA, 7);; V3 = (V3 + V7 + (M3 ^ 0x38D01377UL)); VF = SPH_ROTR32(VF ^ V3, 16); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 12); V3 = (V3 + V7 + (M9 ^ 0x03707344UL)); VF = SPH_ROTR32(VF ^ V3, 8); VB = (VB + VF); V7 = SPH_ROTR32(V7 ^ VB, 7);; V0 = (V0 + V5 + (M5 ^ 0x243F6A88UL)); VF = SPH_ROTR32(VF ^ V0, 16); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 12); V0 = (V0 + V5 + (M0 ^ 0x299F31D0UL)); VF = SPH_ROTR32(VF ^ V0, 8); VA = (VA + VF); V5 = SPH_ROTR32(V5 ^ VA, 7);; V1 = (V1 + V6 + (MF ^ 0xA4093822UL)); VC = SPH_ROTR32(VC ^ V1, 16); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 12); V1 = (V1 + V6 + (M4 ^ 0xB5470917UL)); VC = SPH_ROTR32(VC ^ V1, 8); VB = (VB + VC); V6 = SPH_ROTR32(V6 ^ VB, 7);; V2 = (V2 + V7 + (M8 ^ 0x082EFA98UL)); VD = SPH_ROTR32(VD ^ V2, 16); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 12); V2 = (V2 + V7 + (M6 ^ 0x452821E6UL)); VD = SPH_ROTR32(VD ^ V2, 8); V8 = (V8 + VD); V7 = SPH_ROTR32(V7 ^ V8, 7);; V3 = (V3 + V4 + (M2 ^ 0xBE5466CFUL)); VE = SPH_ROTR32(VE ^ V3, 16); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 12); V3 = (V3 + V4 + (MA ^ 0x13198A2EUL)); VE = SPH_ROTR32(VE ^ V3, 8); V9 = (V9 + VE); V4 = SPH_ROTR32(V4 ^ V9, 7);
+
+	if(pre7 ^ V7 ^ VF)
+		return;
+	output[output[0xFF]++] = nonce;
+}
--- a/ocl.c
+++ b/ocl.c
@ -37,7 +37,7 @@
				@@ -37,7 +37,7 @@
 #include "algorithm/neoscrypt.h"
 #include "algorithm/pluck.h"
 #include "algorithm/yescrypt.h"
-#include "algorithm/lyra2re.h"
+#include "algorithm/lyra2rev2.h"

 /* FIXME: only here for global config vars, replace with configuration.h
 * or similar as soon as config is in a struct instead of littered all
@ -187,7 +187,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
				@@ -187,7 +187,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
 	cl_platform_id platform = NULL;
 	struct cgpu_info *cgpu = &gpus[gpu];
 	_clState *clState = (_clState *)calloc(1, sizeof(_clState));
-	cl_uint preferred_vwidth, slot = 0, cpnd = 0, numDevices = clDevicesNum();
+	cl_uint preferred_vwidth, numDevices = clDevicesNum();
 	cl_device_id *devices = (cl_device_id *)alloca(numDevices * sizeof(cl_device_id));
 	build_kernel_data *build_data = (build_kernel_data *)alloca(sizeof(struct _build_kernel_data));
 	char **pbuff = (char **)alloca(sizeof(char *) * numDevices), filename[256];
@ -586,7 +586,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
				@@ -586,7 +586,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
  }

  // Lyra2re v2 TC
-  else if (cgpu->algorithm.type == ALGO_LYRA2REv2 && !cgpu->opt_tc) {
+  else if (cgpu->algorithm.type == ALGO_LYRA2REV2 && !cgpu->opt_tc) {
    size_t glob_thread_count;
    long max_int;
    unsigned char type = 0;
@ -797,7 +797,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
				@@ -797,7 +797,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
      applog(LOG_DEBUG, "yescrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
      // scrypt/n-scrypt
    }
-    else if (algorithm->type == ALGO_LYRA2REv2) {
+    else if (algorithm->type == ALGO_LYRA2REV2) {
      /* The scratch/pad-buffer needs 32kBytes memory per thread. */
      bufsize = LYRA_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
      buf1size = 4* 8 * cgpu->thread_concurrency; //matrix
@ -855,7 +855,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
				@@ -855,7 +855,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
        return NULL;
      }
    }
-    else if (algorithm->type == ALGO_LYRA2REv2) {
+    else if (algorithm->type == ALGO_LYRA2REV2) {
      // need additionnal buffers
      clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status);
      if (status != CL_SUCCESS && !clState->buffer1) {
--- a/sgminer.c
+++ b/sgminer.c
@ -2625,7 +2625,7 @@ static void curses_print_devstatus(struct cgpu_info *cgpu, int count)
				@@ -2625,7 +2625,7 @@ static void curses_print_devstatus(struct cgpu_info *cgpu, int count)
  if (devcursor + count > LINES - 2)
    return;

-  if (count >= most_devices)
+  if (count >= (opt_removedisabled ? most_devices : total_devices))
    return;

  if (cgpu->dev_start_tv.tv_sec == 0)
@ -2745,7 +2745,7 @@ static void switch_logsize(bool __maybe_unused newdevs)
				@@ -2745,7 +2745,7 @@ static void switch_logsize(bool __maybe_unused newdevs)
    if (opt_compact) {
      logstart = devcursor + 1;
    } else {
-      logstart = devcursor + most_devices + 1;
+      logstart = devcursor + (opt_removedisabled ? most_devices : total_devices) + 1;
    }
    logcursor = logstart + 1;
 #ifdef WIN32
@ -8873,7 +8873,7 @@ int main(int argc, char *argv[])
				@@ -8873,7 +8873,7 @@ int main(int argc, char *argv[])
  rd_unlock(&devices_lock);

  if (!opt_compact) {
-    logstart += most_devices;
+    logstart += (opt_removedisabled ? most_devices : total_devices);
    logcursor = logstart + 1;
 #ifdef HAVE_CURSES
    check_winsizes();
--- a/sph/blake.c
+++ b/sph/blake.c
@ -507,6 +507,55 @@ static const sph_u64 CB[16] = {
				@@ -507,6 +507,55 @@ static const sph_u64 CB[16] = {

 #if SPH_COMPACT_BLAKE_32

+#define COMPRESS32r8   do { \
+		sph_u32 M[16]; \
+		sph_u32 V0, V1, V2, V3, V4, V5, V6, V7; \
+		sph_u32 V8, V9, VA, VB, VC, VD, VE, VF; \
+		unsigned r; \
+		V0 = H0; \
+		V1 = H1; \
+		V2 = H2; \
+		V3 = H3; \
+		V4 = H4; \
+		V5 = H5; \
+		V6 = H6; \
+		V7 = H7; \
+		V8 = S0 ^ CS0; \
+		V9 = S1 ^ CS1; \
+		VA = S2 ^ CS2; \
+		VB = S3 ^ CS3; \
+		VC = T0 ^ CS4; \
+		VD = T0 ^ CS5; \
+		VE = T1 ^ CS6; \
+		VF = T1 ^ CS7; \
+		M[0x0] = sph_dec32be_aligned(buf +  0); \
+		M[0x1] = sph_dec32be_aligned(buf +  4); \
+		M[0x2] = sph_dec32be_aligned(buf +  8); \
+		M[0x3] = sph_dec32be_aligned(buf + 12); \
+		M[0x4] = sph_dec32be_aligned(buf + 16); \
+		M[0x5] = sph_dec32be_aligned(buf + 20); \
+		M[0x6] = sph_dec32be_aligned(buf + 24); \
+		M[0x7] = sph_dec32be_aligned(buf + 28); \
+		M[0x8] = sph_dec32be_aligned(buf + 32); \
+		M[0x9] = sph_dec32be_aligned(buf + 36); \
+		M[0xA] = sph_dec32be_aligned(buf + 40); \
+		M[0xB] = sph_dec32be_aligned(buf + 44); \
+		M[0xC] = sph_dec32be_aligned(buf + 48); \
+		M[0xD] = sph_dec32be_aligned(buf + 52); \
+		M[0xE] = sph_dec32be_aligned(buf + 56); \
+		M[0xF] = sph_dec32be_aligned(buf + 60); \
+		for (r = 0; r < 8; r ++) \
+			ROUND_S(r); \
+		H0 ^= S0 ^ V0 ^ V8; \
+		H1 ^= S1 ^ V1 ^ V9; \
+		H2 ^= S2 ^ V2 ^ VA; \
+		H3 ^= S3 ^ V3 ^ VB; \
+		H4 ^= S0 ^ V4 ^ VC; \
+		H5 ^= S1 ^ V5 ^ VD; \
+		H6 ^= S2 ^ V6 ^ VE; \
+		H7 ^= S3 ^ V7 ^ VF; \
+	} while (0)
+
 #define COMPRESS32   do { \
 		sph_u32 M[16]; \
 		sph_u32 V0, V1, V2, V3, V4, V5, V6, V7; \
@ -558,6 +607,61 @@ static const sph_u64 CB[16] = {
				@@ -558,6 +607,61 @@ static const sph_u64 CB[16] = {

 #else

+#define COMPRESS32r8   do { \
+		sph_u32 M0, M1, M2, M3, M4, M5, M6, M7; \
+		sph_u32 M8, M9, MA, MB, MC, MD, ME, MF; \
+		sph_u32 V0, V1, V2, V3, V4, V5, V6, V7; \
+		sph_u32 V8, V9, VA, VB, VC, VD, VE, VF; \
+		V0 = H0; \
+		V1 = H1; \
+		V2 = H2; \
+		V3 = H3; \
+		V4 = H4; \
+		V5 = H5; \
+		V6 = H6; \
+		V7 = H7; \
+		V8 = S0 ^ CS0; \
+		V9 = S1 ^ CS1; \
+		VA = S2 ^ CS2; \
+		VB = S3 ^ CS3; \
+		VC = T0 ^ CS4; \
+		VD = T0 ^ CS5; \
+		VE = T1 ^ CS6; \
+		VF = T1 ^ CS7; \
+		M0 = sph_dec32be_aligned(buf +  0); \
+		M1 = sph_dec32be_aligned(buf +  4); \
+		M2 = sph_dec32be_aligned(buf +  8); \
+		M3 = sph_dec32be_aligned(buf + 12); \
+		M4 = sph_dec32be_aligned(buf + 16); \
+		M5 = sph_dec32be_aligned(buf + 20); \
+		M6 = sph_dec32be_aligned(buf + 24); \
+		M7 = sph_dec32be_aligned(buf + 28); \
+		M8 = sph_dec32be_aligned(buf + 32); \
+		M9 = sph_dec32be_aligned(buf + 36); \
+		MA = sph_dec32be_aligned(buf + 40); \
+		MB = sph_dec32be_aligned(buf + 44); \
+		MC = sph_dec32be_aligned(buf + 48); \
+		MD = sph_dec32be_aligned(buf + 52); \
+		ME = sph_dec32be_aligned(buf + 56); \
+		MF = sph_dec32be_aligned(buf + 60); \
+		ROUND_S(0); \
+		ROUND_S(1); \
+		ROUND_S(2); \
+		ROUND_S(3); \
+		ROUND_S(4); \
+		ROUND_S(5); \
+		ROUND_S(6); \
+		ROUND_S(7); \
+		H0 ^= S0 ^ V0 ^ V8; \
+		H1 ^= S1 ^ V1 ^ V9; \
+		H2 ^= S2 ^ V2 ^ VA; \
+		H3 ^= S3 ^ V3 ^ VB; \
+		H4 ^= S0 ^ V4 ^ VC; \
+		H5 ^= S1 ^ V5 ^ VD; \
+		H6 ^= S2 ^ V6 ^ VE; \
+		H7 ^= S3 ^ V7 ^ VF; \
+	} while (0)
+
 #define COMPRESS32   do { \
 		sph_u32 M0, M1, M2, M3, M4, M5, M6, M7; \
 		sph_u32 M8, M9, MA, MB, MC, MD, ME, MF; \
@ -831,6 +935,44 @@ blake32(sph_blake_small_context *sc, const void *data, size_t len)
				@@ -831,6 +935,44 @@ blake32(sph_blake_small_context *sc, const void *data, size_t len)
 	sc->ptr = ptr;
 }

+static void
+blake32r8(sph_blake_small_context *sc, const void *data, size_t len)
+{
+	unsigned char *buf;
+	size_t ptr;
+	DECL_STATE32
+
+	buf = sc->buf;
+	ptr = sc->ptr;
+	if (len < (sizeof sc->buf) - ptr) {
+		memcpy(buf + ptr, data, len);
+		ptr += len;
+		sc->ptr = ptr;
+		return;
+	}
+
+	READ_STATE32(sc);
+	while (len > 0) {
+		size_t clen;
+
+		clen = (sizeof sc->buf) - ptr;
+		if (clen > len)
+			clen = len;
+		memcpy(buf + ptr, data, clen);
+		ptr += clen;
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+		if (ptr == sizeof sc->buf) {
+			if ((T0 = SPH_T32(T0 + 512)) < 512)
+				T1 = SPH_T32(T1 + 1);
+			COMPRESS32r8;
+			ptr = 0;
+		}
+	}
+	WRITE_STATE32(sc);
+	sc->ptr = ptr;
+}
+
 static void
 blake32_close(sph_blake_small_context *sc,
 	unsigned ub, unsigned n, void *dst, size_t out_size_w32)
@ -884,6 +1026,59 @@ blake32_close(sph_blake_small_context *sc,
				@@ -884,6 +1026,59 @@ blake32_close(sph_blake_small_context *sc,
 		sph_enc32be(out + (k << 2), sc->H[k]);
 }

+static void
+blake32r8_close(sph_blake_small_context *sc,
+	unsigned ub, unsigned n, void *dst, size_t out_size_w32)
+{
+	union {
+		unsigned char buf[64];
+		sph_u32 dummy;
+	} u;
+	size_t ptr, k;
+	unsigned bit_len;
+	unsigned z;
+	sph_u32 th, tl;
+	unsigned char *out;
+
+	ptr = sc->ptr;
+	bit_len = ((unsigned)ptr << 3) + n;
+	z = 0x80 >> n;
+	u.buf[ptr] = ((ub & -z) | z) & 0xFF;
+	tl = sc->T0 + bit_len;
+	th = sc->T1;
+	if (ptr == 0 && n == 0) {
+		sc->T0 = SPH_C32(0xFFFFFE00);
+		sc->T1 = SPH_C32(0xFFFFFFFF);
+	} else if (sc->T0 == 0) {
+		sc->T0 = SPH_C32(0xFFFFFE00) + bit_len;
+		sc->T1 = SPH_T32(sc->T1 - 1);
+	} else {
+		sc->T0 -= 512 - bit_len;
+	}
+	if (bit_len <= 446) {
+		memset(u.buf + ptr + 1, 0, 55 - ptr);
+		if (out_size_w32 == 8)
+			u.buf[55] |= 1;
+		sph_enc32be_aligned(u.buf + 56, th);
+		sph_enc32be_aligned(u.buf + 60, tl);
+		blake32r8(sc, u.buf + ptr, 64 - ptr);
+	} else {
+		memset(u.buf + ptr + 1, 0, 63 - ptr);
+		blake32r8(sc, u.buf + ptr, 64 - ptr);
+		sc->T0 = SPH_C32(0xFFFFFE00);
+		sc->T1 = SPH_C32(0xFFFFFFFF);
+		memset(u.buf, 0, 56);
+		if (out_size_w32 == 8)
+			u.buf[55] = 1;
+		sph_enc32be_aligned(u.buf + 56, th);
+		sph_enc32be_aligned(u.buf + 60, tl);
+		blake32r8(sc, u.buf, 64);
+	}
+	out = (unsigned char *)dst;
+	for (k = 0; k < out_size_w32; k ++)
+		sph_enc32be(out + (k << 2), sc->H[k]);
+}
+
 #if SPH_64

 static const sph_u64 salt_zero_big[4] = { 0, 0, 0, 0 };
@ -1034,6 +1229,13 @@ sph_blake256(void *cc, const void *data, size_t len)
				@@ -1034,6 +1229,13 @@ sph_blake256(void *cc, const void *data, size_t len)
 	blake32((sph_blake_small_context *)cc, data, len);
 }

+/* see sph_blake.h */
+void
+sph_blake256r8(void *cc, const void *data, size_t len)
+{
+	blake32r8((sph_blake_small_context *)cc, data, len);
+}
+
 /* see sph_blake.h */
 void
 sph_blake256_close(void *cc, void *dst)
@ -1041,6 +1243,13 @@ sph_blake256_close(void *cc, void *dst)
				@@ -1041,6 +1243,13 @@ sph_blake256_close(void *cc, void *dst)
 	sph_blake256_addbits_and_close(cc, 0, 0, dst);
 }

+/* see sph_blake.h */
+void
+sph_blake256r8_close(void *cc, void *dst)
+{
+	sph_blake256r8_addbits_and_close(cc, 0, 0, dst);
+}
+
 /* see sph_blake.h */
 void
 sph_blake256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
@ -1049,6 +1258,14 @@ sph_blake256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
				@@ -1049,6 +1258,14 @@ sph_blake256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
 	sph_blake256_init(cc);
 }

+/* see sph_blake.h */
+void
+sph_blake256r8_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	blake32r8_close((sph_blake_small_context *)cc, ub, n, dst, 8);
+	sph_blake256_init(cc);
+}
+
 #if SPH_64

 /* see sph_blake.h */
--- a/sph/sph_blake.h
+++ b/sph/sph_blake.h
@ -194,6 +194,7 @@ void sph_blake256_init(void *cc);
				@@ -194,6 +194,7 @@ void sph_blake256_init(void *cc);
 * @param len    the input data length (in bytes)
 */
 void sph_blake256(void *cc, const void *data, size_t len);
+void sph_blake256r8(void *cc, const void *data, size_t len);

 /**
 * Terminate the current BLAKE-256 computation and output the result into
@ -205,6 +206,7 @@ void sph_blake256(void *cc, const void *data, size_t len);
				@@ -205,6 +206,7 @@ void sph_blake256(void *cc, const void *data, size_t len);
 * @param dst   the destination buffer
 */
 void sph_blake256_close(void *cc, void *dst);
+void sph_blake256r8_close(void *cc, void *dst);

 /**
 * Add a few additional bits (0 to 7) to the current computation, then
@ -221,6 +223,8 @@ void sph_blake256_close(void *cc, void *dst);
				@@ -221,6 +223,8 @@ void sph_blake256_close(void *cc, void *dst);
 */
 void sph_blake256_addbits_and_close(
 	void *cc, unsigned ub, unsigned n, void *dst);
+void sph_blake256r8_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);

 #if SPH_64

--- a/winbuild/sgminer.vcxproj
+++ b/winbuild/sgminer.vcxproj
@ -263,8 +263,10 @@
				@@ -263,8 +263,10 @@
    <ClCompile Include="..\algorithm.c" />
    <ClCompile Include="..\algorithm\animecoin.c" />
    <ClCompile Include="..\algorithm\bitblock.c" />
+    <ClCompile Include="..\algorithm\credits.c" />
    <ClCompile Include="..\algorithm\lyra2.c" />
    <ClCompile Include="..\algorithm\lyra2re.c" />
+    <ClCompile Include="..\algorithm\lyra2rev2.c" />
    <ClCompile Include="..\algorithm\neoscrypt.c" />
    <ClCompile Include="..\algorithm\pluck.c" />
    <ClCompile Include="..\algorithm\sponge.c" />
@ -328,11 +330,14 @@
				@@ -328,11 +330,14 @@
    <ClInclude Include="..\algorithm.h" />
    <ClInclude Include="..\algorithm\animecoin.h" />
    <ClInclude Include="..\algorithm\bitblock.h" />
+    <ClInclude Include="..\algorithm\credits.h" />
    <ClInclude Include="..\algorithm\lyra2.h" />
    <ClInclude Include="..\algorithm\lyra2re.h" />
+    <ClInclude Include="..\algorithm\lyra2rev2.h" />
    <ClInclude Include="..\algorithm\neoscrypt.h" />
    <ClInclude Include="..\algorithm\pluck.h" />
    <ClInclude Include="..\algorithm\sponge.h" />
+    <ClInclude Include="..\algorithm\sysendian.h" />
    <ClInclude Include="..\algorithm\talkcoin.h" />
    <ClInclude Include="..\algorithm\whirlpoolx.h" />
    <ClInclude Include="..\algorithm\x14.h" />
@ -365,6 +370,7 @@
				@@ -365,6 +370,7 @@
    <ClInclude Include="..\algorithm\qubitcoin.h" />
    <ClInclude Include="..\algorithm\scrypt.h" />
    <ClInclude Include="..\algorithm\sifcoin.h" />
+    <ClInclude Include="..\sph\sha256_Y.h" />
    <ClInclude Include="..\sph\sph_blake.h" />
    <ClInclude Include="..\sph\sph_bmw.h" />
    <ClInclude Include="..\sph\sph_cubehash.h" />
--- a/winbuild/sgminer.vcxproj.filters
+++ b/winbuild/sgminer.vcxproj.filters
@ -218,6 +218,12 @@
				@@ -218,6 +218,12 @@
    <ClCompile Include="..\algorithm\pluck.c">
      <Filter>Source Files\algorithm</Filter>
    </ClCompile>
+    <ClCompile Include="..\algorithm\lyra2rev2.c">
+      <Filter>Source Files\algorithm</Filter>
+    </ClCompile>
+    <ClCompile Include="..\algorithm\credits.c">
+      <Filter>Source Files\algorithm</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\adl.h">
@ -415,6 +421,18 @@
				@@ -415,6 +421,18 @@
    <ClInclude Include="..\algorithm\pluck.h">
      <Filter>Header Files\algorithm</Filter>
    </ClInclude>
+    <ClInclude Include="..\algorithm\lyra2rev2.h">
+      <Filter>Header Files\algorithm</Filter>
+    </ClInclude>
+    <ClInclude Include="..\algorithm\credits.h">
+      <Filter>Header Files\algorithm</Filter>
+    </ClInclude>
+    <ClInclude Include="..\sph\sha256_Y.h">
+      <Filter>Header Files\sph</Filter>
+    </ClInclude>
+    <ClInclude Include="..\algorithm\sysendian.h">
+      <Filter>Header Files\algorithm</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <None Include="README.txt" />