Add decred algo support

2025-09-12 22:21:56 +00:00 · 2016-03-21 17:34:35 +01:00 · 2016-03-21 17:34:35 +01:00 · 29c3f1c714
commit 29c3f1c714
parent d6e6f10881
10 changed files with 571 additions and 63 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -79,6 +79,7 @@ sgminer_SOURCES += algorithm/credits.c algorithm/credits.h
 sgminer_SOURCES += algorithm/yescrypt.h algorithm/yescrypt.c algorithm/yescrypt_core.h algorithm/yescrypt-opt.c algorithm/yescryptcommon.c algorithm/sysendian.h 
 sgminer_SOURCES += algorithm/blake256.c algorithm/blake256.h
 sgminer_SOURCES += algorithm/blakecoin.c algorithm/blakecoin.h
+sgminer_SOURCES += algorithm/decred.c algorithm/decred.h

 bin_SCRIPTS	= $(top_srcdir)/kernel/*.cl

--- a/algorithm.c
+++ b/algorithm.c
@ -39,6 +39,7 @@
 #include "algorithm/credits.h"
 #include "algorithm/blake256.h"
 #include "algorithm/blakecoin.h"
+#include "algorithm/decred.h"

 #include "compat.h"

@ -70,6 +71,7 @@ const char *algorithm_type_str[] = {
  "Yescrypt-multi",
  "Blakecoin",
  "Blake",
+  "Decred",
  "Vanilla"
 };

@ -138,6 +140,17 @@ static void append_neoscrypt_compiler_options(struct _build_kernel_data *data, s
  strcat(data->binary_filename, buf);
 }

+static void append_blake256_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm)
+{
+  char buf[255];
+  sprintf(buf, " -D LOOKUP_GAP=%d -D MAX_GLOBAL_THREADS=%lu ",
+    cgpu->lookup_gap, (unsigned long)cgpu->thread_concurrency);
+  strcat(data->compiler_options, buf);
+
+  sprintf(buf, "tc%lu", (unsigned long)cgpu->thread_concurrency);
+  strcat(data->binary_filename, buf);
+}
+
 static void append_x11_compiler_options(struct _build_kernel_data *data, struct cgpu_info *cgpu, struct _algorithm_t *algorithm)
 {
  char buf[255];
@ -935,30 +948,63 @@ static cl_int queue_pluck_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_un

 static cl_int queue_blake_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
 {
-	cl_kernel *kernel = &clState->kernel;
-	unsigned int num = 0;
-	cl_int status = 0;
-	cl_ulong le_target;
+  cl_kernel *kernel = &clState->kernel;
+  unsigned int num = 0;
+  cl_int status = 0;
+  cl_ulong le_target;

-	le_target = *(cl_ulong *)(blk->work->device_target + 24);
-	flip80(clState->cldata, blk->work->data);
-	status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
+  le_target = *(cl_ulong *)(blk->work->device_target + 24);
+  flip80(clState->cldata, blk->work->data);
+  status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);

-	CL_SET_ARG(clState->outputBuffer);
-	CL_SET_ARG(blk->work->blk.ctx_a);
-	CL_SET_ARG(blk->work->blk.ctx_b);
-	CL_SET_ARG(blk->work->blk.ctx_c);
-	CL_SET_ARG(blk->work->blk.ctx_d);
-	CL_SET_ARG(blk->work->blk.ctx_e);
-	CL_SET_ARG(blk->work->blk.ctx_f);
-	CL_SET_ARG(blk->work->blk.ctx_g);
-	CL_SET_ARG(blk->work->blk.ctx_h);
+  CL_SET_ARG(clState->outputBuffer);
+  CL_SET_ARG(blk->work->blk.ctx_a);
+  CL_SET_ARG(blk->work->blk.ctx_b);
+  CL_SET_ARG(blk->work->blk.ctx_c);
+  CL_SET_ARG(blk->work->blk.ctx_d);
+  CL_SET_ARG(blk->work->blk.ctx_e);
+  CL_SET_ARG(blk->work->blk.ctx_f);
+  CL_SET_ARG(blk->work->blk.ctx_g);
+  CL_SET_ARG(blk->work->blk.ctx_h);

-	CL_SET_ARG(blk->work->blk.cty_a);
-	CL_SET_ARG(blk->work->blk.cty_b);
-	CL_SET_ARG(blk->work->blk.cty_c);
+  CL_SET_ARG(blk->work->blk.cty_a);
+  CL_SET_ARG(blk->work->blk.cty_b);
+  CL_SET_ARG(blk->work->blk.cty_c);

-	return status;
+  return status;
+}
+
+static cl_int queue_decred_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
+{
+  cl_kernel *kernel = &clState->kernel;
+  unsigned int num = 0;
+  cl_int status = 0;
+
+  CL_SET_ARG(clState->outputBuffer);
+  /* Midstate */
+  CL_SET_BLKARG(ctx_a);
+  CL_SET_BLKARG(ctx_b);
+  CL_SET_BLKARG(ctx_c);
+  CL_SET_BLKARG(ctx_d);
+  CL_SET_BLKARG(ctx_e);
+  CL_SET_BLKARG(ctx_f);
+  CL_SET_BLKARG(ctx_g);
+  CL_SET_BLKARG(ctx_h);
+  /* Last 52 bytes of data (without nonce) */
+  CL_SET_BLKARG(cty_a);
+  CL_SET_BLKARG(cty_b);
+  CL_SET_BLKARG(cty_c);
+  CL_SET_BLKARG(cty_d);
+  CL_SET_BLKARG(cty_e);
+  CL_SET_BLKARG(cty_f);
+  CL_SET_BLKARG(cty_g);
+  CL_SET_BLKARG(cty_h);
+  CL_SET_BLKARG(cty_i);
+  CL_SET_BLKARG(cty_j);
+  CL_SET_BLKARG(cty_k);
+  CL_SET_BLKARG(cty_l);
+
+  return status;
 }

 static algorithm_settings_t algos[] = {
@ -989,6 +1035,11 @@ static algorithm_settings_t algos[] = {
  A_CREDITS("credits"),
 #undef A_CREDITS

+#define A_DECRED(a) \
+  { a, ALGO_DECRED, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, decred_regenhash, decred_midstate, decred_prepare_work, queue_decred_kernel, gen_hash, append_blake256_compiler_options }
+  A_DECRED("decred"),
+#undef A_DECRED
+
 #define A_YESCRYPT(a) \
  { a, ALGO_YESCRYPT, "", 1, 65536, 65536, 0, 0, 0xFF, 0xFFFF000000000000ULL, 0x0000ffffUL, 0, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, yescrypt_regenhash, NULL, NULL, queue_yescrypt_kernel, gen_hash, append_neoscrypt_compiler_options}
  A_YESCRYPT("yescrypt"),
--- a/algorithm.h
+++ b/algorithm.h
@ -36,6 +36,7 @@ typedef enum {
  ALGO_YESCRYPT_MULTI,
  ALGO_BLAKECOIN,
  ALGO_BLAKE,
+  ALGO_DECRED,
  ALGO_VANILLA
 } algorithm_type_t;

--- a/algorithm/decred.c
+++ b/algorithm/decred.c
@ -0,0 +1,170 @@
+/*
+ * BLAKE implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ *
+ * Modified for more speed by BlueDragon747 for the Blakecoin project
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <limits.h>
+#include <stdint.h>
+
+#include "sph/sph_blake.h"
+#include "algorithm/decred.h"
+
+static const uint32_t diff1targ_decred = 0x000000ff;
+
+void decredhash(void *state, const void *input)
+{
+  sph_blake256_context ctx_blake;
+  sph_blake256_init(&ctx_blake);
+  sph_blake256(&ctx_blake, input, 180);
+  sph_blake256_close(&ctx_blake, state);
+}
+
+void decred_midstate(struct work *work)
+{
+  sph_blake256_context     ctx_blake;
+  sph_blake256_init(&ctx_blake);
+  sph_blake256 (&ctx_blake, (unsigned char *)work->data, 128);
+
+  memcpy(work->midstate, ctx_blake.H, 32);
+  endian_flip32(work->midstate, work->midstate);
+
+  char *strdata, *strmidstate;
+  strdata = bin2hex(work->data, 128);
+  strmidstate = bin2hex(work->midstate, 32);
+  applog(LOG_DEBUG, "data %s midstate %s", strdata, strmidstate);
+}
+
+void decred_prepare_work(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata)
+{
+	blk->ctx_a = state[0];
+	blk->ctx_b = state[1];
+	blk->ctx_c = state[2];
+	blk->ctx_d = state[3];
+	blk->ctx_e = state[4];
+	blk->ctx_f = state[5];
+	blk->ctx_g = state[6];
+	blk->ctx_h = state[7];
+
+	blk->cty_a = htobe32(pdata[32]);
+	blk->cty_b = htobe32(pdata[33]);
+	blk->cty_c = htobe32(pdata[34]);
+	/* blk->cty_d = htobe32(pdata[35] = nonce) */
+
+	blk->cty_d = htobe32(pdata[36]);
+	blk->cty_e = htobe32(pdata[37]);
+	blk->cty_f = htobe32(pdata[38]);
+	blk->cty_g = htobe32(pdata[39]);
+
+	blk->cty_h = htobe32(pdata[40]);
+	blk->cty_i = htobe32(pdata[41]);
+	blk->cty_j = htobe32(pdata[42]);
+	blk->cty_k = htobe32(pdata[43]);
+
+	blk->cty_l = htobe32(pdata[44]);
+}
+
+
+static const uint32_t diff1targ = 0x0000ffff;
+
+/* Used externally as confirmation of correct OCL code */
+int decred_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
+{
+	uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
+	uint32_t data[45], ohash[8];
+
+	memcpy(data, pdata, 180);
+	data[35] = htobe32(nonce);
+	decredhash(ohash, data);
+	tmp_hash7 = be32toh(ohash[7]);
+
+	applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
+				(long unsigned int)Htarg,
+				(long unsigned int)diff1targ,
+				(long unsigned int)tmp_hash7);
+	if (tmp_hash7 > diff1targ)
+		return -1;
+	if (tmp_hash7 > Htarg)
+		return 0;
+	return 1;
+}
+
+void decred_regenhash(struct work *work)
+{
+        uint32_t data[45];
+        uint32_t *nonce = (uint32_t *)(work->data + 140);
+        uint32_t *ohash = (uint32_t *)(work->hash);
+
+        memcpy(data, work->data, 180);
+        data[35] = htobe32(*nonce);
+        decredhash(ohash, data);
+}
+
+bool scanhash_decred(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
+		     unsigned char *pdata, unsigned char __maybe_unused *phash1,
+		     unsigned char __maybe_unused *phash, const unsigned char *ptarget,
+		     uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
+{
+	uint32_t *nonce = (uint32_t *)(pdata + 140);
+	uint32_t data[45];
+	uint32_t tmp_hash7;
+	uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
+	bool ret = false;
+
+	memcpy(data, pdata, 180);
+
+	while(1) {
+		uint32_t ostate[8];
+
+		*nonce = ++n;
+		data[35] = (n);
+		decredhash(ostate, data);
+		tmp_hash7 = (ostate[7]);
+
+		applog(LOG_INFO, "data7 %08lx",
+					(long unsigned int)data[7]);
+
+		if (unlikely(tmp_hash7 <= Htarg)) {
+			((uint32_t *)pdata)[35] = htobe32(n);
+			*last_nonce = n;
+			ret = true;
+			break;
+		}
+
+		if (unlikely((n >= max_nonce) || thr->work_restart)) {
+			*last_nonce = n;
+			break;
+		}
+	}
+
+	return ret;
+}
--- a/algorithm/decred.h
+++ b/algorithm/decred.h
@ -0,0 +1,11 @@
+#ifndef DECRED_H
+#define DECRED_H
+
+#include "miner.h"
+
+extern int decred_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce);
+extern void decred_prepare_work(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata);
+extern void decred_midstate(struct work *work);
+extern void decred_regenhash(struct work *work);
+
+#endif /* DECRED_H */
--- a/kernel/decred.cl
+++ b/kernel/decred.cl
@ -0,0 +1,161 @@
+/**
+ * BLAKE256 14-round kernel
+ *
+ * Copyright 2015 Company Zero
+ * A complete kernel re-write
+ * with inspiration from the Golang BLAKE256 repo (github.com/dchest/blake256)
+ */
+
+/**
+ * optimized by tpruvot 02/2016 :
+ *
+ * GTX 960 | (5s):735.3M (avg):789.3Mh/s
+ * GTX 750 | (5s):443.3M (avg):476.8Mh/s
+ * to
+ * GTX 960 | (5s):875.0M (avg):899.2Mh/s
+ * GTX 750 | (5s):523.1M (avg):536.8Mh/s
+ */
+#define ROTR(v,n) rotate(v,(uint)(32U-n))
+#define ROTL(v,n) rotate(v, n)
+
+#ifdef _AMD_OPENCL
+#define SWAP(v)   rotate(v, 16U)
+#define ROTR8(v)  rotate(v, 24U)
+#else
+#define SWAP(v)  as_uint(as_uchar4(v).zwxy)
+#define ROTR8(v) as_uint(as_uchar4(v).yzwx)
+#endif
+
+__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
+__kernel void search(
+	volatile __global uint * restrict output,
+	// Midstate
+	const uint h0,
+	const uint h1,
+	const uint h2,
+	const uint h3,
+	const uint h4,
+	const uint h5,
+	const uint h6,
+	const uint h7,
+
+	// last 52 bytes of data
+	const uint M0,
+	const uint M1,
+	const uint M2,
+	// const uint M3 : nonce
+	const uint M4,
+	const uint M5,
+	const uint M6,
+	const uint M7,
+	const uint M8,
+	const uint M9,
+	const uint MA,
+	const uint MB,
+	const uint MC
+)
+{
+	/* Load the block header and padding */
+	const uint M3 = get_global_id(0);
+	const uint MD = 0x80000001UL;
+	const uint ME = 0x00000000UL;
+	const uint MF = 0x000005a0UL;
+
+	const uint cst0 = 0x243F6A88UL;
+	const uint cst1 = 0x85A308D3UL;
+	const uint cst2 = 0x13198A2EUL;
+	const uint cst3 = 0x03707344UL;
+	const uint cst4 = 0xA4093822UL;
+	const uint cst5 = 0x299F31D0UL;
+	const uint cst6 = 0x082EFA98UL;
+	const uint cst7 = 0xEC4E6C89UL;
+	const uint cst8 = 0x452821E6UL;
+	const uint cst9 = 0x38D01377UL;
+	const uint cstA = 0xBE5466CFUL;
+	const uint cstB = 0x34E90C6CUL;
+	const uint cstC = 0xC0AC29B7UL;
+	const uint cstD = 0xC97C50DDUL;
+	const uint cstE = 0x3F84D5B5UL;
+	const uint cstF = 0xB5470917UL;
+
+	uint V0, V1, V2, V3, V4, V5, V6, V7;
+	uint V8, V9, VA, VB, VC, VD, VE, VF;
+	uint pre7;
+
+	/* Load the midstate and initialize */
+	V0 = h0;
+	V1 = h1;
+	V2 = h2;
+	V3 = h3;
+	V4 = h4;
+	V5 = h5;
+	V6 = h6;
+	pre7 = V7 = h7;
+
+	V8 = cst0;
+	V9 = cst1;
+	VA = cst2;
+	VB = cst3;
+	VC = 0xA4093D82UL;
+	VD = 0x299F3470UL;
+	VE = cst6;
+	VF = cst7;
+
+	/* 14 rounds */
+
+	V0 = V0 + (M0 ^ cst1); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M2 ^ cst3); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M4 ^ cst5); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (M6 ^ cst7); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M5 ^ cst4); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M7 ^ cst6); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M3 ^ cst2); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M1 ^ cst0); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M8 ^ cst9); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (MA ^ cstB); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (MC ^ cstD); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (ME ^ cstF); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (MD ^ cstC); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (MF ^ cstE); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (MB ^ cstA); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M9 ^ cst8); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U);
+	V0 = V0 + (ME ^ cstA); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M4 ^ cst8); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M9 ^ cstF); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (MD ^ cst6); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (MF ^ cst9); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M6 ^ cstD); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M8 ^ cst4); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (MA ^ cstE); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M1 ^ cstC); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M0 ^ cst2); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (MB ^ cst7); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M5 ^ cst3); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M7 ^ cstB); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M3 ^ cst5); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M2 ^ cst0); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (MC ^ cst1); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U);
+	V0 = V0 + (MB ^ cst8); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (MC ^ cst0); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M5 ^ cst2); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (MF ^ cstD); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M2 ^ cst5); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (MD ^ cstF); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M0 ^ cstC); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M8 ^ cstB); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (MA ^ cstE); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M3 ^ cst6); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M7 ^ cst1); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M9 ^ cst4); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M1 ^ cst7); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M4 ^ cst9); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M6 ^ cst3); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (ME ^ cstA); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U);
+	V0 = V0 + (M7 ^ cst9); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M3 ^ cst1); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (MD ^ cstC); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (MB ^ cstE); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (MC ^ cstD); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (ME ^ cstB); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M1 ^ cst3); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M9 ^ cst7); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M2 ^ cst6); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M5 ^ cstA); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M4 ^ cst0); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (MF ^ cst8); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M0 ^ cst4); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M8 ^ cstF); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (MA ^ cst5); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M6 ^ cst2); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U);
+	V0 = V0 + (M9 ^ cst0); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M5 ^ cst7); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M2 ^ cst4); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (MA ^ cstF); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M4 ^ cst2); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (MF ^ cstA); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M7 ^ cst5); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M0 ^ cst9); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (ME ^ cst1); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (MB ^ cstC); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M6 ^ cst8); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M3 ^ cstD); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M8 ^ cst6); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (MD ^ cst3); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (MC ^ cstB); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M1 ^ cstE); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U);
+	V0 = V0 + (M2 ^ cstC); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M6 ^ cstA); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M0 ^ cstB); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (M8 ^ cst3); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (MB ^ cst0); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M3 ^ cst8); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (MA ^ cst6); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (MC ^ cst2); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M4 ^ cstD); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M7 ^ cst5); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (MF ^ cstE); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M1 ^ cst9); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (ME ^ cstF); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M9 ^ cst1); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M5 ^ cst7); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (MD ^ cst4); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U);
+	V0 = V0 + (MC ^ cst5); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M1 ^ cstF); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (ME ^ cstD); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (M4 ^ cstA); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (MD ^ cstE); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (MA ^ cst4); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (MF ^ cst1); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M5 ^ cstC); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M0 ^ cst7); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M6 ^ cst3); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M9 ^ cst2); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M8 ^ cstB); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M2 ^ cst9); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (MB ^ cst8); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M3 ^ cst6); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M7 ^ cst0); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U);
+	V0 = V0 + (MD ^ cstB); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M7 ^ cstE); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (MC ^ cst1); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (M3 ^ cst9); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M1 ^ cstC); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M9 ^ cst3); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (ME ^ cst7); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (MB ^ cstD); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M5 ^ cst0); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (MF ^ cst4); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M8 ^ cst6); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M2 ^ cstA); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M6 ^ cst8); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (MA ^ cst2); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M4 ^ cstF); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M0 ^ cst5); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U);
+	V0 = V0 + (M6 ^ cstF); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (ME ^ cst9); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (MB ^ cst3); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (M0 ^ cst8); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M3 ^ cstB); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M8 ^ cst0); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M9 ^ cstE); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (MF ^ cst6); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (MC ^ cst2); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (MD ^ cst7); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M1 ^ cst4); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (MA ^ cst5); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M4 ^ cst1); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M5 ^ cstA); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M7 ^ cstD); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M2 ^ cstC); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U);
+	V0 = V0 + (MA ^ cst2); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M8 ^ cst4); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M7 ^ cst6); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (M1 ^ cst5); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M6 ^ cst7); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M5 ^ cst1); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M4 ^ cst8); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M2 ^ cstA); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (MF ^ cstB); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M9 ^ cstE); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M3 ^ cstC); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (MD ^ cst0); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (MC ^ cst3); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M0 ^ cstD); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (ME ^ cst9); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (MB ^ cstF); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U);
+	V0 = V0 + (M0 ^ cst1); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M2 ^ cst3); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M4 ^ cst5); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (M6 ^ cst7); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M5 ^ cst4); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M7 ^ cst6); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M3 ^ cst2); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M1 ^ cst0); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M8 ^ cst9); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (MA ^ cstB); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (MC ^ cstD); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (ME ^ cstF); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (MD ^ cstC); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (MF ^ cstE); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (MB ^ cstA); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M9 ^ cst8); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U);
+	V0 = V0 + (ME ^ cstA); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M4 ^ cst8); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M9 ^ cstF); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (MD ^ cst6); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (MF ^ cst9); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (M6 ^ cstD); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M8 ^ cst4); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (MA ^ cstE); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M1 ^ cstC); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M0 ^ cst2); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (MB ^ cst7); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M5 ^ cst3); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M7 ^ cstB); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M3 ^ cst5); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M2 ^ cst0); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (MC ^ cst1); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U);
+	V0 = V0 + (MB ^ cst8); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (MC ^ cst0); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (M5 ^ cst2); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (MF ^ cstD); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (M2 ^ cst5); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (MD ^ cstF); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M0 ^ cstC); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M8 ^ cstB); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (MA ^ cstE); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M3 ^ cst6); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M7 ^ cst1); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (M9 ^ cst4); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M1 ^ cst7); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M4 ^ cst9); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (M6 ^ cst3); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (ME ^ cstA); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U);
+	V0 = V0 + (M7 ^ cst9); V0 = V0 + V4; VC = VC ^ V0; VC = SWAP(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 12U); V1 = V1 + (M3 ^ cst1); V1 = V1 + V5; VD = VD ^ V1; VD = SWAP(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 12U); V2 = V2 + (MD ^ cstC); V2 = V2 + V6; VE = VE ^ V2; VE = SWAP(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 12U); V3 = V3 + (MB ^ cstE); V3 = V3 + V7; VF = VF ^ V3; VF = SWAP(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 12U); V2 = V2 + (MC ^ cstD); V2 = V2 + V6; VE = VE ^ V2; VE = ROTR8(VE); VA = VA + VE; V6 = V6 ^ VA; V6 = ROTR(V6, 7U); V3 = V3 + (ME ^ cstB); V3 = V3 + V7; VF = VF ^ V3; VF = ROTR8(VF); VB = VB + VF; V7 = V7 ^ VB; V7 = ROTR(V7, 7U); V1 = V1 + (M1 ^ cst3); V1 = V1 + V5; VD = VD ^ V1; VD = ROTR8(VD); V9 = V9 + VD; V5 = V5 ^ V9; V5 = ROTR(V5, 7U); V0 = V0 + (M9 ^ cst7); V0 = V0 + V4; VC = VC ^ V0; VC = ROTR8(VC); V8 = V8 + VC; V4 = V4 ^ V8; V4 = ROTR(V4, 7U); V0 = V0 + (M2 ^ cst6); V0 = V0 + V5; VF = VF ^ V0; VF = SWAP(VF); VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 12U); V1 = V1 + (M5 ^ cstA); V1 = V1 + V6; VC = VC ^ V1; VC = SWAP(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 12U); V2 = V2 + (M4 ^ cst0); V2 = V2 + V7; VD = VD ^ V2; VD = SWAP(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 12U); V3 = V3 + (MF ^ cst8); V3 = V3 + V4; VE = VE ^ V3; VE = SWAP(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 12U); V2 = V2 + (M0 ^ cst4); V2 = V2 + V7; VD = VD ^ V2; VD = ROTR8(VD); V8 = V8 + VD; V7 = V7 ^ V8; V7 = ROTR(V7, 7U); V3 = V3 + (M8 ^ cstF); V3 = V3 + V4; VE = VE ^ V3; VE = ROTR8(VE); V9 = V9 + VE; V4 = V4 ^ V9; V4 = ROTR(V4, 7U); V1 = V1 + (MA ^ cst5); V1 = V1 + V6; VC = VC ^ V1; VC = ROTR8(VC); VB = VB + VC; V6 = V6 ^ VB; V6 = ROTR(V6, 7U); V0 = V0 + (M6 ^ cst2); V0 = V0 + V5; VF = VF ^ V0; VF = ROTR8(VF);/*VA = VA + VF; V5 = V5 ^ VA; V5 = ROTR(V5, 7U);*/
+
+	/* The final chunks of the hash
+	 * are calculated as:
+	 * h0 = h0 ^ V0 ^ V8;
+	 * h1 = h1 ^ V1 ^ V9;
+	 * h2 = h2 ^ V2 ^ VA;
+	 * h3 = h3 ^ V3 ^ VB;
+	 * h4 = h4 ^ V4 ^ VC;
+	 * h5 = h5 ^ V5 ^ VD;
+	 * h6 = h6 ^ V6 ^ VE;
+	 * h7 = h7 ^ V7 ^ VF;
+	 *
+	 * We just check if the last byte
+	 * is zeroed and if it is, we tell
+	 * cgminer that we've found a
+	 * and to check it against the
+	 * target.
+	*/
+
+	/* Debug code to help you assess the correctness
+	 * of your hashing function in case someone decides
+	 * to try to optimize.
+	if (!((pre7 ^ V7 ^ VF) & 0xFFFF0000)) {
+		printf("hash on gpu %x %x %x %x %x %x %x %x\n",
+			h0 ^ V0 ^ V8,
+			h1 ^ V1 ^ V9,
+			h2 ^ V2 ^ VA,
+			h3 ^ V3 ^ VB,
+			h4 ^ V4 ^ VC,
+			h5 ^ V5 ^ VD,
+			h6 ^ V6 ^ VE,
+			h7 ^ V7 ^ VF);
+		printf("nonce for hash on gpu %x\n",
+			nonce);
+	}
+	*/
+
+	if (pre7 ^ V7 ^ VF) return;
+
+	/* Push this share */
+	output[output[0xFF]++] = M3;
+}
--- a/miner.h
+++ b/miner.h
@ -736,6 +736,16 @@ static inline void flip168(void *dest_p, const void *src_p)
 		dest[i] = swab32(src[i]);
 }

+static inline void flip180(void *dest_p, const void *src_p)
+{
+	uint32_t *dest = (uint32_t *)dest_p;
+	const uint32_t *src = (uint32_t *)src_p;
+	int i;
+
+	for (i = 0; i < 45; i++)
+		dest[i] = swab32(src[i]);
+}
+
 /*
 * Encode a length len/4 vector of (uint32_t) into a length len vector of
 * (unsigned char) in big-endian form.  Assumes len is a multiple of 4.
@ -762,7 +772,11 @@ static inline void endian_flip128(void *dest_p, const void *src_p)
 }
 static inline void endian_flip168(void *dest_p, const void *src_p)
 {
-	flip168(dest_p, src_p);
+  flip168(dest_p, src_p);
+}
+static inline void endian_flip180(void *dest_p, const void *src_p)
+{
+  flip180(dest_p, src_p);
 }

 #else
@ -779,6 +793,10 @@ static inline void
 endian_flip168(void __maybe_unused *dest_p, const void __maybe_unused *src_p)
 {
 }
+static inline void
+endian_flip180(void __maybe_unused *dest_p, const void __maybe_unused *src_p)
+{
+}
 #endif


@ -1232,6 +1250,8 @@ typedef struct _dev_blk_ctx {
  cl_uint ctx_e; cl_uint ctx_f; cl_uint ctx_g; cl_uint ctx_h;
  cl_uint cty_a; cl_uint cty_b; cl_uint cty_c; cl_uint cty_d;
  cl_uint cty_e; cl_uint cty_f; cl_uint cty_g; cl_uint cty_h;
+  cl_uint cty_i; cl_uint cty_j; cl_uint cty_k; cl_uint cty_l;
+  cl_uint cty_m; cl_uint cty_n; cl_uint cty_o; cl_uint cty_p;
  cl_uint merkle; cl_uint ntime; cl_uint nbits; cl_uint nonce;
  cl_uint fW0; cl_uint fW1; cl_uint fW2; cl_uint fW3; cl_uint fW15;
  cl_uint fW01r; cl_uint fcty_e; cl_uint fcty_e2;
@ -1442,7 +1462,7 @@ struct pool {
 #define GETWORK_MODE_GBT 'G'

 struct work {
-  unsigned char data[168];
+  unsigned char data[256];
  unsigned char midstate[32];
  unsigned char target[32];
  unsigned char hash[32];
--- a/ocl.c
+++ b/ocl.c
@ -758,7 +758,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
  size_t buf1size;
  size_t buf3size;
  size_t buf2size;
-  size_t readbufsize = (algorithm->type == ALGO_CRE) ? 168 : 128;
+  size_t readbufsize = 128;
+  if (algorithm->type == ALGO_CRE) readbufsize = 168;
+  else if (algorithm->type == ALGO_DECRED) readbufsize = 192;

  if (algorithm->rw_buffer_size < 0) {
    // calc buffer size for neoscrypt
--- a/ocl.h
+++ b/ocl.h
@ -17,7 +17,7 @@ typedef struct __clState {
  cl_mem buffer1;
  cl_mem buffer2;
  cl_mem buffer3;
-  unsigned char cldata[168];
+  unsigned char cldata[256];
  bool goffset;
  cl_uint vwidth;
  size_t max_work_size;
--- a/sgminer.c
+++ b/sgminer.c
@ -2064,17 +2064,31 @@ static void update_gbt(struct pool *pool)
 /* Return the work coin/network difficulty */
 static double get_work_blockdiff(const struct work *work)
 {
+  uint32_t* data = (uint32_t*) work->data;
  uint64_t diff64;
  double numerator;
+  int powdiff;
+  uint8_t shift;

  // Neoscrypt has the data reversed
  if (work->pool->algorithm.type == ALGO_NEOSCRYPT) {
    diff64 = bswap_64(((uint64_t)(be32toh(*((uint32_t *)(work->data + 72))) & 0xFFFFFF00)) << 8);
    numerator = (double)work->pool->algorithm.diff_numerator;
  }
+  else if (work->pool->algorithm.type == ALGO_DECRED) {
+    shift = work->data[116+3];
+    powdiff = (8 * (0x1d - 3)) - (8 * (shift - 3));
+    diff64 = data[29] & 0xFFFFFF;
+    if (!diff64) diff64 = 1;
+    double d = (double)work->pool->algorithm.diff_numerator / (double)diff64;
+    for (int m = shift; m < 29; m++) d *= 256.0;
+    for (int m = 29; m < shift; m++) d /= 256.0;
+    if (shift == 28) d *= 256.0; // testnet
+    return d;
+  }
  else {
-    uint8_t pow = work->data[72];
-    int powdiff = (8 * (0x1d - 3)) - (8 * (pow - 3));;
+    shift = work->data[72];
+    powdiff = (8 * (0x1d - 3)) - (8 * (shift - 3));;
    diff64 = be32toh(*((uint32_t *)(work->data + 72))) & 0x0000000000FFFFFF;
    numerator = work->pool->algorithm.diff_numerator << powdiff;
  }
@ -2245,15 +2259,15 @@ static bool gbt_decode(struct pool *pool, json_t *res_val)
 static bool getwork_decode(json_t *res_val, struct work *work)
 {
  size_t worklen = 128;
-  worklen = ((work->pool->algorithm.type == ALGO_CRE) ? sizeof(work->data) : worklen);
+  if (work->pool->algorithm.type == ALGO_CRE) worklen = 168;
+  else if (work->pool->algorithm.type == ALGO_DECRED) worklen = 192;
  if (unlikely(!jobj_binary(res_val, "data", work->data, worklen, true))) {
    if (opt_morenotices)
      applog(LOG_ERR, "%s: JSON inval data", isnull(get_pool_name(work->pool), ""));
    return false;
  }

-  // Neoscrypt doesn't calc midstate
-  if (work->pool->algorithm.type != ALGO_NEOSCRYPT) {
+  if (work->pool->algorithm.type == ALGO_CRE || work->pool->algorithm.type == ALGO_SCRYPT) {
    if (!jobj_binary(res_val, "midstate", work->midstate, sizeof(work->midstate), false)) {
      // Calculate it ourselves
      if (opt_morenotices) {
@ -2268,6 +2282,11 @@ static bool getwork_decode(json_t *res_val, struct work *work)
      applog(LOG_ERR, "%s: JSON inval target", isnull(get_pool_name(work->pool), ""));
    return false;
  }
+  if (work->pool->algorithm.type == ALGO_DECRED) {
+    // some random extradata to make it unique
+    ((uint32_t*)work->data)[36] = (rand()*4);
+    ((uint32_t*)work->data)[37] = (rand()*4) << 8 | work->thr_id;
+  }
  return true;
 }

@ -3004,7 +3023,9 @@ static bool submit_upstream_work(struct work *work, CURL *curl, char *curl_err_s

  cgpu = get_thr_cgpu(thr_id);

-  if (work->pool->algorithm.type == ALGO_CRE) {
+  if (work->pool->algorithm.type == ALGO_DECRED) {
+    endian_flip180(work->data, work->data);
+  } else if (work->pool->algorithm.type == ALGO_CRE) {
    endian_flip168(work->data, work->data);
  } else {
    endian_flip128(work->data, work->data);
@ -3014,6 +3035,13 @@ static bool submit_upstream_work(struct work *work, CURL *curl, char *curl_err_s
  int datasize = 128;
  if (work->pool->algorithm.type == ALGO_NEOSCRYPT) datasize = 80;
  else if (work->pool->algorithm.type == ALGO_CRE) datasize = 168;
+  else if (work->pool->algorithm.type == ALGO_DECRED) {
+    datasize = 192;
+    ((uint32_t*)work->data)[45] = 0x80000001UL;
+    ((uint32_t*)work->data)[46] = 0;
+    ((uint32_t*)work->data)[47] = 0x000005a0UL;
+  }
+
  hexstr = bin2hex(work->data, datasize);

  /* build JSON-RPC request */
@ -3670,15 +3698,44 @@ static inline bool can_roll(struct work *work)
    work->rolls < 7000 && !stale_work(work, false));
 }

+static uint32_t _get_work_time(struct work *work)
+{
+  uint32_t *data = (uint32_t*) work->data;
+  uint32_t work_ntime = data[17];
+  if (work->pool && work->pool->algorithm.type == ALGO_DECRED) {
+    work_ntime = data[34];
+  }
+  return work_ntime;
+}
+
+static void _set_work_time(struct work *work, uint32_t ntime)
+{
+  uint32_t *data = (uint32_t*) work->data;
+  uint32_t *work_ntime = &data[17];
+  if (work->pool && work->pool->algorithm.type == ALGO_DECRED) {
+    work_ntime = &data[34];
+  }
+  (*work_ntime) = ntime;
+}
+
 static void roll_work(struct work *work)
 {
-  uint32_t *work_ntime;
+  uint32_t work_ntime;
  uint32_t ntime;

-  work_ntime = (uint32_t *)(work->data + 68);
-  ntime = be32toh(*work_ntime);
+  work_ntime = _get_work_time(work);
+  ntime = be32toh(work_ntime);
  ntime++;
-  *work_ntime = htobe32(ntime);
+
+  if (work->pool->algorithm.type == ALGO_DECRED) {
+    uint32_t* data = (uint32_t*) work->data;
+    // dont mess with ntime, use extranonce
+    data[36]++;
+    data[37] = ((rand()*4) << 8) | work->thr_id;
+  } else {
+    _set_work_time(work, htobe32(ntime));
+  }
+
  local_work++;
  work->rolls++;
  work->blk.nonce = 0;
@ -3736,6 +3793,12 @@ static struct work *make_clone(struct work *work)
 {
  struct work *work_clone = copy_work(work);

+  if (work->pool->algorithm.type == ALGO_DECRED) {
+    // maybe not useful here
+    ((uint32_t*)work->data)[36] = (rand()*4);
+    ((uint32_t*)work->data)[37] = (rand()*4) << 8;
+  }
+
  work_clone->clone = true;
  cgtime((struct timeval *)&(work_clone->tv_cloned));
  work_clone->longpoll = false;
@ -3854,20 +3917,18 @@ static void _copy_work(struct work *work, const struct work *base_work, int noff
    /* If we are passed an noffset the binary work->data ntime and
     * the work->ntime hex string need to be adjusted. */
    if (noffset) {
-      uint32_t *work_ntime = (uint32_t *)(work->data + 68);
-      uint32_t ntime = be32toh(*work_ntime);
-
+      uint32_t work_ntime = _get_work_time(work);
+      uint32_t ntime = be32toh(work_ntime);
      ntime += noffset;
-      *work_ntime = htobe32(ntime);
+      _set_work_time(work, htobe32(ntime));
      work->ntime = offset_ntime(base_work->ntime, noffset);
    } else
      work->ntime = strdup(base_work->ntime);
  } else if (noffset) {
-    uint32_t *work_ntime = (uint32_t *)(work->data + 68);
-    uint32_t ntime = be32toh(*work_ntime);
-
+    uint32_t work_ntime = _get_work_time(work);
+    uint32_t ntime = be32toh(work_ntime);
    ntime += noffset;
-    *work_ntime = htobe32(ntime);
+    _set_work_time(work, htobe32(ntime));
  }
  if (base_work->coinbase)
    work->coinbase = strdup(base_work->coinbase);
@ -5522,11 +5583,10 @@ static void *stratum_sthread(void *userdata)
    quit(1, "Failed to create stratum_q in stratum_sthread");

  while (42) {
-    char noncehex[12], nonce2hex[20], s[1024];
+    char noncehex[12], nonce2hex[33], s[1024] = { 0 };
    struct stratum_share *sshare;
    uint32_t *hash32, nonce;
-    unsigned char nonce2[8];
-    uint64_t *nonce2_64;
+    unsigned char nonce2[16];
    struct work *work;
    bool submitted;

@ -5538,7 +5598,7 @@ static void *stratum_sthread(void *userdata)
    if (unlikely(!work))
      quit(1, "Stratum q returned empty work");

-    if (unlikely(work->nonce2_len > 8)) {
+    if ((pool->algorithm.type != ALGO_DECRED && unlikely(work->nonce2_len > 8)) || (pool->algorithm.type == ALGO_DECRED && unlikely(work->nonce2_len > 16))) {
      applog(LOG_ERR, "%s asking for inappropriately long nonce2 length %d", get_pool_name(pool), (int)work->nonce2_len);
      applog(LOG_ERR, "Not attempting to submit shares");
      free_work(work);
@ -5561,12 +5621,19 @@ static void *stratum_sthread(void *userdata)
      nonce = htobe32(*((uint32_t *)(work->data + 76)));
      //*((uint32_t *)nonce2) = htole32(work->nonce2);
    }
+    else if (pool->algorithm.type == ALGO_DECRED) {
+      nonce = *((uint32_t *)(work->data + 140));
+    }
    else {
      nonce = *((uint32_t *)(work->data + 76));
    }
    __bin2hex(noncehex, (const unsigned char *)&nonce, 4);

-    *((uint64_t *)nonce2) = htole64(work->nonce2);
+    if (pool->algorithm.type == ALGO_DECRED) {
+      memcpy(nonce2, work->data + 144, work->nonce2_len);
+    } else {
+      *((uint64_t *)nonce2) = htole64(work->nonce2);
+    }
    __bin2hex(nonce2hex, nonce2, work->nonce2_len);
    memset(s, 0, 1024);

@ -6033,29 +6100,38 @@ void set_target_neoscrypt(unsigned char *target, double diff, const int thr_id)
 static void gen_stratum_work(struct pool *pool, struct work *work)
 {
  unsigned char merkle_root[32], merkle_sha[64];
+  int datasize = 128;
  uint32_t *data32, *swap32;
  uint64_t nonce2le;
  int i, j;

  cg_wlock(&pool->data_lock);

-  /* Update coinbase. Always use an LE encoded nonce2 to fill in values
-  * from left to right and prevent overflow errors with small n2sizes */
-  nonce2le = htole64(pool->nonce2);
-  memcpy(pool->coinbase + pool->nonce2_offset, &nonce2le, pool->n2size);
-  work->nonce2 = pool->nonce2++;
-  work->nonce2_len = pool->n2size;
+  if (pool->algorithm.type == ALGO_DECRED) {
+    datasize = 180;
+    work->nonce2 = pool->nonce2++;
+    work->nonce2_len = pool->n2size;
+  } else {
+    /* Update coinbase. Always use an LE encoded nonce2 to fill in values
+    * from left to right and prevent overflow errors with small n2sizes */
+    nonce2le = htole64(pool->nonce2);
+    memcpy(pool->coinbase + pool->nonce2_offset, &nonce2le, pool->n2size);
+    work->nonce2 = pool->nonce2++;
+    work->nonce2_len = pool->n2size;
+  }

  /* Downgrade to a read lock to read off the pool variables */
  cg_dwlock(&pool->data_lock);

-  /* Generate merkle root */
-  pool->algorithm.gen_hash(pool->coinbase, pool->swork.cb_len, merkle_root);
-  memcpy(merkle_sha, merkle_root, 32);
-  for (i = 0; i < pool->swork.merkles; i++) {
-    memcpy(merkle_sha + 32, pool->swork.merkle_bin[i], 32);
-    gen_hash(merkle_sha, 64, merkle_root);
+  if (pool->algorithm.type != ALGO_DECRED) {
+    /* Generate merkle root */
+    pool->algorithm.gen_hash(pool->coinbase, pool->swork.cb_len, merkle_root);
    memcpy(merkle_sha, merkle_root, 32);
+    for (i = 0; i < pool->swork.merkles; i++) {
+      memcpy(merkle_sha + 32, pool->swork.merkle_bin[i], 32);
+      gen_hash(merkle_sha, 64, merkle_root);
+      memcpy(merkle_sha, merkle_root, 32);
+    }
  }

  applog(LOG_DEBUG, "[THR%d] gen_stratum_work() - algorithm = %s", work->thr_id, pool->algorithm.name);
@ -6089,6 +6165,18 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
    ((uint32_t *)work->data)[20] = 0x80000000;
    ((uint32_t *)work->data)[31] = 0x00000280;
  }
+  else if (pool->algorithm.type == ALGO_DECRED) {
+    memcpy(work->data, pool->header_bin, 4); // version
+    flip32(work->data + 4, pool->header_bin + 4); // prevhash
+    memcpy(work->data + 4 + 32, pool->coinbase, MIN((int)pool->swork.cb_len, 108));
+    memcpy(work->data + 144, pool->nonce1bin, MIN(pool->n1_len, 36));
+    ((uint32_t *)work->data)[36] = work->nonce2;
+//    ((uint32_t *)work->data)[36] = 2;
+    ((uint32_t *)work->data)[37] = ((rand() * 4) << 8) | work->thr_id;
+//    ((uint32_t *)work->data)[37] = 0x0000a400;
+    for (i = 39; i < 45; i++)
+      ((uint32_t *)work->data)[i] = 0;
+  }
  else {
    data32 = (uint32_t *)merkle_sha;
    swap32 = (uint32_t *)merkle_root;
@ -6112,14 +6200,16 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
  if (opt_debug) {
    char *header, *merkle_hash;

-    header = bin2hex(work->data, 128);
-    merkle_hash = bin2hex((const unsigned char *)merkle_root, 32);
-    applog(LOG_DEBUG, "[THR%d] Generated stratum merkle %s", work->thr_id, merkle_hash);
+    header = bin2hex(work->data, datasize);
+    if (pool->algorithm.type != ALGO_DECRED) {
+        merkle_hash = bin2hex((const unsigned char *)merkle_root, 32);
+        applog(LOG_DEBUG, "[THR%d] Generated stratum merkle %s", work->thr_id, merkle_hash);
+        free(merkle_hash);
+    }
    applog(LOG_DEBUG, "[THR%d] Generated stratum header %s", work->thr_id, header);
    applog(LOG_DEBUG, "[THR%d] Work job_id %s nonce2 %"PRIu64" ntime %s", work->thr_id, work->job_id,
           work->nonce2, work->ntime);
    free(header);
-    free(merkle_hash);
  }

  // For Neoscrypt use set_target_neoscrypt() function
@ -7055,6 +7145,7 @@ static void rebuild_nonce(struct work *work, uint32_t nonce)
 {
  uint32_t nonce_pos = 76;
  if (work->pool->algorithm.type == ALGO_CRE) nonce_pos = 140;
+  else if (work->pool->algorithm.type == ALGO_DECRED) nonce_pos = 140;

  uint32_t *work_nonce = (uint32_t *)(work->data + nonce_pos);