decred algo for longpoll/getwork

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
9 years ago · 6e95407dcf
11 changed files with 590 additions and 56 deletions
--- a/Algo256/blake256.cu
+++ b/Algo256/blake256.cu
@ -487,7 +487,7 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
				@@ -487,7 +487,7 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non

 	if (opt_benchmark) {
 		targetHigh = 0x1ULL << 32;
-		ptarget[6] = swab32(0x00ff);
+		ptarget[6] = swab32(0xff);
 	}

 	if (!init[thr_id])
@ -519,9 +519,9 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
				@@ -519,9 +519,9 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
 		if (foundNonce != UINT32_MAX)
 		{
 			uint32_t vhashcpu[8];
-			uint32_t Htarg = (uint32_t)targetHigh;
+			uint32_t Htarg = ptarget[6];

-			for (int k=0; k < 19; k++)
+			for (int k=16; k < 19; k++)
 				be32enc(&endiandata[k], pdata[k]);

 			be32enc(&endiandata[19], foundNonce);
--- a/Algo256/decred.cu
+++ b/Algo256/decred.cu
@ -0,0 +1,443 @@
				@@ -0,0 +1,443 @@
+/**
+ * Blake-256 Decred 180-Bytes input Cuda Kernel (Tested on SM 5/5.2)
+ *
+ * Tanguy Pruvot - Feb 2016
+ */
+
+#include <stdint.h>
+#include <memory.h>
+
+#include <miner.h>
+
+extern "C" {
+#include <sph/sph_blake.h>
+}
+
+/* threads per block */
+#define TPB 256
+
+/* hash by cpu with blake 256 */
+extern "C" void decred_hash(void *output, const void *input)
+{
+	sph_blake256_context ctx;
+
+	sph_blake256_set_rounds(14);
+
+	sph_blake256_init(&ctx);
+	sph_blake256(&ctx, input, 180);
+	sph_blake256_close(&ctx, output);
+}
+
+#include <cuda_helper.h>
+
+#ifdef __INTELLISENSE__
+#define __byte_perm(x, y, b) x
+#endif
+
+__constant__ uint32_t _ALIGN(4) d_data[24];
+
+/* 8 adapters max */
+static uint32_t *d_resNonce[MAX_GPUS];
+static uint32_t *h_resNonce[MAX_GPUS];
+
+/* max count of found nonces in one call */
+#define NBN 1
+#if NBN > 1
+static uint32_t extra_results[NBN] = { UINT32_MAX };
+#endif
+
+/* ############################################################################################################################### */
+
+#define GSPREC(a,b,c,d,x,y) { \
+	v[a] += (m[x] ^ c_u256[y]) + v[b]; \
+	v[d] = __byte_perm(v[d] ^ v[a], 0, 0x1032); \
+	v[c] += v[d]; \
+	v[b] = SPH_ROTR32(v[b] ^ v[c], 12); \
+	v[a] += (m[y] ^ c_u256[x]) + v[b]; \
+	v[d] = __byte_perm(v[d] ^ v[a], 0, 0x0321); \
+	v[c] += v[d]; \
+	v[b] = SPH_ROTR32(v[b] ^ v[c], 7); \
+}
+
+__device__ __forceinline__
+void blake256_compress_14(uint32_t *h, const uint32_t nonce, const uint32_t T0)
+{
+	uint32_t v[16];
+
+	#pragma unroll 8
+	for(uint32_t i = 0; i < 8; i++)
+		v[i] = h[i];
+
+	const uint32_t c_u256[16] = {
+		0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344,
+		0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89,
+		0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C,
+		0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917
+	};
+
+	v[ 8] = c_u256[0];
+	v[ 9] = c_u256[1];
+	v[10] = c_u256[2];
+	v[11] = c_u256[3];
+
+	v[12] = c_u256[4] ^ T0;
+	v[13] = c_u256[5] ^ T0;
+	v[14] = c_u256[6];
+	v[15] = c_u256[7];
+
+	uint32_t m[16];
+
+	m[0] = d_data[8];
+	m[1] = d_data[9];
+	m[2] = d_data[10];
+	m[3] = nonce;
+
+	#pragma unroll
+	for (uint32_t i = 4; i < 16; i++) {
+		m[i] = d_data[i+8U];
+	}
+
+	// round 1
+	GSPREC(0, 4, 0x8, 0xC, 0,  1);
+	GSPREC(1, 5, 0x9, 0xD, 2,  3);
+	GSPREC(2, 6, 0xA, 0xE, 4,  5);
+	GSPREC(3, 7, 0xB, 0xF, 6,  7);
+	GSPREC(0, 5, 0xA, 0xF, 8,  9);
+	GSPREC(1, 6, 0xB, 0xC, 10, 11);
+	GSPREC(2, 7, 0x8, 0xD, 12, 13);
+	GSPREC(3, 4, 0x9, 0xE, 14, 15);
+	// round 2
+	GSPREC(0, 4, 0x8, 0xC, 14, 10);
+	GSPREC(1, 5, 0x9, 0xD, 4,  8);
+	GSPREC(2, 6, 0xA, 0xE, 9,  15);
+	GSPREC(3, 7, 0xB, 0xF, 13, 6);
+	GSPREC(0, 5, 0xA, 0xF, 1,  12);
+	GSPREC(1, 6, 0xB, 0xC, 0,  2);
+	GSPREC(2, 7, 0x8, 0xD, 11, 7);
+	GSPREC(3, 4, 0x9, 0xE, 5,  3);
+	// round 3
+	GSPREC(0, 4, 0x8, 0xC, 11, 8);
+	GSPREC(1, 5, 0x9, 0xD, 12, 0);
+	GSPREC(2, 6, 0xA, 0xE, 5,  2);
+	GSPREC(3, 7, 0xB, 0xF, 15, 13);
+	GSPREC(0, 5, 0xA, 0xF, 10, 14);
+	GSPREC(1, 6, 0xB, 0xC, 3,  6);
+	GSPREC(2, 7, 0x8, 0xD, 7,  1);
+	GSPREC(3, 4, 0x9, 0xE, 9,  4);
+	// round 4
+	GSPREC(0, 4, 0x8, 0xC, 7,  9);
+	GSPREC(1, 5, 0x9, 0xD, 3,  1);
+	GSPREC(2, 6, 0xA, 0xE, 13, 12);
+	GSPREC(3, 7, 0xB, 0xF, 11, 14);
+	GSPREC(0, 5, 0xA, 0xF, 2,  6);
+	GSPREC(1, 6, 0xB, 0xC, 5,  10);
+	GSPREC(2, 7, 0x8, 0xD, 4,  0);
+	GSPREC(3, 4, 0x9, 0xE, 15, 8);
+	// round 5
+	GSPREC(0, 4, 0x8, 0xC, 9,  0);
+	GSPREC(1, 5, 0x9, 0xD, 5,  7);
+	GSPREC(2, 6, 0xA, 0xE, 2,  4);
+	GSPREC(3, 7, 0xB, 0xF, 10, 15);
+	GSPREC(0, 5, 0xA, 0xF, 14, 1);
+	GSPREC(1, 6, 0xB, 0xC, 11, 12);
+	GSPREC(2, 7, 0x8, 0xD, 6,  8);
+	GSPREC(3, 4, 0x9, 0xE, 3,  13);
+	// round 6
+	GSPREC(0, 4, 0x8, 0xC, 2, 12);
+	GSPREC(1, 5, 0x9, 0xD, 6, 10);
+	GSPREC(2, 6, 0xA, 0xE, 0, 11);
+	GSPREC(3, 7, 0xB, 0xF, 8, 3);
+	GSPREC(0, 5, 0xA, 0xF, 4, 13);
+	GSPREC(1, 6, 0xB, 0xC, 7, 5);
+	GSPREC(2, 7, 0x8, 0xD, 15,14);
+	GSPREC(3, 4, 0x9, 0xE, 1, 9);
+	// round 7
+	GSPREC(0, 4, 0x8, 0xC, 12, 5);
+	GSPREC(1, 5, 0x9, 0xD, 1, 15);
+	GSPREC(2, 6, 0xA, 0xE, 14,13);
+	GSPREC(3, 7, 0xB, 0xF, 4, 10);
+	GSPREC(0, 5, 0xA, 0xF, 0,  7);
+	GSPREC(1, 6, 0xB, 0xC, 6,  3);
+	GSPREC(2, 7, 0x8, 0xD, 9,  2);
+	GSPREC(3, 4, 0x9, 0xE, 8, 11);
+	// round 8
+	GSPREC(0, 4, 0x8, 0xC, 13,11);
+	GSPREC(1, 5, 0x9, 0xD, 7, 14);
+	GSPREC(2, 6, 0xA, 0xE, 12, 1);
+	GSPREC(3, 7, 0xB, 0xF, 3,  9);
+	GSPREC(0, 5, 0xA, 0xF, 5,  0);
+	GSPREC(1, 6, 0xB, 0xC, 15, 4);
+	GSPREC(2, 7, 0x8, 0xD, 8,  6);
+	GSPREC(3, 4, 0x9, 0xE, 2, 10);
+	// round 9
+	GSPREC(0, 4, 0x8, 0xC, 6, 15);
+	GSPREC(1, 5, 0x9, 0xD, 14, 9);
+	GSPREC(2, 6, 0xA, 0xE, 11, 3);
+	GSPREC(3, 7, 0xB, 0xF, 0,  8);
+	GSPREC(0, 5, 0xA, 0xF, 12, 2);
+	GSPREC(1, 6, 0xB, 0xC, 13, 7);
+	GSPREC(2, 7, 0x8, 0xD, 1,  4);
+	GSPREC(3, 4, 0x9, 0xE, 10, 5);
+	// round 10
+	GSPREC(0, 4, 0x8, 0xC, 10, 2);
+	GSPREC(1, 5, 0x9, 0xD, 8,  4);
+	GSPREC(2, 6, 0xA, 0xE, 7,  6);
+	GSPREC(3, 7, 0xB, 0xF, 1,  5);
+	GSPREC(0, 5, 0xA, 0xF, 15,11);
+	GSPREC(1, 6, 0xB, 0xC, 9, 14);
+	GSPREC(2, 7, 0x8, 0xD, 3, 12);
+	GSPREC(3, 4, 0x9, 0xE, 13, 0);
+	// round 11
+	GSPREC(0, 4, 0x8, 0xC, 0,  1);
+	GSPREC(1, 5, 0x9, 0xD, 2,  3);
+	GSPREC(2, 6, 0xA, 0xE, 4,  5);
+	GSPREC(3, 7, 0xB, 0xF, 6,  7);
+	GSPREC(0, 5, 0xA, 0xF, 8,  9);
+	GSPREC(1, 6, 0xB, 0xC, 10,11);
+	GSPREC(2, 7, 0x8, 0xD, 12,13);
+	GSPREC(3, 4, 0x9, 0xE, 14,15);
+	// round 12
+	GSPREC(0, 4, 0x8, 0xC, 14,10);
+	GSPREC(1, 5, 0x9, 0xD, 4,  8);
+	GSPREC(2, 6, 0xA, 0xE, 9, 15);
+	GSPREC(3, 7, 0xB, 0xF, 13, 6);
+	GSPREC(0, 5, 0xA, 0xF, 1, 12);
+	GSPREC(1, 6, 0xB, 0xC, 0,  2);
+	GSPREC(2, 7, 0x8, 0xD, 11, 7);
+	GSPREC(3, 4, 0x9, 0xE, 5,  3);
+	// round 13
+	GSPREC(0, 4, 0x8, 0xC, 11, 8);
+	GSPREC(1, 5, 0x9, 0xD, 12, 0);
+	GSPREC(2, 6, 0xA, 0xE, 5,  2);
+	GSPREC(3, 7, 0xB, 0xF, 15,13);
+	GSPREC(0, 5, 0xA, 0xF, 10,14);
+	GSPREC(1, 6, 0xB, 0xC, 3,  6);
+	GSPREC(2, 7, 0x8, 0xD, 7,  1);
+	GSPREC(3, 4, 0x9, 0xE, 9,  4);
+	// round 14
+	GSPREC(0, 4, 0x8, 0xC, 7,  9);
+	GSPREC(1, 5, 0x9, 0xD, 3,  1);
+	GSPREC(2, 6, 0xA, 0xE, 13,12);
+	GSPREC(3, 7, 0xB, 0xF, 11,14);
+	GSPREC(0, 5, 0xA, 0xF, 2,  6);
+	GSPREC(1, 6, 0xB, 0xC, 5, 10);
+	GSPREC(2, 7, 0x8, 0xD, 4,  0);
+	//GSPREC(3, 4, 0x9, 0xE, 15, 8);
+
+	v[3] += (m[15] ^ c_u256[8]) + v[4];
+	v[14] = __byte_perm(v[14] ^ v[3], 0, 0x1032);
+	v[9] += v[14]; \
+	v[4] = SPH_ROTR32(v[4] ^ v[9], 12);
+	v[3] += (m[8] ^ c_u256[15]) + v[4];
+	v[14] = __byte_perm(v[14] ^ v[3], 0, 0x0321);
+
+	// only compute h6 & 7
+	h[6] ^= v[6] ^ v[14];
+	h[7] ^= v[7] ^ v[15];
+}
+
+/* ############################################################################################################################### */
+
+__global__
+void blake256_gpu_hash_nonce(const uint32_t threads, const uint32_t startNonce, uint32_t *resNonce, const uint64_t highTarget)
+{
+	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+		const uint32_t nonce = startNonce + thread;
+		uint32_t h[8];
+
+		#pragma unroll
+		for(int i=0; i < 8; i++) {
+			h[i] = d_data[i];
+		}
+
+		// ------ Close: Last 52/64 bytes ------
+
+		blake256_compress_14(h, nonce, (180U*8U));
+
+		if (h[7] == 0 && cuda_swab32(h[6]) <= highTarget) {
+#if NBN == 2
+			if (resNonce[0] != UINT32_MAX)
+				resNonce[1] = nonce;
+			else
+				resNonce[0] = nonce;
+#else
+			resNonce[0] = nonce;
+#endif
+		}
+	}
+}
+
+__host__
+static uint32_t decred_cpu_hash_nonce(const int thr_id, const uint32_t threads, const uint32_t startNonce, const uint64_t highTarget)
+{
+	uint32_t result = UINT32_MAX;
+
+	dim3 grid((threads + TPB-1)/TPB);
+	dim3 block(TPB);
+
+	/* Check error on Ctrl+C or kill to prevent segfaults on exit */
+	if (cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t)) != cudaSuccess)
+		return result;
+
+	blake256_gpu_hash_nonce <<<grid, block>>> (threads, startNonce, d_resNonce[thr_id], highTarget);
+
+	if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
+		result = h_resNonce[thr_id][0];
+#if NBN > 1
+		for (int n=0; n < (NBN-1); n++)
+			extra_results[n] = h_resNonce[thr_id][n+1];
+#endif
+	}
+	return result;
+}
+
+__host__
+static void decred_midstate_128(uint32_t *output, const uint32_t *input)
+{
+	sph_blake256_context ctx;
+
+	sph_blake256_set_rounds(14);
+
+	sph_blake256_init(&ctx);
+	sph_blake256(&ctx, input, 128);
+
+	memcpy(output, (void*)ctx.H, 32);
+}
+
+__host__
+void decred_cpu_setBlock_52(uint32_t *penddata, const uint32_t *midstate, const uint32_t *ptarget)
+{
+	uint32_t _ALIGN(64) data[24];
+	memcpy(data, midstate, 32);
+	// pre swab32
+	for (int i=0; i<13; i++)
+		data[8+i] = swab32(penddata[i]);
+	data[21] = 0x80000001;
+	data[22] = 0;
+	data[23] = 0x000005a0;
+	CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_data, data, 32 + 64, 0, cudaMemcpyHostToDevice));
+}
+
+/* ############################################################################################################################### */
+
+static bool init[MAX_GPUS] = { 0 };
+
+// nonce position is different in decred
+#define DCR_NONCE_OFT32 35
+
+extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
+{
+	uint32_t _ALIGN(64) endiandata[48];
+	uint32_t _ALIGN(64) midstate[8];
+
+	uint32_t *pdata = work->data;
+	uint32_t *ptarget = work->target;
+	uint32_t *pnonce = &pdata[DCR_NONCE_OFT32];
+
+	const uint32_t first_nonce = *pnonce;
+	uint64_t targetHigh = ((uint64_t*)ptarget)[3];
+
+	int dev_id = device_map[thr_id];
+	int intensity = (device_sm[dev_id] > 500 && !is_windows()) ? 29 : 25;
+	if (device_sm[dev_id] < 350) intensity = 22;
+
+	uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
+	if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
+
+	int rc = 0;
+
+	if (opt_benchmark) {
+		targetHigh = 0x1ULL << 32;
+		ptarget[6] = swab32(0xff);
+	}
+
+	if (!init[thr_id])
+	{
+		cudaSetDevice(dev_id);
+		if (opt_cudaschedule == -1 && gpu_threads == 1) {
+			cudaDeviceReset();
+			// reduce cpu usage (linux)
+			cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
+			cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+			CUDA_LOG_ERROR();
+		}
+
+		CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), -1);
+		CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), -1);
+		init[thr_id] = true;
+	}
+
+	memcpy(endiandata, pdata, 180);
+	decred_midstate_128(midstate, endiandata);
+	decred_cpu_setBlock_52(&pdata[32], midstate, ptarget);
+
+	do {
+		// GPU HASH
+		uint32_t foundNonce = decred_cpu_hash_nonce(thr_id, throughput, (*pnonce), targetHigh);
+
+		if (foundNonce != UINT32_MAX)
+		{
+			uint32_t vhashcpu[8];
+			uint32_t Htarg = ptarget[6];
+
+			be32enc(&endiandata[DCR_NONCE_OFT32], foundNonce);
+			decred_hash(vhashcpu, endiandata);
+
+			if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget))
+			{
+				rc = 1;
+				work_set_target_ratio(work, vhashcpu);
+				*hashes_done = (*pnonce) - first_nonce + throughput;
+				work->nonces[0] = *pnonce = swab32(foundNonce);
+#if NBN > 1
+				if (extra_results[0] != UINT32_MAX) {
+					be32enc(&endiandata[DCR_NONCE_OFT32], extra_results[0]);
+					decred_hash(vhashcpu, endiandata);
+					if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget)) {
+						work->nonces[1] = swab32(extra_results[0]);
+						if (bn_hash_target_ratio(vhashcpu, ptarget) > work->shareratio) {
+							work_set_target_ratio(work, vhashcpu);
+							xchg(work->nonces[1], *pnonce);
+						}
+						rc = 2;
+					}
+					extra_results[0] = UINT32_MAX;
+				}
+#endif
+				return rc;
+			}
+			else if (opt_debug) {
+				applog_hash(ptarget);
+				applog_compare_hash(vhashcpu, ptarget);
+				gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
+			}
+		}
+
+		*pnonce += throughput;
+
+	} while (!work_restart[thr_id].restart && max_nonce > (uint64_t)throughput + (*pnonce));
+
+	*hashes_done = (*pnonce) - first_nonce;
+
+	MyStreamSynchronize(NULL, 0, dev_id);
+	return rc;
+}
+
+// cleanup
+extern "C" void free_decred(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaDeviceSynchronize();
+
+	cudaFreeHost(h_resNonce[thr_id]);
+	cudaFree(d_resNonce[thr_id]);
+
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}
+
--- a/Makefile.am
+++ b/Makefile.am
@ -36,7 +36,7 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
				@@ -36,7 +36,7 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
 		          lyra2/lyra2REv2.cu lyra2/cuda_lyra2v2.cu \
 			  Algo256/cuda_bmw256.cu Algo256/cuda_cubehash256.cu \
 			  Algo256/cuda_blake256.cu Algo256/cuda_groestl256.cu Algo256/cuda_keccak256.cu Algo256/cuda_skein256.cu \
-			  Algo256/blake256.cu Algo256/keccak256.cu \
+			  Algo256/blake256.cu Algo256/decred.cu Algo256/keccak256.cu \
 			  Algo256/bmw.cu Algo256/cuda_bmw.cu \
 			  JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
 			  JHA/cuda_jha_compactionTest.cu cuda_checkhash.cu \
--- a/algos.h
+++ b/algos.h
@ -10,6 +10,7 @@ enum sha_algos {
				@@ -10,6 +10,7 @@ enum sha_algos {
 	ALGO_BMW,
 	ALGO_C11,
 	ALGO_DEEP,
+	ALGO_DECRED,
 	ALGO_DMD_GR,
 	ALGO_FRESH,
 	ALGO_FUGUE256,		/* Fugue256 */
@ -55,6 +56,7 @@ static const char *algo_names[] = {
				@@ -55,6 +56,7 @@ static const char *algo_names[] = {
 	"bmw",
 	"c11",
 	"deep",
+	"decred",
 	"dmd-gr",
 	"fresh",
 	"fugue256",
--- a/bench.cpp
+++ b/bench.cpp
@ -47,6 +47,7 @@ void algo_free_all(int thr_id)
				@@ -47,6 +47,7 @@ void algo_free_all(int thr_id)
 	free_blake256(thr_id);
 	free_bmw(thr_id);
 	free_c11(thr_id);
+	free_decred(thr_id);
 	free_deep(thr_id);
 	free_keccak256(thr_id);
 	free_fresh(thr_id);
--- a/ccminer.cpp
+++ b/ccminer.cpp
@ -546,6 +546,7 @@ static void calc_network_diff(struct work *work)
				@@ -546,6 +546,7 @@ static void calc_network_diff(struct work *work)
 	// sample for diff 43.281 : 1c05ea29
 	// todo: endian reversed on longpoll could be zr5 specific...
 	uint32_t nbits = have_longpoll ? work->data[18] : swab32(work->data[18]);
+	if (opt_algo == ALGO_DECRED) nbits = work->data[29];
 	uint32_t bits = (nbits & 0xffffff);
 	int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28

@ -568,16 +569,20 @@ static bool work_decode(const json_t *val, struct work *work)
				@@ -568,16 +569,20 @@ static bool work_decode(const json_t *val, struct work *work)
 	int i;

 	switch (opt_algo) {
+	case ALGO_DECRED:
+		data_size = 192;
+		adata_sz = 180/4;
+		break;
 	case ALGO_NEOSCRYPT:
 	case ALGO_ZR5:
 		data_size = 80;
+		adata_sz = data_size / 4;
 		break;
 	default:
-		data_size = 128; // sizeof(work->data);
+		data_size = 128;
+		adata_sz = data_size / 4;
 	}

-	adata_sz = data_size / 4; // sizeof(uint32_t);
-
 	if (!jobj_binary(val, "data", work->data, data_size)) {
 		json_t *obj = json_object_get(val, "data");
 		int len = obj ? (int) strlen(json_string_value(obj)) : 0;
@ -647,20 +652,30 @@ static bool work_decode(const json_t *val, struct work *work)
				@@ -647,20 +652,30 @@ static bool work_decode(const json_t *val, struct work *work)
 		}
 	}

-	json_t *jr = json_object_get(val, "noncerange");
-	if (jr) {
-		const char * hexstr = json_string_value(jr);
-		if (likely(hexstr)) {
-			// never seen yet...
-			hex2bin((uchar*)work->noncerange.u64, hexstr, 8);
-			applog(LOG_DEBUG, "received noncerange: %08x-%08x",
-				work->noncerange.u32[0], work->noncerange.u32[1]);
-		}
-	}
-
 	/* use work ntime as job id (solo-mining) */
 	cbin2hex(work->job_id, (const char*)&work->data[17], 4);

+	if (opt_algo == ALGO_DECRED) {
+		// some random extradata to make it unique
+		work->data[36] = (rand()*4);
+		work->data[37] = (rand()*4) << 8;
+		// required for the longpoll pool block info...
+		work->height = work->data[32];
+		if (!have_longpoll && work->height > net_blocks + 1) {
+			char netinfo[64] = { 0 };
+			if (opt_showdiff && net_diff > 0.) {
+				if (net_diff != work->targetdiff)
+					sprintf(netinfo, ", diff %.3f, pool %.1f", net_diff, work->targetdiff);
+				else
+					sprintf(netinfo, ", diff %.3f", net_diff);
+			}
+			applog(LOG_BLUE, "%s block %d%s",
+				algo_names[opt_algo], work->height, netinfo);
+			net_blocks = work->height - 1;
+		}
+		cbin2hex(work->job_id, (const char*)&work->data[34], 4);
+	}
+
 	return true;
 }

@ -729,10 +744,10 @@ static int share_result(int result, int pooln, double sharediff, const char *rea
				@@ -729,10 +744,10 @@ static int share_result(int result, int pooln, double sharediff, const char *rea

 static bool submit_upstream_work(CURL *curl, struct work *work)
 {
+	char s[512];
 	struct pool_infos *pool = &pools[work->pooln];
 	json_t *val, *res, *reason;
 	bool stale_work = false;
-	char s[384];

 	/* discard if a newer block was received */
 	stale_work = work->height && work->height < g_work.height;
@ -776,6 +791,8 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
				@@ -776,6 +791,8 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
 			be32enc(&ntime, work->data[17]);
 			be32enc(&nonce, work->data[19]);
 			break;
+		case ALGO_DECRED:
+			break;
 		case ALGO_BLAKE:
 		case ALGO_BLAKECOIN:
 		case ALGO_BMW:
@ -852,6 +869,9 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
				@@ -852,6 +869,9 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
 		if (opt_algo == ALGO_ZR5) {
 			data_size = 80; adata_sz = 20;
 		}
+		else if (opt_algo == ALGO_DECRED) {
+			data_size = 192; adata_sz = 180/4;
+		}

 		if (opt_algo != ALGO_HEAVY && opt_algo != ALGO_MJOLLNIR) {
 			for (int i = 0; i < adata_sz; i++)
@ -971,7 +991,7 @@ static bool get_mininginfo(CURL *curl, struct work *work)
				@@ -971,7 +991,7 @@ static bool get_mininginfo(CURL *curl, struct work *work)
 	struct pool_infos *pool = &pools[work->pooln];
 	int curl_err = 0;

-	if (have_stratum || !allow_mininginfo)
+	if (have_stratum || have_longpoll || !allow_mininginfo)
 		return false;

 	json_t *val = json_rpc_call_pool(curl, pool, info_req, false, false, &curl_err);
@ -1223,8 +1243,12 @@ bool get_work(struct thr_info *thr, struct work *work)
				@@ -1223,8 +1243,12 @@ bool get_work(struct thr_info *thr, struct work *work)
 		memset(work->data, 0x55, 76);
 		//work->data[17] = swab32((uint32_t)time(NULL));
 		memset(work->data + 19, 0x00, 52);
-		work->data[20] = 0x80000000;
-		work->data[31] = 0x00000280;
+		if (opt_algo == ALGO_DECRED) {
+			memset(&work->data[35], 0x00, 52);
+		} else {
+			work->data[20] = 0x80000000;
+			work->data[31] = 0x00000280;
+		}
 		memset(work->target, 0x00, sizeof(work->target));
 		return true;
 	}
@ -1358,8 +1382,14 @@ static bool stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
				@@ -1358,8 +1382,14 @@ static bool stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
 		break;
 	}

-	work->data[20] = 0x80000000;
-	work->data[31] = (opt_algo == ALGO_MJOLLNIR) ? 0x000002A0 : 0x00000280;
+	if (opt_algo == ALGO_DECRED) {
+		work->data[45] = 0x80000001;
+		work->data[46] = 0;
+		work->data[47] = 0x000005a0;
+	} else {
+		work->data[20] = 0x80000000;
+		work->data[31] = (opt_algo == ALGO_MJOLLNIR) ? 0x000002A0 : 0x00000280;
+	}

 	// HeavyCoin (vote / reward)
 	if (opt_algo == ALGO_HEAVY) {
@ -1554,7 +1584,7 @@ static void *miner_thread(void *userdata)
				@@ -1554,7 +1584,7 @@ static void *miner_thread(void *userdata)
 		uint64_t max64, minmax = 0x100000;

 		// &work.data[19]
-		int wcmplen = 76;
+		int wcmplen = (opt_algo == ALGO_DECRED) ? 140 : 76;
 		int wcmpoft = 0;
 		uint32_t *nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);

@ -1633,9 +1663,16 @@ static void *miner_thread(void *userdata)
				@@ -1633,9 +1663,16 @@ static void *miner_thread(void *userdata)
 			#endif
 			memcpy(&work, &g_work, sizeof(struct work));
 			nonceptr[0] = (UINT32_MAX / opt_n_threads) * thr_id; // 0 if single thr
+			if (opt_algo == ALGO_DECRED) nonceptr[0] = 0;
 		} else
 			nonceptr[0]++; //??

+		if (opt_algo == ALGO_DECRED) {
+			end_nonce = 0xF0000000UL;
+			nonceptr[1] += 1;
+			nonceptr[2] |= thr_id;
+		}
+
 		pthread_mutex_unlock(&g_work_lock);

 		// --benchmark [-a all]
@ -1751,6 +1788,7 @@ static void *miner_thread(void *userdata)
				@@ -1751,6 +1788,7 @@ static void *miner_thread(void *userdata)
 				break;
 			case ALGO_BLAKE:
 			case ALGO_BMW:
+			case ALGO_DECRED:
 			case ALGO_WHIRLPOOLX:
 				minmax = 0x40000000U;
 				break;
@ -1839,6 +1877,9 @@ static void *miner_thread(void *userdata)
				@@ -1839,6 +1877,9 @@ static void *miner_thread(void *userdata)
 		case ALGO_C11:
 			rc = scanhash_c11(thr_id, &work, max_nonce, &hashes_done);
 			break;
+		case ALGO_DECRED:
+			rc = scanhash_decred(thr_id, &work, max_nonce, &hashes_done);
+			break;
 		case ALGO_DEEP:
 			rc = scanhash_deep(thr_id, &work, max_nonce, &hashes_done);
 			break;
@ -2154,9 +2195,13 @@ longpoll_retry:
				@@ -2154,9 +2195,13 @@ longpoll_retry:
 					if (net_diff > 0.) {
 						sprintf(netinfo, ", diff %.3f", net_diff);
 					}
-					if (opt_showdiff)
+					if (opt_showdiff) {
 						sprintf(&netinfo[strlen(netinfo)], ", target %.3f", g_work.targetdiff);
-					applog(LOG_BLUE, "%s detected new block%s", short_url, netinfo);
+					}
+					if (g_work.height)
+						applog(LOG_BLUE, "%s block %u%s", algo_names[opt_algo], g_work.height, netinfo);
+					else
+						applog(LOG_BLUE, "%s detected new block%s", short_url, netinfo);
 				}
 				g_work_time = time(NULL);
 			}
@ -3175,6 +3220,13 @@ int main(int argc, char *argv[])
				@@ -3175,6 +3220,13 @@ int main(int argc, char *argv[])
 	cur_pooln = pool_get_first_valid(0);
 	pool_switch(-1, cur_pooln);

+	if (opt_algo == ALGO_DECRED) {
+		allow_gbt = false;
+		want_stratum = have_stratum = false;
+		allow_mininginfo = false;
+		want_longpoll = true;
+	}
+
 	flags = !opt_benchmark && strncmp(rpc_url, "https:", 6)
 	      ? (CURL_GLOBAL_ALL & ~CURL_GLOBAL_SSL)
 	      : CURL_GLOBAL_ALL;
@ -3317,6 +3369,12 @@ int main(int argc, char *argv[])
				@@ -3317,6 +3369,12 @@ int main(int argc, char *argv[])
 	/* real start of the stratum work */
 	if (want_stratum && have_stratum) {
 		tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
+	} else {
+		// hmm, weird on Multicoin.co
+		//char lpurl[512];
+		//sprintf(lpurl, "%s/LP", rpc_url);
+		//if (opt_algo == ALGO_DECRED)
+		//	tq_push(thr_info[longpoll_thr_id].q, strdup(lpurl));
 	}

 #ifdef USE_WRAPNVML
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -408,6 +408,7 @@
				@@ -408,6 +408,7 @@
      <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
      <FastMath>true</FastMath>
    </CudaCompile>
+    <CudaCompile Include="Algo256\decred.cu" />
    <CudaCompile Include="Algo256\keccak256.cu" />
    <CudaCompile Include="Algo256\cuda_blake256.cu" />
    <CudaCompile Include="Algo256\cuda_bmw256.cu" />
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -622,6 +622,9 @@
				@@ -622,6 +622,9 @@
    <CudaCompile Include="Algo256\blake256.cu">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
+    <CudaCompile Include="Algo256\decred.cu">
+      <Filter>Source Files\CUDA</Filter>
+    </CudaCompile>
    <CudaCompile Include="Algo256\keccak256.cu">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
--- a/cpuminer-config.h
+++ b/cpuminer-config.h
@ -66,7 +66,7 @@
				@@ -66,7 +66,7 @@
 #define HAVE_STRING_H 1

 /* Define to 1 if you have the <syslog.h> header file. */
-/* #undef HAVE_SYSLOG_H */
+#define HAVE_SYSLOG_H 1

 /* Define to 1 if you have the <sys/endian.h> header file. */
 /* #undef HAVE_SYS_ENDIAN_H */
@ -87,7 +87,7 @@
				@@ -87,7 +87,7 @@
 #define HAVE_UNISTD_H 1

 /* Defined if libcurl supports AsynchDNS */
-/* #undef LIBCURL_FEATURE_ASYNCHDNS */
+#define LIBCURL_FEATURE_ASYNCHDNS 1

 /* Defined if libcurl supports IDN */
 #define LIBCURL_FEATURE_IDN 1
@ -111,7 +111,7 @@
				@@ -111,7 +111,7 @@
 /* #undef LIBCURL_FEATURE_SSPI */

 /* Defined if libcurl supports DICT */
-/* #undef LIBCURL_PROTOCOL_DICT */
+#define LIBCURL_PROTOCOL_DICT 1

 /* Defined if libcurl supports FILE */
 #define LIBCURL_PROTOCOL_FILE 1
@ -126,31 +126,28 @@
				@@ -126,31 +126,28 @@
 #define LIBCURL_PROTOCOL_HTTP 1

 /* Defined if libcurl supports HTTPS */
-/* #undef LIBCURL_PROTOCOL_HTTPS */
+#define LIBCURL_PROTOCOL_HTTPS 1

 /* Defined if libcurl supports IMAP */
-/* #undef LIBCURL_PROTOCOL_IMAP */
+#define LIBCURL_PROTOCOL_IMAP 1

 /* Defined if libcurl supports LDAP */
-/* #undef LIBCURL_PROTOCOL_LDAP */
+#define LIBCURL_PROTOCOL_LDAP 1

 /* Defined if libcurl supports POP3 */
-/* #undef LIBCURL_PROTOCOL_POP3 */
+#define LIBCURL_PROTOCOL_POP3 1

 /* Defined if libcurl supports RTSP */
-/* #undef LIBCURL_PROTOCOL_RTSP */
+#define LIBCURL_PROTOCOL_RTSP 1

 /* Defined if libcurl supports SMTP */
-/* #undef LIBCURL_PROTOCOL_SMTP */
+#define LIBCURL_PROTOCOL_SMTP 1

 /* Defined if libcurl supports TELNET */
-/* #undef LIBCURL_PROTOCOL_TELNET */
+#define LIBCURL_PROTOCOL_TELNET 1

 /* Defined if libcurl supports TFTP */
-/* #undef LIBCURL_PROTOCOL_TFTP */
-
-/* Define to 1 if your C compiler doesn't accept -c and -o together. */
-/* #undef NO_MINUS_C_MINUS_O */
+#define LIBCURL_PROTOCOL_TFTP 1

 /* Name of package */
 #define PACKAGE "ccminer"
@ -191,4 +188,4 @@
				@@ -191,4 +188,4 @@
 /* #undef curl_free */

 /* Define to `unsigned int' if <sys/types.h> does not define. */
-//#define size_t unsigned int
+/* #undef size_t */
--- a/miner.h
+++ b/miner.h
@ -264,6 +264,7 @@ struct work;
				@@ -264,6 +264,7 @@ struct work;
 extern int scanhash_blake256(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int8_t blakerounds);
 extern int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
+extern int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
@ -305,6 +306,7 @@ void algo_free_all(int thr_id);
				@@ -305,6 +306,7 @@ void algo_free_all(int thr_id);
 extern void free_blake256(int thr_id);
 extern void free_bmw(int thr_id);
 extern void free_c11(int thr_id);
+extern void free_decred(int thr_id);
 extern void free_deep(int thr_id);
 extern void free_keccak256(int thr_id);
 extern void free_fresh(int thr_id);
@ -536,7 +538,7 @@ extern void gpulog(int prio, int thr_id, const char *fmt, ...);
				@@ -536,7 +538,7 @@ extern void gpulog(int prio, int thr_id, const char *fmt, ...);
 void get_defconfig_path(char *out, size_t bufsize, char *argv0);
 extern void cbin2hex(char *out, const char *in, size_t len);
 extern char *bin2hex(const unsigned char *in, size_t len);
-extern bool hex2bin(unsigned char *p, const char *hexstr, size_t len);
+extern bool hex2bin(void *output, const char *hexstr, size_t len);
 extern int timeval_subtract(struct timeval *result, struct timeval *x,
 	struct timeval *y);
 extern bool fulltest(const uint32_t *hash, const uint32_t *target);
@ -612,7 +614,7 @@ struct tx {
				@@ -612,7 +614,7 @@ struct tx {
 };

 struct work {
-	uint32_t data[32];
+	uint32_t data[48];
 	uint32_t target[8];
 	uint32_t maxvote;

@ -625,6 +627,8 @@ struct work {
				@@ -625,6 +627,8 @@ struct work {
 		uint64_t u64[1];
 	} noncerange;

+	uint32_t nonces[2];
+
 	double targetdiff;
 	double shareratio;
 	double sharediff;
@ -761,13 +765,16 @@ void restart_threads(void);
				@@ -761,13 +765,16 @@ void restart_threads(void);
 size_t time2str(char* buf, time_t timer);
 char* atime2str(time_t timer);

-void applog_hash(unsigned char *hash);
-void applog_compare_hash(unsigned char *hash, unsigned char *hash2);
+void applog_hex(void *data, int len);
+void applog_hash(void *hash);
+void applog_hash64(void *hash);
+void applog_compare_hash(void *hash, void *hash_ref);

 void print_hash_tests(void);
 void blake256hash(void *output, const void *input, int8_t rounds);
 void bmw_hash(void *state, const void *input);
 void c11hash(void *output, const void *input);
+void decred_hash(void *state, const void *input);
 void deephash(void *state, const void *input);
 void luffa_hash(void *state, const void *input);
 void fresh_hash(void *state, const void *input);
--- a/util.cpp
+++ b/util.cpp
@ -724,9 +724,10 @@ char *bin2hex(const uchar *in, size_t len)
				@@ -724,9 +724,10 @@ char *bin2hex(const uchar *in, size_t len)
 	return s;
 }

-bool hex2bin(uchar *p, const char *hexstr, size_t len)
+bool hex2bin(void *output, const char *hexstr, size_t len)
 {
-	char hex_byte[3];
+	uchar *p = (uchar *) output;
+	char hex_byte[4];
 	char *ep;

 	hex_byte[2] = '\0';
@ -1791,8 +1792,9 @@ char* atime2str(time_t timer)
				@@ -1791,8 +1792,9 @@ char* atime2str(time_t timer)
 }

 /* sprintf can be used in applog */
-static char* format_hash(char* buf, uchar *hash)
+static char* format_hash(char* buf, uint8_t* h)
 {
+	uchar *hash = (uchar*) h;
 	int len = 0;
 	for (int i=0; i < 32; i += 4) {
 		len += sprintf(buf+len, "%02x%02x%02x%02x ",
@ -1802,23 +1804,39 @@ static char* format_hash(char* buf, uchar *hash)
				@@ -1802,23 +1804,39 @@ static char* format_hash(char* buf, uchar *hash)
 }

 /* to debug diff in data */
-extern void applog_compare_hash(uchar *hash, uchar *hash2)
+void applog_compare_hash(void *hash, void *hash_ref)
 {
 	char s[256] = "";
 	int len = 0;
+	uchar* hash1 = (uchar*)hash;
+	uchar* hash2 = (uchar*)hash_ref;
 	for (int i=0; i < 32; i += 4) {
-		const char *color = memcmp(hash+i, hash2+i, 4) ? CL_WHT : CL_GRY;
+		const char *color = memcmp(hash1+i, hash2+i, 4) ? CL_WHT : CL_GRY;
 		len += sprintf(s+len, "%s%02x%02x%02x%02x " CL_GRY, color,
-			hash[i], hash[i+1], hash[i+2], hash[i+3]);
+			hash1[i], hash1[i+1], hash1[i+2], hash1[i+3]);
 		s[len] = '\0';
 	}
 	applog(LOG_DEBUG, "%s", s);
 }

-extern void applog_hash(uchar *hash)
+void applog_hash(void *hash)
+{
+	char s[128] = {'\0'};
+	applog(LOG_DEBUG, "%s", format_hash(s, (uint8_t*)hash));
+}
+
+void applog_hash64(void *hash)
 {
 	char s[128] = {'\0'};
-	applog(LOG_DEBUG, "%s", format_hash(s, hash));
+	char t[128] = {'\0'};
+	applog(LOG_DEBUG, "%s %s", format_hash(s, (uint8_t*)hash), format_hash(t, &((uint8_t*)hash)[32]));
+}
+
+void applog_hex(void *data, int len)
+{
+	char* hex = bin2hex((uchar*)data, len);
+	applog(LOG_DEBUG, "%s", hex);
+	free(hex);
 }

 #define printpfx(n,h) \
@ -1865,7 +1883,7 @@ void do_gpu_tests(void)
				@@ -1865,7 +1883,7 @@ void do_gpu_tests(void)
 	//scanhash_scrypt_jane(0, &work, NULL, 1, &done, &tv, &tv);

 	memset(work.data, 0, sizeof(work.data));
-	scanhash_sib(0, &work, 1, &done);
+	scanhash_decred(0, &work, 1, &done);

 	free(work_restart);
 	work_restart = NULL;
@ -1878,7 +1896,7 @@ void print_hash_tests(void)
				@@ -1878,7 +1896,7 @@ void print_hash_tests(void)
 	uchar *scratchbuf = NULL;
 	char s[128] = {'\0'};
 	uchar hash[128];
-	uchar buf[128];
+	uchar buf[192];

 	// work space for scratchpad based algos
 	scratchbuf = (uchar*)calloc(128, 1024);
@ -1900,6 +1918,10 @@ void print_hash_tests(void)
				@@ -1900,6 +1918,10 @@ void print_hash_tests(void)
 	c11hash(&hash[0], &buf[0]);
 	printpfx("c11", hash);

+	memset(buf, 0, 180);
+	decred_hash(&hash[0], &buf[0]);
+	printpfx("decred", hash);
+
 	deephash(&hash[0], &buf[0]);
 	printpfx("deep", hash);