decred algo for longpoll/getwork

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
9 years ago · 6e95407dcf
11 changed files with 590 additions and 56 deletions
--- a/Algo256/blake256.cu
+++ b/Algo256/blake256.cu
@ -487,7 +487,7 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
 	if (opt_benchmark) {
 		targetHigh = 0x1ULL << 32;
-		ptarget[6] = swab32(0x00ff);
+		ptarget[6] = swab32(0xff);
 	}
 	if (!init[thr_id])
@ -519,9 +519,9 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
 		if (foundNonce != UINT32_MAX)
 		{
 			uint32_t vhashcpu[8];
-			uint32_t Htarg = (uint32_t)targetHigh;
+			uint32_t Htarg = ptarget[6];
-			for (int k=0; k < 19; k++)
+			for (int k=16; k < 19; k++)
 				be32enc(&endiandata[k], pdata[k]);
 			be32enc(&endiandata[19], foundNonce);
--- a/Algo256/decred.cu
+++ b/Algo256/decred.cu
@ -0,0 +1,443 @@
 /**
 * Blake-256 Decred 180-Bytes input Cuda Kernel (Tested on SM 5/5.2)
 *
 * Tanguy Pruvot - Feb 2016
 */
 #include <stdint.h>
 #include <memory.h>
 #include <miner.h>
 extern "C" {
 #include <sph/sph_blake.h>
 }
 /* threads per block */
 #define TPB 256
 /* hash by cpu with blake 256 */
 extern "C" void decred_hash(void *output, const void *input)
 {
 	sph_blake256_context ctx;
 	sph_blake256_set_rounds(14);
 	sph_blake256_init(&ctx);
 	sph_blake256(&ctx, input, 180);
 	sph_blake256_close(&ctx, output);
 }
 #include <cuda_helper.h>
 #ifdef __INTELLISENSE__
 #define __byte_perm(x, y, b) x
 #endif
 __constant__ uint32_t _ALIGN(4) d_data[24];
 /* 8 adapters max */
 static uint32_t *d_resNonce[MAX_GPUS];
 static uint32_t *h_resNonce[MAX_GPUS];
 /* max count of found nonces in one call */
 #define NBN 1
 #if NBN > 1
 static uint32_t extra_results[NBN] = { UINT32_MAX };
 #endif
 /* ############################################################################################################################### */
 #define GSPREC(a,b,c,d,x,y) { \
 	v[a] += (m[x] ^ c_u256[y]) + v[b]; \
 	v[d] = __byte_perm(v[d] ^ v[a], 0, 0x1032); \
 	v[c] += v[d]; \
 	v[b] = SPH_ROTR32(v[b] ^ v[c], 12); \
 	v[a] += (m[y] ^ c_u256[x]) + v[b]; \
 	v[d] = __byte_perm(v[d] ^ v[a], 0, 0x0321); \
 	v[c] += v[d]; \
 	v[b] = SPH_ROTR32(v[b] ^ v[c], 7); \
 }
 __device__ __forceinline__
 void blake256_compress_14(uint32_t *h, const uint32_t nonce, const uint32_t T0)
 {
 	uint32_t v[16];
 	#pragma unroll 8
 	for(uint32_t i = 0; i < 8; i++)
 		v[i] = h[i];
 	const uint32_t c_u256[16] = {
 		0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344,
 		0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89,
 		0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C,
 		0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917
 	};
 	v[ 8] = c_u256[0];
 	v[ 9] = c_u256[1];
 	v[10] = c_u256[2];
 	v[11] = c_u256[3];
 	v[12] = c_u256[4] ^ T0;
 	v[13] = c_u256[5] ^ T0;
 	v[14] = c_u256[6];
 	v[15] = c_u256[7];
 	uint32_t m[16];
 	m[0] = d_data[8];
 	m[1] = d_data[9];
 	m[2] = d_data[10];
 	m[3] = nonce;
 	#pragma unroll
 	for (uint32_t i = 4; i < 16; i++) {
 		m[i] = d_data[i+8U];
 	}
 	// round 1
 	GSPREC(0, 4, 0x8, 0xC, 0,  1);
 	GSPREC(1, 5, 0x9, 0xD, 2,  3);
 	GSPREC(2, 6, 0xA, 0xE, 4,  5);
 	GSPREC(3, 7, 0xB, 0xF, 6,  7);
 	GSPREC(0, 5, 0xA, 0xF, 8,  9);
 	GSPREC(1, 6, 0xB, 0xC, 10, 11);
 	GSPREC(2, 7, 0x8, 0xD, 12, 13);
 	GSPREC(3, 4, 0x9, 0xE, 14, 15);
 	// round 2
 	GSPREC(0, 4, 0x8, 0xC, 14, 10);
 	GSPREC(1, 5, 0x9, 0xD, 4,  8);
 	GSPREC(2, 6, 0xA, 0xE, 9,  15);
 	GSPREC(3, 7, 0xB, 0xF, 13, 6);
 	GSPREC(0, 5, 0xA, 0xF, 1,  12);
 	GSPREC(1, 6, 0xB, 0xC, 0,  2);
 	GSPREC(2, 7, 0x8, 0xD, 11, 7);
 	GSPREC(3, 4, 0x9, 0xE, 5,  3);
 	// round 3
 	GSPREC(0, 4, 0x8, 0xC, 11, 8);
 	GSPREC(1, 5, 0x9, 0xD, 12, 0);
 	GSPREC(2, 6, 0xA, 0xE, 5,  2);
 	GSPREC(3, 7, 0xB, 0xF, 15, 13);
 	GSPREC(0, 5, 0xA, 0xF, 10, 14);
 	GSPREC(1, 6, 0xB, 0xC, 3,  6);
 	GSPREC(2, 7, 0x8, 0xD, 7,  1);
 	GSPREC(3, 4, 0x9, 0xE, 9,  4);
 	// round 4
 	GSPREC(0, 4, 0x8, 0xC, 7,  9);
 	GSPREC(1, 5, 0x9, 0xD, 3,  1);
 	GSPREC(2, 6, 0xA, 0xE, 13, 12);
 	GSPREC(3, 7, 0xB, 0xF, 11, 14);
 	GSPREC(0, 5, 0xA, 0xF, 2,  6);
 	GSPREC(1, 6, 0xB, 0xC, 5,  10);
 	GSPREC(2, 7, 0x8, 0xD, 4,  0);
 	GSPREC(3, 4, 0x9, 0xE, 15, 8);
 	// round 5
 	GSPREC(0, 4, 0x8, 0xC, 9,  0);
 	GSPREC(1, 5, 0x9, 0xD, 5,  7);
 	GSPREC(2, 6, 0xA, 0xE, 2,  4);
 	GSPREC(3, 7, 0xB, 0xF, 10, 15);
 	GSPREC(0, 5, 0xA, 0xF, 14, 1);
 	GSPREC(1, 6, 0xB, 0xC, 11, 12);
 	GSPREC(2, 7, 0x8, 0xD, 6,  8);
 	GSPREC(3, 4, 0x9, 0xE, 3,  13);
 	// round 6
 	GSPREC(0, 4, 0x8, 0xC, 2, 12);
 	GSPREC(1, 5, 0x9, 0xD, 6, 10);
 	GSPREC(2, 6, 0xA, 0xE, 0, 11);
 	GSPREC(3, 7, 0xB, 0xF, 8, 3);
 	GSPREC(0, 5, 0xA, 0xF, 4, 13);
 	GSPREC(1, 6, 0xB, 0xC, 7, 5);
 	GSPREC(2, 7, 0x8, 0xD, 15,14);
 	GSPREC(3, 4, 0x9, 0xE, 1, 9);
 	// round 7
 	GSPREC(0, 4, 0x8, 0xC, 12, 5);
 	GSPREC(1, 5, 0x9, 0xD, 1, 15);
 	GSPREC(2, 6, 0xA, 0xE, 14,13);
 	GSPREC(3, 7, 0xB, 0xF, 4, 10);
 	GSPREC(0, 5, 0xA, 0xF, 0,  7);
 	GSPREC(1, 6, 0xB, 0xC, 6,  3);
 	GSPREC(2, 7, 0x8, 0xD, 9,  2);
 	GSPREC(3, 4, 0x9, 0xE, 8, 11);
 	// round 8
 	GSPREC(0, 4, 0x8, 0xC, 13,11);
 	GSPREC(1, 5, 0x9, 0xD, 7, 14);
 	GSPREC(2, 6, 0xA, 0xE, 12, 1);
 	GSPREC(3, 7, 0xB, 0xF, 3,  9);
 	GSPREC(0, 5, 0xA, 0xF, 5,  0);
 	GSPREC(1, 6, 0xB, 0xC, 15, 4);
 	GSPREC(2, 7, 0x8, 0xD, 8,  6);
 	GSPREC(3, 4, 0x9, 0xE, 2, 10);
 	// round 9
 	GSPREC(0, 4, 0x8, 0xC, 6, 15);
 	GSPREC(1, 5, 0x9, 0xD, 14, 9);
 	GSPREC(2, 6, 0xA, 0xE, 11, 3);
 	GSPREC(3, 7, 0xB, 0xF, 0,  8);
 	GSPREC(0, 5, 0xA, 0xF, 12, 2);
 	GSPREC(1, 6, 0xB, 0xC, 13, 7);
 	GSPREC(2, 7, 0x8, 0xD, 1,  4);
 	GSPREC(3, 4, 0x9, 0xE, 10, 5);
 	// round 10
 	GSPREC(0, 4, 0x8, 0xC, 10, 2);
 	GSPREC(1, 5, 0x9, 0xD, 8,  4);
 	GSPREC(2, 6, 0xA, 0xE, 7,  6);
 	GSPREC(3, 7, 0xB, 0xF, 1,  5);
 	GSPREC(0, 5, 0xA, 0xF, 15,11);
 	GSPREC(1, 6, 0xB, 0xC, 9, 14);
 	GSPREC(2, 7, 0x8, 0xD, 3, 12);
 	GSPREC(3, 4, 0x9, 0xE, 13, 0);
 	// round 11
 	GSPREC(0, 4, 0x8, 0xC, 0,  1);
 	GSPREC(1, 5, 0x9, 0xD, 2,  3);
 	GSPREC(2, 6, 0xA, 0xE, 4,  5);
 	GSPREC(3, 7, 0xB, 0xF, 6,  7);
 	GSPREC(0, 5, 0xA, 0xF, 8,  9);
 	GSPREC(1, 6, 0xB, 0xC, 10,11);
 	GSPREC(2, 7, 0x8, 0xD, 12,13);
 	GSPREC(3, 4, 0x9, 0xE, 14,15);
 	// round 12
 	GSPREC(0, 4, 0x8, 0xC, 14,10);
 	GSPREC(1, 5, 0x9, 0xD, 4,  8);
 	GSPREC(2, 6, 0xA, 0xE, 9, 15);
 	GSPREC(3, 7, 0xB, 0xF, 13, 6);
 	GSPREC(0, 5, 0xA, 0xF, 1, 12);
 	GSPREC(1, 6, 0xB, 0xC, 0,  2);
 	GSPREC(2, 7, 0x8, 0xD, 11, 7);
 	GSPREC(3, 4, 0x9, 0xE, 5,  3);
 	// round 13
 	GSPREC(0, 4, 0x8, 0xC, 11, 8);
 	GSPREC(1, 5, 0x9, 0xD, 12, 0);
 	GSPREC(2, 6, 0xA, 0xE, 5,  2);
 	GSPREC(3, 7, 0xB, 0xF, 15,13);
 	GSPREC(0, 5, 0xA, 0xF, 10,14);
 	GSPREC(1, 6, 0xB, 0xC, 3,  6);
 	GSPREC(2, 7, 0x8, 0xD, 7,  1);
 	GSPREC(3, 4, 0x9, 0xE, 9,  4);
 	// round 14
 	GSPREC(0, 4, 0x8, 0xC, 7,  9);
 	GSPREC(1, 5, 0x9, 0xD, 3,  1);
 	GSPREC(2, 6, 0xA, 0xE, 13,12);
 	GSPREC(3, 7, 0xB, 0xF, 11,14);
 	GSPREC(0, 5, 0xA, 0xF, 2,  6);
 	GSPREC(1, 6, 0xB, 0xC, 5, 10);
 	GSPREC(2, 7, 0x8, 0xD, 4,  0);
 	//GSPREC(3, 4, 0x9, 0xE, 15, 8);
 	v[3] += (m[15] ^ c_u256[8]) + v[4];
 	v[14] = __byte_perm(v[14] ^ v[3], 0, 0x1032);
 	v[9] += v[14]; \
 	v[4] = SPH_ROTR32(v[4] ^ v[9], 12);
 	v[3] += (m[8] ^ c_u256[15]) + v[4];
 	v[14] = __byte_perm(v[14] ^ v[3], 0, 0x0321);
 	// only compute h6 & 7
 	h[6] ^= v[6] ^ v[14];
 	h[7] ^= v[7] ^ v[15];
 }
 /* ############################################################################################################################### */
 __global__
 void blake256_gpu_hash_nonce(const uint32_t threads, const uint32_t startNonce, uint32_t *resNonce, const uint64_t highTarget)
 {
 	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)
 	{
 		const uint32_t nonce = startNonce + thread;
 		uint32_t h[8];
 		#pragma unroll
 		for(int i=0; i < 8; i++) {
 			h[i] = d_data[i];
 		}
 		// ------ Close: Last 52/64 bytes ------
 		blake256_compress_14(h, nonce, (180U*8U));
 		if (h[7] == 0 && cuda_swab32(h[6]) <= highTarget) {
 #if NBN == 2
 			if (resNonce[0] != UINT32_MAX)
 				resNonce[1] = nonce;
 			else
 				resNonce[0] = nonce;
 #else
 			resNonce[0] = nonce;
 #endif
 		}
 	}
 }
 __host__
 static uint32_t decred_cpu_hash_nonce(const int thr_id, const uint32_t threads, const uint32_t startNonce, const uint64_t highTarget)
 {
 	uint32_t result = UINT32_MAX;
 	dim3 grid((threads + TPB-1)/TPB);
 	dim3 block(TPB);
 	/* Check error on Ctrl+C or kill to prevent segfaults on exit */
 	if (cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t)) != cudaSuccess)
 		return result;
 	blake256_gpu_hash_nonce <<<grid, block>>> (threads, startNonce, d_resNonce[thr_id], highTarget);
 	if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
 		result = h_resNonce[thr_id][0];
 #if NBN > 1
 		for (int n=0; n < (NBN-1); n++)
 			extra_results[n] = h_resNonce[thr_id][n+1];
 #endif
 	}
 	return result;
 }
 __host__
 static void decred_midstate_128(uint32_t *output, const uint32_t *input)
 {
 	sph_blake256_context ctx;
 	sph_blake256_set_rounds(14);
 	sph_blake256_init(&ctx);
 	sph_blake256(&ctx, input, 128);
 	memcpy(output, (void*)ctx.H, 32);
 }
 __host__
 void decred_cpu_setBlock_52(uint32_t *penddata, const uint32_t *midstate, const uint32_t *ptarget)
 {
 	uint32_t _ALIGN(64) data[24];
 	memcpy(data, midstate, 32);
 	// pre swab32
 	for (int i=0; i<13; i++)
 		data[8+i] = swab32(penddata[i]);
 	data[21] = 0x80000001;
 	data[22] = 0;
 	data[23] = 0x000005a0;
 	CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_data, data, 32 + 64, 0, cudaMemcpyHostToDevice));
 }
 /* ############################################################################################################################### */
 static bool init[MAX_GPUS] = { 0 };
 // nonce position is different in decred
 #define DCR_NONCE_OFT32 35
 extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
 {
 	uint32_t _ALIGN(64) endiandata[48];
 	uint32_t _ALIGN(64) midstate[8];
 	uint32_t *pdata = work->data;
 	uint32_t *ptarget = work->target;
 	uint32_t *pnonce = &pdata[DCR_NONCE_OFT32];
 	const uint32_t first_nonce = *pnonce;
 	uint64_t targetHigh = ((uint64_t*)ptarget)[3];
 	int dev_id = device_map[thr_id];
 	int intensity = (device_sm[dev_id] > 500 && !is_windows()) ? 29 : 25;
 	if (device_sm[dev_id] < 350) intensity = 22;
 	uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
 	if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
 	int rc = 0;
 	if (opt_benchmark) {
 		targetHigh = 0x1ULL << 32;
 		ptarget[6] = swab32(0xff);
 	}
 	if (!init[thr_id])
 	{
 		cudaSetDevice(dev_id);
 		if (opt_cudaschedule == -1 && gpu_threads == 1) {
 			cudaDeviceReset();
 			// reduce cpu usage (linux)
 			cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
 			cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 			CUDA_LOG_ERROR();
 		}
 		CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), -1);
 		CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), -1);
 		init[thr_id] = true;
 	}
 	memcpy(endiandata, pdata, 180);
 	decred_midstate_128(midstate, endiandata);
 	decred_cpu_setBlock_52(&pdata[32], midstate, ptarget);
 	do {
 		// GPU HASH
 		uint32_t foundNonce = decred_cpu_hash_nonce(thr_id, throughput, (*pnonce), targetHigh);
 		if (foundNonce != UINT32_MAX)
 		{
 			uint32_t vhashcpu[8];
 			uint32_t Htarg = ptarget[6];
 			be32enc(&endiandata[DCR_NONCE_OFT32], foundNonce);
 			decred_hash(vhashcpu, endiandata);
 			if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget))
 			{
 				rc = 1;
 				work_set_target_ratio(work, vhashcpu);
 				*hashes_done = (*pnonce) - first_nonce + throughput;
 				work->nonces[0] = *pnonce = swab32(foundNonce);
 #if NBN > 1
 				if (extra_results[0] != UINT32_MAX) {
 					be32enc(&endiandata[DCR_NONCE_OFT32], extra_results[0]);
 					decred_hash(vhashcpu, endiandata);
 					if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget)) {
 						work->nonces[1] = swab32(extra_results[0]);
 						if (bn_hash_target_ratio(vhashcpu, ptarget) > work->shareratio) {
 							work_set_target_ratio(work, vhashcpu);
 							xchg(work->nonces[1], *pnonce);
 						}
 						rc = 2;
 					}
 					extra_results[0] = UINT32_MAX;
 				}
 #endif
 				return rc;
 			}
 			else if (opt_debug) {
 				applog_hash(ptarget);
 				applog_compare_hash(vhashcpu, ptarget);
 				gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
 			}
 		}
 		*pnonce += throughput;
 	} while (!work_restart[thr_id].restart && max_nonce > (uint64_t)throughput + (*pnonce));
 	*hashes_done = (*pnonce) - first_nonce;
 	MyStreamSynchronize(NULL, 0, dev_id);
 	return rc;
 }
 // cleanup
 extern "C" void free_decred(int thr_id)
 {
 	if (!init[thr_id])
 		return;
 	cudaDeviceSynchronize();
 	cudaFreeHost(h_resNonce[thr_id]);
 	cudaFree(d_resNonce[thr_id]);
 	init[thr_id] = false;
 	cudaDeviceSynchronize();
 }
--- a/Makefile.am
+++ b/Makefile.am
@ -36,7 +36,7 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
 		          lyra2/lyra2REv2.cu lyra2/cuda_lyra2v2.cu \
 			  Algo256/cuda_bmw256.cu Algo256/cuda_cubehash256.cu \
 			  Algo256/cuda_blake256.cu Algo256/cuda_groestl256.cu Algo256/cuda_keccak256.cu Algo256/cuda_skein256.cu \
-			  Algo256/blake256.cu Algo256/keccak256.cu \
+			  Algo256/blake256.cu Algo256/decred.cu Algo256/keccak256.cu \
 			  Algo256/bmw.cu Algo256/cuda_bmw.cu \
 			  JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
 			  JHA/cuda_jha_compactionTest.cu cuda_checkhash.cu \
--- a/algos.h
+++ b/algos.h
@ -10,6 +10,7 @@ enum sha_algos {
 	ALGO_BMW,
 	ALGO_C11,
 	ALGO_DEEP,
 	ALGO_DECRED,
 	ALGO_DMD_GR,
 	ALGO_FRESH,
 	ALGO_FUGUE256,		/* Fugue256 */
@ -55,6 +56,7 @@ static const char *algo_names[] = {
 	"bmw",
 	"c11",
 	"deep",
 	"decred",
 	"dmd-gr",
 	"fresh",
 	"fugue256",
--- a/bench.cpp
+++ b/bench.cpp
@ -47,6 +47,7 @@ void algo_free_all(int thr_id)
 	free_blake256(thr_id);
 	free_bmw(thr_id);
 	free_c11(thr_id);
 	free_decred(thr_id);
 	free_deep(thr_id);
 	free_keccak256(thr_id);
 	free_fresh(thr_id);
--- a/ccminer.cpp
+++ b/ccminer.cpp
@ -546,6 +546,7 @@ static void calc_network_diff(struct work *work)
 	// sample for diff 43.281 : 1c05ea29
 	// todo: endian reversed on longpoll could be zr5 specific...
 	uint32_t nbits = have_longpoll ? work->data[18] : swab32(work->data[18]);
 	if (opt_algo == ALGO_DECRED) nbits = work->data[29];
 	uint32_t bits = (nbits & 0xffffff);
 	int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28
@ -568,16 +569,20 @@ static bool work_decode(const json_t *val, struct work *work)
 	int i;
 	switch (opt_algo) {
 	case ALGO_DECRED:
 		data_size = 192;
 		adata_sz = 180/4;
 		break;
 	case ALGO_NEOSCRYPT:
 	case ALGO_ZR5:
 		data_size = 80;
 		adata_sz = data_size / 4;
 		break;
 	default:
-		data_size = 128; // sizeof(work->data);
+		data_size = 128;
 		adata_sz = data_size / 4;
 	}
 	adata_sz = data_size / 4; // sizeof(uint32_t);
 	if (!jobj_binary(val, "data", work->data, data_size)) {
 		json_t *obj = json_object_get(val, "data");
 		int len = obj ? (int) strlen(json_string_value(obj)) : 0;
@ -647,20 +652,30 @@ static bool work_decode(const json_t *val, struct work *work)
 		}
 	}
 	json_t *jr = json_object_get(val, "noncerange");
 	if (jr) {
 		const char * hexstr = json_string_value(jr);
 		if (likely(hexstr)) {
 			// never seen yet...
 			hex2bin((uchar*)work->noncerange.u64, hexstr, 8);
 			applog(LOG_DEBUG, "received noncerange: %08x-%08x",
 				work->noncerange.u32[0], work->noncerange.u32[1]);
 		}
 	}
 	/* use work ntime as job id (solo-mining) */
 	cbin2hex(work->job_id, (const char*)&work->data[17], 4);
 	if (opt_algo == ALGO_DECRED) {
 		// some random extradata to make it unique
 		work->data[36] = (rand()*4);
 		work->data[37] = (rand()*4) << 8;
 		// required for the longpoll pool block info...
 		work->height = work->data[32];
 		if (!have_longpoll && work->height > net_blocks + 1) {
 			char netinfo[64] = { 0 };
 			if (opt_showdiff && net_diff > 0.) {
 				if (net_diff != work->targetdiff)
 					sprintf(netinfo, ", diff %.3f, pool %.1f", net_diff, work->targetdiff);
 				else
 					sprintf(netinfo, ", diff %.3f", net_diff);
 			}
 			applog(LOG_BLUE, "%s block %d%s",
 				algo_names[opt_algo], work->height, netinfo);
 			net_blocks = work->height - 1;
 		}
 		cbin2hex(work->job_id, (const char*)&work->data[34], 4);
 	}
 	return true;
 }
@ -729,10 +744,10 @@ static int share_result(int result, int pooln, double sharediff, const char *rea
 static bool submit_upstream_work(CURL *curl, struct work *work)
 {
 	char s[512];
 	struct pool_infos *pool = &pools[work->pooln];
 	json_t *val, *res, *reason;
 	bool stale_work = false;
 	char s[384];
 	/* discard if a newer block was received */
 	stale_work = work->height && work->height < g_work.height;
@ -776,6 +791,8 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
 			be32enc(&ntime, work->data[17]);
 			be32enc(&nonce, work->data[19]);
 			break;
 		case ALGO_DECRED:
 			break;
 		case ALGO_BLAKE:
 		case ALGO_BLAKECOIN:
 		case ALGO_BMW:
@ -852,6 +869,9 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
 		if (opt_algo == ALGO_ZR5) {
 			data_size = 80; adata_sz = 20;
 		}
 		else if (opt_algo == ALGO_DECRED) {
 			data_size = 192; adata_sz = 180/4;
 		}
 		if (opt_algo != ALGO_HEAVY && opt_algo != ALGO_MJOLLNIR) {
 			for (int i = 0; i < adata_sz; i++)
@ -971,7 +991,7 @@ static bool get_mininginfo(CURL *curl, struct work *work)
 	struct pool_infos *pool = &pools[work->pooln];
 	int curl_err = 0;
-	if (have_stratum || !allow_mininginfo)
+	if (have_stratum || have_longpoll || !allow_mininginfo)
 		return false;
 	json_t *val = json_rpc_call_pool(curl, pool, info_req, false, false, &curl_err);
@ -1223,8 +1243,12 @@ bool get_work(struct thr_info *thr, struct work *work)
 		memset(work->data, 0x55, 76);
 		//work->data[17] = swab32((uint32_t)time(NULL));
 		memset(work->data + 19, 0x00, 52);
-		work->data[20] = 0x80000000;
+		if (opt_algo == ALGO_DECRED) {
-		work->data[31] = 0x00000280;
+			memset(&work->data[35], 0x00, 52);
 		} else {
 			work->data[20] = 0x80000000;
 			work->data[31] = 0x00000280;
 		}
 		memset(work->target, 0x00, sizeof(work->target));
 		return true;
 	}
@ -1358,8 +1382,14 @@ static bool stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
 		break;
 	}
-	work->data[20] = 0x80000000;
+	if (opt_algo == ALGO_DECRED) {
-	work->data[31] = (opt_algo == ALGO_MJOLLNIR) ? 0x000002A0 : 0x00000280;
+		work->data[45] = 0x80000001;
 		work->data[46] = 0;
 		work->data[47] = 0x000005a0;
 	} else {
 		work->data[20] = 0x80000000;
 		work->data[31] = (opt_algo == ALGO_MJOLLNIR) ? 0x000002A0 : 0x00000280;
 	}
 	// HeavyCoin (vote / reward)
 	if (opt_algo == ALGO_HEAVY) {
@ -1554,7 +1584,7 @@ static void *miner_thread(void *userdata)
 		uint64_t max64, minmax = 0x100000;
 		// &work.data[19]
-		int wcmplen = 76;
+		int wcmplen = (opt_algo == ALGO_DECRED) ? 140 : 76;
 		int wcmpoft = 0;
 		uint32_t *nonceptr = (uint32_t*) (((char*)work.data) + wcmplen);
@ -1633,9 +1663,16 @@ static void *miner_thread(void *userdata)
 			#endif
 			memcpy(&work, &g_work, sizeof(struct work));
 			nonceptr[0] = (UINT32_MAX / opt_n_threads) * thr_id; // 0 if single thr
 			if (opt_algo == ALGO_DECRED) nonceptr[0] = 0;
 		} else
 			nonceptr[0]++; //??
 		if (opt_algo == ALGO_DECRED) {
 			end_nonce = 0xF0000000UL;
 			nonceptr[1] += 1;
 			nonceptr[2] |= thr_id;
 		}
 		pthread_mutex_unlock(&g_work_lock);
 		// --benchmark [-a all]
@ -1751,6 +1788,7 @@ static void *miner_thread(void *userdata)
 				break;
 			case ALGO_BLAKE:
 			case ALGO_BMW:
 			case ALGO_DECRED:
 			case ALGO_WHIRLPOOLX:
 				minmax = 0x40000000U;
 				break;
@ -1839,6 +1877,9 @@ static void *miner_thread(void *userdata)
 		case ALGO_C11:
 			rc = scanhash_c11(thr_id, &work, max_nonce, &hashes_done);
 			break;
 		case ALGO_DECRED:
 			rc = scanhash_decred(thr_id, &work, max_nonce, &hashes_done);
 			break;
 		case ALGO_DEEP:
 			rc = scanhash_deep(thr_id, &work, max_nonce, &hashes_done);
 			break;
@ -2154,9 +2195,13 @@ longpoll_retry:
 					if (net_diff > 0.) {
 						sprintf(netinfo, ", diff %.3f", net_diff);
 					}
-					if (opt_showdiff)
+					if (opt_showdiff) {
 						sprintf(&netinfo[strlen(netinfo)], ", target %.3f", g_work.targetdiff);
-					applog(LOG_BLUE, "%s detected new block%s", short_url, netinfo);
+					}
 					if (g_work.height)
 						applog(LOG_BLUE, "%s block %u%s", algo_names[opt_algo], g_work.height, netinfo);
 					else
 						applog(LOG_BLUE, "%s detected new block%s", short_url, netinfo);
 				}
 				g_work_time = time(NULL);
 			}
@ -3175,6 +3220,13 @@ int main(int argc, char *argv[])
 	cur_pooln = pool_get_first_valid(0);
 	pool_switch(-1, cur_pooln);
 	if (opt_algo == ALGO_DECRED) {
 		allow_gbt = false;
 		want_stratum = have_stratum = false;
 		allow_mininginfo = false;
 		want_longpoll = true;
 	}
 	flags = !opt_benchmark && strncmp(rpc_url, "https:", 6)
 	      ? (CURL_GLOBAL_ALL & ~CURL_GLOBAL_SSL)
 	      : CURL_GLOBAL_ALL;
@ -3317,6 +3369,12 @@ int main(int argc, char *argv[])
 	/* real start of the stratum work */
 	if (want_stratum && have_stratum) {
 		tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
 	} else {
 		// hmm, weird on Multicoin.co
 		//char lpurl[512];
 		//sprintf(lpurl, "%s/LP", rpc_url);
 		//if (opt_algo == ALGO_DECRED)
 		//	tq_push(thr_info[longpoll_thr_id].q, strdup(lpurl));
 	}
 #ifdef USE_WRAPNVML
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -408,6 +408,7 @@
      <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
      <FastMath>true</FastMath>
    </CudaCompile>
    <CudaCompile Include="Algo256\decred.cu" />
    <CudaCompile Include="Algo256\keccak256.cu" />
    <CudaCompile Include="Algo256\cuda_blake256.cu" />
    <CudaCompile Include="Algo256\cuda_bmw256.cu" />
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -622,6 +622,9 @@
    <CudaCompile Include="Algo256\blake256.cu">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
    <CudaCompile Include="Algo256\decred.cu">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
    <CudaCompile Include="Algo256\keccak256.cu">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
--- a/cpuminer-config.h
+++ b/cpuminer-config.h
@ -66,7 +66,7 @@
 #define HAVE_STRING_H 1
 /* Define to 1 if you have the <syslog.h> header file. */
-/* #undef HAVE_SYSLOG_H */
+#define HAVE_SYSLOG_H 1
 /* Define to 1 if you have the <sys/endian.h> header file. */
 /* #undef HAVE_SYS_ENDIAN_H */
@ -87,7 +87,7 @@
 #define HAVE_UNISTD_H 1
 /* Defined if libcurl supports AsynchDNS */
-/* #undef LIBCURL_FEATURE_ASYNCHDNS */
+#define LIBCURL_FEATURE_ASYNCHDNS 1
 /* Defined if libcurl supports IDN */
 #define LIBCURL_FEATURE_IDN 1
@ -111,7 +111,7 @@
 /* #undef LIBCURL_FEATURE_SSPI */
 /* Defined if libcurl supports DICT */
-/* #undef LIBCURL_PROTOCOL_DICT */
+#define LIBCURL_PROTOCOL_DICT 1
 /* Defined if libcurl supports FILE */
 #define LIBCURL_PROTOCOL_FILE 1
@ -126,31 +126,28 @@
 #define LIBCURL_PROTOCOL_HTTP 1
 /* Defined if libcurl supports HTTPS */
-/* #undef LIBCURL_PROTOCOL_HTTPS */
+#define LIBCURL_PROTOCOL_HTTPS 1
 /* Defined if libcurl supports IMAP */
-/* #undef LIBCURL_PROTOCOL_IMAP */
+#define LIBCURL_PROTOCOL_IMAP 1
 /* Defined if libcurl supports LDAP */
-/* #undef LIBCURL_PROTOCOL_LDAP */
+#define LIBCURL_PROTOCOL_LDAP 1
 /* Defined if libcurl supports POP3 */
-/* #undef LIBCURL_PROTOCOL_POP3 */
+#define LIBCURL_PROTOCOL_POP3 1
 /* Defined if libcurl supports RTSP */
-/* #undef LIBCURL_PROTOCOL_RTSP */
+#define LIBCURL_PROTOCOL_RTSP 1
 /* Defined if libcurl supports SMTP */
-/* #undef LIBCURL_PROTOCOL_SMTP */
+#define LIBCURL_PROTOCOL_SMTP 1
 /* Defined if libcurl supports TELNET */
-/* #undef LIBCURL_PROTOCOL_TELNET */
+#define LIBCURL_PROTOCOL_TELNET 1
 /* Defined if libcurl supports TFTP */
-/* #undef LIBCURL_PROTOCOL_TFTP */
+#define LIBCURL_PROTOCOL_TFTP 1
 /* Define to 1 if your C compiler doesn't accept -c and -o together. */
 /* #undef NO_MINUS_C_MINUS_O */
 /* Name of package */
 #define PACKAGE "ccminer"
@ -191,4 +188,4 @@
 /* #undef curl_free */
 /* Define to `unsigned int' if <sys/types.h> does not define. */
-//#define size_t unsigned int
+/* #undef size_t */
--- a/miner.h
+++ b/miner.h
@ -264,6 +264,7 @@ struct work;
 extern int scanhash_blake256(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int8_t blakerounds);
 extern int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
@ -305,6 +306,7 @@ void algo_free_all(int thr_id);
 extern void free_blake256(int thr_id);
 extern void free_bmw(int thr_id);
 extern void free_c11(int thr_id);
 extern void free_decred(int thr_id);
 extern void free_deep(int thr_id);
 extern void free_keccak256(int thr_id);
 extern void free_fresh(int thr_id);
@ -536,7 +538,7 @@ extern void gpulog(int prio, int thr_id, const char *fmt, ...);
 void get_defconfig_path(char *out, size_t bufsize, char *argv0);
 extern void cbin2hex(char *out, const char *in, size_t len);
 extern char *bin2hex(const unsigned char *in, size_t len);
-extern bool hex2bin(unsigned char *p, const char *hexstr, size_t len);
+extern bool hex2bin(void *output, const char *hexstr, size_t len);
 extern int timeval_subtract(struct timeval *result, struct timeval *x,
 	struct timeval *y);
 extern bool fulltest(const uint32_t *hash, const uint32_t *target);
@ -612,7 +614,7 @@ struct tx {
 };
 struct work {
-	uint32_t data[32];
+	uint32_t data[48];
 	uint32_t target[8];
 	uint32_t maxvote;
@ -625,6 +627,8 @@ struct work {
 		uint64_t u64[1];
 	} noncerange;
 	uint32_t nonces[2];
 	double targetdiff;
 	double shareratio;
 	double sharediff;
@ -761,13 +765,16 @@ void restart_threads(void);
 size_t time2str(char* buf, time_t timer);
 char* atime2str(time_t timer);
-void applog_hash(unsigned char *hash);
+void applog_hex(void *data, int len);
-void applog_compare_hash(unsigned char *hash, unsigned char *hash2);
+void applog_hash(void *hash);
 void applog_hash64(void *hash);
 void applog_compare_hash(void *hash, void *hash_ref);
 void print_hash_tests(void);
 void blake256hash(void *output, const void *input, int8_t rounds);
 void bmw_hash(void *state, const void *input);
 void c11hash(void *output, const void *input);
 void decred_hash(void *state, const void *input);
 void deephash(void *state, const void *input);
 void luffa_hash(void *state, const void *input);
 void fresh_hash(void *state, const void *input);
--- a/util.cpp
+++ b/util.cpp
@ -724,9 +724,10 @@ char *bin2hex(const uchar *in, size_t len)
 	return s;
 }
-bool hex2bin(uchar *p, const char *hexstr, size_t len)
+bool hex2bin(void *output, const char *hexstr, size_t len)
 {
-	char hex_byte[3];
+	uchar *p = (uchar *) output;
 	char hex_byte[4];
 	char *ep;
 	hex_byte[2] = '\0';
@ -1791,8 +1792,9 @@ char* atime2str(time_t timer)
 }
 /* sprintf can be used in applog */
-static char* format_hash(char* buf, uchar *hash)
+static char* format_hash(char* buf, uint8_t* h)
 {
 	uchar *hash = (uchar*) h;
 	int len = 0;
 	for (int i=0; i < 32; i += 4) {
 		len += sprintf(buf+len, "%02x%02x%02x%02x ",
@ -1802,23 +1804,39 @@ static char* format_hash(char* buf, uchar *hash)
 }
 /* to debug diff in data */
-extern void applog_compare_hash(uchar *hash, uchar *hash2)
+void applog_compare_hash(void *hash, void *hash_ref)
 {
 	char s[256] = "";
 	int len = 0;
 	uchar* hash1 = (uchar*)hash;
 	uchar* hash2 = (uchar*)hash_ref;
 	for (int i=0; i < 32; i += 4) {
-		const char *color = memcmp(hash+i, hash2+i, 4) ? CL_WHT : CL_GRY;
+		const char *color = memcmp(hash1+i, hash2+i, 4) ? CL_WHT : CL_GRY;
 		len += sprintf(s+len, "%s%02x%02x%02x%02x " CL_GRY, color,
-			hash[i], hash[i+1], hash[i+2], hash[i+3]);
+			hash1[i], hash1[i+1], hash1[i+2], hash1[i+3]);
 		s[len] = '\0';
 	}
 	applog(LOG_DEBUG, "%s", s);
 }
-extern void applog_hash(uchar *hash)
+void applog_hash(void *hash)
 {
 	char s[128] = {'\0'};
 	applog(LOG_DEBUG, "%s", format_hash(s, (uint8_t*)hash));
 }
 void applog_hash64(void *hash)
 {
 	char s[128] = {'\0'};
-	applog(LOG_DEBUG, "%s", format_hash(s, hash));
+	char t[128] = {'\0'};
 	applog(LOG_DEBUG, "%s %s", format_hash(s, (uint8_t*)hash), format_hash(t, &((uint8_t*)hash)[32]));
 }
 void applog_hex(void *data, int len)
 {
 	char* hex = bin2hex((uchar*)data, len);
 	applog(LOG_DEBUG, "%s", hex);
 	free(hex);
 }
 #define printpfx(n,h) \
@ -1865,7 +1883,7 @@ void do_gpu_tests(void)
 	//scanhash_scrypt_jane(0, &work, NULL, 1, &done, &tv, &tv);
 	memset(work.data, 0, sizeof(work.data));
-	scanhash_sib(0, &work, 1, &done);
+	scanhash_decred(0, &work, 1, &done);
 	free(work_restart);
 	work_restart = NULL;
@ -1878,7 +1896,7 @@ void print_hash_tests(void)
 	uchar *scratchbuf = NULL;
 	char s[128] = {'\0'};
 	uchar hash[128];
-	uchar buf[128];
+	uchar buf[192];
 	// work space for scratchpad based algos
 	scratchbuf = (uchar*)calloc(128, 1024);
@ -1900,6 +1918,10 @@ void print_hash_tests(void)
 	c11hash(&hash[0], &buf[0]);
 	printpfx("c11", hash);
 	memset(buf, 0, 180);
 	decred_hash(&hash[0], &buf[0]);
 	printpfx("decred", hash);
 	deephash(&hash[0], &buf[0]);
 	printpfx("deep", hash);