Add support for blakecoin (-a blakecoin)

Blakecoin use an old variant of Blake 256. Speed : 190 MHash/s (vs 25 in cudaMiner) Restore support of this algo (was in cudaminer before)
2014-09-06 01:22:54 +02:00 · 2014-09-06 01:22:54 +02:00 · 383b184549
commit 383b184549
parent e0487aac46
10 changed files with 140 additions and 115 deletions
--- a/README.txt
+++ b/README.txt
@ -62,6 +62,7 @@ its command line interface and options.
                          quark       use to mine Quarkcoin
                          anime       use to mine Animecoin
                          blake       use to mine NEOS (Blake 256)
+                          blakecoin   use to mine Old Blake 256
                          nist5       use to mine TalkCoin
                          fresh       use to mine Freshcoin
                          whirl       use to mine Whirlcoin
--- a/blake32.cu
+++ b/blake32.cu
@ -15,11 +15,17 @@ extern "C" {
 /* threads per block */
 #define TPB 128

+extern "C" int blake256_rounds = 14;
+
 /* hash by cpu with blake 256 */
-extern "C" void blake32hash(void *output, const void *input)
+extern "C" void blake256hash(void *output, const void *input, int rounds = 14)
 {
 	unsigned char hash[64];
 	sph_blake256_context ctx;
+
+	/* in sph_blake.c */
+	blake256_rounds = rounds;
+
 	sph_blake256_init(&ctx);
 	sph_blake256(&ctx, input, 80);
 	sph_blake256_close(&ctx, hash);
@ -28,6 +34,8 @@ extern "C" void blake32hash(void *output, const void *input)

 #include "cuda_helper.h"

+#define MAXU 0xffffffffU
+
 // in cpu-miner.c
 extern bool opt_n_threads;
 extern bool opt_benchmark;
@ -35,28 +43,19 @@ extern int device_map[8];

 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);

-__constant__
-static uint32_t __align__(32) c_PaddedMessage80[32]; // padded message (80 bytes + padding)
-
 __constant__
 static uint32_t __align__(32) c_Target[8];

-#define MAXU 0xffffffffU
+__constant__
+static uint32_t __align__(32) c_data[20];

 static uint32_t *d_resNounce[8];
 static uint32_t *h_resNounce[8];

-__constant__
-#ifdef WIN32
-/* what the fuck ! */
-static uint8_t c_sigma[16][16];
-const uint8_t host_sigma[16][16]
-#else
 /* prefer uint32_t to prevent size conversions = speed +5/10 % */
+__constant__
 static uint32_t __align__(32) c_sigma[16][16];
-const uint32_t host_sigma[16][16]
-#endif
-= {
+const uint32_t host_sigma[16][16] = {
 	{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
 	{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
 	{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
@ -122,53 +121,61 @@ static const uint32_t __align__(32) c_u256[16] = {
 #define GS(a,b,c,d,x) { \
 	const uint32_t idx1 = c_sigma[i][x]; \
 	const uint32_t idx2 = c_sigma[i][x+1]; \
-	v[a] += (m[idx1] ^ u256[idx2]) + v[b]; \
+	v[a] += (m[idx1] ^ c_u256[idx2]) + v[b]; \
 	v[d] = SPH_ROTL32(v[d] ^ v[a], 16); \
 	v[c] += v[d]; \
 	v[b] = SPH_ROTR32(v[b] ^ v[c], 12); \
 \
-	v[a] += (m[idx2] ^ u256[idx1]) + v[b]; \
+	v[a] += (m[idx2] ^ c_u256[idx1]) + v[b]; \
 	v[d] = SPH_ROTR32(v[d] ^ v[a], 8); \
 	v[c] += v[d]; \
 	v[b] = SPH_ROTR32(v[b] ^ v[c], 7); \
 }

-#define BLAKE256_ROUNDS 14
+/* Second part (64-80) msg never change, store it */
+__device__ __constant__
+static const uint32_t __align__(32) c_Padding[16] = {
+	0, 0, 0, 0,
+	0x80000000UL, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 1, 0, 640,
+};

 __device__ static
-void blake256_compress(uint32_t *h, const uint32_t *block, const uint32_t T0)
+void blake256_compress(uint32_t *h, const uint32_t *block, const uint32_t T0, int blakerounds)
 {
-	uint32_t /* __align__(8) */ v[16];
 	uint32_t /* __align__(8) */ m[16];
+	uint32_t /* __align__(8) */ v[16];

-	const uint32_t* u256 = c_u256;
+	m[0] = block[0];
+	m[1] = block[1];
+	m[2] = block[2];
+	m[3] = block[3];

-	//#pragma unroll
-	for (int i = 0; i < 16; ++i) {
-		m[i] = block[i];
+	for (uint32_t i = 4; i < 16; i++) {
+		m[i] = (T0 == 0x200) ? block[i] : c_Padding[i];
 	}

 	//#pragma unroll 8
-	for(int i = 0; i < 8; i++)
+	for(uint32_t i = 0; i < 8; i++)
 		v[i] = h[i];

-	v[ 8] = u256[0];
-	v[ 9] = u256[1];
-	v[10] = u256[2];
-	v[11] = u256[3];
+	v[ 8] = c_u256[0];
+	v[ 9] = c_u256[1];
+	v[10] = c_u256[2];
+	v[11] = c_u256[3];

-	v[12] = u256[4] ^ T0;
-	v[13] = u256[5] ^ T0;
-	v[14] = u256[6];
-	v[15] = u256[7];
+	v[12] = c_u256[4] ^ T0;
+	v[13] = c_u256[5] ^ T0;
+	v[14] = c_u256[6];
+	v[15] = c_u256[7];

-	//#pragma unroll
-	for (int i = 0; i < BLAKE256_ROUNDS; i++) {
+	for (int i = 0; i < blakerounds; i++) {
 		/* column step */
-		GS(0, 4, 0x8, 0xC, 0);
-		GS(1, 5, 0x9, 0xD, 2);
-		GS(2, 6, 0xA, 0xE, 4);
-		GS(3, 7, 0xB, 0xF, 6);
+		GS(0, 4, 0x8, 0xC, 0x0);
+		GS(1, 5, 0x9, 0xD, 0x2);
+		GS(2, 6, 0xA, 0xE, 0x4);
+		GS(3, 7, 0xB, 0xF, 0x6);
 		/* diagonal step */
 		GS(0, 5, 0xA, 0xF, 0x8);
 		GS(1, 6, 0xB, 0xC, 0xA);
@ -177,48 +184,36 @@ void blake256_compress(uint32_t *h, const uint32_t *block, const uint32_t T0)
 	}

 	//#pragma unroll 16
-	for(int i = 0; i < 16; i++)
-		h[i % 8] ^= v[i];
+	for (uint32_t i = 0; i < 16; i++) {
+		uint32_t j = i % 8;
+		h[j] ^= v[i];
+	}
 }

 __global__
-void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resNounce)
+void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resNounce, const int blakerounds)
 {
 	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)
 	{
 		const uint32_t nounce = startNounce + thread;
-		uint32_t /* __align__(8) */ msg[16];
 		uint32_t h[8];

 		#pragma unroll
 		for(int i=0; i<8; i++)
 			h[i] = c_IV256[i];

-		blake256_compress(h, c_PaddedMessage80, 0x200); /* 512 = 0x200 */
+		blake256_compress(h, c_data, 512, blakerounds);

 		// ------ Close: Bytes 64 to 80 ------ 

-		msg[0] = c_PaddedMessage80[16];
-		msg[1] = c_PaddedMessage80[17];
-		msg[2] = c_PaddedMessage80[18];
-		msg[3] = nounce; /* our tested value */
-		msg[4] = 0x80000000UL; //cuda_swab32(0x80U);
+		uint32_t ending[4];
+		ending[0] = c_data[16];
+		ending[1] = c_data[17];
+		ending[2] = c_data[18];
+		ending[3] = nounce; /* our tested value */

-		msg[5] = 0;  // uchar[17 to 55]
-		msg[6] = 0;
-		msg[7] = 0;
-		msg[8] = 0;
-		msg[9] = 0;
-		msg[10] = 0;
-		msg[11] = 0;
-		msg[12] = 0;
-
-		msg[13] = 1;
-		msg[14] = 0;
-		msg[15] = 0x280;
-
-		blake256_compress(h, msg, 0x280);
+		blake256_compress(h, ending, 640, blakerounds);

 		for (int i = 7; i >= 0; i--) {
 			uint32_t hash = cuda_swab32(h[i]);
@ -237,7 +232,7 @@ void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resN
 }

 __host__
-uint32_t blake256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce)
+uint32_t blake256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, const int blakerounds)
 {
 	const int threadsperblock = TPB;
 	uint32_t result = MAXU;
@ -250,7 +245,7 @@ uint32_t blake256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce
 	if (cudaMemset(d_resNounce[thr_id], 0xff, sizeof(uint32_t)) != cudaSuccess)
 		return result;

-	blake256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_resNounce[thr_id]);
+	blake256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_resNounce[thr_id], blakerounds);
 	cudaDeviceSynchronize();
 	if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
 		cudaThreadSynchronize();
@ -262,16 +257,15 @@ uint32_t blake256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce
 __host__
 void blake256_cpu_setBlock_80(uint32_t *pdata, const uint32_t *ptarget)
 {
-	uint32_t PaddedMessage[32];
-	memcpy(PaddedMessage, pdata, 80);
-	memset(&PaddedMessage[20], 0, 48);
-	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, sizeof(PaddedMessage), 0, cudaMemcpyHostToDevice));
+	uint32_t data[20];
+	memcpy(data, pdata, 80);
+	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_data, data, sizeof(data), 0, cudaMemcpyHostToDevice));
 	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_sigma, host_sigma, sizeof(host_sigma), 0, cudaMemcpyHostToDevice));
 	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 32, 0, cudaMemcpyHostToDevice));
 }

-extern "C" int scanhash_blake32(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
-	uint32_t max_nonce, unsigned long *hashes_done)
+extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
+	uint32_t max_nonce, unsigned long *hashes_done, uint32_t blakerounds=14)
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
@ -297,25 +291,19 @@ extern "C" int scanhash_blake32(int thr_id, uint32_t *pdata, const uint32_t *pta

 	do {
 		// GPU HASH
-		uint32_t foundNonce = blake256_cpu_hash_80(thr_id, throughput, pdata[19]);
-		if (foundNonce != 0xffffffff)
+		uint32_t foundNonce = blake256_cpu_hash_80(thr_id, throughput, pdata[19], blakerounds);
+		if (foundNonce != MAXU)
 		{
 			uint32_t endiandata[20];
 			uint32_t vhashcpu[8];
 			uint32_t Htarg = ptarget[7];

-			for (int k=0; k < 20; k++)
+			for (int k=0; k < 19; k++)
 				be32enc(&endiandata[k], pdata[k]);

-			if (opt_debug && !opt_quiet) {
-				applog(LOG_DEBUG, "throughput=%u, start=%x, max=%x, pdata=%08x...%08x",
-					throughput, first_nonce, max_nonce, endiandata[0], endiandata[7]);
-				applog_hash((unsigned char *)pdata);
-			}
-
 			be32enc(&endiandata[19], foundNonce);

-			blake32hash(vhashcpu, endiandata);
+			blake256hash(vhashcpu, endiandata, blakerounds);

 			if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget))
 			{
@ -340,11 +328,14 @@ extern "C" int scanhash_blake32(int thr_id, uint32_t *pdata, const uint32_t *pta

 exit_scan:
 	*hashes_done = pdata[19] - first_nonce + 1;
-	// reset the device to allow multiple instances
+#if 0
+	/* reset the device to allow multiple instances
+	 * could be made in cpu-miner... check later if required */
 	if (opt_n_threads == 1) {
 		CUDA_SAFE_CALL(cudaDeviceReset());
 		init[thr_id] = false;
 	}
+#endif
 	// wait proper end of all threads
 	cudaDeviceSynchronize();
 	return rc;
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -400,8 +400,9 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
    </CudaCompile>
    <CudaCompile Include="blake32.cu">
      <MaxRegCount>64</MaxRegCount>
-      <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-O2 -dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
      <AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
+      <FastMath>true</FastMath>
    </CudaCompile>
    <CudaCompile Include="quark\animecoin.cu">
      <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
@ -562,4 +563,4 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
  <ImportGroup Label="ExtensionTargets">
    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
  </ImportGroup>
-</Project>
+</Project>
--- a/configure.ac
+++ b/configure.ac
@ -1,4 +1,4 @@
-AC_INIT([ccminer], [2014.09.01])
+AC_INIT([ccminer], [2014.09.06])

 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
--- a/cpu-miner.c
+++ b/cpu-miner.c
@ -128,6 +128,7 @@ struct workio_cmd {
 typedef enum {
 	ALGO_ANIME,
 	ALGO_BLAKE,
+	ALGO_BLAKECOIN,
 	ALGO_FRESH,
 	ALGO_FUGUE256,		/* Fugue256 */
 	ALGO_GROESTL,
@ -149,6 +150,7 @@ typedef enum {
 static const char *algo_names[] = {
 	"anime",
 	"blake",
+	"blakecoin",
 	"fresh",
 	"fugue256",
 	"groestl",
@ -231,6 +233,7 @@ Options:\n\
  -a, --algo=ALGO       specify the algorithm to use\n\
                        anime     Animecoin hash\n\
                        blake     Blake 256 (like NEOS blake)\n\
+                        blakecoin Old Blake 256 (8 rounds)\n\
                        fresh     Freshcoin hash (shavite 80)\n\
                        fugue256  Fuguecoin hash\n\
                        groestl   Groestlcoin hash\n\
@ -505,8 +508,7 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
 			goto out;
 		}

-		hashlog_remember_submit(work->job_id, nonce);
-		hashlog_remember_scan_range(work->job_id, work->scanned_from, work->scanned_to);
+		hashlog_remember_submit(work->job_id, nonce, work->scanned_from);

 	} else {

@ -787,7 +789,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
 	if (opt_algo == ALGO_HEAVY || opt_algo == ALGO_MJOLLNIR)
 		heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
 	else
-	if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_WHC)
+	if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_WHC || opt_algo == ALGO_BLAKECOIN)
 		SHA256((unsigned char*)sctx->job.coinbase, sctx->job.coinbase_size, (unsigned char*)merkle_root);
 	else
 		sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
@ -961,6 +963,8 @@ static void *miner_thread(void *userdata)
 			case ALGO_JACKPOT:
 				max64 = 0x1fffLL;
 				break;
+			case ALGO_BLAKECOIN:
+				max64 = 0x3ffffffLL;
 			case ALGO_BLAKE:
 				/* based on the 750Ti hashrate (100kH) */
 				max64 = 0x3ffffffLL;
@ -1065,9 +1069,14 @@ static void *miner_thread(void *userdata)
 			                      max_nonce, &hashes_done);
 			break;

+		case ALGO_BLAKECOIN:
+			rc = scanhash_blake256(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done, 8);
+			break;
+
 		case ALGO_BLAKE:
-			rc = scanhash_blake32(thr_id, work.data, work.target,
-			                      max_nonce, &hashes_done);
+			rc = scanhash_blake256(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done, 14);
 			break;

 		case ALGO_FRESH:
@ -1365,7 +1374,7 @@ out:
 	return NULL;
 }

-#define PROGRAM_VERSION "1.4"
+#define PROGRAM_VERSION "1.4.1"
 static void show_version_and_exit(void)
 {
 	printf("%s v%s\n"
--- a/hashlog.cpp
+++ b/hashlog.cpp
@ -1,3 +1,11 @@
+/**
+ * Hash log of submitted job nonces
+ * Prevent duplicate shares and could be used for RPC stats later
+ *
+ * Note: this source is C++ (requires std::map)
+ *
+ * tpruvot@github 2014
+ */
 #include <stdlib.h>
 #include <memory.h>
 #include <map>
@ -27,7 +35,8 @@ static std::map<uint64_t, hashlog_data> tlastshares;
 static uint64_t hextouint(char* jobid)
 {
 	char *ptr;
-	return strtoull(jobid, &ptr, 16);
+	/* dont use strtoull(), only since VS2013 */
+	return (uint64_t) strtoul(jobid, &ptr, 16);
 }

 /**
@ -38,6 +47,7 @@ extern "C" uint32_t hashlog_already_submittted(char* jobid, uint32_t nonce)
 	uint32_t ret = 0;
 	uint64_t njobid = hextouint(jobid);
 	uint64_t key = (njobid << 32) + nonce;
+
 	if (nonce == 0) {
 		// search last submitted nonce for job
 		ret = hashlog_get_last_sent(jobid);
@ -50,17 +60,17 @@ extern "C" uint32_t hashlog_already_submittted(char* jobid, uint32_t nonce)
 /**
 * Store submitted nonces of a job
 */
-extern "C" void hashlog_remember_submit(char* jobid, uint32_t nonce)
+extern "C" void hashlog_remember_submit(char* jobid, uint32_t nonce, uint32_t scanned_from)
 {
 	uint64_t njobid = hextouint(jobid);
 	uint64_t keyall = (njobid << 32);
 	uint64_t key = keyall + nonce;
-	struct hashlog_data data;
+	hashlog_data data;

-	data = tlastshares[keyall];
-	data.tm_upd = data.tm_sent = (uint32_t) time(NULL);
-	if (data.tm_add == 0)
-		data.tm_add = data.tm_upd;
+	memset(&data, 0, sizeof(data));
+	data.scanned_from = scanned_from;
+	data.scanned_to = nonce;
+	data.tm_add = data.tm_upd = data.tm_sent = (uint32_t) time(NULL);
 	tlastshares[key] = data;
 }

@ -72,7 +82,7 @@ extern "C" void hashlog_remember_scan_range(char* jobid, uint32_t scanned_from,
 	uint64_t njobid = hextouint(jobid);
 	uint64_t keyall = (njobid << 32);
 	uint64_t range = hashlog_get_scan_range(jobid);
-	struct hashlog_data data;
+	hashlog_data data;

 	// global scan range of a job
 	data = tlastshares[keyall];
@ -90,12 +100,12 @@ extern "C" void hashlog_remember_scan_range(char* jobid, uint32_t scanned_from,
 	data.last_from = scanned_from;

 	if (scanned_from < scanned_to) {
-		if (data.scanned_from == 0)
-			data.scanned_from = scanned_from ? scanned_from : 1; // min 1
-		else if (scanned_from < data.scanned_from) // || scanned_to == (data.scanned_from - 1)
-			data.scanned_from = scanned_from;
 		if (data.scanned_to == 0 || scanned_from == data.scanned_to + 1)
 			data.scanned_to = scanned_to;
+		if (data.scanned_from == 0)
+			data.scanned_from = scanned_from ? scanned_from : 1; // min 1
+		else if (scanned_from < data.scanned_from || scanned_to == (data.scanned_from - 1))
+			data.scanned_from = scanned_from;
 	}

 	data.tm_upd = (uint32_t) time(NULL);
@ -114,7 +124,8 @@ extern "C" uint64_t hashlog_get_scan_range(char* jobid)
 	uint64_t ret = 0;
 	uint64_t njobid = hextouint(jobid);
 	uint64_t keypfx = (njobid << 32);
-	struct hashlog_data data;
+	hashlog_data data;
+
 	data.scanned_from = 0;
 	data.scanned_to = 0;
 	std::map<uint64_t, hashlog_data>::iterator i = tlastshares.begin();
@ -215,10 +226,11 @@ extern "C" void hashlog_dump_job(char* jobid)
 		std::map<uint64_t, hashlog_data>::iterator i = tlastshares.begin();
 		while (i != tlastshares.end()) {
 			if ((keypfx & i->first) == keypfx) {
-				applog(LOG_BLUE, "job %s range : %x %x %s added %x upd %x", jobid,
-					i->second.scanned_from, i->second.scanned_to,
-					i->second.tm_sent ? "sent" : "",
-					i->second.tm_add, i->second.tm_upd);/* */
+				if (i->first != keypfx)
+					applog(LOG_DEBUG, CL_YLW "job %s, found %08x ", jobid, LO_DWORD(i->first));
+				else
+					applog(LOG_DEBUG, CL_YLW "job %s scanned range : %08x-%08x", jobid,
+						i->second.scanned_from, i->second.scanned_to);
 			}
 			i++;
 		}
--- a/miner.h
+++ b/miner.h
@ -237,11 +237,11 @@ extern int scanhash_anime(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);

-extern int scanhash_fresh(int thr_id, uint32_t *pdata,
+extern int scanhash_blake256(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
-	unsigned long *hashes_done);
+	unsigned long *hashes_done, uint32_t blakerounds);

-extern int scanhash_blake32(int thr_id, uint32_t *pdata,
+extern int scanhash_fresh(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);

@ -392,7 +392,7 @@ bool stratum_subscribe(struct stratum_ctx *sctx);
 bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass);
 bool stratum_handle_method(struct stratum_ctx *sctx, const char *s);

-void hashlog_remember_submit(char* jobid, uint32_t nounce);
+void hashlog_remember_submit(char* jobid, uint32_t nounce, uint32_t scanned_from);
 void hashlog_remember_scan_range(char* jobid, uint32_t scanned_from, uint32_t scanned_to);
 uint32_t hashlog_already_submittted(char* jobid, uint32_t nounce);
 uint32_t hashlog_get_last_sent(char* jobid);
@ -420,7 +420,7 @@ void applog_hash(unsigned char *hash);

 void print_hash_tests(void);
 void animehash(void *state, const void *input);
-void blake32hash(void *output, const void *input);
+void blake256hash(void *output, const void *input, int rounds);
 void fresh_hash(void *state, const void *input);
 void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
 void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
--- a/sph/blake.c
+++ b/sph/blake.c
@ -548,7 +548,7 @@ static const sph_u64 CB[16] = {
 		M[0xD] = sph_dec32be_aligned(buf + 52); \
 		M[0xE] = sph_dec32be_aligned(buf + 56); \
 		M[0xF] = sph_dec32be_aligned(buf + 60); \
-		for (r = 0; r < 14; r ++) \
+		for (r = 0; r < blake256_rounds; r ++) \
 			ROUND_S(r); \
 		H0 ^= S0 ^ V0 ^ V8; \
 		H1 ^= S1 ^ V1 ^ V9; \
@ -607,12 +607,14 @@ static const sph_u64 CB[16] = {
 		ROUND_S(5); \
 		ROUND_S(6); \
 		ROUND_S(7); \
+		if (blake256_rounds == 14) { \
 		ROUND_S(8); \
 		ROUND_S(9); \
 		ROUND_S(0); \
 		ROUND_S(1); \
 		ROUND_S(2); \
 		ROUND_S(3); \
+		} \
 		H0 ^= S0 ^ V0 ^ V8; \
 		H1 ^= S1 ^ V1 ^ V9; \
 		H2 ^= S2 ^ V2 ^ VA; \
--- a/sph/sph_blake.h
+++ b/sph/sph_blake.h
@ -181,6 +181,11 @@ void sph_blake224_close(void *cc, void *dst);
 void sph_blake224_addbits_and_close(
 	void *cc, unsigned ub, unsigned n, void *dst);

+/**
+ * Switch for the number of rounds (old blake was 8)
+ */
+extern int blake256_rounds;
+
 /**
 * Initialize a BLAKE-256 context. This process performs no memory allocation.
 *
--- a/util.c
+++ b/util.c
@ -1042,7 +1042,7 @@ static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)

 	/* store stratum server time diff */
 	hex2bin((unsigned char *)&ntime, stime, 4);
-	ntime = swab32(ntime) - time(0);
+	ntime = swab32(ntime) - (uint32_t) time(0);
 	if (ntime > sctx->srvtime_diff) {
 		sctx->srvtime_diff = ntime;
 		if (!opt_quiet)
@ -1420,7 +1420,11 @@ void print_hash_tests(void)
 	printpfx("anime", hash);

 	memset(hash, 0, sizeof hash);
-	blake32hash(&hash[0], &buf[0]);
+	blake256hash(&hash[0], &buf[0], 8);
+	printpfx("blakecoin", hash);
+
+	memset(hash, 0, sizeof hash);
+	blake256hash(&hash[0], &buf[0], 14);
 	printpfx("blake", hash);

 	memset(hash, 0, sizeof hash);