From 49a73971c47c7a807a2a7074de048bdfd2021c2a Mon Sep 17 00:00:00 2001
From: Tanguy Pruvot <tanguy.pruvot@gmail.com>
Date: Sun, 7 Dec 2014 12:49:40 +0100
Subject: [PATCH] Enhance stale work detection + throughput fixes

seems to resolve solo mining lock on share.
export also computed solo work diff in api (not perfect)

In high rate algos, throughput should be unsigned...
This fixes keccak, blake and doom problems

And change terminal color of debug lines, to be selectable in putty,
color code is not supported in windows but selection is ok there.
---
 Algo256/blake256.cu  |  8 +++++---
 Algo256/keccak256.cu | 13 ++++++-------
 ccminer.cpp          | 46 +++++++++++++++++++++++---------------------
 miner.h              |  4 ++++
 qubit/doom.cu        | 19 +++++++++---------
 5 files changed, 48 insertions(+), 42 deletions(-)

diff --git a/Algo256/blake256.cu b/Algo256/blake256.cu
index ad426e3..4ecec43 100644
--- a/Algo256/blake256.cu
+++ b/Algo256/blake256.cu
@@ -452,14 +452,16 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
 			if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget))
 			{
 				rc = 1;
-				*hashes_done = pdata[19] - first_nonce + throughput;
 				pdata[19] = foundNonce;
+				*hashes_done = pdata[19] - first_nonce + 1;
 #if NBN > 1
 				if (extra_results[0] != UINT32_MAX) {
 					be32enc(&endiandata[19], extra_results[0]);
 					blake256hash(vhashcpu, endiandata, blakerounds);
 					if (vhashcpu[6] <= Htarg /* && fulltest(vhashcpu, ptarget) */) {
 						pdata[21] = extra_results[0];
+						applog(LOG_BLUE, "1:%x 2:%x", foundNonce, extra_results[0]);
+						*hashes_done = max(*hashes_done, extra_results[0] - first_nonce + 1);
 						rc = 2;
 					}
 					extra_results[0] = UINT32_MAX;
@@ -477,7 +479,6 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
 		}
 
 		if ((uint64_t) pdata[19] + throughput > (uint64_t) max_nonce) {
-			pdata[19] = max_nonce;
 			break;
 		}
 
@@ -485,6 +486,7 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
 
 	} while (!work_restart[thr_id].restart);
 
-	*hashes_done = pdata[19] - first_nonce + 1; // (+1 to prevent locks)
+	*hashes_done = pdata[19] - first_nonce;
+
 	return rc;
 }
diff --git a/Algo256/keccak256.cu b/Algo256/keccak256.cu
index ac6f4d0..59a1d9b 100644
--- a/Algo256/keccak256.cu
+++ b/Algo256/keccak256.cu
@@ -41,8 +41,8 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
 	unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
-	int throughput = opt_work_size ? opt_work_size : (1 << 21); // 256*256*8*4
-	throughput = min(throughput, (int)(max_nonce - first_nonce));
+	uint32_t throughput = opt_work_size ? opt_work_size : (1 << 21); // 256*256*8*4
+	throughput = min(throughput, (max_nonce - first_nonce));
 
 	if (opt_benchmark)
 		((uint32_t*)ptarget)[7] = 0x0005;
@@ -51,7 +51,7 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
 		cudaSetDevice(device_map[thr_id]);
 
 		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
-		keccak256_cpu_init(thr_id, throughput);
+		keccak256_cpu_init(thr_id, (int) throughput);
 
 		init[thr_id] = true;
 	}
@@ -65,7 +65,7 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
 	do {
 		int order = 0;
 
-		uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+		uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], order++);
 		if (foundNonce != UINT32_MAX)
 		{
 			uint32_t Htarg = ptarget[7];
@@ -83,8 +83,7 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
 			}
 		}
 
-		if ((uint64_t) pdata[19] + throughput > (uint64_t) max_nonce) {
-			pdata[19] = max_nonce;
+		if ((uint64_t) pdata[19] + throughput > max_nonce) {
 			break;
 		}
 
@@ -92,6 +91,6 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
 
 	} while (!work_restart[thr_id].restart);
 
-	*hashes_done = pdata[19] - first_nonce + 1;
+	*hashes_done = pdata[19] - first_nonce;
 	return 0;
 }
diff --git a/ccminer.cpp b/ccminer.cpp
index 7392886..b0f2b9a 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -484,6 +484,7 @@ static bool work_decode(const json_t *val, struct work *work)
 
 /**
  * Calculate the work difficulty as double
+ * Not sure it works with pools
  */
 static void calc_diff(struct work *work, int known)
 {
@@ -548,26 +549,24 @@ static int share_result(int result, const char *reason)
 static bool submit_upstream_work(CURL *curl, struct work *work)
 {
 	json_t *val, *res, *reason;
+	bool stale_work = false;
 	char s[384];
-	bool stale_work;
 
-	pthread_mutex_lock(&g_work_lock);
-	if (strlen(work->job_id + 8)) {
-		/* stale if not the current job id */
-		stale_work = strcmp(work->job_id + 8, g_work.job_id + 8);
-	} else {
-		/* fallback when no job id (compare hash) */
-		stale_work = memcmp(&work->data[1], &g_work.data[1], 32);
+	/* discard if a new bloc was sent */
+	stale_work = work->height != g_work.height;
+	if (have_stratum && !stale_work) {
+		pthread_mutex_lock(&g_work_lock);
+		if (strlen(work->job_id + 8))
+			stale_work = strcmp(work->job_id + 8, g_work.job_id + 8);
+		pthread_mutex_unlock(&g_work_lock);
 	}
 
 	if (stale_work) {
-		pthread_mutex_unlock(&g_work_lock);
 		if (opt_debug)
 			applog(LOG_WARNING, "stale work detected, discarding");
 		return true;
 	}
 	calc_diff(work, 0);
-	pthread_mutex_unlock(&g_work_lock);
 
 	if (have_stratum) {
 		uint32_t sent;
@@ -1049,14 +1048,15 @@ static void *miner_thread(void *userdata)
 				stratum_gen_work(&stratum, &g_work);
 			}
 		} else {
-			int min_scantime = scan_time;
-			/* obtain new work from internal workio thread */
 			pthread_mutex_lock(&g_work_lock);
-			if (time(NULL) - g_work_time >= min_scantime || nonceptr[0] >= end_nonce) {
+			if ((time(NULL) - g_work_time) >= scan_time || nonceptr[0] >= (end_nonce - 0x100)) {
+				if (opt_debug && g_work_time && !opt_quiet)
+					applog(LOG_DEBUG, "work time %u/%us nonce %x/%x", time(NULL) - g_work_time,
+						scan_time, nonceptr[0], end_nonce);
+				/* obtain new work from internal workio thread */
 				if (unlikely(!get_work(mythr, &g_work))) {
-					applog(LOG_ERR, "work retrieval failed, exiting "
-						"mining thread %d", mythr->id);
 					pthread_mutex_unlock(&g_work_lock);
+					applog(LOG_ERR, "work retrieval failed, exiting mining thread %d", mythr->id);
 					goto out;
 				}
 				g_work_time = time(NULL);
@@ -1065,6 +1065,8 @@ static void *miner_thread(void *userdata)
 
 		if (!opt_benchmark && memcmp(work.target, g_work.target, sizeof(work.target))) {
 			calc_diff(&g_work, 0);
+			if (!have_stratum)
+				global_diff = g_work.difficulty;
 			if (opt_debug) {
 				uint64_t target64 = g_work.target[7] * 0x100000000ULL + g_work.target[6];
 				applog(LOG_DEBUG, "job %s target change: %llx (%.1f)", g_work.job_id, target64, g_work.difficulty);
@@ -1175,9 +1177,6 @@ static void *miner_thread(void *userdata)
 			}
 		}
 #endif
-		if (opt_algo == ALGO_KECCAK && max64 == UINT32_MAX) {
-			max64 = 0x7FFFFFFFUL;
-		}
 		/* never let small ranges at end */
 		if (end_nonce >= UINT32_MAX - 256)
 			end_nonce = UINT32_MAX;
@@ -1345,6 +1344,7 @@ static void *miner_thread(void *userdata)
 		timeval_subtract(&diff, &tv_end, &tv_start);
 
 		if (diff.tv_usec || diff.tv_sec) {
+			double dtime = (double) diff.tv_sec + 1e-6 * diff.tv_usec;
 
 			/* hashrate factors for some algos */
 			double rate_factor = 1.0;
@@ -1357,13 +1357,13 @@ static void *miner_thread(void *userdata)
 			}
 
 			/* store thread hashrate */
-			pthread_mutex_lock(&stats_lock);
-			if (diff.tv_sec + 1e-6 * diff.tv_usec > 0.0) {
-				thr_hashrates[thr_id] = hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec);
+			if (dtime > 0.0) {
+				pthread_mutex_lock(&stats_lock);
+				thr_hashrates[thr_id] = hashes_done / dtime;
 				thr_hashrates[thr_id] *= rate_factor;
 				stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id], (uint8_t) rc, work.height);
+				pthread_mutex_unlock(&stats_lock);
 			}
-			pthread_mutex_unlock(&stats_lock);
 		}
 
 		if (rc > 1)
@@ -1392,8 +1392,10 @@ static void *miner_thread(void *userdata)
 		/* loopcnt: ignore first loop hashrate */
 		if (loopcnt && thr_id == (opt_n_threads - 1)) {
 			double hashrate = 0.;
+			pthread_mutex_lock(&stats_lock);
 			for (int i = 0; i < opt_n_threads && thr_hashrates[i]; i++)
 				hashrate += stats_get_speed(i, thr_hashrates[i]);
+			pthread_mutex_unlock(&stats_lock);
 			if (opt_benchmark) {
 				sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", hashrate / 1000.);
 				applog(LOG_NOTICE, "Total: %s kH/s", s);
diff --git a/miner.h b/miner.h
index 0a4219e..81833cc 100644
--- a/miner.h
+++ b/miner.h
@@ -505,7 +505,11 @@ extern long  device_sm[8];
 #define CL_CY2  "\x1B[22;36m" /* cyan */
 #define CL_SIL  "\x1B[22;37m" /* gray */
 
+#ifdef WIN32
 #define CL_GRY  "\x1B[01;30m" /* dark gray */
+#else
+#define CL_GRY  "\x1B[90m"    /* dark gray selectable in putty */
+#endif
 #define CL_LRD  "\x1B[01;31m" /* light red */
 #define CL_LGR  "\x1B[01;32m" /* light green */
 #define CL_LYL  "\x1B[01;33m" /* tooltips */
diff --git a/qubit/doom.cu b/qubit/doom.cu
index e390a7f..df30b04 100644
--- a/qubit/doom.cu
+++ b/qubit/doom.cu
@@ -40,8 +40,8 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
 {
 	const uint32_t first_nonce = pdata[19];
 	uint32_t endiandata[20];
-	int throughput = opt_work_size ? opt_work_size : (1 << 22); // 256*256*8*8
-	throughput = min(throughput, (int)(max_nonce - first_nonce));
+	uint32_t throughput = opt_work_size ? opt_work_size : (1 << 22); // 256*256*8*8
+	throughput = min(throughput, (max_nonce - first_nonce));
 
 	if (opt_benchmark)
 		((uint32_t*)ptarget)[7] = 0x0000f;
@@ -52,7 +52,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
 
 		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
 
-		qubit_luffa512_cpu_init(thr_id, throughput);
+		qubit_luffa512_cpu_init(thr_id, (int) throughput);
 
 		init[thr_id] = true;
 	}
@@ -65,7 +65,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
 	do {
 		int order = 0;
 
-		uint32_t foundNonce = qubit_luffa512_cpu_finalhash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+		uint32_t foundNonce = qubit_luffa512_cpu_finalhash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], order++);
 		if (foundNonce != UINT32_MAX)
 		{
 			const uint32_t Htarg = ptarget[7];
@@ -74,23 +74,22 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
 			doomhash(vhash64, endiandata);
 
 			if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
-				int res = 1;
-				*hashes_done = pdata[19] - first_nonce + throughput;
+				*hashes_done = min(max_nonce - first_nonce, (uint64_t) pdata[19] - first_nonce + throughput);
 				pdata[19] = foundNonce;
-				return res;
+				return 1;
 			}
 			else {
 				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
 			}
 		}
 
-		pdata[19] += throughput;
-
 		if ((uint64_t) pdata[19] + throughput > max_nonce) {
-			pdata[19] = max_nonce;
+			// pdata[19] = max_nonce;
 			break;
 		}
 
+		pdata[19] += throughput;
+
 	} while (!work_restart[thr_id].restart);
 
 	*hashes_done = pdata[19] - first_nonce + 1;