From 65909ec3b778fdba97c97146ff8900795b972526 Mon Sep 17 00:00:00 2001
From: Tanguy Pruvot <tanguy.pruvot@gmail.com>
Date: Sat, 6 Sep 2014 10:55:44 +0200
Subject: [PATCH] blake: handle case when 2 hashes are found in a call

---
 blake32.cu        | 50 +++++++++++++++++++++++++++++++++++++----------
 cpu-miner.c       |  2 +-
 cpuminer-config.h |  6 +++---
 util.c            |  6 ++++--
 4 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/blake32.cu b/blake32.cu
index 2ce2acd..5013de7 100644
--- a/blake32.cu
+++ b/blake32.cu
@@ -52,6 +52,8 @@ static uint32_t __align__(32) c_data[20];
 static uint32_t *d_resNounce[8];
 static uint32_t *h_resNounce[8];
 
+static uint32_t extra_results[2] = { MAXU, MAXU };
+
 /* prefer uint32_t to prevent size conversions = speed +5/10 % */
 __constant__
 static uint32_t __align__(32) c_sigma[16][16];
@@ -225,9 +227,13 @@ void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resN
 			}
 		}
 
-		/* keep the smallest nounce, hmm... */
-		if(resNounce[0] > nounce)
+		/* keep the smallest nounce, + extra one if found */
+		if (resNounce[0] > nounce) {
+			resNounce[1] = resNounce[0];
 			resNounce[0] = nounce;
+		}
+		else
+			resNounce[1] = nounce;
 	}
 }
 
@@ -242,14 +248,15 @@ uint32_t blake256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce
 	size_t shared_size = 0;
 
 	/* Check error on Ctrl+C or kill to prevent segfaults on exit */
-	if (cudaMemset(d_resNounce[thr_id], 0xff, sizeof(uint32_t)) != cudaSuccess)
+	if (cudaMemset(d_resNounce[thr_id], 0xff, 2*sizeof(uint32_t)) != cudaSuccess)
 		return result;
 
 	blake256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_resNounce[thr_id], blakerounds);
 	cudaDeviceSynchronize();
-	if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
+	if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
 		cudaThreadSynchronize();
-		result = *h_resNounce[thr_id];
+		result = h_resNounce[thr_id][0];
+		extra_results[0] = h_resNounce[thr_id][1];
 	}
 	return result;
 }
@@ -269,9 +276,20 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
-	uint32_t throughput = min(TPB * 2048, max_nonce - first_nonce);
+	uint32_t throughput = min(TPB * 4096, max_nonce - first_nonce);
 	int rc = 0;
 
+	if (extra_results[0] != MAXU) {
+		// possible extra result found in previous call
+		if (first_nonce <= extra_results[0] && max_nonce >= extra_results[0]) {
+			pdata[19] = extra_results[0];
+			*hashes_done = pdata[19] - first_nonce + 1;
+			extra_results[0] = MAXU;
+			rc = 1;
+			goto exit_scan;
+		}
+	}
+
 	if (opt_benchmark)
 		((uint32_t*)ptarget)[7] = 0x00000f;
 
@@ -279,13 +297,13 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
 		if (opt_n_threads > 1) {
 			CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
 		}
-		CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], sizeof(uint32_t)));
-		CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], sizeof(uint32_t)));
+		CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], 2*sizeof(uint32_t)));
+		CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], 2*sizeof(uint32_t)));
 		init[thr_id] = true;
 	}
 
-	if (throughput < (TPB * 2048))
-		applog(LOG_WARNING, "throughput=%u, start=%x, max=%x", throughput, first_nonce, max_nonce);
+	if (opt_debug && throughput < (TPB * 4096))
+		applog(LOG_DEBUG, "throughput=%u, start=%x, max=%x", throughput, first_nonce, max_nonce);
 
 	blake256_cpu_setBlock_80(pdata, ptarget);
 
@@ -309,6 +327,18 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
 			{
 				pdata[19] = foundNonce;
 				rc = 1;
+
+				if (extra_results[0] != MAXU) {
+					// Rare but possible if the throughput is big
+					be32enc(&endiandata[19], extra_results[0]);
+					blake256hash(vhashcpu, endiandata, blakerounds);
+					if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget)) {
+						applog(LOG_NOTICE, "GPU found more than one result yippee!");
+					} else {
+						extra_results[0] = MAXU;
+					}
+				}
+
 				goto exit_scan;
 			}
 			else if (vhashcpu[7] > Htarg) {
diff --git a/cpu-miner.c b/cpu-miner.c
index b3a6ba7..7f70a6e 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -1005,7 +1005,7 @@ static void *miner_thread(void *userdata)
 					work_restart[thr_id].restart = 1;
 					hashlog_purge_old();
 					// wait a bit for a new job...
-					usleep(1500*1000);
+					sleep(1);
 					(*nonceptr) = end_nonce + 1;
 					work_done = true;
 					continue;
diff --git a/cpuminer-config.h b/cpuminer-config.h
index 0fafa85..11edf82 100644
--- a/cpuminer-config.h
+++ b/cpuminer-config.h
@@ -156,7 +156,7 @@
 #define PACKAGE_NAME "ccminer"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "ccminer 2014.09.01"
+#define PACKAGE_STRING "ccminer 2014.09.06"
 
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "ccminer"
@@ -165,7 +165,7 @@
 #define PACKAGE_URL ""
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "2014.09.01"
+#define PACKAGE_VERSION "2014.09.06"
 
 /* If using the C implementation of alloca, define if you know the
    direction of stack growth for your system; otherwise it will be
@@ -188,7 +188,7 @@
 #define USE_XOP 1
 
 /* Version number of package */
-#define VERSION "2014.09.01"
+#define VERSION "2014.09.06"
 
 /* Define curl_free() as free() if our version of curl lacks curl_free. */
 /* #undef curl_free */
diff --git a/util.c b/util.c
index fe9168b..b2c0b0f 100644
--- a/util.c
+++ b/util.c
@@ -557,6 +557,9 @@ bool fulltest(const uint32_t *hash, const uint32_t *target)
 			rc = true;
 			break;
 		}
+		if (hash[0] == target[0]) {
+			applog(LOG_NOTICE, "We found an exact match!");
+		}
 	}
 
 	if (!rc && opt_debug) {
@@ -1122,8 +1125,7 @@ static bool stratum_set_difficulty(struct stratum_ctx *sctx, json_t *params)
 	sctx->next_diff = diff;
 	pthread_mutex_unlock(&sctx->work_lock);
 
-	if (opt_debug)
-		applog(LOG_DEBUG, "Stratum difficulty set to %g", diff);
+	applog(LOG_INFO, "Stratum difficulty set to %g", diff);
 
 	return true;
 }