blake: handle case when 2 hashes are found in a call

10 years ago · 65909ec3b7
4 changed files with 48 additions and 16 deletions
--- a/blake32.cu
+++ b/blake32.cu
@ -52,6 +52,8 @@ static uint32_t __align__(32) c_data[20];
 static uint32_t *d_resNounce[8];
 static uint32_t *h_resNounce[8];
 static uint32_t extra_results[2] = { MAXU, MAXU };
 /* prefer uint32_t to prevent size conversions = speed +5/10 % */
 __constant__
 static uint32_t __align__(32) c_sigma[16][16];
@ -225,9 +227,13 @@ void blake256_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resN
 			}
 		}
-		/* keep the smallest nounce, hmm... */
+		/* keep the smallest nounce, + extra one if found */
-		if(resNounce[0] > nounce)
+		if (resNounce[0] > nounce) {
 			resNounce[1] = resNounce[0];
 			resNounce[0] = nounce;
 		}
 		else
 			resNounce[1] = nounce;
 	}
 }
@ -242,14 +248,15 @@ uint32_t blake256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce
 	size_t shared_size = 0;
 	/* Check error on Ctrl+C or kill to prevent segfaults on exit */
-	if (cudaMemset(d_resNounce[thr_id], 0xff, sizeof(uint32_t)) != cudaSuccess)
+	if (cudaMemset(d_resNounce[thr_id], 0xff, 2*sizeof(uint32_t)) != cudaSuccess)
 		return result;
 	blake256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_resNounce[thr_id], blakerounds);
 	cudaDeviceSynchronize();
-	if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
+	if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
 		cudaThreadSynchronize();
-		result = *h_resNounce[thr_id];
+		result = h_resNounce[thr_id][0];
 		extra_results[0] = h_resNounce[thr_id][1];
 	}
 	return result;
 }
@ -269,9 +276,20 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
-	uint32_t throughput = min(TPB * 2048, max_nonce - first_nonce);
+	uint32_t throughput = min(TPB * 4096, max_nonce - first_nonce);
 	int rc = 0;
 	if (extra_results[0] != MAXU) {
 		// possible extra result found in previous call
 		if (first_nonce <= extra_results[0] && max_nonce >= extra_results[0]) {
 			pdata[19] = extra_results[0];
 			*hashes_done = pdata[19] - first_nonce + 1;
 			extra_results[0] = MAXU;
 			rc = 1;
 			goto exit_scan;
 		}
 	}
 	if (opt_benchmark)
 		((uint32_t*)ptarget)[7] = 0x00000f;
@ -279,13 +297,13 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
 		if (opt_n_threads > 1) {
 			CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
 		}
-		CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], sizeof(uint32_t)));
+		CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], 2*sizeof(uint32_t)));
-		CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], sizeof(uint32_t)));
+		CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], 2*sizeof(uint32_t)));
 		init[thr_id] = true;
 	}
-	if (throughput < (TPB * 2048))
+	if (opt_debug && throughput < (TPB * 4096))
-		applog(LOG_WARNING, "throughput=%u, start=%x, max=%x", throughput, first_nonce, max_nonce);
+		applog(LOG_DEBUG, "throughput=%u, start=%x, max=%x", throughput, first_nonce, max_nonce);
 	blake256_cpu_setBlock_80(pdata, ptarget);
@ -309,6 +327,18 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
 			{
 				pdata[19] = foundNonce;
 				rc = 1;
 				if (extra_results[0] != MAXU) {
 					// Rare but possible if the throughput is big
 					be32enc(&endiandata[19], extra_results[0]);
 					blake256hash(vhashcpu, endiandata, blakerounds);
 					if (vhashcpu[7] <= Htarg && fulltest(vhashcpu, ptarget)) {
 						applog(LOG_NOTICE, "GPU found more than one result yippee!");
 					} else {
 						extra_results[0] = MAXU;
 					}
 				}
 				goto exit_scan;
 			}
 			else if (vhashcpu[7] > Htarg) {
--- a/cpu-miner.c
+++ b/cpu-miner.c
@ -1005,7 +1005,7 @@ static void *miner_thread(void *userdata)
 					work_restart[thr_id].restart = 1;
 					hashlog_purge_old();
 					// wait a bit for a new job...
-					usleep(1500*1000);
+					sleep(1);
 					(*nonceptr) = end_nonce + 1;
 					work_done = true;
 					continue;
--- a/cpuminer-config.h
+++ b/cpuminer-config.h
@ -156,7 +156,7 @@
 #define PACKAGE_NAME "ccminer"
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "ccminer 2014.09.01"
+#define PACKAGE_STRING "ccminer 2014.09.06"
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "ccminer"
@ -165,7 +165,7 @@
 #define PACKAGE_URL ""
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "2014.09.01"
+#define PACKAGE_VERSION "2014.09.06"
 /* If using the C implementation of alloca, define if you know the
   direction of stack growth for your system; otherwise it will be
@ -188,7 +188,7 @@
 #define USE_XOP 1
 /* Version number of package */
-#define VERSION "2014.09.01"
+#define VERSION "2014.09.06"
 /* Define curl_free() as free() if our version of curl lacks curl_free. */
 /* #undef curl_free */
--- a/util.c
+++ b/util.c
@ -557,6 +557,9 @@ bool fulltest(const uint32_t *hash, const uint32_t *target)
 			rc = true;
 			break;
 		}
 		if (hash[0] == target[0]) {
 			applog(LOG_NOTICE, "We found an exact match!");
 		}
 	}
 	if (!rc && opt_debug) {
@ -1122,8 +1125,7 @@ static bool stratum_set_difficulty(struct stratum_ctx *sctx, json_t *params)
 	sctx->next_diff = diff;
 	pthread_mutex_unlock(&sctx->work_lock);
-	if (opt_debug)
+	applog(LOG_INFO, "Stratum difficulty set to %g", diff);
 		applog(LOG_DEBUG, "Stratum difficulty set to %g", diff);
 	return true;
 }