Increase efficiency of slow mining threads such as CPU miners dramatically.

Do this by detecting which threads cannot complete searching a work item within the scantime and then divide up a work item into multiple smaller work items. Detect the age of the work items and if they've been cloned before to prevent doing the same work over. If the work is too old to be divided, then see if it can be time rolled and do that to generate work. This dramatically decreases the number of queued work items from a pool leading to higher overall efficiency (but the same hashrate and share submission rate).
2025-01-22 20:44:19 +00:00 · 2011-07-26 20:48:05 +10:00 · 2011-07-26 20:48:05 +10:00 · 1536e085da
commit 1536e085da
parent 7e7a5f777e
2 changed files with 75 additions and 19 deletions
--- a/main.c
+++ b/main.c
@ -1206,6 +1206,7 @@ static bool workio_get_work(struct workio_cmd *wc)

 static bool stale_work(struct work *work)
 {
+	struct timeval now;
 	bool ret = false;
 	char *hexstr;

@ -1214,6 +1215,10 @@ static bool stale_work(struct work *work)
 	if (work->pool != current_pool())
 		return ret;

+	gettimeofday(&now, NULL);
+	if ((now.tv_sec - work->tv_staged.tv_sec) > opt_scantime)
+		return ret;
+
 	hexstr = bin2hex(work->data, 36);
 	if (unlikely(!hexstr)) {
 		applog(LOG_ERR, "submit_work_thread OOM");
@ -1421,7 +1426,7 @@ static void test_work_current(struct work *work)
 	char *hexstr;

 	/* Only use the primary pool for determination */
-	if (work->pool != current_pool())
+	if (work->pool != current_pool() || work->cloned || work->rolls || work->clone)
 		return;

 	hexstr = bin2hex(work->data, 36);
@ -1468,6 +1473,8 @@ static void *stage_thread(void *userdata)
 		}

 		test_work_current(work);
+		if (!work->cloned && !work->clone)
+			gettimeofday(&work->tv_staged, NULL);

 		if (unlikely(!tq_push(getq, work))) {
 			applog(LOG_ERR, "Failed to tq_push work in stage_thread");
@ -2249,7 +2256,57 @@ static void flush_requests(void)
 static inline bool can_roll(struct work *work)
 {
 	return (work->pool && !stale_work(work) && work->pool->has_rolltime &&
-		work->rolls < 11);
+		work->rolls < 11 && !work->clone);
+}
+
+static void roll_work(struct work *work)
+{
+	uint32_t *work_ntime;
+	uint32_t ntime;
+
+	work_ntime = (uint32_t *)(work->data + 68);
+	ntime = be32toh(*work_ntime);
+	ntime++;
+	*work_ntime = htobe32(ntime);
+	local_work++;
+	work->rolls++;
+	work->blk.nonce = 0;
+	gettimeofday(&work->tv_staged, NULL);
+	if (opt_debug)
+		applog(LOG_DEBUG, "Successfully rolled work");
+}
+
+/* Recycle the work at a higher starting res_nonce if we know the thread we're
+ * giving it to will not finish scanning it. We keep the master copy to be
+ * recycled more rapidly and discard the clone to avoid repeating work */
+static bool divide_work(struct timeval *now, struct work *work, uint32_t hash_div)
+{
+	uint64_t hash_inc;
+
+	if (hash_div < 3 || work->clone)
+		return false;
+
+	hash_inc = MAXTHREADS / (hash_div - 1);
+	if ((uint64_t)work->blk.nonce + hash_inc < MAXTHREADS) {
+		/* Don't keep handing it out if it's getting old, but try to
+		 * roll it instead */
+		if ((now->tv_sec - work->tv_staged.tv_sec) > opt_scantime * 2 / 3) {
+			if (!can_roll(work))
+				return false;
+			else {
+				local_work++;
+				roll_work(work);
+				return true;
+			}
+		}
+		/* Okay we can divide it up */
+		work->blk.nonce += hash_inc;
+		work->cloned = true;
+		if (opt_debug)
+			applog(LOG_DEBUG, "Successfully divided work");
+		return true;
+	}
+	return false;
 }

 static bool get_work(struct work *work, bool queued, struct thr_info *thr,
@ -2273,9 +2330,6 @@ retry:
 	}

 	if (!requests_staged() && can_roll(work)) {
-		uint32_t *work_ntime;
-		uint32_t ntime;
-
 		/* Only print this message once each time we shift to localgen */
 		if (!pool_tset(pool, &pool->idle)) {
 			applog(LOG_WARNING, "Pool %d not providing work fast enough, generating work locally",
@ -2296,13 +2350,8 @@ retry:
 			}
 		}

-		work_ntime = (uint32_t *)(work->data + 68);
-		ntime = be32toh(*work_ntime);
-		ntime++;
-		*work_ntime = htobe32(ntime);
+		roll_work(work);
 		ret = true;
-		local_work++;
-		work->rolls++;
 		goto out;
 	}

@ -2327,9 +2376,17 @@ retry:
 	dec_queued();

 	memcpy(work, work_heap, sizeof(*work));
-	
+
+	/* Copy the res nonce back so we know to start at a higher baseline
+	 * should we divide the same work up again. Make the work we're
+	 * handing out be clone */
+	if (divide_work(&now, work_heap, hash_div)) {
+		tq_push(thr_info[stage_thr_id].q, work_heap);
+		work->clone = true;
+	} else
+		free(work_heap);
+
 	ret = true;
-	free(work_heap);
 out:
 	if (unlikely(ret == false)) {
 		if ((opt_retries >= 0) && (++failures > opt_retries)) {
@ -2402,9 +2459,8 @@ static void *miner_thread(void *userdata)
 	/* Try to cycle approximately 5 times before each log update */
 	const unsigned long cycle = opt_log_interval / 5 ? : 1;
 	int request_interval;
-	unsigned const long request_nonce = MAXTHREADS / 3 * 2;
 	bool requested = true;
-	uint32_t hash_div = 1;
+	uint32_t hash_div = opt_n_threads;

 	pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);

@ -2445,7 +2501,6 @@ static void *miner_thread(void *userdata)
 			mythr->cgpu->getworks++;
 			needs_work = requested = false;
 			total_hashes = 0;
-			work.blk.nonce = work.res_nonce;;
 			max_nonce = work.blk.nonce + hashes_done;
 		}
 		hashes_done = 0;
@ -2560,7 +2615,7 @@ static void *miner_thread(void *userdata)
 		}

 		timeval_subtract(&diff, &tv_end, &tv_workstart);
-		if (!requested && (diff.tv_sec >= request_interval || work.blk.nonce > request_nonce)) {
+		if (!requested && (diff.tv_sec >= request_interval)) {
 			if (unlikely(!queue_request())) {
 				applog(LOG_ERR, "Failed to queue_request in miner_thread %d", thr_id);
 				goto out;
@ -2810,7 +2865,6 @@ static void *gpuminer_thread(void *userdata)
 			requested = false;

 			precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
-			work->blk.nonce = 0;
 			work_restart[thr_id].restart = 0;

 			if (opt_debug)
--- a/miner.h
+++ b/miner.h
@ -308,12 +308,14 @@ struct work {
 	int		rolls;

 	uint32_t	output[1];
-	uint32_t	res_nonce;
 	uint32_t	valid;
 	dev_blk_ctx	blk;

 	int		thr_id;
 	struct pool	*pool;
+	struct timeval	tv_staged;
+	bool		clone;
+	bool		cloned;
 };

 enum cl_kernel {