Optimise work loop to make cl calls asynchronous where possible.

2025-03-13 06:01:03 +00:00 · 2011-06-22 10:15:23 +10:00 · 2011-06-22 10:15:23 +10:00 · f117675ac2
commit f117675ac2
parent f05270b8c8
3 changed files with 46 additions and 48 deletions
--- a/cpu-miner.c
+++ b/cpu-miner.c
@ -743,11 +743,11 @@ static void *gpuminer_thread(void *userdata)
 	struct thr_info *mythr = userdata;
 	struct timeval tv_start;
 	int thr_id = mythr->id;
-	uint32_t res[128];
+	uint32_t res[128], blank_res[128];

 	setpriority(PRIO_PROCESS, 0, 19);

-	memset(res, 0, BUFFERSIZE);
+	memset(blank_res, 0, BUFFERSIZE);

 	size_t globalThreads[1];
 	size_t localThreads[1];
@ -765,26 +765,23 @@ static void *gpuminer_thread(void *userdata)
 		{ applog(LOG_ERR, "Error: Setting kernel argument 2.\n"); goto out; }

 	status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
-			BUFFERSIZE, res, 0, NULL, NULL);   
+			BUFFERSIZE, blank_res, 0, NULL, NULL);
 	if (unlikely(status != CL_SUCCESS))
 		{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }

 	struct work *work = malloc(sizeof(struct work));
 	bool need_work = true;
 	unsigned int threads = 1 << 22;
-	unsigned int h0count = 0;
+
 	gettimeofday(&tv_start, NULL);
+	globalThreads[0] = threads;
+	localThreads[0] = 128;

 	while (1) {
 		struct timeval tv_end, diff;
 		int i;

 		if (need_work) {
-			work_restart[thr_id].restart = 0;
-
-			if (opt_debug)
-				applog(LOG_DEBUG, "getwork");
-
 			/* obtain new work from internal workio thread */
 			if (unlikely(!get_work(mythr, work))) {
 				applog(LOG_ERR, "work retrieval failed, exiting "
@ -793,47 +790,48 @@ static void *gpuminer_thread(void *userdata)
 			}

 			precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
-
 			work->blk.nonce = 0;
-			need_work = false;
-		}
-		globalThreads[0] = threads;
-		localThreads[0] = 128;
-
-		status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_TRUE, 0,
+			status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_FALSE, 0,
 				sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL);
-		if (unlikely(status != CL_SUCCESS))
-			{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
+			if (unlikely(status != CL_SUCCESS))
+				{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }

-		status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, NULL, 
+			work_restart[thr_id].restart = 0;
+			need_work = false;
+
+			if (opt_debug)
+				applog(LOG_DEBUG, "getwork");
+
+		}
+		clFinish(clState->commandQueue);
+
+		status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, NULL,
 				globalThreads, localThreads, 0,  NULL, NULL);
 		if (unlikely(status != CL_SUCCESS))
 			{ applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; }

-		status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, 
-				BUFFERSIZE, res, 0, NULL, NULL);   
-		if (unlikely(status != CL_SUCCESS))
-			{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
+		/* 127 is used as a flag to say nonces exist */
 		if (unlikely(res[127])) {
-			/* 127 is used as a flag to say nonces exist */
+			/* Clear the buffer again */
+			status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+					BUFFERSIZE, blank_res, 0, NULL, NULL);
+			if (unlikely(status != CL_SUCCESS))
+				{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
 			for (i = 0; i < 127; i++) {
 				if (res[i]) {
-					uint32_t start = res[i];
-					uint32_t my_g, my_nonce;
-
 					applog(LOG_INFO, "GPU Found something?");
-					my_g = postcalc_hash(mythr, &work->blk, work, start, start + 1026, &my_nonce, &h0count);
-					res[i] = 0;
+					postcalc_hash(mythr, &work->blk, work, res[i]);
 				} else
 					break;
 			}
-			/* Clear the buffer again */
-			status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
-					BUFFERSIZE, res, 0, NULL, NULL);   
-			if (unlikely(status != CL_SUCCESS))
-				{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
+			clFinish(clState->commandQueue);
 		}

+		status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+				BUFFERSIZE, res, 0, NULL, NULL);
+		if (unlikely(status != CL_SUCCESS))
+			{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
+
 		gettimeofday(&tv_end, NULL);
 		timeval_subtract(&diff, &tv_end, &tv_start);
 		hashmeter(thr_id, &diff, threads);
@ -844,6 +842,14 @@ static void *gpuminer_thread(void *userdata)
 		if (unlikely(work->blk.nonce > MAXTHREADS - threads) ||
 			(work_restart[thr_id].restart))
 				need_work = true;
+
+		clFinish(clState->commandQueue);
+
+		status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_FALSE, 0,
+				sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL);
+		if (unlikely(status != CL_SUCCESS))
+			{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
+
 	}
 out:
 	tq_freeze(mythr->q);
--- a/findnonce.c
+++ b/findnonce.c
@ -131,14 +131,13 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
  R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \
  R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5])

-uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
-		       struct work *work, uint32_t start, uint32_t end, 
-		       uint32_t *best_nonce, unsigned int *h0count)
+void postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, struct work *work, uint32_t start)
 {
 	cl_uint A, B, C, D, E, F, G, H;
 	cl_uint W[16];
 	cl_uint nonce;
 	cl_uint best_g = ~0;
+	uint32_t end = start + 1026;

 	for (nonce = start; nonce != end; nonce+=1) {
 		A = blk->cty_a; B = blk->cty_b;
@ -171,8 +170,6 @@ uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
 		FR(48); PFR(56);

 		if (unlikely(H == 0xA41F32E7)) {
-			(*h0count)++;
-
 			if (unlikely(submit_nonce(thr, work, nonce) == false)) {
 				applog(LOG_ERR, "Failed to submit work, exiting");
 				goto out;
@ -181,14 +178,11 @@ uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
 			G += 0x1f83d9ab;
 			G = ByteReverse(G);

-			if (G < best_g) {
-				*best_nonce = nonce;
+			if (G < best_g)
 				best_g = G;
-			}
 		}
 	}
 out:
-	// if (unlikely(best_g == ~0)) applog(LOG_ERR, "No best_g found! Error in OpenCL code?");
-
-	return best_g;
+	if (unlikely(best_g == ~0))
+		applog(LOG_ERR, "No best_g found! Error in OpenCL code?");
 }
--- a/findnonce.h
+++ b/findnonce.h
@ -19,6 +19,4 @@ typedef struct {
 } dev_blk_ctx;

 extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
-extern uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
-			      struct work *work, uint32_t start, uint32_t end,
-			      uint32_t *best_nonce, unsigned int *h0count);
+extern void postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, struct work *work, uint32_t start);