Do the dynamic timing in opencl code over a single pass through scanhash to make sure we're only getting opencl times contributing to the measured intervals.

2025-09-13 06:31:51 +00:00 · 2012-08-20 14:52:06 +10:00 · 2012-08-20 14:52:06 +10:00 · e5ed708493
commit e5ed708493
parent 08948e02f3
1 changed files with 43 additions and 44 deletions
--- a/driver-opencl.c
+++ b/driver-opencl.c
@ -1460,6 +1460,10 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
 	const int thr_id = thr->id;
 	struct opencl_thread_data *thrdata = thr->cgpu_data;
 	_clState *clState = clStates[thr_id];
+	struct cgpu_info *gpu = thr->cgpu;
+
+	if (gpu->dynamic)
+		return;

 	clFinish(clState->commandQueue);
 	if (thrdata->res[FOUND]) {
@ -1491,7 +1495,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	const cl_kernel *kernel = &clState->kernel;
 	const int dynamic_us = opt_dynamic_interval * 1000;
 	struct timeval tv_gpuend;
-	cl_bool blocking;

 	cl_int status;
 	size_t globalThreads[1];
@ -1499,18 +1502,73 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	unsigned int threads;
 	int64_t hashes;

-	if (gpu->dynamic)
-		blocking = CL_TRUE;
-	else
-		blocking = CL_FALSE;
-
 	/* This finish flushes the readbuffer set with CL_FALSE later */
-	if (!blocking)
+	if (!gpu->dynamic)
 		clFinish(clState->commandQueue);

+	set_threads_hashes(clState->vwidth, &threads, &hashes, globalThreads,
+			   localThreads[0], gpu->intensity);
+	if (hashes > gpu->max_hashes)
+		gpu->max_hashes = hashes;
+
+	/* MAXBUFFERS entry is used as a flag to say nonces exist */
+	if (thrdata->res[FOUND]) {
+		/* Clear the buffer again */
+		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+				BUFFERSIZE, blank_res, 0, NULL, NULL);
+		if (unlikely(status != CL_SUCCESS)) {
+			applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
+			return -1;
+		}
+		if (unlikely(thrdata->last_work)) {
+			applog(LOG_DEBUG, "GPU %d found something in last work?", gpu->device_id);
+			postcalc_hash_async(thr, thrdata->last_work, thrdata->res);
+			thrdata->last_work = NULL;
+		} else {
+			applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id);
+			postcalc_hash_async(thr, work, thrdata->res);
+		}
+		memset(thrdata->res, 0, BUFFERSIZE);
+		clFinish(clState->commandQueue);
+	}
+
+	gettimeofday(&gpu->tv_gpumid, NULL);
+	if (!gpu->intervals) {
+		gpu->tv_gpustart.tv_sec = gpu->tv_gpumid.tv_sec;
+		gpu->tv_gpustart.tv_usec = gpu->tv_gpumid.tv_usec;
+	}
+
+	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
+	if (unlikely(status != CL_SUCCESS)) {
+		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
+		return -1;
+	}
+
+	if (clState->goffset) {
+		size_t global_work_offset[1];
+
+		global_work_offset[0] = work->blk.nonce;
+		status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, global_work_offset,
+						globalThreads, localThreads, 0,  NULL, NULL);
+	} else
+		status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
+						globalThreads, localThreads, 0,  NULL, NULL);
+	if (unlikely(status != CL_SUCCESS)) {
+		applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
+		return -1;
+	}
+
+	status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+			BUFFERSIZE, thrdata->res, 0, NULL, NULL);
+	if (unlikely(status != CL_SUCCESS)) {
+		applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
+		return -1;
+	}
+
 	if (gpu->dynamic) {
 		double gpu_us;

+		clFinish(clState->commandQueue);
 		/* Windows returns the same time for gettimeofday due to its
 		 * 15ms timer resolution, so we must average the result over
 		 * at least 5 values that are actually different to get an
@ -1535,65 +1593,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 			gpu->intervals = gpu->hit = 0;
 		}
 	}
-	set_threads_hashes(clState->vwidth, &threads, &hashes, globalThreads,
-			   localThreads[0], gpu->intensity);
-	if (hashes > gpu->max_hashes)
-		gpu->max_hashes = hashes;
-
-	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
-	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
-		return -1;
-	}
-
-	/* MAXBUFFERS entry is used as a flag to say nonces exist */
-	if (thrdata->res[FOUND]) {
-		/* Clear the buffer again */
-		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
-				BUFFERSIZE, blank_res, 0, NULL, NULL);
-		if (unlikely(status != CL_SUCCESS)) {
-			applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
-			return -1;
-		}
-		if (unlikely(thrdata->last_work)) {
-			applog(LOG_DEBUG, "GPU %d found something in last work?", gpu->device_id);
-			postcalc_hash_async(thr, thrdata->last_work, thrdata->res);
-			thrdata->last_work = NULL;
-		} else {
-			applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id);
-			postcalc_hash_async(thr, work, thrdata->res);
-		}
-		memset(thrdata->res, 0, BUFFERSIZE);
-		if (!blocking)
-			clFinish(clState->commandQueue);
-	}
-
-	gettimeofday(&gpu->tv_gpumid, NULL);
-	if (!gpu->intervals) {
-		gpu->tv_gpustart.tv_sec = gpu->tv_gpumid.tv_sec;
-		gpu->tv_gpustart.tv_usec = gpu->tv_gpumid.tv_usec;
-	}
-
-	if (clState->goffset) {
-		size_t global_work_offset[1];
-
-		global_work_offset[0] = work->blk.nonce;
-		status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, global_work_offset,
-						globalThreads, localThreads, 0,  NULL, NULL);
-	} else
-		status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
-						globalThreads, localThreads, 0,  NULL, NULL);
-	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
-		return -1;
-	}
-
-	status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
-			BUFFERSIZE, thrdata->res, 0, NULL, NULL);
-	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
-		return -1;
-	}

 	/* The amount of work scanned can fluctuate when intensity changes
 	 * and since we do this one cycle behind, we increment the work more