Do the dynamic timing in opencl code over a single pass through scanhash to make sure we're only getting opencl times contributing to the measured intervals.

13 years ago · e5ed708493
1 changed files with 43 additions and 44 deletions
--- a/driver-opencl.c
+++ b/driver-opencl.c
@ -1460,6 +1460,10 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
 	const int thr_id = thr->id;
 	struct opencl_thread_data *thrdata = thr->cgpu_data;
 	_clState *clState = clStates[thr_id];
 	struct cgpu_info *gpu = thr->cgpu;
 	if (gpu->dynamic)
 		return;
 	clFinish(clState->commandQueue);
 	if (thrdata->res[FOUND]) {
@ -1491,7 +1495,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	const cl_kernel *kernel = &clState->kernel;
 	const int dynamic_us = opt_dynamic_interval * 1000;
 	struct timeval tv_gpuend;
 	cl_bool blocking;
 	cl_int status;
 	size_t globalThreads[1];
@ -1499,57 +1502,19 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	unsigned int threads;
 	int64_t hashes;
 	if (gpu->dynamic)
 		blocking = CL_TRUE;
 	else
 		blocking = CL_FALSE;
 	/* This finish flushes the readbuffer set with CL_FALSE later */
-	if (!blocking)
+	if (!gpu->dynamic)
 		clFinish(clState->commandQueue);
 	if (gpu->dynamic) {
 		double gpu_us;
 		/* Windows returns the same time for gettimeofday due to its
 		 * 15ms timer resolution, so we must average the result over
 		 * at least 5 values that are actually different to get an
 		 * accurate result */
 		gpu->intervals++;
 		gettimeofday(&tv_gpuend, NULL);
 		gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpumid);
 		if (gpu_us > 0 && ++gpu->hit > 4) {
 			gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
 			gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;
 			/* Try to not let the GPU be out for longer than 
 			 * opt_dynamic_interval in ms, but increase
 			 * intensity when the system is idle in dynamic mode */
 			if (gpu->gpu_us_average > dynamic_us) {
 				if (gpu->intensity > MIN_INTENSITY)
 					--gpu->intensity;
 			} else if (gpu->gpu_us_average < dynamic_us / 2) {
 				if (gpu->intensity < MAX_INTENSITY)
 					++gpu->intensity;
 			}
 			gpu->intervals = gpu->hit = 0;
 		}
 	}
 	set_threads_hashes(clState->vwidth, &threads, &hashes, globalThreads,
 			   localThreads[0], gpu->intensity);
 	if (hashes > gpu->max_hashes)
 		gpu->max_hashes = hashes;
 	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
 		return -1;
 	}
 	/* MAXBUFFERS entry is used as a flag to say nonces exist */
 	if (thrdata->res[FOUND]) {
 		/* Clear the buffer again */
-		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
+		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
 				BUFFERSIZE, blank_res, 0, NULL, NULL);
 		if (unlikely(status != CL_SUCCESS)) {
 			applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
@ -1564,8 +1529,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 			postcalc_hash_async(thr, work, thrdata->res);
 		}
 		memset(thrdata->res, 0, BUFFERSIZE);
-		if (!blocking)
+		clFinish(clState->commandQueue);
 			clFinish(clState->commandQueue);
 	}
 	gettimeofday(&gpu->tv_gpumid, NULL);
@ -1574,6 +1538,12 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 		gpu->tv_gpustart.tv_usec = gpu->tv_gpumid.tv_usec;
 	}
 	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
 		return -1;
 	}
 	if (clState->goffset) {
 		size_t global_work_offset[1];
@ -1588,13 +1558,42 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 		return -1;
 	}
-	status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
+	status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
 			BUFFERSIZE, thrdata->res, 0, NULL, NULL);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
 		return -1;
 	}
 	if (gpu->dynamic) {
 		double gpu_us;
 		clFinish(clState->commandQueue);
 		/* Windows returns the same time for gettimeofday due to its
 		 * 15ms timer resolution, so we must average the result over
 		 * at least 5 values that are actually different to get an
 		 * accurate result */
 		gpu->intervals++;
 		gettimeofday(&tv_gpuend, NULL);
 		gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpumid);
 		if (gpu_us > 0 && ++gpu->hit > 4) {
 			gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
 			gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;
 			/* Try to not let the GPU be out for longer than
 			 * opt_dynamic_interval in ms, but increase
 			 * intensity when the system is idle in dynamic mode */
 			if (gpu->gpu_us_average > dynamic_us) {
 				if (gpu->intensity > MIN_INTENSITY)
 					--gpu->intensity;
 			} else if (gpu->gpu_us_average < dynamic_us / 2) {
 				if (gpu->intensity < MAX_INTENSITY)
 					++gpu->intensity;
 			}
 			gpu->intervals = gpu->hit = 0;
 		}
 	}
 	/* The amount of work scanned can fluctuate when intensity changes
 	 * and since we do this one cycle behind, we increment the work more
 	 * than enough to prevent repeating work */