Browse Source

Do the dynamic timing in opencl code over a single pass through scanhash to make sure we're only getting opencl times contributing to the measured intervals.

nfactor-troky
Con Kolivas 13 years ago
parent
commit
e5ed708493
  1. 87
      driver-opencl.c

87
driver-opencl.c

@ -1460,6 +1460,10 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
const int thr_id = thr->id; const int thr_id = thr->id;
struct opencl_thread_data *thrdata = thr->cgpu_data; struct opencl_thread_data *thrdata = thr->cgpu_data;
_clState *clState = clStates[thr_id]; _clState *clState = clStates[thr_id];
struct cgpu_info *gpu = thr->cgpu;
if (gpu->dynamic)
return;
clFinish(clState->commandQueue); clFinish(clState->commandQueue);
if (thrdata->res[FOUND]) { if (thrdata->res[FOUND]) {
@ -1491,7 +1495,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
const cl_kernel *kernel = &clState->kernel; const cl_kernel *kernel = &clState->kernel;
const int dynamic_us = opt_dynamic_interval * 1000; const int dynamic_us = opt_dynamic_interval * 1000;
struct timeval tv_gpuend; struct timeval tv_gpuend;
cl_bool blocking;
cl_int status; cl_int status;
size_t globalThreads[1]; size_t globalThreads[1];
@ -1499,57 +1502,19 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
unsigned int threads; unsigned int threads;
int64_t hashes; int64_t hashes;
if (gpu->dynamic)
blocking = CL_TRUE;
else
blocking = CL_FALSE;
/* This finish flushes the readbuffer set with CL_FALSE later */ /* This finish flushes the readbuffer set with CL_FALSE later */
if (!blocking) if (!gpu->dynamic)
clFinish(clState->commandQueue); clFinish(clState->commandQueue);
if (gpu->dynamic) {
double gpu_us;
/* Windows returns the same time for gettimeofday due to its
* 15ms timer resolution, so we must average the result over
* at least 5 values that are actually different to get an
* accurate result */
gpu->intervals++;
gettimeofday(&tv_gpuend, NULL);
gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpumid);
if (gpu_us > 0 && ++gpu->hit > 4) {
gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;
/* Try to not let the GPU be out for longer than
* opt_dynamic_interval in ms, but increase
* intensity when the system is idle in dynamic mode */
if (gpu->gpu_us_average > dynamic_us) {
if (gpu->intensity > MIN_INTENSITY)
--gpu->intensity;
} else if (gpu->gpu_us_average < dynamic_us / 2) {
if (gpu->intensity < MAX_INTENSITY)
++gpu->intensity;
}
gpu->intervals = gpu->hit = 0;
}
}
set_threads_hashes(clState->vwidth, &threads, &hashes, globalThreads, set_threads_hashes(clState->vwidth, &threads, &hashes, globalThreads,
localThreads[0], gpu->intensity); localThreads[0], gpu->intensity);
if (hashes > gpu->max_hashes) if (hashes > gpu->max_hashes)
gpu->max_hashes = hashes; gpu->max_hashes = hashes;
status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
return -1;
}
/* MAXBUFFERS entry is used as a flag to say nonces exist */ /* MAXBUFFERS entry is used as a flag to say nonces exist */
if (thrdata->res[FOUND]) { if (thrdata->res[FOUND]) {
/* Clear the buffer again */ /* Clear the buffer again */
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0, status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
BUFFERSIZE, blank_res, 0, NULL, NULL); BUFFERSIZE, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) { if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
@ -1564,8 +1529,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
postcalc_hash_async(thr, work, thrdata->res); postcalc_hash_async(thr, work, thrdata->res);
} }
memset(thrdata->res, 0, BUFFERSIZE); memset(thrdata->res, 0, BUFFERSIZE);
if (!blocking) clFinish(clState->commandQueue);
clFinish(clState->commandQueue);
} }
gettimeofday(&gpu->tv_gpumid, NULL); gettimeofday(&gpu->tv_gpumid, NULL);
@ -1574,6 +1538,12 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
gpu->tv_gpustart.tv_usec = gpu->tv_gpumid.tv_usec; gpu->tv_gpustart.tv_usec = gpu->tv_gpumid.tv_usec;
} }
status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
return -1;
}
if (clState->goffset) { if (clState->goffset) {
size_t global_work_offset[1]; size_t global_work_offset[1];
@ -1588,13 +1558,42 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
return -1; return -1;
} }
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0, status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
BUFFERSIZE, thrdata->res, 0, NULL, NULL); BUFFERSIZE, thrdata->res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) { if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status); applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
return -1; return -1;
} }
if (gpu->dynamic) {
double gpu_us;
clFinish(clState->commandQueue);
/* Windows returns the same time for gettimeofday due to its
* 15ms timer resolution, so we must average the result over
* at least 5 values that are actually different to get an
* accurate result */
gpu->intervals++;
gettimeofday(&tv_gpuend, NULL);
gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpumid);
if (gpu_us > 0 && ++gpu->hit > 4) {
gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;
/* Try to not let the GPU be out for longer than
* opt_dynamic_interval in ms, but increase
* intensity when the system is idle in dynamic mode */
if (gpu->gpu_us_average > dynamic_us) {
if (gpu->intensity > MIN_INTENSITY)
--gpu->intensity;
} else if (gpu->gpu_us_average < dynamic_us / 2) {
if (gpu->intensity < MAX_INTENSITY)
++gpu->intensity;
}
gpu->intervals = gpu->hit = 0;
}
}
/* The amount of work scanned can fluctuate when intensity changes /* The amount of work scanned can fluctuate when intensity changes
* and since we do this one cycle behind, we increment the work more * and since we do this one cycle behind, we increment the work more
* than enough to prevent repeating work */ * than enough to prevent repeating work */

Loading…
Cancel
Save