mirror of
https://github.com/GOSTSec/sgminer
synced 2025-09-13 14:42:05 +00:00
Do the dynamic timing in opencl code over a single pass through scanhash to make sure we're only getting opencl times contributing to the measured intervals.
This commit is contained in:
parent
08948e02f3
commit
e5ed708493
131
driver-opencl.c
131
driver-opencl.c
@ -1460,6 +1460,10 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
|
|||||||
const int thr_id = thr->id;
|
const int thr_id = thr->id;
|
||||||
struct opencl_thread_data *thrdata = thr->cgpu_data;
|
struct opencl_thread_data *thrdata = thr->cgpu_data;
|
||||||
_clState *clState = clStates[thr_id];
|
_clState *clState = clStates[thr_id];
|
||||||
|
struct cgpu_info *gpu = thr->cgpu;
|
||||||
|
|
||||||
|
if (gpu->dynamic)
|
||||||
|
return;
|
||||||
|
|
||||||
clFinish(clState->commandQueue);
|
clFinish(clState->commandQueue);
|
||||||
if (thrdata->res[FOUND]) {
|
if (thrdata->res[FOUND]) {
|
||||||
@ -1491,7 +1495,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|||||||
const cl_kernel *kernel = &clState->kernel;
|
const cl_kernel *kernel = &clState->kernel;
|
||||||
const int dynamic_us = opt_dynamic_interval * 1000;
|
const int dynamic_us = opt_dynamic_interval * 1000;
|
||||||
struct timeval tv_gpuend;
|
struct timeval tv_gpuend;
|
||||||
cl_bool blocking;
|
|
||||||
|
|
||||||
cl_int status;
|
cl_int status;
|
||||||
size_t globalThreads[1];
|
size_t globalThreads[1];
|
||||||
@ -1499,18 +1502,73 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|||||||
unsigned int threads;
|
unsigned int threads;
|
||||||
int64_t hashes;
|
int64_t hashes;
|
||||||
|
|
||||||
if (gpu->dynamic)
|
|
||||||
blocking = CL_TRUE;
|
|
||||||
else
|
|
||||||
blocking = CL_FALSE;
|
|
||||||
|
|
||||||
/* This finish flushes the readbuffer set with CL_FALSE later */
|
/* This finish flushes the readbuffer set with CL_FALSE later */
|
||||||
if (!blocking)
|
if (!gpu->dynamic)
|
||||||
clFinish(clState->commandQueue);
|
clFinish(clState->commandQueue);
|
||||||
|
|
||||||
|
set_threads_hashes(clState->vwidth, &threads, &hashes, globalThreads,
|
||||||
|
localThreads[0], gpu->intensity);
|
||||||
|
if (hashes > gpu->max_hashes)
|
||||||
|
gpu->max_hashes = hashes;
|
||||||
|
|
||||||
|
/* MAXBUFFERS entry is used as a flag to say nonces exist */
|
||||||
|
if (thrdata->res[FOUND]) {
|
||||||
|
/* Clear the buffer again */
|
||||||
|
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
|
||||||
|
BUFFERSIZE, blank_res, 0, NULL, NULL);
|
||||||
|
if (unlikely(status != CL_SUCCESS)) {
|
||||||
|
applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (unlikely(thrdata->last_work)) {
|
||||||
|
applog(LOG_DEBUG, "GPU %d found something in last work?", gpu->device_id);
|
||||||
|
postcalc_hash_async(thr, thrdata->last_work, thrdata->res);
|
||||||
|
thrdata->last_work = NULL;
|
||||||
|
} else {
|
||||||
|
applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id);
|
||||||
|
postcalc_hash_async(thr, work, thrdata->res);
|
||||||
|
}
|
||||||
|
memset(thrdata->res, 0, BUFFERSIZE);
|
||||||
|
clFinish(clState->commandQueue);
|
||||||
|
}
|
||||||
|
|
||||||
|
gettimeofday(&gpu->tv_gpumid, NULL);
|
||||||
|
if (!gpu->intervals) {
|
||||||
|
gpu->tv_gpustart.tv_sec = gpu->tv_gpumid.tv_sec;
|
||||||
|
gpu->tv_gpustart.tv_usec = gpu->tv_gpumid.tv_usec;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
|
||||||
|
if (unlikely(status != CL_SUCCESS)) {
|
||||||
|
applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (clState->goffset) {
|
||||||
|
size_t global_work_offset[1];
|
||||||
|
|
||||||
|
global_work_offset[0] = work->blk.nonce;
|
||||||
|
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, global_work_offset,
|
||||||
|
globalThreads, localThreads, 0, NULL, NULL);
|
||||||
|
} else
|
||||||
|
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
|
||||||
|
globalThreads, localThreads, 0, NULL, NULL);
|
||||||
|
if (unlikely(status != CL_SUCCESS)) {
|
||||||
|
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
|
||||||
|
BUFFERSIZE, thrdata->res, 0, NULL, NULL);
|
||||||
|
if (unlikely(status != CL_SUCCESS)) {
|
||||||
|
applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
if (gpu->dynamic) {
|
if (gpu->dynamic) {
|
||||||
double gpu_us;
|
double gpu_us;
|
||||||
|
|
||||||
|
clFinish(clState->commandQueue);
|
||||||
/* Windows returns the same time for gettimeofday due to its
|
/* Windows returns the same time for gettimeofday due to its
|
||||||
* 15ms timer resolution, so we must average the result over
|
* 15ms timer resolution, so we must average the result over
|
||||||
* at least 5 values that are actually different to get an
|
* at least 5 values that are actually different to get an
|
||||||
@ -1535,65 +1593,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|||||||
gpu->intervals = gpu->hit = 0;
|
gpu->intervals = gpu->hit = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
set_threads_hashes(clState->vwidth, &threads, &hashes, globalThreads,
|
|
||||||
localThreads[0], gpu->intensity);
|
|
||||||
if (hashes > gpu->max_hashes)
|
|
||||||
gpu->max_hashes = hashes;
|
|
||||||
|
|
||||||
status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
|
|
||||||
if (unlikely(status != CL_SUCCESS)) {
|
|
||||||
applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* MAXBUFFERS entry is used as a flag to say nonces exist */
|
|
||||||
if (thrdata->res[FOUND]) {
|
|
||||||
/* Clear the buffer again */
|
|
||||||
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
|
|
||||||
BUFFERSIZE, blank_res, 0, NULL, NULL);
|
|
||||||
if (unlikely(status != CL_SUCCESS)) {
|
|
||||||
applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (unlikely(thrdata->last_work)) {
|
|
||||||
applog(LOG_DEBUG, "GPU %d found something in last work?", gpu->device_id);
|
|
||||||
postcalc_hash_async(thr, thrdata->last_work, thrdata->res);
|
|
||||||
thrdata->last_work = NULL;
|
|
||||||
} else {
|
|
||||||
applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id);
|
|
||||||
postcalc_hash_async(thr, work, thrdata->res);
|
|
||||||
}
|
|
||||||
memset(thrdata->res, 0, BUFFERSIZE);
|
|
||||||
if (!blocking)
|
|
||||||
clFinish(clState->commandQueue);
|
|
||||||
}
|
|
||||||
|
|
||||||
gettimeofday(&gpu->tv_gpumid, NULL);
|
|
||||||
if (!gpu->intervals) {
|
|
||||||
gpu->tv_gpustart.tv_sec = gpu->tv_gpumid.tv_sec;
|
|
||||||
gpu->tv_gpustart.tv_usec = gpu->tv_gpumid.tv_usec;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (clState->goffset) {
|
|
||||||
size_t global_work_offset[1];
|
|
||||||
|
|
||||||
global_work_offset[0] = work->blk.nonce;
|
|
||||||
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, global_work_offset,
|
|
||||||
globalThreads, localThreads, 0, NULL, NULL);
|
|
||||||
} else
|
|
||||||
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
|
|
||||||
globalThreads, localThreads, 0, NULL, NULL);
|
|
||||||
if (unlikely(status != CL_SUCCESS)) {
|
|
||||||
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
|
|
||||||
BUFFERSIZE, thrdata->res, 0, NULL, NULL);
|
|
||||||
if (unlikely(status != CL_SUCCESS)) {
|
|
||||||
applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The amount of work scanned can fluctuate when intensity changes
|
/* The amount of work scanned can fluctuate when intensity changes
|
||||||
* and since we do this one cycle behind, we increment the work more
|
* and since we do this one cycle behind, we increment the work more
|
||||||
|
Loading…
x
Reference in New Issue
Block a user