mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-30 08:24:26 +00:00
Dramatically simplify the dynamic intensity calculation by oversampling many runs through the opencl kernel till we're likely well within the timer resolution on windows.
This commit is contained in:
parent
2a9b3e33d3
commit
7450b25e75
@ -1463,10 +1463,8 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
|
|||||||
const int thr_id = thr->id;
|
const int thr_id = thr->id;
|
||||||
struct opencl_thread_data *thrdata = thr->cgpu_data;
|
struct opencl_thread_data *thrdata = thr->cgpu_data;
|
||||||
_clState *clState = clStates[thr_id];
|
_clState *clState = clStates[thr_id];
|
||||||
struct cgpu_info *gpu = thr->cgpu;
|
|
||||||
|
|
||||||
if (!gpu->dynamic)
|
clFinish(clState->commandQueue);
|
||||||
clFinish(clState->commandQueue);
|
|
||||||
|
|
||||||
if (thrdata->res[FOUND]) {
|
if (thrdata->res[FOUND]) {
|
||||||
thrdata->last_work = &thrdata->_last_work;
|
thrdata->last_work = &thrdata->_last_work;
|
||||||
@ -1496,7 +1494,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|||||||
_clState *clState = clStates[thr_id];
|
_clState *clState = clStates[thr_id];
|
||||||
const cl_kernel *kernel = &clState->kernel;
|
const cl_kernel *kernel = &clState->kernel;
|
||||||
const int dynamic_us = opt_dynamic_interval * 1000;
|
const int dynamic_us = opt_dynamic_interval * 1000;
|
||||||
struct timeval tv_gpuend;
|
|
||||||
|
|
||||||
cl_int status;
|
cl_int status;
|
||||||
size_t globalThreads[1];
|
size_t globalThreads[1];
|
||||||
@ -1504,8 +1501,25 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|||||||
int64_t hashes;
|
int64_t hashes;
|
||||||
|
|
||||||
/* This finish flushes the readbuffer set with CL_FALSE later */
|
/* This finish flushes the readbuffer set with CL_FALSE later */
|
||||||
if (!gpu->dynamic)
|
clFinish(clState->commandQueue);
|
||||||
clFinish(clState->commandQueue);
|
|
||||||
|
/* Windows' timer resolution is only 15ms so oversample 5x */
|
||||||
|
if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 75) {
|
||||||
|
struct timeval tv_gpuend;
|
||||||
|
double gpu_us;
|
||||||
|
|
||||||
|
gettimeofday(&tv_gpuend, NULL);
|
||||||
|
gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
|
||||||
|
if (gpu_us > dynamic_us) {
|
||||||
|
if (gpu->intensity > MIN_INTENSITY)
|
||||||
|
--gpu->intensity;
|
||||||
|
} else if (gpu_us < dynamic_us / 2) {
|
||||||
|
if (gpu->intensity < MAX_INTENSITY)
|
||||||
|
++gpu->intensity;
|
||||||
|
}
|
||||||
|
memcpy(&(gpu->tv_gpustart), &tv_gpuend, sizeof(struct timeval));
|
||||||
|
gpu->intervals = 0;
|
||||||
|
}
|
||||||
|
|
||||||
set_threads_hashes(clState->vwidth, &hashes, globalThreads, localThreads[0], &gpu->intensity);
|
set_threads_hashes(clState->vwidth, &hashes, globalThreads, localThreads[0], &gpu->intensity);
|
||||||
if (hashes > gpu->max_hashes)
|
if (hashes > gpu->max_hashes)
|
||||||
@ -1532,18 +1546,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|||||||
clFinish(clState->commandQueue);
|
clFinish(clState->commandQueue);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gpu->dynamic) {
|
|
||||||
gettimeofday(&gpu->tv_gpumid, NULL);
|
|
||||||
if (gpu->new_work) {
|
|
||||||
gpu->new_work = false;
|
|
||||||
gpu->intervals = gpu->hit = 0;
|
|
||||||
}
|
|
||||||
if (!gpu->intervals) {
|
|
||||||
gpu->tv_gpustart.tv_sec = gpu->tv_gpumid.tv_sec;
|
|
||||||
gpu->tv_gpustart.tv_usec = gpu->tv_gpumid.tv_usec;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
|
status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
|
||||||
if (unlikely(status != CL_SUCCESS)) {
|
if (unlikely(status != CL_SUCCESS)) {
|
||||||
applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
|
applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
|
||||||
@ -1571,39 +1573,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gpu->dynamic) {
|
|
||||||
double gpu_us;
|
|
||||||
|
|
||||||
clFinish(clState->commandQueue);
|
|
||||||
/* Windows returns the same time for gettimeofday due to its
|
|
||||||
* 15ms timer resolution, so we must average the result over
|
|
||||||
* at least 5 values that are actually different to get an
|
|
||||||
* accurate result */
|
|
||||||
gpu->intervals++;
|
|
||||||
gettimeofday(&tv_gpuend, NULL);
|
|
||||||
gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpumid);
|
|
||||||
if (gpu_us > 0 && ++gpu->hit > 4) {
|
|
||||||
gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
|
|
||||||
/* Very rarely we may get an overflow so put an upper
|
|
||||||
* limit on the detected time */
|
|
||||||
if (unlikely(gpu->gpu_us_average > 0 && gpu_us > gpu->gpu_us_average * 4))
|
|
||||||
gpu_us = gpu->gpu_us_average * 4;
|
|
||||||
gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;
|
|
||||||
|
|
||||||
/* Try to not let the GPU be out for longer than
|
|
||||||
* opt_dynamic_interval in ms, but increase
|
|
||||||
* intensity when the system is idle in dynamic mode */
|
|
||||||
if (gpu->gpu_us_average > dynamic_us) {
|
|
||||||
if (gpu->intensity > MIN_INTENSITY)
|
|
||||||
--gpu->intensity;
|
|
||||||
} else if (gpu->gpu_us_average < dynamic_us / 2) {
|
|
||||||
if (gpu->intensity < MAX_INTENSITY)
|
|
||||||
++gpu->intensity;
|
|
||||||
}
|
|
||||||
gpu->intervals = gpu->hit = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The amount of work scanned can fluctuate when intensity changes
|
/* The amount of work scanned can fluctuate when intensity changes
|
||||||
* and since we do this one cycle behind, we increment the work more
|
* and since we do this one cycle behind, we increment the work more
|
||||||
* than enough to prevent repeating work */
|
* than enough to prevent repeating work */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user