@ -1460,6 +1460,10 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
@@ -1460,6 +1460,10 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
const int thr_id = thr - > id ;
struct opencl_thread_data * thrdata = thr - > cgpu_data ;
_clState * clState = clStates [ thr_id ] ;
struct cgpu_info * gpu = thr - > cgpu ;
if ( gpu - > dynamic )
return ;
clFinish ( clState - > commandQueue ) ;
if ( thrdata - > res [ FOUND ] ) {
@ -1491,7 +1495,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
@@ -1491,7 +1495,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
const cl_kernel * kernel = & clState - > kernel ;
const int dynamic_us = opt_dynamic_interval * 1000 ;
struct timeval tv_gpuend ;
cl_bool blocking ;
cl_int status ;
size_t globalThreads [ 1 ] ;
@ -1499,57 +1502,19 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
@@ -1499,57 +1502,19 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
unsigned int threads ;
int64_t hashes ;
if ( gpu - > dynamic )
blocking = CL_TRUE ;
else
blocking = CL_FALSE ;
/* This finish flushes the readbuffer set with CL_FALSE later */
if ( ! blocking )
if ( ! gpu - > dynamic )
clFinish ( clState - > commandQueue ) ;
if ( gpu - > dynamic ) {
double gpu_us ;
/* Windows returns the same time for gettimeofday due to its
* 15 ms timer resolution , so we must average the result over
* at least 5 values that are actually different to get an
* accurate result */
gpu - > intervals + + ;
gettimeofday ( & tv_gpuend , NULL ) ;
gpu_us = us_tdiff ( & tv_gpuend , & gpu - > tv_gpumid ) ;
if ( gpu_us > 0 & & + + gpu - > hit > 4 ) {
gpu_us = us_tdiff ( & tv_gpuend , & gpu - > tv_gpustart ) / gpu - > intervals ;
gpu - > gpu_us_average = ( gpu - > gpu_us_average + gpu_us * 0.63 ) / 1.63 ;
/* Try to not let the GPU be out for longer than
* opt_dynamic_interval in ms , but increase
* intensity when the system is idle in dynamic mode */
if ( gpu - > gpu_us_average > dynamic_us ) {
if ( gpu - > intensity > MIN_INTENSITY )
- - gpu - > intensity ;
} else if ( gpu - > gpu_us_average < dynamic_us / 2 ) {
if ( gpu - > intensity < MAX_INTENSITY )
+ + gpu - > intensity ;
}
gpu - > intervals = gpu - > hit = 0 ;
}
}
set_threads_hashes ( clState - > vwidth , & threads , & hashes , globalThreads ,
localThreads [ 0 ] , gpu - > intensity ) ;
if ( hashes > gpu - > max_hashes )
gpu - > max_hashes = hashes ;
status = thrdata - > queue_kernel_parameters ( clState , & work - > blk , globalThreads [ 0 ] ) ;
if ( unlikely ( status ! = CL_SUCCESS ) ) {
applog ( LOG_ERR , " Error: clSetKernelArg of all params failed. " ) ;
return - 1 ;
}
/* MAXBUFFERS entry is used as a flag to say nonces exist */
if ( thrdata - > res [ FOUND ] ) {
/* Clear the buffer again */
status = clEnqueueWriteBuffer ( clState - > commandQueue , clState - > outputBuffer , blocking , 0 ,
status = clEnqueueWriteBuffer ( clState - > commandQueue , clState - > outputBuffer , CL_FALSE , 0 ,
BUFFERSIZE , blank_res , 0 , NULL , NULL ) ;
if ( unlikely ( status ! = CL_SUCCESS ) ) {
applog ( LOG_ERR , " Error: clEnqueueWriteBuffer failed. " ) ;
@ -1564,7 +1529,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
@@ -1564,7 +1529,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
postcalc_hash_async ( thr , work , thrdata - > res ) ;
}
memset ( thrdata - > res , 0 , BUFFERSIZE ) ;
if ( ! blocking )
clFinish ( clState - > commandQueue ) ;
}
@ -1574,6 +1538,12 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
@@ -1574,6 +1538,12 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
gpu - > tv_gpustart . tv_usec = gpu - > tv_gpumid . tv_usec ;
}
status = thrdata - > queue_kernel_parameters ( clState , & work - > blk , globalThreads [ 0 ] ) ;
if ( unlikely ( status ! = CL_SUCCESS ) ) {
applog ( LOG_ERR , " Error: clSetKernelArg of all params failed. " ) ;
return - 1 ;
}
if ( clState - > goffset ) {
size_t global_work_offset [ 1 ] ;
@ -1588,13 +1558,42 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
@@ -1588,13 +1558,42 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
return - 1 ;
}
status = clEnqueueReadBuffer ( clState - > commandQueue , clState - > outputBuffer , blocking , 0 ,
status = clEnqueueReadBuffer ( clState - > commandQueue , clState - > outputBuffer , CL_FALSE , 0 ,
BUFFERSIZE , thrdata - > res , 0 , NULL , NULL ) ;
if ( unlikely ( status ! = CL_SUCCESS ) ) {
applog ( LOG_ERR , " Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer) " , status ) ;
return - 1 ;
}
if ( gpu - > dynamic ) {
double gpu_us ;
clFinish ( clState - > commandQueue ) ;
/* Windows returns the same time for gettimeofday due to its
* 15 ms timer resolution , so we must average the result over
* at least 5 values that are actually different to get an
* accurate result */
gpu - > intervals + + ;
gettimeofday ( & tv_gpuend , NULL ) ;
gpu_us = us_tdiff ( & tv_gpuend , & gpu - > tv_gpumid ) ;
if ( gpu_us > 0 & & + + gpu - > hit > 4 ) {
gpu_us = us_tdiff ( & tv_gpuend , & gpu - > tv_gpustart ) / gpu - > intervals ;
gpu - > gpu_us_average = ( gpu - > gpu_us_average + gpu_us * 0.63 ) / 1.63 ;
/* Try to not let the GPU be out for longer than
* opt_dynamic_interval in ms , but increase
* intensity when the system is idle in dynamic mode */
if ( gpu - > gpu_us_average > dynamic_us ) {
if ( gpu - > intensity > MIN_INTENSITY )
- - gpu - > intensity ;
} else if ( gpu - > gpu_us_average < dynamic_us / 2 ) {
if ( gpu - > intensity < MAX_INTENSITY )
+ + gpu - > intensity ;
}
gpu - > intervals = gpu - > hit = 0 ;
}
}
/* The amount of work scanned can fluctuate when intensity changes
* and since we do this one cycle behind , we increment the work more
* than enough to prevent repeating work */