|
|
|
@ -318,12 +318,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
@@ -318,12 +318,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|
|
|
|
} |
|
|
|
|
if (opt_debug) |
|
|
|
|
applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size); |
|
|
|
|
/* Some nvidia cards report 1024 but fail when set larger than 512 !? */ |
|
|
|
|
if (clState->max_work_size > 512) |
|
|
|
|
clState->max_work_size = 512; |
|
|
|
|
|
|
|
|
|
/* For some reason 2 vectors is still better even if the card says
|
|
|
|
|
* otherwise */ |
|
|
|
|
* otherwise, and many cards lie about their max so use 256 as max |
|
|
|
|
* unless explicitly set on the command line */ |
|
|
|
|
if (clState->preferred_vwidth > 1) |
|
|
|
|
clState->preferred_vwidth = 2; |
|
|
|
|
if (opt_vectors) |
|
|
|
@ -331,7 +329,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
@@ -331,7 +329,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|
|
|
|
if (opt_worksize && opt_worksize <= clState->max_work_size) |
|
|
|
|
clState->work_size = opt_worksize; |
|
|
|
|
else |
|
|
|
|
clState->work_size = clState->max_work_size / clState->preferred_vwidth; |
|
|
|
|
clState->work_size = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / |
|
|
|
|
clState->preferred_vwidth; |
|
|
|
|
|
|
|
|
|
/* Create binary filename based on parameters passed to opencl
|
|
|
|
|
* compiler to ensure we only load a binary that matches what would |
|
|
|
|