|
|
|
@ -332,32 +332,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
@@ -332,32 +332,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|
|
|
|
} |
|
|
|
|
applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size); |
|
|
|
|
|
|
|
|
|
/* For some reason 2 vectors is still better even if the card says
|
|
|
|
|
* otherwise, and many cards lie about their max so use 256 as max |
|
|
|
|
* unless explicitly set on the command line. 79x0 cards perform |
|
|
|
|
* better without vectors */ |
|
|
|
|
if (preferred_vwidth > 1) { |
|
|
|
|
if (strstr(name, "Tahiti")) |
|
|
|
|
preferred_vwidth = 1; |
|
|
|
|
else |
|
|
|
|
preferred_vwidth = 2; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (gpus[gpu].vwidth) |
|
|
|
|
clState->vwidth = gpus[gpu].vwidth; |
|
|
|
|
else { |
|
|
|
|
clState->vwidth = preferred_vwidth; |
|
|
|
|
gpus[gpu].vwidth = preferred_vwidth; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (gpus[gpu].work_size && gpus[gpu].work_size <= clState->max_work_size) |
|
|
|
|
clState->wsize = gpus[gpu].work_size; |
|
|
|
|
else if (strstr(name, "Tahiti")) |
|
|
|
|
clState->wsize = 64; |
|
|
|
|
else |
|
|
|
|
clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth; |
|
|
|
|
gpus[gpu].work_size = clState->wsize; |
|
|
|
|
|
|
|
|
|
/* Create binary filename based on parameters passed to opencl
|
|
|
|
|
* compiler to ensure we only load a binary that matches what would |
|
|
|
|
* have otherwise created. The filename is: |
|
|
|
@ -378,7 +352,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
@@ -378,7 +352,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|
|
|
|
applog(LOG_INFO, "Selecting diablo kernel"); |
|
|
|
|
clState->chosen_kernel = KL_DIABLO; |
|
|
|
|
} |
|
|
|
|
} else if (strstr(vbuff, "898.1")) { // Windows 64 bit 12.2 driver
|
|
|
|
|
} else if (strstr(vbuff, "898.1") || // Windows 64 bit 12.2 driver
|
|
|
|
|
strstr(name, "Tahiti")) { // All non SDK 2.6 79x0
|
|
|
|
|
applog(LOG_INFO, "Selecting diablo kernel"); |
|
|
|
|
clState->chosen_kernel = KL_DIABLO; |
|
|
|
|
} else if (clState->hasBitAlign) { |
|
|
|
@ -393,10 +368,18 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
@@ -393,10 +368,18 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|
|
|
|
} else |
|
|
|
|
clState->chosen_kernel = gpus[gpu].kernel; |
|
|
|
|
|
|
|
|
|
/* For some reason 2 vectors is still better even if the card says
|
|
|
|
|
* otherwise, and many cards lie about their max so use 256 as max |
|
|
|
|
* unless explicitly set on the command line. */ |
|
|
|
|
if (preferred_vwidth > 2) |
|
|
|
|
preferred_vwidth = 2; |
|
|
|
|
|
|
|
|
|
switch (clState->chosen_kernel) { |
|
|
|
|
case KL_POCLBM: |
|
|
|
|
strcpy(filename, POCLBM_KERNNAME".cl"); |
|
|
|
|
strcpy(binaryfilename, POCLBM_KERNNAME); |
|
|
|
|
/* This kernel prefers to not use vectors */ |
|
|
|
|
preferred_vwidth = 1; |
|
|
|
|
break; |
|
|
|
|
case KL_PHATK: |
|
|
|
|
strcpy(filename, PHATK_KERNNAME".cl"); |
|
|
|
@ -413,6 +396,21 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
@@ -413,6 +396,21 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (gpus[gpu].vwidth) |
|
|
|
|
clState->vwidth = gpus[gpu].vwidth; |
|
|
|
|
else { |
|
|
|
|
clState->vwidth = preferred_vwidth; |
|
|
|
|
gpus[gpu].vwidth = preferred_vwidth; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (gpus[gpu].work_size && gpus[gpu].work_size <= clState->max_work_size) |
|
|
|
|
clState->wsize = gpus[gpu].work_size; |
|
|
|
|
else if (strstr(name, "Tahiti")) |
|
|
|
|
clState->wsize = 64; |
|
|
|
|
else |
|
|
|
|
clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth; |
|
|
|
|
gpus[gpu].work_size = clState->wsize; |
|
|
|
|
|
|
|
|
|
FILE *binaryfile; |
|
|
|
|
size_t *binary_sizes; |
|
|
|
|
char **binaries; |
|
|
|
|