Browse Source

Simplify repeated use of gpus[gpu]. in ocl.c

nfactor-troky
Con Kolivas 13 years ago
parent
commit
da1b996a39
  1. 61
      ocl.c

61
ocl.c

@ -208,6 +208,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
{ {
_clState *clState = calloc(1, sizeof(_clState)); _clState *clState = calloc(1, sizeof(_clState));
bool patchbfi = false, prog_built = false; bool patchbfi = false, prog_built = false;
struct cgpu_info *cgpu = &gpus[gpu];
cl_platform_id platform = NULL; cl_platform_id platform = NULL;
char pbuff[256], vbuff[255]; char pbuff[256], vbuff[255];
cl_platform_id* platforms; cl_platform_id* platforms;
@ -363,12 +364,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
} }
applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size); applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size);
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&gpus[gpu].max_alloc, NULL); status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL);
if (status != CL_SUCCESS) { if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_MEM_ALLOC_SIZE", status); applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_MEM_ALLOC_SIZE", status);
return NULL; return NULL;
} }
applog(LOG_DEBUG, "Max mem alloc size is %u", gpus[gpu].max_alloc); applog(LOG_DEBUG, "Max mem alloc size is %u", cgpu->max_alloc);
/* Create binary filename based on parameters passed to opencl /* Create binary filename based on parameters passed to opencl
* compiler to ensure we only load a binary that matches what would * compiler to ensure we only load a binary that matches what would
@ -381,7 +382,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
char filename[255]; char filename[255];
char numbuf[10]; char numbuf[10];
if (gpus[gpu].kernel == KL_NONE) { if (cgpu->kernel == KL_NONE) {
if (opt_scrypt) { if (opt_scrypt) {
applog(LOG_INFO, "Selecting scrypt kernel"); applog(LOG_INFO, "Selecting scrypt kernel");
clState->chosen_kernel = KL_SCRYPT; clState->chosen_kernel = KL_SCRYPT;
@ -403,9 +404,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
applog(LOG_INFO, "Selecting phatk kernel"); applog(LOG_INFO, "Selecting phatk kernel");
clState->chosen_kernel = KL_PHATK; clState->chosen_kernel = KL_PHATK;
} }
gpus[gpu].kernel = clState->chosen_kernel; cgpu->kernel = clState->chosen_kernel;
} else { } else {
clState->chosen_kernel = gpus[gpu].kernel; clState->chosen_kernel = cgpu->kernel;
if (clState->chosen_kernel == KL_PHATK && if (clState->chosen_kernel == KL_PHATK &&
(strstr(vbuff, "844.4") || strstr(vbuff, "851.4") || (strstr(vbuff, "844.4") || strstr(vbuff, "851.4") ||
strstr(vbuff, "831.4") || strstr(vbuff, "898.1") || strstr(vbuff, "831.4") || strstr(vbuff, "898.1") ||
@ -442,7 +443,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
strcpy(filename, SCRYPT_KERNNAME".cl"); strcpy(filename, SCRYPT_KERNNAME".cl");
strcpy(binaryfilename, SCRYPT_KERNNAME); strcpy(binaryfilename, SCRYPT_KERNNAME);
/* Scrypt only supports vector 1 */ /* Scrypt only supports vector 1 */
gpus[gpu].vwidth = 1; cgpu->vwidth = 1;
break; break;
case KL_NONE: /* Shouldn't happen */ case KL_NONE: /* Shouldn't happen */
case KL_DIABLO: case KL_DIABLO:
@ -451,45 +452,45 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
break; break;
} }
if (gpus[gpu].vwidth) if (cgpu->vwidth)
clState->vwidth = gpus[gpu].vwidth; clState->vwidth = cgpu->vwidth;
else { else {
clState->vwidth = preferred_vwidth; clState->vwidth = preferred_vwidth;
gpus[gpu].vwidth = preferred_vwidth; cgpu->vwidth = preferred_vwidth;
} }
if (((clState->chosen_kernel == KL_POCLBM || clState->chosen_kernel == KL_DIABLO || clState->chosen_kernel == KL_DIAKGCN) && if (((clState->chosen_kernel == KL_POCLBM || clState->chosen_kernel == KL_DIABLO || clState->chosen_kernel == KL_DIAKGCN) &&
clState->vwidth == 1 && clState->hasOpenCL11plus) || opt_scrypt) clState->vwidth == 1 && clState->hasOpenCL11plus) || opt_scrypt)
clState->goffset = true; clState->goffset = true;
if (gpus[gpu].work_size && gpus[gpu].work_size <= clState->max_work_size) if (cgpu->work_size && cgpu->work_size <= clState->max_work_size)
clState->wsize = gpus[gpu].work_size; clState->wsize = cgpu->work_size;
else if (strstr(name, "Tahiti")) else if (strstr(name, "Tahiti"))
clState->wsize = 64; clState->wsize = 64;
else else
clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth; clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
gpus[gpu].work_size = clState->wsize; cgpu->work_size = clState->wsize;
#ifdef USE_SCRYPT #ifdef USE_SCRYPT
if (opt_scrypt) { if (opt_scrypt) {
cl_ulong ma = gpus[gpu].max_alloc, mt; cl_ulong ma = cgpu->max_alloc, mt;
int pow2 = 0; int pow2 = 0;
if (!gpus[gpu].lookup_gap) { if (!cgpu->lookup_gap) {
applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu); applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
gpus[gpu].lookup_gap = 2; cgpu->lookup_gap = 2;
} }
if (!gpus[gpu].thread_concurrency) { if (!cgpu->thread_concurrency) {
gpus[gpu].thread_concurrency = ma / 32768 / gpus[gpu].lookup_gap; cgpu->thread_concurrency = ma / 32768 / cgpu->lookup_gap;
if (gpus[gpu].shaders && gpus[gpu].thread_concurrency > gpus[gpu].shaders) if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders)
gpus[gpu].thread_concurrency -= gpus[gpu].thread_concurrency % gpus[gpu].shaders; cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders;
applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %u",gpu, gpus[gpu].thread_concurrency); applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %u",gpu, cgpu->thread_concurrency);
} }
/* If we have memory to spare, try to find a power of 2 value /* If we have memory to spare, try to find a power of 2 value
* >= required amount to map nicely to an intensity */ * >= required amount to map nicely to an intensity */
mt = gpus[gpu].thread_concurrency * 32768 * gpus[gpu].lookup_gap; mt = cgpu->thread_concurrency * 32768 * cgpu->lookup_gap;
if (ma > mt) { if (ma > mt) {
while (ma >>= 1) while (ma >>= 1)
pow2++; pow2++;
@ -497,8 +498,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
while (--pow2 && ma < mt) while (--pow2 && ma < mt)
ma <<= 1; ma <<= 1;
if (ma >= mt) { if (ma >= mt) {
gpus[gpu].max_alloc = ma; cgpu->max_alloc = ma;
applog(LOG_DEBUG, "Max alloc decreased to %lu", gpus[gpu].max_alloc); applog(LOG_DEBUG, "Max alloc decreased to %lu", cgpu->max_alloc);
} }
} }
} }
@ -532,7 +533,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
if (clState->goffset) if (clState->goffset)
strcat(binaryfilename, "g"); strcat(binaryfilename, "g");
if (opt_scrypt) { if (opt_scrypt) {
sprintf(numbuf, "lg%dtc%d", gpus[gpu].lookup_gap, gpus[gpu].thread_concurrency); sprintf(numbuf, "lg%dtc%d", cgpu->lookup_gap, cgpu->thread_concurrency);
strcat(binaryfilename, numbuf); strcat(binaryfilename, numbuf);
} else { } else {
sprintf(numbuf, "v%d", clState->vwidth); sprintf(numbuf, "v%d", clState->vwidth);
@ -604,7 +605,7 @@ build:
#ifdef USE_SCRYPT #ifdef USE_SCRYPT
if (opt_scrypt) if (opt_scrypt)
sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d", sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d",
gpus[gpu].lookup_gap, gpus[gpu].thread_concurrency, (int)clState->wsize); cgpu->lookup_gap, cgpu->thread_concurrency, (int)clState->wsize);
else else
#endif #endif
{ {
@ -791,16 +792,16 @@ built:
#ifdef USE_SCRYPT #ifdef USE_SCRYPT
if (opt_scrypt) { if (opt_scrypt) {
size_t ipt = (1024 / gpus[gpu].lookup_gap + (1024 % gpus[gpu].lookup_gap > 0)); size_t ipt = (1024 / cgpu->lookup_gap + (1024 % cgpu->lookup_gap > 0));
size_t bufsize = 128 * ipt * gpus[gpu].thread_concurrency; size_t bufsize = 128 * ipt * cgpu->thread_concurrency;
/* Use the max alloc value which has been rounded to a power of /* Use the max alloc value which has been rounded to a power of
* 2 greater >= required amount earlier */ * 2 greater >= required amount earlier */
if (bufsize > gpus[gpu].max_alloc) { if (bufsize > cgpu->max_alloc) {
applog(LOG_WARNING, "Maximum buffer memory device %d supports says %u, your scrypt settings come to %u", applog(LOG_WARNING, "Maximum buffer memory device %d supports says %u, your scrypt settings come to %u",
gpu, gpus[gpu].max_alloc, bufsize); gpu, cgpu->max_alloc, bufsize);
} else } else
bufsize = gpus[gpu].max_alloc; bufsize = cgpu->max_alloc;
applog(LOG_DEBUG, "Creating scrypt buffer sized %d", bufsize); applog(LOG_DEBUG, "Creating scrypt buffer sized %d", bufsize);
clState->padbufsize = bufsize; clState->padbufsize = bufsize;
clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status); clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);

Loading…
Cancel
Save