diff --git a/ocl.c b/ocl.c index ba8cde2f..71b69ac0 100644 --- a/ocl.c +++ b/ocl.c @@ -208,6 +208,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) { _clState *clState = calloc(1, sizeof(_clState)); bool patchbfi = false, prog_built = false; + struct cgpu_info *cgpu = &gpus[gpu]; cl_platform_id platform = NULL; char pbuff[256], vbuff[255]; cl_platform_id* platforms; @@ -363,12 +364,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) } applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size); - status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&gpus[gpu].max_alloc, NULL); + status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL); if (status != CL_SUCCESS) { applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_MEM_ALLOC_SIZE", status); return NULL; } - applog(LOG_DEBUG, "Max mem alloc size is %u", gpus[gpu].max_alloc); + applog(LOG_DEBUG, "Max mem alloc size is %u", cgpu->max_alloc); /* Create binary filename based on parameters passed to opencl * compiler to ensure we only load a binary that matches what would @@ -381,7 +382,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) char filename[255]; char numbuf[10]; - if (gpus[gpu].kernel == KL_NONE) { + if (cgpu->kernel == KL_NONE) { if (opt_scrypt) { applog(LOG_INFO, "Selecting scrypt kernel"); clState->chosen_kernel = KL_SCRYPT; @@ -403,9 +404,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) applog(LOG_INFO, "Selecting phatk kernel"); clState->chosen_kernel = KL_PHATK; } - gpus[gpu].kernel = clState->chosen_kernel; + cgpu->kernel = clState->chosen_kernel; } else { - clState->chosen_kernel = gpus[gpu].kernel; + clState->chosen_kernel = cgpu->kernel; if (clState->chosen_kernel == KL_PHATK && (strstr(vbuff, "844.4") || strstr(vbuff, "851.4") || strstr(vbuff, "831.4") || strstr(vbuff, "898.1") || @@ -442,7 +443,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) strcpy(filename, SCRYPT_KERNNAME".cl"); strcpy(binaryfilename, SCRYPT_KERNNAME); /* Scrypt only supports vector 1 */ - gpus[gpu].vwidth = 1; + cgpu->vwidth = 1; break; case KL_NONE: /* Shouldn't happen */ case KL_DIABLO: @@ -451,45 +452,45 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) break; } - if (gpus[gpu].vwidth) - clState->vwidth = gpus[gpu].vwidth; + if (cgpu->vwidth) + clState->vwidth = cgpu->vwidth; else { clState->vwidth = preferred_vwidth; - gpus[gpu].vwidth = preferred_vwidth; + cgpu->vwidth = preferred_vwidth; } if (((clState->chosen_kernel == KL_POCLBM || clState->chosen_kernel == KL_DIABLO || clState->chosen_kernel == KL_DIAKGCN) && clState->vwidth == 1 && clState->hasOpenCL11plus) || opt_scrypt) clState->goffset = true; - if (gpus[gpu].work_size && gpus[gpu].work_size <= clState->max_work_size) - clState->wsize = gpus[gpu].work_size; + if (cgpu->work_size && cgpu->work_size <= clState->max_work_size) + clState->wsize = cgpu->work_size; else if (strstr(name, "Tahiti")) clState->wsize = 64; else clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth; - gpus[gpu].work_size = clState->wsize; + cgpu->work_size = clState->wsize; #ifdef USE_SCRYPT if (opt_scrypt) { - cl_ulong ma = gpus[gpu].max_alloc, mt; + cl_ulong ma = cgpu->max_alloc, mt; int pow2 = 0; - if (!gpus[gpu].lookup_gap) { + if (!cgpu->lookup_gap) { applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu); - gpus[gpu].lookup_gap = 2; + cgpu->lookup_gap = 2; } - if (!gpus[gpu].thread_concurrency) { - gpus[gpu].thread_concurrency = ma / 32768 / gpus[gpu].lookup_gap; - if (gpus[gpu].shaders && gpus[gpu].thread_concurrency > gpus[gpu].shaders) - gpus[gpu].thread_concurrency -= gpus[gpu].thread_concurrency % gpus[gpu].shaders; + if (!cgpu->thread_concurrency) { + cgpu->thread_concurrency = ma / 32768 / cgpu->lookup_gap; + if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders) + cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders; - applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %u",gpu, gpus[gpu].thread_concurrency); + applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %u",gpu, cgpu->thread_concurrency); } /* If we have memory to spare, try to find a power of 2 value * >= required amount to map nicely to an intensity */ - mt = gpus[gpu].thread_concurrency * 32768 * gpus[gpu].lookup_gap; + mt = cgpu->thread_concurrency * 32768 * cgpu->lookup_gap; if (ma > mt) { while (ma >>= 1) pow2++; @@ -497,8 +498,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) while (--pow2 && ma < mt) ma <<= 1; if (ma >= mt) { - gpus[gpu].max_alloc = ma; - applog(LOG_DEBUG, "Max alloc decreased to %lu", gpus[gpu].max_alloc); + cgpu->max_alloc = ma; + applog(LOG_DEBUG, "Max alloc decreased to %lu", cgpu->max_alloc); } } } @@ -532,7 +533,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) if (clState->goffset) strcat(binaryfilename, "g"); if (opt_scrypt) { - sprintf(numbuf, "lg%dtc%d", gpus[gpu].lookup_gap, gpus[gpu].thread_concurrency); + sprintf(numbuf, "lg%dtc%d", cgpu->lookup_gap, cgpu->thread_concurrency); strcat(binaryfilename, numbuf); } else { sprintf(numbuf, "v%d", clState->vwidth); @@ -604,7 +605,7 @@ build: #ifdef USE_SCRYPT if (opt_scrypt) sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d", - gpus[gpu].lookup_gap, gpus[gpu].thread_concurrency, (int)clState->wsize); + cgpu->lookup_gap, cgpu->thread_concurrency, (int)clState->wsize); else #endif { @@ -791,16 +792,16 @@ built: #ifdef USE_SCRYPT if (opt_scrypt) { - size_t ipt = (1024 / gpus[gpu].lookup_gap + (1024 % gpus[gpu].lookup_gap > 0)); - size_t bufsize = 128 * ipt * gpus[gpu].thread_concurrency; + size_t ipt = (1024 / cgpu->lookup_gap + (1024 % cgpu->lookup_gap > 0)); + size_t bufsize = 128 * ipt * cgpu->thread_concurrency; /* Use the max alloc value which has been rounded to a power of * 2 greater >= required amount earlier */ - if (bufsize > gpus[gpu].max_alloc) { + if (bufsize > cgpu->max_alloc) { applog(LOG_WARNING, "Maximum buffer memory device %d supports says %u, your scrypt settings come to %u", - gpu, gpus[gpu].max_alloc, bufsize); + gpu, cgpu->max_alloc, bufsize); } else - bufsize = gpus[gpu].max_alloc; + bufsize = cgpu->max_alloc; applog(LOG_DEBUG, "Creating scrypt buffer sized %d", bufsize); clState->padbufsize = bufsize; clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);