Simplify repeated use of gpus[gpu]. in ocl.c

13 years ago · da1b996a39
1 changed files with 31 additions and 30 deletions
--- a/ocl.c
+++ b/ocl.c
@ -208,6 +208,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -208,6 +208,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 {
 	_clState *clState = calloc(1, sizeof(_clState));
 	bool patchbfi = false, prog_built = false;
+	struct cgpu_info *cgpu = &gpus[gpu];
 	cl_platform_id platform = NULL;
 	char pbuff[256], vbuff[255];
 	cl_platform_id* platforms;
@ -363,12 +364,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -363,12 +364,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	}
 	applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size);

-	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&gpus[gpu].max_alloc, NULL);
+	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_MEM_ALLOC_SIZE", status);
 		return NULL;
 	}
-	applog(LOG_DEBUG, "Max mem alloc size is %u", gpus[gpu].max_alloc);
+	applog(LOG_DEBUG, "Max mem alloc size is %u", cgpu->max_alloc);

 	/* Create binary filename based on parameters passed to opencl
 	 * compiler to ensure we only load a binary that matches what would
@ -381,7 +382,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -381,7 +382,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	char filename[255];
 	char numbuf[10];

-	if (gpus[gpu].kernel == KL_NONE) {
+	if (cgpu->kernel == KL_NONE) {
 		if (opt_scrypt) {
 			applog(LOG_INFO, "Selecting scrypt kernel");
 			clState->chosen_kernel = KL_SCRYPT;
@ -403,9 +404,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -403,9 +404,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			applog(LOG_INFO, "Selecting phatk kernel");
 			clState->chosen_kernel = KL_PHATK;
 		}
-		gpus[gpu].kernel = clState->chosen_kernel;
+		cgpu->kernel = clState->chosen_kernel;
 	} else {
-		clState->chosen_kernel = gpus[gpu].kernel;
+		clState->chosen_kernel = cgpu->kernel;
 		if (clState->chosen_kernel == KL_PHATK &&
 		    (strstr(vbuff, "844.4") || strstr(vbuff, "851.4") ||
 		     strstr(vbuff, "831.4") || strstr(vbuff, "898.1") ||
@ -442,7 +443,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -442,7 +443,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			strcpy(filename, SCRYPT_KERNNAME".cl");
 			strcpy(binaryfilename, SCRYPT_KERNNAME);
 			/* Scrypt only supports vector 1 */
-			gpus[gpu].vwidth = 1;
+			cgpu->vwidth = 1;
 			break;
 		case KL_NONE: /* Shouldn't happen */
 		case KL_DIABLO:
@ -451,45 +452,45 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -451,45 +452,45 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			break;
 	}

-	if (gpus[gpu].vwidth)
-		clState->vwidth = gpus[gpu].vwidth;
+	if (cgpu->vwidth)
+		clState->vwidth = cgpu->vwidth;
 	else {
 		clState->vwidth = preferred_vwidth;
-		gpus[gpu].vwidth = preferred_vwidth;
+		cgpu->vwidth = preferred_vwidth;
 	}

 	if (((clState->chosen_kernel == KL_POCLBM || clState->chosen_kernel == KL_DIABLO || clState->chosen_kernel == KL_DIAKGCN) &&
 		clState->vwidth == 1 && clState->hasOpenCL11plus) || opt_scrypt)
 			clState->goffset = true;

-	if (gpus[gpu].work_size && gpus[gpu].work_size <= clState->max_work_size)
-		clState->wsize = gpus[gpu].work_size;
+	if (cgpu->work_size && cgpu->work_size <= clState->max_work_size)
+		clState->wsize = cgpu->work_size;
 	else if (strstr(name, "Tahiti"))
 		clState->wsize = 64;
 	else
 		clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
-	gpus[gpu].work_size = clState->wsize;
+	cgpu->work_size = clState->wsize;

 #ifdef USE_SCRYPT
 	if (opt_scrypt) {
-		cl_ulong ma = gpus[gpu].max_alloc, mt;
+		cl_ulong ma = cgpu->max_alloc, mt;
 		int pow2 = 0;

-		if (!gpus[gpu].lookup_gap) {
+		if (!cgpu->lookup_gap) {
 			applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
-			gpus[gpu].lookup_gap = 2;
+			cgpu->lookup_gap = 2;
 		}
-		if (!gpus[gpu].thread_concurrency) {
-			gpus[gpu].thread_concurrency = ma / 32768 / gpus[gpu].lookup_gap;
-			if (gpus[gpu].shaders && gpus[gpu].thread_concurrency > gpus[gpu].shaders)
-				gpus[gpu].thread_concurrency -= gpus[gpu].thread_concurrency % gpus[gpu].shaders;
+		if (!cgpu->thread_concurrency) {
+			cgpu->thread_concurrency = ma / 32768 / cgpu->lookup_gap;
+			if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders)
+				cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders;
 				
-			applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %u",gpu,  gpus[gpu].thread_concurrency);
+			applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %u",gpu,  cgpu->thread_concurrency);
 		}

 		/* If we have memory to spare, try to find a power of 2 value
 		 * >= required amount to map nicely to an intensity */
-		mt = gpus[gpu].thread_concurrency * 32768 * gpus[gpu].lookup_gap;
+		mt = cgpu->thread_concurrency * 32768 * cgpu->lookup_gap;
 		if (ma > mt) {
 			while (ma >>= 1)
 				pow2++;
@ -497,8 +498,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -497,8 +498,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			while (--pow2 && ma < mt)
 				ma <<= 1;
 			if (ma >= mt) {
-				gpus[gpu].max_alloc = ma;
-				applog(LOG_DEBUG, "Max alloc decreased to %lu", gpus[gpu].max_alloc);
+				cgpu->max_alloc = ma;
+				applog(LOG_DEBUG, "Max alloc decreased to %lu", cgpu->max_alloc);
 			}
 		}
 	}
@ -532,7 +533,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -532,7 +533,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	if (clState->goffset)
 		strcat(binaryfilename, "g");
 	if (opt_scrypt) {
-		sprintf(numbuf, "lg%dtc%d", gpus[gpu].lookup_gap, gpus[gpu].thread_concurrency);
+		sprintf(numbuf, "lg%dtc%d", cgpu->lookup_gap, cgpu->thread_concurrency);
 		strcat(binaryfilename, numbuf);
 	} else {
 		sprintf(numbuf, "v%d", clState->vwidth);
@ -604,7 +605,7 @@ build:
				@@ -604,7 +605,7 @@ build:
 #ifdef USE_SCRYPT
 	if (opt_scrypt)
 		sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d",
-			gpus[gpu].lookup_gap, gpus[gpu].thread_concurrency, (int)clState->wsize);
+			cgpu->lookup_gap, cgpu->thread_concurrency, (int)clState->wsize);
 	else
 #endif
 	{
@ -791,16 +792,16 @@ built:
				@@ -791,16 +792,16 @@ built:

 #ifdef USE_SCRYPT
 	if (opt_scrypt) {
-		size_t ipt = (1024 / gpus[gpu].lookup_gap + (1024 % gpus[gpu].lookup_gap > 0));
-		size_t bufsize = 128 * ipt * gpus[gpu].thread_concurrency;
+		size_t ipt = (1024 / cgpu->lookup_gap + (1024 % cgpu->lookup_gap > 0));
+		size_t bufsize = 128 * ipt * cgpu->thread_concurrency;

 		/* Use the max alloc value which has been rounded to a power of
 		 * 2 greater >= required amount earlier */
-		if (bufsize > gpus[gpu].max_alloc) {
+		if (bufsize > cgpu->max_alloc) {
 			applog(LOG_WARNING, "Maximum buffer memory device %d supports says %u, your scrypt settings come to %u",
-			       gpu, gpus[gpu].max_alloc, bufsize);
+			       gpu, cgpu->max_alloc, bufsize);
 		} else
-			bufsize = gpus[gpu].max_alloc;
+			bufsize = cgpu->max_alloc;
 		applog(LOG_DEBUG, "Creating scrypt buffer sized %d", bufsize);
 		clState->padbufsize = bufsize;
 		clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);