diff --git a/main.c b/main.c
index 6a35c212..dc97a9a6 100644
--- a/main.c
+++ b/main.c
@@ -217,6 +217,8 @@ int longpoll_thr_id;
 static int stage_thr_id;
 static int watchdog_thr_id;
 static int input_thr_id;
+static int gpur_thr_id;
+static int cpur_thr_id;
 static int total_threads;
 
 struct work_restart *work_restart = NULL;
@@ -3464,7 +3466,6 @@ static void *gpuminer_thread(void *userdata)
 	uint32_t *res, *blank_res;
 	double gpu_ms_average = 7;
 	int gpu = dev_from_id(thr_id);
-	struct cgpu_info *cgpu = mythr->cgpu;
 
 	size_t globalThreads[1];
 	size_t localThreads[1];
@@ -3476,7 +3477,7 @@ static void *gpuminer_thread(void *userdata)
 
 	struct work *work = make_work();
 	unsigned int threads;
-	unsigned const int vectors = cgpu->vwidth;
+	unsigned const int vectors = clState->preferred_vwidth;
 	unsigned int hashes;
 	unsigned int hashes_done = 0;
 
@@ -3513,7 +3514,7 @@ static void *gpuminer_thread(void *userdata)
 	}
 
 	gettimeofday(&tv_start, NULL);
-	localThreads[0] = cgpu->work_size;
+	localThreads[0] = clState->work_size;
 	set_threads_hashes(vectors, &threads, &hashes, &globalThreads[0],
 			   localThreads[0]);
 
@@ -3527,7 +3528,7 @@ static void *gpuminer_thread(void *userdata)
 	if (unlikely(status != CL_SUCCESS))
 		{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
 
-	cgpu->status = LIFE_WELL;
+	mythr->cgpu->status = LIFE_WELL;
 	if (opt_debug)
 		applog(LOG_DEBUG, "Popping ping in gpuminer thread");
 
@@ -3654,7 +3655,7 @@ static void *gpuminer_thread(void *userdata)
 		}
 		if (unlikely(!gpu_devices[gpu])) {
 			applog(LOG_WARNING, "Thread %d being disabled", thr_id);
-			mythr->rolling = cgpu->rolling = 0;
+			mythr->rolling = mythr->cgpu->rolling = 0;
 			if (opt_debug)
 				applog(LOG_DEBUG, "Popping wakeup ping in gpuminer thread");
 
@@ -3839,6 +3840,7 @@ static void restart_longpoll(void)
 
 static void *reinit_cpu(void *userdata)
 {
+	pthread_detach(pthread_self());
 #if 0
 	struct cgpu_info *cgpu = (struct cgpu_info *)userdata;
 	int cpu = cgpu->cpu_gpu;
@@ -3870,16 +3872,34 @@ static void *reinit_cpu(void *userdata)
 }
 
 #ifdef HAVE_OPENCL
+/* We have only one thread that ever re-initialises GPUs, thus if any GPU
+ * init command fails due to a completely wedged GPU, the thread will never
+ * return, unable to harm other GPUs. If it does return, it means we only had
+ * a soft failure and then the reinit_gpu thread is ready to tackle another
+ * GPU */
 static void *reinit_gpu(void *userdata)
 {
-	struct cgpu_info *cgpu = (struct cgpu_info *)userdata;
-	int gpu = cgpu->cpu_gpu;
+	struct thr_info *mythr = userdata;
+	struct cgpu_info *cgpu;
 	struct thr_info *thr;
 	struct timeval now;
-	_clState *clState;
+	char name[256];
 	int thr_id;
+	int gpu;
+
+	pthread_detach(pthread_self());
+
+select_cgpu:
+	cgpu = tq_pop(mythr->q, NULL);
+	if (!cgpu)
+		goto out;
 
-	/* Send threads message to stop */
+	if (clDevicesNum() != nDevs) {
+		applog(LOG_WARNING, "Hardware not reporting same number of active devices, will not attempt to restart GPU");
+		goto out;
+	}
+
+	gpu = cgpu->cpu_gpu;
 	gpu_devices[gpu] = false;
 
 	for (thr_id = 0; thr_id < gpu_threads; thr_id ++) {
@@ -3889,9 +3909,16 @@ static void *reinit_gpu(void *userdata)
 		thr = &thr_info[thr_id];
 		thr->rolling = thr->cgpu->rolling = 0;
 		if (!pthread_cancel(*thr->pth)) {
-			applog(LOG_WARNING, "Thread still exists, killing it off");
+			applog(LOG_WARNING, "Thread %d still exists, killing it off", thr_id);
 		} else
-			applog(LOG_WARNING, "Thread no longer exists!");
+			applog(LOG_WARNING, "Thread %d no longer exists", thr_id);
+	}
+
+	for (thr_id = 0; thr_id < gpu_threads; thr_id ++) {
+		if (dev_from_id(thr_id) != gpu)
+			continue;
+
+		thr = &thr_info[thr_id];
 
 		/* Lose this ram cause we may get stuck here! */
 		//tq_freeze(thr->q);
@@ -3900,13 +3927,16 @@ static void *reinit_gpu(void *userdata)
 		if (!thr->q)
 			quit(1, "Failed to tq_new in reinit_gpu");
 
-		/* Create a new clstate */
-		applog(LOG_WARNING, "Attempting to create a new clState");
-		clState = initCQ(clStates[thr_id], gpu);
-
 		/* Lose this ram cause we may dereference in the dying thread! */
 		//free(clState);
-		applog(LOG_WARNING, "Command successful, attempting to create new thread");
+
+		applog(LOG_INFO, "Reinit GPU thread %d", thr_id);
+		clStates[thr_id] = initCl(gpu, name, sizeof(name));
+		if (!clStates[thr_id]) {
+			applog(LOG_ERR, "Failed to reinit GPU thread %d", thr_id);
+			goto out;
+		}
+		applog(LOG_INFO, "initCl() finished. Found %s", name);
 
 		if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr))) {
 			applog(LOG_ERR, "thread %d create failed", thr_id);
@@ -3918,70 +3948,31 @@ static void *reinit_gpu(void *userdata)
 	gettimeofday(&now, NULL);
 	get_datestamp(cgpu->init, &now);
 
-	/* Try to re-enable it */
 	gpu_devices[gpu] = true;
-	for (thr_id = 0; thr_id < gpu_threads; thr_id ++) {
-		thr = &thr_info[thr_id];
-		if (dev_from_id(thr_id) == gpu)
-			tq_push(thr->q, &ping);
-	}
-
-	return NULL;
-}
-
-static void *ping_gputhread(void *userdata)
-{
-	struct cgpu_info *cgpu = (struct cgpu_info *)userdata;
-	int gpu = cgpu->cpu_gpu;
-	struct thr_info *thr;
-	_clState *clState;
-	int thr_id;
-
 	for (thr_id = 0; thr_id < gpu_threads; thr_id ++) {
 		if (dev_from_id(thr_id) != gpu)
 			continue;
 
 		thr = &thr_info[thr_id];
-		clState = clStates[thr_id];
-		tq_push(thr->q, &ping);
-		applog(LOG_WARNING, "Attempting to flush command queue of thread %d", thr_id);
-		clFlush(clState->commandQueue);
-		clFinish(clState->commandQueue);
 		tq_push(thr->q, &ping);
 	}
 
+	goto select_cgpu;
+out:
 	return NULL;
 }
-
-static void ping_gpu(struct cgpu_info *cgpu)
-{
-	pthread_t ping_thread;
-
-	if (unlikely(pthread_create(&ping_thread, NULL, ping_gputhread, (void *)cgpu)))
-		applog(LOG_ERR, "Failed to create ping thread");
-}
 #else
 static void *reinit_gpu(void *userdata)
 {
 }
-
-static void ping_gpu(struct cgpu_info *cgpu)
-{
-}
 #endif
 
 static void reinit_device(struct cgpu_info *cgpu)
 {
-	pthread_t resus_thread;
-	void *reinit;
-
 	if (cgpu->is_gpu)
-		reinit = reinit_gpu;
+		tq_push(thr_info[gpur_thr_id].q, cgpu);
 	else
-		reinit = reinit_cpu;
-
-	if (unlikely(pthread_create(&resus_thread, NULL, reinit, (void *)cgpu)))
-		applog(LOG_ERR, "Failed to create reinit thread");
+		tq_push(thr_info[cpur_thr_id].q, cgpu);
 }
 
 /* Determine which are the first threads belonging to a device and if they're
@@ -4080,7 +4071,7 @@ static void *watchdog_thread(void *userdata)
 				gpus[gpu].status = LIFE_SICK;
 				applog(LOG_ERR, "Thread %d idle for more than 60 seconds, GPU %d declared SICK!", i, gpu);
 				/* Sent it a ping, it might respond */
-				ping_gpu(thr->cgpu);
+				tq_push(thr->q, &ping);
 			} else if (now.tv_sec - thr->last.tv_sec > 300 && gpus[i].status == LIFE_SICK) {
 				gpus[gpu].status = LIFE_DEAD;
 				applog(LOG_ERR, "Thread %d idle for more than 5 minutes, GPU %d declared DEAD!", i, gpu);
@@ -4340,7 +4331,7 @@ out:
 
 int main (int argc, char *argv[])
 {
-	unsigned int i, j = 0, x, y, pools_active = 0;
+	unsigned int i, x, y, pools_active = 0;
 	struct sigaction handler;
 	struct thr_info *thr;
 	char name[256];
@@ -4568,7 +4559,7 @@ int main (int argc, char *argv[])
 
 	mining_threads = opt_n_threads + gpu_threads;
 
-	total_threads = mining_threads + 5;
+	total_threads = mining_threads + 7;
 	work_restart = calloc(total_threads, sizeof(*work_restart));
 	if (!work_restart)
 		quit(1, "Failed to calloc work_restart");
@@ -4649,14 +4640,9 @@ int main (int argc, char *argv[])
 		quit(0, "No pools active! Exiting.");
 
 #ifdef HAVE_OPENCL
-	i = 0;
-
-	if (nDevs > 0)
-		preinit_devices();
-
 	/* start GPU mining threads */
-	for (j = 0; j < nDevs * opt_g_threads; j++) {
-		int gpu = j % nDevs;
+	for (i = 0; i < nDevs * opt_g_threads; i++) {
+		int gpu = i % nDevs;
 		struct cgpu_info *cgpu;
 		struct timeval now;
 
@@ -4665,8 +4651,7 @@ int main (int argc, char *argv[])
 
 		thr = &thr_info[i];
 		thr->id = i;
-		cgpu = &gpus[gpu];
-		thr->cgpu = cgpu;
+		cgpu = thr->cgpu = &gpus[gpu];
 
 		thr->q = tq_new();
 		if (!thr->q)
@@ -4682,11 +4667,10 @@ int main (int argc, char *argv[])
 		}
 
 		applog(LOG_INFO, "Init GPU thread %i", i);
-		clStates[i] = initCl(cgpu, name, sizeof(name));
+		clStates[i] = initCl(gpu, name, sizeof(name));
 		if (!clStates[i]) {
 			applog(LOG_ERR, "Failed to init GPU thread %d", i);
 			gpu_devices[i] = false;
-			strcat(cgpu->init, "Never");
 			continue;
 		}
 		applog(LOG_INFO, "initCl() finished. Found %s", name);
@@ -4695,8 +4679,6 @@ int main (int argc, char *argv[])
 
 		if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr)))
 			quit(1, "thread %d create failed", i);
-
-		i++;
 	}
 
 	applog(LOG_INFO, "%d gpu miner threads started", gpu_threads);
@@ -4742,6 +4724,24 @@ int main (int argc, char *argv[])
 		quit(1, "input thread create failed");
 	pthread_detach(*thr->pth);
 
+	/* Create reinit cpu thread */
+	cpur_thr_id = mining_threads + 5;
+	thr = &thr_info[cpur_thr_id];
+	thr->q = tq_new();
+	if (!thr->q)
+		quit(1, "tq_new failed for cpur_thr_id");
+	if (thr_info_create(thr, NULL, reinit_cpu, thr))
+		quit(1, "reinit_cpu thread create failed");
+
+	/* Create reinit gpu thread */
+	gpur_thr_id = mining_threads + 6;
+	thr = &thr_info[gpur_thr_id];
+	thr->q = tq_new();
+	if (!thr->q)
+		quit(1, "tq_new failed for gpur_thr_id");
+	if (thr_info_create(thr, NULL, reinit_gpu, thr))
+		quit(1, "reinit_gpu thread create failed");
+
 	/* main loop - simply wait for workio thread to exit */
 	pthread_join(*thr_info[work_thr_id].pth, NULL);
 	applog(LOG_INFO, "workio thread dead, exiting.");
diff --git a/miner.h b/miner.h
index f26df4dd..b1034ba4 100644
--- a/miner.h
+++ b/miner.h
@@ -152,12 +152,6 @@ struct cgpu_info {
 	double efficiency;
 	double utility;
 	enum alive status;
-
-	int hasBitAlign;
-	unsigned int vwidth;
-	size_t max_work_size;
-	size_t work_size;
-
 	char init[40];
 };
 
diff --git a/ocl.c b/ocl.c
index 21f6bd90..45f70955 100644
--- a/ocl.c
+++ b/ocl.c
@@ -52,8 +52,6 @@ char *file_contents(const char *filename, int *length)
 	return (char*)buffer;
 }
 
-static cl_uint numDevices;
-
 int clDevicesNum() {
 	cl_int status = 0;
 
@@ -113,95 +111,6 @@ int clDevicesNum() {
 	return numDevices;
 }
 
-static cl_platform_id platform = NULL;
-static cl_device_id *devices;
-
-int preinit_devices(void)
-{
-	cl_int status;
-	cl_uint numPlatforms;
-	int i;
-
-	status = clGetPlatformIDs(0, NULL, &numPlatforms);
-	if (status != CL_SUCCESS)
-	{
-		applog(LOG_ERR, "Error: Getting Platforms. (clGetPlatformsIDs)");
-		return -1;
-	}
-
-	if (numPlatforms > 0)
-	{
-		cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));
-		status = clGetPlatformIDs(numPlatforms, platforms, NULL);
-		if (status != CL_SUCCESS)
-		{
-			applog(LOG_ERR, "Error: Getting Platform Ids. (clGetPlatformsIDs)");
-			return -1;
-		}
-
-		for(i = 0; i < numPlatforms; ++i)
-		{
-			char pbuff[100];
-			status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
-			if (status != CL_SUCCESS)
-			{
-				applog(LOG_ERR, "Error: Getting Platform Info. (clGetPlatformInfo)");
-				free(platforms);
-				return -1;
-			}
-			platform = platforms[i];
-			if (!strcmp(pbuff, "Advanced Micro Devices, Inc."))
-			{
-				break;
-			}
-		}
-		free(platforms);
-	}
-
-	if (platform == NULL) {
-		perror("NULL platform found!\n");
-		return -1;
-	}
-
-	status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
-	if (status != CL_SUCCESS)
-	{
-		applog(LOG_ERR, "Error: Getting Device IDs (num)");
-		return -1;
-	}
-
-	if (numDevices > 0 ) {
-		devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id));
-
-		/* Now, get the device list data */
-
-		status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
-		if (status != CL_SUCCESS)
-		{
-			applog(LOG_ERR, "Error: Getting Device IDs (list)");
-			return -1;
-		}
-
-		applog(LOG_INFO, "List of devices:");
-
-		unsigned int i;
-		for(i=0; i<numDevices; i++) {
-			char pbuff[100];
-			status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
-			if (status != CL_SUCCESS)
-			{
-				applog(LOG_ERR, "Error: Getting Device Info");
-				return -1;
-			}
-
-			applog(LOG_INFO, "\t%i\t%s", i, pbuff);
-		}
-
-	} else return -1;
-
-	return 0;
-}
-
 static int advance(char **area, unsigned *remaining, const char *marker)
 {
 	char *find = memmem(*area, *remaining, marker, strlen(marker));
@@ -264,70 +173,111 @@ void patch_opcodes(char *w, unsigned remaining)
 	}
 }
 
-_clState *initCQ(_clState *clState, unsigned int gpu)
+_clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 {
+	int patchbfi = 0;
 	cl_int status = 0;
-	cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };
+	unsigned int i;
 
-	clState->context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
+	_clState *clState = calloc(1, sizeof(_clState));
+
+	cl_uint numPlatforms;
+	cl_platform_id platform = NULL;
+	status = clGetPlatformIDs(0, NULL, &numPlatforms);
 	if (status != CL_SUCCESS)
 	{
-		applog(LOG_ERR, "Error: Creating Context. (clCreateContextFromType)");
+		applog(LOG_ERR, "Error: Getting Platforms. (clGetPlatformsIDs)");
 		return NULL;
 	}
 
-	/* create a cl program executable for the device specified */
-	status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
-	if (status != CL_SUCCESS)
+	if (numPlatforms > 0)
 	{
-		applog(LOG_ERR, "Error: Building Program (clBuildProgram)");
-		size_t logSize;
-		status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
+		cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));
+		status = clGetPlatformIDs(numPlatforms, platforms, NULL);
+		if (status != CL_SUCCESS)
+		{
+			applog(LOG_ERR, "Error: Getting Platform Ids. (clGetPlatformsIDs)");
+			return NULL;
+		}
 
-		char *log = malloc(logSize);
-		status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
-		applog(LOG_INFO, "%s", log);
-		return NULL;
+		for(i = 0; i < numPlatforms; ++i)
+		{
+			char pbuff[100];
+			status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
+			if (status != CL_SUCCESS)
+			{
+				applog(LOG_ERR, "Error: Getting Platform Info. (clGetPlatformInfo)");
+				free(platforms);
+				return NULL;
+			}
+			platform = platforms[i];
+			if (!strcmp(pbuff, "Advanced Micro Devices, Inc."))
+			{
+				break;
+			}
+		}
+		free(platforms);
 	}
 
-	/* get a kernel object handle for a kernel with the given name */
-	clState->kernel = clCreateKernel(clState->program, "search", &status);
-	if (status != CL_SUCCESS)
-	{
-		applog(LOG_ERR, "Error: Creating Kernel from program. (clCreateKernel)");
+	if (platform == NULL) {
+		perror("NULL platform found!\n");
 		return NULL;
 	}
 
-	/////////////////////////////////////////////////////////////////
-	// Create an OpenCL command queue
-	/////////////////////////////////////////////////////////////////
-	clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu],
-						     CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &status);
-	if (status != CL_SUCCESS) /* Try again without OOE enable */
-		clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status);
+	size_t nDevices;
+	cl_uint numDevices;
+	status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
 	if (status != CL_SUCCESS)
 	{
-		applog(LOG_ERR, "Creating Command Queue. (clCreateCommandQueue)");
+		applog(LOG_ERR, "Error: Getting Device IDs (num)");
 		return NULL;
 	}
 
-	clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, BUFFERSIZE, NULL, &status);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error: clCreateBuffer (outputBuffer)");
-		return NULL;
-	}
+	cl_device_id *devices;
+	if (numDevices > 0 ) {
+		devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id));
 
-	return clState;
-}
+		/* Now, get the device list data */
 
-_clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
-{
-	unsigned int gpu = cgpu->cpu_gpu;
-	int patchbfi = 0;
-	cl_int status = 0;
-	size_t nDevices;
+		status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
+		if (status != CL_SUCCESS)
+		{
+			applog(LOG_ERR, "Error: Getting Device IDs (list)");
+			return NULL;
+		}
 
-	_clState *clState = calloc(1, sizeof(_clState));
+		applog(LOG_INFO, "List of devices:");
+
+		unsigned int i;
+		for(i=0; i<numDevices; i++) {
+			char pbuff[100];
+			status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
+			if (status != CL_SUCCESS)
+			{
+				applog(LOG_ERR, "Error: Getting Device Info");
+				return NULL;
+			}
+
+			applog(LOG_INFO, "\t%i\t%s", i, pbuff);
+		}
+
+		if (gpu < numDevices) {
+			char pbuff[100];
+			status = clGetDeviceInfo(devices[gpu], CL_DEVICE_NAME, sizeof(pbuff), pbuff, &nDevices);
+			if (status != CL_SUCCESS)
+			{
+				applog(LOG_ERR, "Error: Getting Device Info");
+				return NULL;
+			}
+
+			applog(LOG_INFO, "Selected %i: %s", gpu, pbuff);
+			strncpy(name, pbuff, nameSize);
+		} else {
+			applog(LOG_ERR, "Invalid GPU %i", gpu);
+			return NULL;
+		}
+
+	} else return NULL;
 
 	cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };
 
@@ -338,22 +288,6 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
 		return NULL;
 	}
 
-	if (gpu < numDevices) {
-		char pbuff[100];
-		status = clGetDeviceInfo(devices[gpu], CL_DEVICE_NAME, sizeof(pbuff), pbuff, &nDevices);
-		if (status != CL_SUCCESS)
-		{
-			applog(LOG_ERR, "Error: Getting Device Info");
-			return NULL;
-		}
-
-		applog(LOG_INFO, "Selected %i: %s", gpu, pbuff);
-		strncpy(name, pbuff, nameSize);
-	} else {
-		applog(LOG_ERR, "Invalid GPU %i", gpu);
-		return NULL;
-	}
-
 	/* Check for BFI INT support. Hopefully people don't mix devices with
 	 * and without it! */
 	char * extensions = malloc(1024);
@@ -367,7 +301,7 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
 	}
 	find = strstr(extensions, camo);
 	if (find)
-		cgpu->hasBitAlign = patchbfi = 1;
+		clState->hasBitAlign = patchbfi = 1;
 
 	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL);
 	if (status != CL_SUCCESS) {
@@ -377,27 +311,26 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
 	if (opt_debug)
 		applog(LOG_DEBUG, "Preferred vector width reported %d", clState->preferred_vwidth);
 
-	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&cgpu->max_work_size, NULL);
+	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE");
 		return NULL;
 	}
 	if (opt_debug)
-		applog(LOG_DEBUG, "Max work group size reported %d", cgpu->max_work_size);
+		applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size);
 
 	/* For some reason 2 vectors is still better even if the card says
 	 * otherwise, and many cards lie about their max so use 256 as max
 	 * unless explicitly set on the command line */
-	cgpu->vwidth = clState->preferred_vwidth;
 	if (clState->preferred_vwidth > 1)
-		cgpu->vwidth = 2;
+		clState->preferred_vwidth = 2;
 	if (opt_vectors)
-		cgpu->vwidth = opt_vectors;
-	if (opt_worksize && opt_worksize <= cgpu->max_work_size)
-		cgpu->work_size = opt_worksize;
+		clState->preferred_vwidth = opt_vectors;
+	if (opt_worksize && opt_worksize <= clState->max_work_size)
+		clState->work_size = opt_worksize;
 	else
-		cgpu->work_size = (cgpu->max_work_size <= 256 ? cgpu->max_work_size : 256) /
-				cgpu->vwidth;
+		clState->work_size = (clState->max_work_size <= 256 ? clState->max_work_size : 256) /
+				clState->preferred_vwidth;
 
 	/* Create binary filename based on parameters passed to opencl
 	 * compiler to ensure we only load a binary that matches what would
@@ -409,7 +342,7 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
 	char filename[16];
 
 	if (chosen_kernel == KL_NONE) {
-		if (cgpu->hasBitAlign)
+		if (clState->hasBitAlign)
 			chosen_kernel = KL_PHATK;
 		else
 			chosen_kernel = KL_POCLBM;
@@ -452,14 +385,14 @@ _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize)
 	}
 
 	strcat(binaryfilename, name);
-	if (cgpu->hasBitAlign)
+	if (clState->hasBitAlign)
 		strcat(binaryfilename, "bitalign");
 
 	strcat(binaryfilename, "v");
-	sprintf(numbuf, "%d", cgpu->vwidth);
+	sprintf(numbuf, "%d", clState->preferred_vwidth);
 	strcat(binaryfilename, numbuf);
 	strcat(binaryfilename, "w");
-	sprintf(numbuf, "%d", (int)cgpu->work_size);
+	sprintf(numbuf, "%d", (int)clState->work_size);
 	strcat(binaryfilename, numbuf);
 	strcat(binaryfilename, "long");
 	sprintf(numbuf, "%d", (int)sizeof(long));
@@ -515,7 +448,7 @@ build:
 	memcpy(source, rawsource, pl);
 
 	/* Patch the source file with the preferred_vwidth */
-	if (cgpu->vwidth > 1) {
+	if (clState->preferred_vwidth > 1) {
 		char *find = strstr(source, "VECTORSX");
 
 		if (unlikely(!find)) {
@@ -523,7 +456,7 @@ build:
 			return NULL;
 		}
 		find += 7; // "VECTORS"
-		if (cgpu->vwidth == 2)
+		if (clState->preferred_vwidth == 2)
 			strncpy(find, "2", 1);
 		else
 			strncpy(find, "4", 1);
@@ -532,7 +465,7 @@ build:
 	}
 
 	/* Patch the source file defining BITALIGN */
-	if (cgpu->hasBitAlign) {
+	if (clState->hasBitAlign) {
 		char *find = strstr(source, "BITALIGNX");
 
 		if (unlikely(!find)) {
@@ -690,13 +623,51 @@ built:
 	free(binaries);
 	free(binary_sizes);
 
-	/* We throw everything out now and create the real context we're using in initCQ */
-	clReleaseContext(clState->context);
-
 	applog(LOG_INFO, "Initialising kernel %s with%s BFI_INT patching, %d vectors and worksize %d",
-	       filename, patchbfi ? "" : "out", cgpu->vwidth, cgpu->work_size);
+	       filename, patchbfi ? "" : "out", clState->preferred_vwidth, clState->work_size);
+
+	/* create a cl program executable for all the devices specified */
+	status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
+	if (status != CL_SUCCESS)
+	{
+		applog(LOG_ERR, "Error: Building Program (clBuildProgram)");
+		size_t logSize;
+		status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
+
+		char *log = malloc(logSize);
+		status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
+		applog(LOG_INFO, "%s", log);
+		return NULL;
+	}
+
+	/* get a kernel object handle for a kernel with the given name */
+	clState->kernel = clCreateKernel(clState->program, "search", &status);
+	if (status != CL_SUCCESS)
+	{
+		applog(LOG_ERR, "Error: Creating Kernel from program. (clCreateKernel)");
+		return NULL;
+	}
+
+	/////////////////////////////////////////////////////////////////
+	// Create an OpenCL command queue
+	/////////////////////////////////////////////////////////////////
+	clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu],
+						     CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &status);
+	if (status != CL_SUCCESS) /* Try again without OOE enable */
+		clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status);
+	if (status != CL_SUCCESS)
+	{
+		applog(LOG_ERR, "Creating Command Queue. (clCreateCommandQueue)");
+		return NULL;
+	}
 
-	return initCQ(clState, gpu);
+	clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, BUFFERSIZE, NULL, &status);
+	if (status != CL_SUCCESS) {
+		applog(LOG_ERR, "Error: clCreateBuffer (outputBuffer)");
+		return NULL;
+	}
+
+	return clState;
 }
 #endif /* HAVE_OPENCL */
 
diff --git a/ocl.h b/ocl.h
index 2189fd46..3c2a5cee 100644
--- a/ocl.h
+++ b/ocl.h
@@ -7,7 +7,6 @@
 #else
 #include <CL/cl.h>
 #endif
-#include "miner.h"
 
 typedef struct {
 	cl_context context;
@@ -15,13 +14,14 @@ typedef struct {
 	cl_command_queue commandQueue;
 	cl_program program;
 	cl_mem outputBuffer;
+	int hasBitAlign;
 	cl_uint preferred_vwidth;
+	size_t max_work_size;
+	size_t work_size;
 } _clState;
 
 extern char *file_contents(const char *filename, int *length);
 extern int clDevicesNum();
-extern int preinit_devices(void);
-extern _clState *initCQ(_clState *clState, unsigned int gpu);
-extern _clState *initCl(struct cgpu_info *cgpu, char *name, size_t nameSize);
+extern _clState *initCl(unsigned int gpu, char *name, size_t nameSize);
 #endif /* HAVE_OPENCL */
 #endif /* __OCL_H__ */