diff --git a/adl.c b/adl.c
index 6f973ff4..baa1076b 100644
--- a/adl.c
+++ b/adl.c
@@ -212,6 +212,15 @@ void init_adl(int nDevs)
 			continue;
 		}
 
+		if (!gpus[gpu].enabled) {
+			gpus[i].gpu_engine =
+			gpus[i].gpu_memclock =
+			gpus[i].gpu_vddc =
+			gpus[i].gpu_fan =
+			gpus[i].gpu_powertune = 0;
+			continue;
+		}
+
 		gpus[gpu].has_adl = true;
 		/* Flag adl as active if any card is successfully activated */
 		adl_active = true;
diff --git a/api.c b/api.c
index fe29b947..86c7c2d7 100644
--- a/api.c
+++ b/api.c
@@ -387,7 +387,7 @@ void gpustatus(int gpu, bool isjson)
 #endif
 			gt = gv = gm = gc = ga = gf = gp = pt = 0;
 
-		if (gpu_devices[gpu])
+		if (cgpu->enabled)
 			enabled = (char *)YES;
 		else
 			enabled = (char *)NO;
@@ -662,13 +662,13 @@ void gpuenable(SOCKETTYPE c, char *param, bool isjson)
 		return;
 	}
 
-	if (gpu_devices[id]) {
+	if (gpus[id].enabled) {
 		strcpy(io_buffer, message(MSG_ALRENA, id, isjson));
 		return;
 	}
 
 	for (i = 0; i < gpu_threads; i++) {
-		gpu = thr_info[i].cgpu->cpu_gpu;
+		gpu = thr_info[i].cgpu->device_id;
 		if (gpu == id) {
 			thr = &thr_info[i];
 			if (thr->cgpu->status != LIFE_WELL) {
@@ -676,7 +676,7 @@ void gpuenable(SOCKETTYPE c, char *param, bool isjson)
 				return;
 			}
 
-			gpu_devices[id] = true;
+			gpus[id].enabled = true;
 			tq_push(thr->q, &ping);
 
 		}
@@ -705,12 +705,12 @@ void gpudisable(SOCKETTYPE c, char *param, bool isjson)
 		return;
 	}
 
-	if (!gpu_devices[id]) {
+	if (!gpus[id].enabled) {
 		strcpy(io_buffer, message(MSG_ALRDIS, id, isjson));
 		return;
 	}
 
-	gpu_devices[id] = false;
+	gpus[id].enabled = false;
 
 	strcpy(io_buffer, message(MSG_GPUDIS, id, isjson));
 }
diff --git a/main.c b/main.c
index d98341e8..de4b27c9 100644
--- a/main.c
+++ b/main.c
@@ -1,6 +1,7 @@
 
 /*
  * Copyright 2011 Con Kolivas
+ * Copyright 2011 Luke Dashjr
  * Copyright 2010 Jeff Garzik
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -145,7 +146,6 @@ const char *algo_names[] = {
 #endif
 };
 
-typedef void (*sha256_func)();
 static const sha256_func sha256_funcs[] = {
 	[ALGO_C]		= (sha256_func)scanhash_c,
 #ifdef WANT_SSE2_4WAY
@@ -205,12 +205,14 @@ enum sha256_algos opt_algo = ALGO_C;
 #endif
 int nDevs;
 static int opt_g_threads = 2;
-static int opt_device;
-static int total_devices;
-bool gpu_devices[MAX_GPUDEVICES];
+static signed int devices_enabled = 0;
+static bool opt_removedisabled = false;
+int total_devices = 0;
+struct cgpu_info *devices[MAX_DEVICES];
+bool have_opencl = false;
 int gpu_threads;
 static bool forced_n_threads;
-int opt_n_threads;
+int opt_n_threads = -1;
 int mining_threads;
 int num_processors;
 bool use_curses = true;
@@ -485,34 +487,20 @@ static double bench_algo_stage3(
 	struct timeval end;
 	struct timeval start;
 	uint32_t max_nonce = (1<<22);
-	unsigned long hashes_done = 0;
+	uint32_t last_nonce = 0;
 
 	gettimeofday(&start, 0);
-		#if defined(WANT_VIA_PADLOCK)
-
-			// For some reason, the VIA padlock hasher has a different API ...
-			if (ALGO_VIA==algo) {
-				(void)scanhash_via(
-					0,
-					work.data,
-					work.target,
-					max_nonce,
-					&hashes_done,
-					work.blk.nonce
-				);
-			} else
-		#endif
 			{
 				sha256_func func = sha256_funcs[algo];
 				(*func)(
 					0,
 					work.midstate,
-					work.data + 64,
+					work.data,
 					work.hash1,
 					work.hash,
 					work.target,
 					max_nonce,
-					&hashes_done,
+					&last_nonce,
 					work.blk.nonce
 				);
 			}
@@ -525,7 +513,7 @@ static double bench_algo_stage3(
 
 	double rate = -1.0;
 	if (0<usec_elapsed) {
-		rate = (1.0*hashes_done)/usec_elapsed;
+		rate = (1.0*(last_nonce+1))/usec_elapsed;
 	}
 	return rate;
 }
@@ -1002,17 +990,20 @@ static char *set_float_0_to_99(const char *arg, float *f)
 	return NULL;
 }
 
-static char *set_devices(const char *arg, int *i)
+static char *set_devices(char *arg)
 {
-	char *err = opt_set_intval(arg, i);
-
-	if (err)
-		return err;
+	int i = strtol(arg, &arg, 0);
+	if (*arg) {
+		if (*arg == '?') {
+			devices_enabled = -1;
+			return NULL;
+		}
+		return "Invalid device number";
+	}
 
-	if (*i < 0 || *i > 15)
-		return "Invalid GPU device number";
-	total_devices++;
-	gpu_devices[*i] = true;
+	if (i < 0 || i >= (sizeof(devices_enabled) * 8) - 1)
+		return "Invalid device number";
+	devices_enabled |= 1 << i;
 	return NULL;
 }
 
@@ -1545,10 +1536,10 @@ static struct opt_table opt_config_table[] = {
 	OPT_WITHOUT_ARG("--debug|-D",
 		     enable_debug, &opt_debug,
 		     "Enable debug output"),
-#ifdef HAVE_OPENCL
 	OPT_WITH_ARG("--device|-d",
-		     set_devices, NULL, &opt_device,
+		     set_devices, NULL, NULL,
 	             "Select device to use, (Use repeat -d for multiple devices, default: all)"),
+#ifdef HAVE_OPENCL
 	OPT_WITHOUT_ARG("--disable-gpu|-G",
 			opt_set_bool, &opt_nogpu,
 			"Disable GPU mining even if suitable devices exist"),
@@ -1643,6 +1634,9 @@ static struct opt_table opt_config_table[] = {
 	OPT_WITHOUT_ARG("--real-quiet",
 			opt_set_bool, &opt_realquiet,
 			"Disable all output"),
+	OPT_WITHOUT_ARG("--remove-disabled",
+		     opt_set_bool, &opt_removedisabled,
+	         "Remove disabled devices entirely, as if they didn't exist"),
 	OPT_WITH_ARG("--retries|-r",
 		     opt_set_intval, opt_show_intval, &opt_retries,
 		     "Number of times to retry before giving up, if JSON-RPC call fails (-1 means never)"),
@@ -1929,7 +1923,7 @@ err_out:
 
 static inline int dev_from_id(int thr_id)
 {
-	return thr_info[thr_id].cgpu->cpu_gpu;
+	return thr_info[thr_id].cgpu->device_id;
 }
 
 /* Make the change in the recent value adjust dynamically when the difference
@@ -1965,7 +1959,7 @@ static int requests_staged(void)
 static WINDOW *mainwin, *statuswin, *logwin;
 double total_secs = 0.1;
 static char statusline[256];
-static int cpucursor, gpucursor, logstart, logcursor;
+static int devcursor, logstart, logcursor;
 struct cgpu_info gpus[MAX_GPUDEVICES]; /* Maximum number apparently possible */
 struct cgpu_info *cpus;
 
@@ -2001,24 +1995,7 @@ static void tailsprintf(char *f, const char *fmt, ...)
 
 static void get_statline(char *buf, struct cgpu_info *cgpu)
 {
-	sprintf(buf, "%sPU%d ", cgpu->is_gpu ? "G" : "C", cgpu->cpu_gpu);
-#ifdef HAVE_ADL
-	if (cgpu->has_adl) {
-		int gpu = cgpu->cpu_gpu;
-		float gt = gpu_temp(gpu);
-		int gf = gpu_fanspeed(gpu);
-		int gp = gpu_fanpercent(gpu);
-
-		if (gt != -1)
-			tailsprintf(buf, "%.1fC ", gt);
-		if (gf != -1)
-			tailsprintf(buf, "%dRPM ", gf);
-		else if (gp != -1)
-			tailsprintf(buf, "%d%% ", gp);
-		if (gt > -1 || gf > -1 || gp > -1)
-			tailsprintf(buf, "| ");
-	}
-#endif
+	sprintf(buf, "%s%d ", cgpu->api->name, cgpu->device_id);
 	tailsprintf(buf, "(%ds):%.1f (avg):%.1f Mh/s | A:%d R:%d HW:%d U:%.2f/m",
 		opt_log_interval,
 		cgpu->rolling,
@@ -2027,8 +2004,8 @@ static void get_statline(char *buf, struct cgpu_info *cgpu)
 		cgpu->rejected,
 		cgpu->hw_errors,
 		cgpu->utility);
-	if (cgpu->is_gpu)
-		tailsprintf(buf, " I:%d", cgpu->intensity);
+	if (cgpu->api->get_statline)
+		cgpu->api->get_statline(buf, cgpu);
 }
 
 static void text_print_status(int thr_id)
@@ -2069,8 +2046,8 @@ static void curses_print_status(void)
 	mvwprintw(statuswin, 5, 0, " Block: %s...  Started: %s", current_hash, blocktime);
 	mvwhline(statuswin, 6, 0, '-', 80);
 	mvwhline(statuswin, logstart - 1, 0, '-', 80);
-	mvwprintw(statuswin, gpucursor - 1, 1, "[P]ool management %s[S]ettings [D]isplay options [Q]uit",
-		gpu_threads ? "[G]PU management " : "");
+	mvwprintw(statuswin, devcursor - 1, 1, "[P]ool management %s[S]ettings [D]isplay options [Q]uit",
+		have_opencl ? "[G]PU management " : "");
 	/* The window will be updated once we're done with all the devices */
 	wnoutrefresh(statuswin);
 }
@@ -2084,38 +2061,17 @@ static void adj_width(int var, int *length)
 static void curses_print_devstatus(int thr_id)
 {
 	static int awidth = 1, rwidth = 1, hwwidth = 1, uwidth = 1;
-
-	if (thr_id >= 0 && thr_id < gpu_threads) {
-		int gpu = dev_from_id(thr_id);
-		struct cgpu_info *cgpu = &gpus[gpu];
+	struct cgpu_info *cgpu = thr_info[thr_id].cgpu;
+	char logline[255];
 
 		cgpu->utility = cgpu->accepted / ( total_secs ? total_secs : 1 ) * 60;
 
-		mvwprintw(statuswin, gpucursor + gpu, 0, " GPU %d: ", gpu);
-#ifdef HAVE_ADL
-		if (cgpu->has_adl) {
-			float gt = gpu_temp(gpu);
-			int gf = gpu_fanspeed(gpu);
-			int gp = gpu_fanpercent(gpu);
-
-			if (gt != -1)
-				wprintw(statuswin, "%5.1fC ", gt);
-			else
-				wprintw(statuswin, "       ");
-			if (gf != -1)
-				wprintw(statuswin, "%4dRPM ", gf);
-			else if (gp != -1)
-				wprintw(statuswin, "%3d%%    ", gp);
-			else
-				wprintw(statuswin, "        ");
-			wprintw(statuswin, "| ");
-		}
-#endif
+	mvwprintw(statuswin, devcursor + cgpu->cgminer_id, 0, " %s %d: ", cgpu->api->name, cgpu->device_id);
 		if (cgpu->status == LIFE_DEAD)
 			wprintw(statuswin, "DEAD ");
 		else if (cgpu->status == LIFE_SICK)
 			wprintw(statuswin, "SICK ");
-		else  if (!gpu_devices[gpu])
+	else if (!cgpu->enabled)
 			wprintw(statuswin, "OFF  ");
 		else
 			wprintw(statuswin, "%5.1f", cgpu->rolling);
@@ -2123,30 +2079,20 @@ static void curses_print_devstatus(int thr_id)
 		adj_width(cgpu->rejected, &rwidth);
 		adj_width(cgpu->hw_errors, &hwwidth);
 		adj_width(cgpu->utility, &uwidth);
-		wprintw(statuswin, "/%5.1fMh/s | A:%*d R:%*d HW:%*d U:%*.2f/m I:%2d",
+	wprintw(statuswin, "/%5.1fMh/s | A:%*d R:%*d HW:%*d U:%*.2f/m",
 			cgpu->total_mhashes / total_secs,
 			awidth, cgpu->accepted,
 			rwidth, cgpu->rejected,
 			hwwidth, cgpu->hw_errors,
-			uwidth + 3, cgpu->utility,
-			gpus[gpu].intensity);
-		wclrtoeol(statuswin);
-	} else if (thr_id >= gpu_threads) {
-		int cpu = dev_from_id(thr_id);
-		struct cgpu_info *cgpu = &cpus[cpu];
+		uwidth + 3, cgpu->utility);
 
-		cgpu->utility = cgpu->accepted / ( total_secs ? total_secs : 1 ) * 60;
+	if (cgpu->api->get_statline) {
+		logline[0] = '\0';
+		cgpu->api->get_statline(logline, cgpu);
+		wprintw(statuswin, "%s", logline);
+	}
 
-		adj_width(cgpu->accepted, &awidth);
-		adj_width(cgpu->rejected, &rwidth);
-		adj_width(cgpu->utility, &uwidth);
-		mvwprintw(statuswin, cpucursor + cpu, 0, " CPU %d: %5.2f/%5.2fMh/s | A:%*d R:%*d U:%*.2f/m",
-			cpu, cgpu->rolling, cgpu->total_mhashes / total_secs,
-			awidth, cgpu->accepted,
-			rwidth, cgpu->rejected,
-			uwidth + 3, cgpu->utility);
 		wclrtoeol(statuswin);
-	}
 	wnoutrefresh(statuswin);
 }
 
@@ -2365,14 +2311,14 @@ static bool submit_upstream_work(const struct work *work)
 			applog(LOG_DEBUG, "PROOF OF WORK RESULT: true (yay!!!)");
 		if (!QUIET) {
 			if (donor(work->pool))
-				applog(LOG_NOTICE, "Accepted %s %sPU %d thread %d donate",
-				       hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id);
+				applog(LOG_NOTICE, "Accepted %s %s %d thread %d donate",
+				       hashshow, cgpu->api->name, cgpu->device_id, thr_id);
 			else if (total_pools > 1)
-				applog(LOG_NOTICE, "Accepted %s %sPU %d thread %d pool %d",
-				       hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id, work->pool->pool_no);
+				applog(LOG_NOTICE, "Accepted %s %s %d thread %d pool %d",
+				       hashshow, cgpu->api->name, cgpu->device_id, thr_id, work->pool->pool_no);
 			else
-				applog(LOG_NOTICE, "Accepted %s %sPU %d thread %d",
-				       hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id);
+				applog(LOG_NOTICE, "Accepted %s %s %d thread %d",
+				       hashshow, cgpu->api->name, cgpu->device_id, thr_id);
 		}
 		if (opt_shares && total_accepted >= opt_shares) {
 			applog(LOG_WARNING, "Successfully mined %d accepted shares as requested and exiting.", opt_shares);
@@ -2387,14 +2333,14 @@ static bool submit_upstream_work(const struct work *work)
 			applog(LOG_DEBUG, "PROOF OF WORK RESULT: false (booooo)");
 		if (!QUIET) {
 			if (donor(work->pool))
-				applog(LOG_NOTICE, "Rejected %s %sPU %d thread %d donate",
-				       hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id);
+				applog(LOG_NOTICE, "Rejected %s %s %d thread %d donate",
+				       hashshow, cgpu->api->name, cgpu->device_id, thr_id);
 			else if (total_pools > 1)
-				applog(LOG_NOTICE, "Rejected %s %sPU %d thread %d pool %d",
-				       hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id, work->pool->pool_no);
+				applog(LOG_NOTICE, "Rejected %s %s %d thread %d pool %d",
+				       hashshow, cgpu->api->name, cgpu->device_id, thr_id, work->pool->pool_no);
 			else
-				applog(LOG_NOTICE, "Rejected %s %sPU %d thread %d",
-				       hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id);
+				applog(LOG_NOTICE, "Rejected %s %s %d thread %d",
+				       hashshow, cgpu->api->name, cgpu->device_id, thr_id);
 		}
 	}
 
@@ -3228,11 +3174,11 @@ static void write_config(FILE *fcfg)
 	if (schedstop.enable)
 		fprintf(fcfg, ",\n\"stop-time\" : \"%d:%d\"", schedstop.tm.tm_hour, schedstop.tm.tm_min);
 	for(i = 0; i < nDevs; i++)
-		if (!gpu_devices[i])
+		if (!gpus[i].enabled)
 			break;
 	if (i < nDevs)
 		for(i = 0; i < nDevs; i++)
-			if (gpu_devices[i])
+			if (gpus[i].enabled)
 				fprintf(fcfg, ",\n\"device\" : \"%d\"", i);
 	if (strcmp(opt_api_description, PACKAGE_STRING) != 0)
 		fprintf(fcfg, ",\n\"api-description\" : \"%s\"", opt_api_description);
@@ -3558,6 +3504,7 @@ retry:
 
 #ifdef HAVE_OPENCL
 void reinit_device(struct cgpu_info *cgpu);
+struct device_api opencl_api;
 
 static void manage_gpu(void)
 {
@@ -3626,7 +3573,7 @@ retry:
 			if (thr->cgpu != cgpu)
 				continue;
 			get_datestamp(checkin, &thr->last);
-			wlog("Thread %d: %.1f Mh/s %s ", i, thr->rolling, gpu_devices[gpu] ? "Enabled" : "Disabled");
+			wlog("Thread %d: %.1f Mh/s %s ", i, thr->rolling, cgpu->enabled ? "Enabled" : "Disabled");
 			switch (cgpu->status) {
 				default:
 				case LIFE_WELL:
@@ -3657,24 +3604,29 @@ retry:
 	else
 		selected = -1;
 	if (!strncasecmp(&input, "e", 1)) {
+		struct cgpu_info *cgpu;
+
 		if (selected)
 			selected = curses_int("Select GPU to enable");
 		if (selected < 0 || selected >= nDevs) {
 			wlogprint("Invalid selection\n");
 			goto retry;
 		}
-		if (gpu_devices[selected]) {
+		if (gpus[selected].enabled) {
 			wlogprint("Device already enabled\n");
 			goto retry;
 		}
-		gpu_devices[selected] = true;
-		for (i = 0; i < gpu_threads; i++) {
+		gpus[selected].enabled = true;
+		for (i = 0; i < mining_threads; ++i) {
+			thr = &thr_info[i];
+			cgpu = thr->cgpu;
+			if (cgpu->api != &opencl_api)
+				continue;
 			if (dev_from_id(i) != selected)
 				continue;
-			thr = &thr_info[i];
-			if (thr->cgpu->status != LIFE_WELL) {
+			if (cgpu->status != LIFE_WELL) {
 				wlogprint("Must restart device before enabling it");
-				gpu_devices[selected] = false;
+				gpus[selected].enabled = false;
 				goto retry;
 			}
 			if (opt_debug)
@@ -3690,11 +3642,11 @@ retry:
 			wlogprint("Invalid selection\n");
 			goto retry;
 		}
-		if (!gpu_devices[selected]) {
+		if (!gpus[selected].enabled) {
 			wlogprint("Device already disabled\n");
 			goto retry;
 		}
-		gpu_devices[selected] = false;
+		gpus[selected].enabled = false;
 		goto retry;
 	} else if (!strncasecmp(&input, "i", 1)) {
 		int intensity;
@@ -3778,7 +3730,7 @@ static void *input_thread(void *userdata)
 			display_pools();
 		else if (!strncasecmp(&input, "s", 1))
 			set_options();
-		else if (gpu_threads && !strncasecmp(&input, "g", 1))
+		else if (have_opencl && !strncasecmp(&input, "g", 1))
 			manage_gpu();
 		if (opt_realquiet) {
 			disable_curses();
@@ -4348,230 +4300,149 @@ bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce)
 	return submit_work_sync(thr, work);
 }
 
+static inline bool abandon_work(int thr_id, struct work *work, struct timeval *wdiff, uint64_t hashes)
+{
+	if (wdiff->tv_sec > opt_scantime ||
+	    work->blk.nonce >= MAXTHREADS - hashes ||
+	    stale_work(work, false))
+		return true;
+	return false;
+}
+
 static void *miner_thread(void *userdata)
 {
-	struct work *work = make_work();
 	struct thr_info *mythr = userdata;
 	const int thr_id = mythr->id;
-	uint32_t max_nonce = 0xffffff, total_hashes = 0;
-	unsigned long hashes_done = max_nonce;
-	bool needs_work = true;
+	struct cgpu_info *cgpu = mythr->cgpu;
+	struct device_api *api = cgpu->api;
+
 	/* Try to cycle approximately 5 times before each log update */
-	const unsigned long cycle = opt_log_interval / 5 ? : 1;
+	const unsigned long def_cycle = opt_log_interval / 5 ? : 1;
+	unsigned long cycle;
+	struct timeval tv_start, tv_end, tv_workstart, tv_lastupdate;
+	struct timeval diff, sdiff, wdiff;
+	uint32_t max_nonce = api->can_limit_work ? api->can_limit_work(mythr) : 0xffffffff;
+	uint32_t hashes_done = 0;
+	uint32_t hashes;
+	struct work *work = make_work();
 	unsigned const int request_interval = opt_scantime * 2 / 3 ? : 1;
+	unsigned const long request_nonce = MAXTHREADS / 3 * 2;
 	bool requested = false;
-	uint32_t nonce_inc = max_nonce, hash_div = 1;
-	double hash_divfloat = 1.0;
-
+	uint32_t hash_div = 1;
 	pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
 
-	/* Set worker threads to nice 19 and then preferentially to SCHED_IDLE
-	 * and if that fails, then SCHED_BATCH. No need for this to be an
-	 * error if it fails */
-	setpriority(PRIO_PROCESS, 0, 19);
-	drop_policy();
+	if (api->thread_init && !api->thread_init(mythr))
+		goto out;
 
-	/* Cpu affinity only makes sense if the number of threads is a multiple
-	 * of the number of CPUs */
-	if (!(opt_n_threads % num_processors))
-		affine_to_cpu(thr_id - gpu_threads, dev_from_id(thr_id));
+	if (opt_debug)
+		applog(LOG_DEBUG, "Popping ping in miner thread");
+	tq_pop(mythr->q, NULL); /* Wait for a ping to start */
 
-	/* Invalidate pool so it fails can_roll() test */
-	work->pool = NULL;
+	sdiff.tv_sec = sdiff.tv_usec = 0;
+	gettimeofday(&tv_lastupdate, NULL);
 
 	while (1) {
-		struct timeval tv_workstart, tv_start, tv_end, diff;
-		uint64_t max64;
-		bool rc;
-
-		if (needs_work) {
-			gettimeofday(&tv_workstart, NULL);
-			/* obtain new work from internal workio thread */
-			if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) {
-				applog(LOG_ERR, "work retrieval failed, exiting "
-					"mining thread %d", thr_id);
-				goto out;
-			}
-			needs_work = requested = false;
-			total_hashes = 0;
-			max_nonce = work->blk.nonce + hashes_done;
+		work_restart[thr_id].restart = 0;
+		if (api->free_work && likely(work->pool))
+			api->free_work(mythr, work);
+		if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) {
+			applog(LOG_ERR, "work retrieval failed, exiting "
+				"mining thread %d", thr_id);
+			break;
 		}
-		hashes_done = 0;
-		gettimeofday(&tv_start, NULL);
-
-		/* scan nonces for a proof-of-work hash */
-		switch (opt_algo) {
-		case ALGO_C:
-			rc = scanhash_c(thr_id, work->midstate, work->data + 64,
-				        work->hash1, work->hash, work->target,
-					max_nonce, &hashes_done,
-					work->blk.nonce);
+		requested = false;
+		cycle = (can_roll(work) && should_roll(work)) ? 1 : def_cycle;
+		gettimeofday(&tv_workstart, NULL);
+		work->blk.nonce = 0;
+		if (api->prepare_work && !api->prepare_work(mythr, work)) {
+			applog(LOG_ERR, "work prepare failed, exiting "
+				"mining thread %d", thr_id);
 			break;
+		}
 
-#ifdef WANT_X8632_SSE2
-		case ALGO_SSE2_32: {
-			unsigned int rc5 =
-			        scanhash_sse2_32(thr_id, work->midstate, work->data + 64,
-						 work->hash1, work->hash,
-						 work->target,
-					         max_nonce, &hashes_done,
-						 work->blk.nonce);
-			rc = (rc5 == -1) ? false : true;
-			}
-			break;
-#endif
+		do {
+			gettimeofday(&tv_start, NULL);
 
-#ifdef WANT_X8664_SSE2
-		case ALGO_SSE2_64: {
-			unsigned int rc5 =
-			        scanhash_sse2_64(thr_id, work->midstate, work->data + 64,
-						 work->hash1, work->hash,
-						 work->target,
-					         max_nonce, &hashes_done,
-						 work->blk.nonce);
-			rc = (rc5 == -1) ? false : true;
+			hashes = api->scanhash(mythr, work, work->blk.nonce + max_nonce);
+			if (unlikely(work_restart[thr_id].restart))
+				break;
+			if (unlikely(!hashes))
+				goto out;
+			hashes_done += hashes;
+
+			gettimeofday(&tv_end, NULL);
+			timeval_subtract(&diff, &tv_end, &tv_start);
+			sdiff.tv_sec += diff.tv_sec;
+			sdiff.tv_usec += diff.tv_usec;
+			if (sdiff.tv_usec > 1000000) {
+				++sdiff.tv_sec;
+				sdiff.tv_usec -= 1000000;
 			}
-			break;
-#endif
 
-#ifdef WANT_X8664_SSE4
-		case ALGO_SSE4_64: {
-			unsigned int rc5 =
-			        scanhash_sse4_64(thr_id, work->midstate, work->data + 64,
-						 work->hash1, work->hash,
-						 work->target,
-					         max_nonce, &hashes_done,
-						 work->blk.nonce);
-			rc = (rc5 == -1) ? false : true;
-			}
-			break;
+			timeval_subtract(&wdiff, &tv_end, &tv_workstart);
+			if (!requested) {
+#if 0
+			if (wdiff.tv_sec > request_interval)
+				hash_div = (MAXTHREADS / total_hashes) ? : 1;
 #endif
-
-#ifdef WANT_SSE2_4WAY
-		case ALGO_4WAY: {
-			unsigned int rc4 =
-				ScanHash_4WaySSE2(thr_id, work->midstate, work->data + 64,
-						  work->hash1, work->hash,
-						  work->target,
-						  max_nonce, &hashes_done,
-						  work->blk.nonce);
-			rc = (rc4 == -1) ? false : true;
+				if (wdiff.tv_sec > request_interval || work->blk.nonce > request_nonce) {
+					thread_reportout(mythr);
+					if (unlikely(!queue_request(mythr, false))) {
+						applog(LOG_ERR, "Failed to queue_request in miner_thread %d", thr_id);
+						goto out;
+					}
+					thread_reportin(mythr);
+					requested = true;
+				}
 			}
-			break;
-#endif
-
-#ifdef WANT_ALTIVEC_4WAY
-            case ALGO_ALTIVEC_4WAY:
-            {
-                unsigned int rc4 = ScanHash_altivec_4way(thr_id, work->midstate, work->data + 64,
-                        work->hash1, work->hash,
-                        work->target,
-                        max_nonce, &hashes_done,
-                        work->blk.nonce);
-                rc = (rc4 == -1) ? false : true;
-            }
-                break;
-#endif
-
-#ifdef WANT_VIA_PADLOCK
-		case ALGO_VIA:
-			rc = scanhash_via(thr_id, work->data, work->target,
-					  max_nonce, &hashes_done,
-					  work->blk.nonce);
-			break;
-#endif
-		case ALGO_CRYPTOPP:
-			rc = scanhash_cryptopp(thr_id, work->midstate, work->data + 64,
-				        work->hash1, work->hash, work->target,
-					max_nonce, &hashes_done,
-					work->blk.nonce);
-			break;
 
-#ifdef WANT_CRYPTOPP_ASM32
-		case ALGO_CRYPTOPP_ASM32:
-			rc = scanhash_asm32(thr_id, work->midstate, work->data + 64,
-				        work->hash1, work->hash, work->target,
-					max_nonce, &hashes_done,
-					work->blk.nonce);
-			break;
-#endif
+			if (sdiff.tv_sec < cycle) {
+				if (likely(!api->can_limit_work || max_nonce == 0xffffffff))
+					continue;
 
-		default:
-			/* should never happen */
-			goto out;
-		}
+				{
+					int mult = 1000000 / ((sdiff.tv_usec + 0x400) / 0x400) + 0x10;
+					mult *= cycle;
+					if (max_nonce > (0xffffffff * 0x400) / mult)
+						max_nonce = 0xffffffff;
+					else
+						max_nonce = (max_nonce * mult) / 0x400;
+				}
+			} else if (unlikely(sdiff.tv_sec > cycle) && api->can_limit_work) {
+				max_nonce = max_nonce * cycle / sdiff.tv_sec;
+			} else if (unlikely(sdiff.tv_usec > 100000) && api->can_limit_work) {
+				max_nonce = max_nonce * 0x400 / (((cycle * 1000000) + sdiff.tv_usec) / (cycle * 1000000 / 0x400));
+			}
 
-		/* record scanhash elapsed time */
-		gettimeofday(&tv_end, NULL);
-		timeval_subtract(&diff, &tv_end, &tv_start);
-
-		hashes_done -= work->blk.nonce;
-		hashmeter(thr_id, &diff, hashes_done);
-		total_hashes += hashes_done;
-		work->blk.nonce += hashes_done;
-
-		/* adjust max_nonce to meet target cycle time */
-		if (diff.tv_usec > 500000)
-			diff.tv_sec++;
-		if (diff.tv_sec && diff.tv_sec != cycle) {
-			uint64_t next_inc = ((uint64_t)hashes_done * (uint64_t)cycle) / (uint64_t)diff.tv_sec;
-
-			if (next_inc > (uint64_t)nonce_inc / 2 * 3)
-				next_inc = nonce_inc / 2 * 3;
-			nonce_inc = next_inc;
-		} else if (!diff.tv_sec)
-			nonce_inc = hashes_done * 2;
-		if (nonce_inc < 4)
-			nonce_inc = 0xffffff;
-		max64 = work->blk.nonce + nonce_inc;
-		if (max64 > 0xfffffffaULL)
-			max64 = 0xfffffffaULL;
-		max_nonce = max64;
-
-		/* if nonce found, submit work */
-		if (unlikely(rc)) {
-			if (opt_debug)
-				applog(LOG_DEBUG, "CPU %d found something?", dev_from_id(thr_id));
-			if (unlikely(!submit_work_sync(mythr, work))) {
-				applog(LOG_ERR, "Failed to submit_work_sync in miner_thread %d", thr_id);
-				break;
+			timeval_subtract(&diff, &tv_end, &tv_lastupdate);
+			if (diff.tv_sec >= opt_log_interval) {
+				hashmeter(thr_id, &diff, hashes_done);
+				hashes_done = 0;
+				tv_lastupdate = tv_end;
 			}
-			work->blk.nonce += 4;
-		}
 
-		timeval_subtract(&diff, &tv_end, &tv_workstart);
-		if (!requested && (diff.tv_sec >= request_interval)) {
-			thread_reportout(mythr);
-			if (unlikely(!queue_request(mythr, false))) {
-				applog(LOG_ERR, "Failed to queue_request in miner_thread %d", thr_id);
-				goto out;
+			if (unlikely(mythr->pause || !cgpu->enabled)) {
+				applog(LOG_WARNING, "Thread %d being disabled", thr_id);
+				mythr->rolling = mythr->cgpu->rolling = 0;
+				if (opt_debug)
+					applog(LOG_DEBUG, "Popping wakeup ping in miner thread");
+				thread_reportout(mythr);
+				tq_pop(mythr->q, NULL); /* Ignore ping that's popped */
+				thread_reportin(mythr);
+				applog(LOG_WARNING, "Thread %d being re-enabled", thr_id);
 			}
-			thread_reportin(mythr);
-			requested = true;
-		}
 
-		if (diff.tv_sec > opt_scantime) {
-			decay_time(&hash_divfloat , (double)((MAXTHREADS / total_hashes) ? : 1));
-			hash_div = hash_divfloat;
-			needs_work = true;
-		} else if (work_restart[thr_id].restart || stale_work(work, false) ||
-			work->blk.nonce >= MAXTHREADS - hashes_done)
-				needs_work = true;
-
-		if (unlikely(mythr->pause)) {
-			applog(LOG_WARNING, "Thread %d being disabled", thr_id);
-			mythr->rolling = mythr->cgpu->rolling = 0;
-			if (opt_debug)
-				applog(LOG_DEBUG, "Popping wakeup ping in miner thread");
+			sdiff.tv_sec = sdiff.tv_usec = 0;
 
-			thread_reportout(mythr);
-			tq_pop(mythr->q, NULL); /* Ignore ping that's popped */
-			thread_reportin(mythr);
-			applog(LOG_WARNING, "Thread %d being re-enabled", thr_id);
-		}
+			if (can_roll(work) && should_roll(work))
+				roll_work(work);
+		} while (!abandon_work(thr_id, work, &wdiff, hashes));
 	}
 
 out:
+	if (api->thread_shutdown)
+		api->thread_shutdown(mythr);
+
 	thread_reportin(mythr);
 	applog(LOG_ERR, "Thread %d failure, exiting", thr_id);
 	tq_freeze(mythr->q);
@@ -4679,224 +4550,6 @@ static void set_threads_hashes(unsigned int vectors, unsigned int *threads,
 	*globalThreads = *threads;
 	*hashes = *threads * vectors;
 }
-
-static void *gpuminer_thread(void *userdata)
-{
-	cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *);
-
-	const unsigned long cycle = opt_log_interval / 5 ? : 1;
-	struct timeval tv_start, tv_end, diff, tv_workstart;
-	struct thr_info *mythr = userdata;
-	const int thr_id = mythr->id;
-	uint32_t *res, *blank_res;
-	double gpu_ms_average = 7;
-	int gpu = dev_from_id(thr_id);
-
-	size_t globalThreads[1];
-	size_t localThreads[1];
-
-	cl_int status;
-
-	_clState *clState = clStates[thr_id];
-	const cl_kernel *kernel = &clState->kernel;
-
-	struct work *work = make_work();
-	unsigned int threads;
-	unsigned const int vectors = clState->preferred_vwidth;
-	unsigned int hashes;
-	unsigned int hashes_done = 0;
-
-	/* Request the next work item at 2/3 of the scantime */
-	unsigned const int request_interval = opt_scantime * 2 / 3 ? : 1;
-	unsigned const long request_nonce = MAXTHREADS / 3 * 2;
-	bool requested = false;
-	uint32_t total_hashes = 0, hash_div = 1;
-
-	switch (chosen_kernel) {
-		case KL_POCLBM:
-			queue_kernel_parameters = &queue_poclbm_kernel;
-			break;
-		case KL_PHATK:
-		default:
-			queue_kernel_parameters = &queue_phatk_kernel;
-			break;
-	}
-
-	pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
-
-	res = calloc(BUFFERSIZE, 1);
-	blank_res = calloc(BUFFERSIZE, 1);
-
-	if (!res || !blank_res) {
-		applog(LOG_ERR, "Failed to calloc in gpuminer_thread");
-		goto out;
-	}
-
-	gettimeofday(&tv_start, NULL);
-	localThreads[0] = clState->work_size;
-	set_threads_hashes(vectors, &threads, &hashes, &globalThreads[0],
-			   localThreads[0], gpus[gpu].intensity);
-
-	diff.tv_sec = 0;
-	gettimeofday(&tv_end, NULL);
-
-	work->pool = NULL;
-
-	status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
-			BUFFERSIZE, blank_res, 0, NULL, NULL);
-	if (unlikely(status != CL_SUCCESS))
-		{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
-
-	mythr->cgpu->status = LIFE_WELL;
-	if (opt_debug)
-		applog(LOG_DEBUG, "Popping ping in gpuminer thread");
-
-	tq_pop(mythr->q, NULL); /* Wait for a ping to start */
-	gettimeofday(&tv_workstart, NULL);
-	/* obtain new work from internal workio thread */
-	if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) {
-		applog(LOG_ERR, "work retrieval failed, exiting "
-			"gpu mining thread %d", thr_id);
-		goto out;
-	}
-	requested = false;
-	precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
-	work->blk.nonce = 0;
-
-	while (1) {
-		struct timeval tv_gpustart, tv_gpuend;
-		suseconds_t gpu_us;
-
-		gettimeofday(&tv_gpustart, NULL);
-		timeval_subtract(&diff, &tv_gpustart, &tv_gpuend);
-		/* This finish flushes the readbuffer set with CL_FALSE later */
-		clFinish(clState->commandQueue);
-		gettimeofday(&tv_gpuend, NULL);
-		timeval_subtract(&diff, &tv_gpuend, &tv_gpustart);
-		gpu_us = diff.tv_sec * 1000000 + diff.tv_usec;
-		decay_time(&gpu_ms_average, gpu_us / 1000);
-		if (gpus[gpu].dynamic) {
-			/* Try to not let the GPU be out for longer than 6ms, but
-			 * increase intensity when the system is idle, unless
-			 * dynamic is disabled. */
-			if (gpu_ms_average > 7) {
-				if (gpus[gpu].intensity > -10)
-					gpus[gpu].intensity--;
-			} else if (gpu_ms_average < 3) {
-				if (gpus[gpu].intensity < 10)
-					gpus[gpu].intensity++;
-			}
-		}
-		set_threads_hashes(vectors, &threads, &hashes, globalThreads,
-				   localThreads[0], gpus[gpu].intensity);
-
-		if (diff.tv_sec > opt_scantime ||
-		    work->blk.nonce >= MAXTHREADS - hashes ||
-		    work_restart[thr_id].restart ||
-		    stale_work(work, false)) {
-			/* Ignore any reads since we're getting new work and queue a clean buffer */
-			status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
-					BUFFERSIZE, blank_res, 0, NULL, NULL);
-			if (unlikely(status != CL_SUCCESS))
-				{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
-			memset(res, 0, BUFFERSIZE);
-
-			gettimeofday(&tv_workstart, NULL);
-			if (opt_debug)
-				applog(LOG_DEBUG, "getwork thread %d", thr_id);
-			/* obtain new work from internal workio thread */
-			if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) {
-				applog(LOG_ERR, "work retrieval failed, exiting "
-					"gpu mining thread %d", thr_id);
-				goto out;
-			}
-			requested = false;
-
-			precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
-			work_restart[thr_id].restart = 0;
-
-			/* Flushes the writebuffer set with CL_FALSE above */
-			clFinish(clState->commandQueue);
-		}
-		status = queue_kernel_parameters(clState, &work->blk);
-		if (unlikely(status != CL_SUCCESS))
-			{ applog(LOG_ERR, "Error: clSetKernelArg of all params failed."); goto out; }
-
-		/* MAXBUFFERS entry is used as a flag to say nonces exist */
-		if (res[FOUND]) {
-			/* Clear the buffer again */
-			status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
-					BUFFERSIZE, blank_res, 0, NULL, NULL);
-			if (unlikely(status != CL_SUCCESS))
-				{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
-			if (opt_debug)
-				applog(LOG_DEBUG, "GPU %d found something?", gpu);
-			postcalc_hash_async(mythr, work, res);
-			memset(res, 0, BUFFERSIZE);
-			clFinish(clState->commandQueue);
-		}
-
-		status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
-				globalThreads, localThreads, 0,  NULL, NULL);
-		if (unlikely(status != CL_SUCCESS))
-			{ applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; }
-
-		status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
-				BUFFERSIZE, res, 0, NULL, NULL);
-		if (unlikely(status != CL_SUCCESS))
-			{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
-
-		gettimeofday(&tv_end, NULL);
-		timeval_subtract(&diff, &tv_end, &tv_start);
-		hashes_done += hashes;
-		total_hashes += hashes;
-		work->blk.nonce += hashes;
-		if (diff.tv_sec >= cycle) {
-			hashmeter(thr_id, &diff, hashes_done);
-			gettimeofday(&tv_start, NULL);
-			hashes_done = 0;
-		}
-
-		timeval_subtract(&diff, &tv_end, &tv_workstart);
-		if (!requested) {
-#if 0
-			if (diff.tv_sec > request_interval)
-				hash_div = (MAXTHREADS / total_hashes) ? : 1;
-#endif
-			if (diff.tv_sec > request_interval || work->blk.nonce > request_nonce) {
-				thread_reportout(mythr);
-				if (unlikely(!queue_request(mythr, false))) {
-					applog(LOG_ERR, "Failed to queue_request in gpuminer_thread %d", thr_id);
-					goto out;
-				}
-				thread_reportin(mythr);
-				requested = true;
-			}
-		}
-		if (unlikely(!gpu_devices[gpu] || mythr->pause)) {
-			applog(LOG_WARNING, "Thread %d being disabled", thr_id);
-			mythr->rolling = mythr->cgpu->rolling = 0;
-			if (opt_debug)
-				applog(LOG_DEBUG, "Popping wakeup ping in gpuminer thread");
-
-			thread_reportout(mythr);
-			tq_pop(mythr->q, NULL); /* Ignore ping that's popped */
-			thread_reportin(mythr);
-			applog(LOG_WARNING, "Thread %d being re-enabled", thr_id);
-		}
-	}
-out:
-	clReleaseCommandQueue(clState->commandQueue);
-	clReleaseKernel(clState->kernel);
-	clReleaseProgram(clState->program);
-	clReleaseContext(clState->context);
-
-	thread_reportin(mythr);
-	applog(LOG_ERR, "Thread %d failure, exiting", thr_id);
-	tq_freeze(mythr->q);
-
-	return NULL;
-}
 #endif /* HAVE_OPENCL */
 
 /* Stage another work item from the work returned in a longpoll */
@@ -5068,7 +4721,7 @@ static void *reinit_cpu(void *userdata)
 	pthread_detach(pthread_self());
 #if 0
 	struct cgpu_info *cgpu = (struct cgpu_info *)userdata;
-	int cpu = cgpu->cpu_gpu;
+	int cpu = cgpu->device_id;
 	long thr_id = ....(long)userdata;
 	struct thr_info *thr = &thr_info[thr_id];
 	int cpu = dev_from_id(thr_id);
@@ -5124,10 +4777,14 @@ select_cgpu:
 		goto out;
 	}
 
-	gpu = cgpu->cpu_gpu;
-	gpu_devices[gpu] = false;
+	gpu = cgpu->device_id;
+	cgpu->enabled = false;
 
-	for (thr_id = 0; thr_id < gpu_threads; thr_id ++) {
+	for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
+		thr = &thr_info[thr_id];
+		cgpu = thr->cgpu;
+		if (cgpu->api != &opencl_api)
+			continue;
 		if (dev_from_id(thr_id) != gpu)
 			continue;
 
@@ -5146,14 +4803,16 @@ select_cgpu:
 			applog(LOG_WARNING, "Thread %d no longer exists", thr_id);
 	}
 
-	gpu_devices[gpu] = true;
+	cgpu->enabled = true;
 
-	for (thr_id = 0; thr_id < gpu_threads; thr_id ++) {
+	for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
+		thr = &thr_info[thr_id];
+		cgpu = thr->cgpu;
+		if (cgpu->api != &opencl_api)
+			continue;
 		if (dev_from_id(thr_id) != gpu)
 			continue;
 
-		thr = &thr_info[thr_id];
-
 		/* Lose this ram cause we may get stuck here! */
 		//tq_freeze(thr->q);
 
@@ -5172,7 +4831,7 @@ select_cgpu:
 		}
 		applog(LOG_INFO, "initCl() finished. Found %s", name);
 
-		if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr))) {
+		if (unlikely(thr_info_create(thr, NULL, miner_thread, thr))) {
 			applog(LOG_ERR, "thread %d create failed", thr_id);
 			return NULL;
 		}
@@ -5182,11 +4841,14 @@ select_cgpu:
 	gettimeofday(&now, NULL);
 	get_datestamp(cgpu->init, &now);
 
-	for (thr_id = 0; thr_id < gpu_threads; thr_id ++) {
+	for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
+		thr = &thr_info[thr_id];
+		cgpu = thr->cgpu;
+		if (cgpu->api != &opencl_api)
+			continue;
 		if (dev_from_id(thr_id) != gpu)
 			continue;
 
-		thr = &thr_info[thr_id];
 		tq_push(thr->q, &ping);
 	}
 
@@ -5202,24 +4864,16 @@ static void *reinit_gpu(void *userdata)
 
 void reinit_device(struct cgpu_info *cgpu)
 {
-	if (cgpu->is_gpu)
-		tq_push(thr_info[gpur_thr_id].q, cgpu);
-	else
-		tq_push(thr_info[cpur_thr_id].q, cgpu);
+	if (cgpu->api->reinit_device)
+		cgpu->api->reinit_device(cgpu);
 }
 
 /* Determine which are the first threads belonging to a device and if they're
  * active */
 static bool active_device(int thr_id)
 {
-	if (thr_id < gpu_threads) {
-		if (thr_id >= total_devices)
-			return false;
-		if (!gpu_devices[dev_from_id(thr_id)])
-			return false;
-	} else if (thr_id > gpu_threads + num_processors)
-		return false;
-	return true;
+	struct cgpu_info *cgpu = thr_info[thr_id].cgpu;
+	return cgpu->enabled;
 }
 
 /* Makes sure the hashmeter keeps going even if mining threads stall, updates
@@ -5314,25 +4968,30 @@ static void *watchdog_thread(void *userdata)
 				struct thr_info *thr;
 				thr = &thr_info[i];
 
-				/* Don't touch disabled GPUs */
-				if (thr->cgpu->is_gpu && !gpu_devices[thr->cgpu->cpu_gpu])
+				/* Don't touch disabled devices */
+				if (!thr->cgpu->enabled)
 					continue;
 				thr->pause = false;
 				tq_push(thr->q, &ping);
 			}
 		}
 
-		for (i = 0; i < gpu_threads; i++) {
+#ifdef HAVE_OPENCL
+		for (i = 0; i < mining_threads; i++) {
 			struct thr_info *thr;
 			bool *enable;
+			struct cgpu_info *cgpu;
 			int gpu;
 
+			thr = &thr_info[i];
+			cgpu = thr->cgpu;
+			if (cgpu->api != &opencl_api)
+				continue;
 			/* Use only one thread per device to determine if the GPU is healthy */
 			if (i >= nDevs)
 				break;
-			thr = &thr_info[i];
-			gpu = thr->cgpu->cpu_gpu;
-			enable = &gpu_devices[gpu];
+			gpu = thr->cgpu->device_id;
+			enable = &cgpu->enabled;
 #ifdef HAVE_ADL
 			if (adl_active && gpus[gpu].has_adl && *enable)
 				gpu_autotune(gpu, enable);
@@ -5384,6 +5043,7 @@ static void *watchdog_thread(void *userdata)
 					reinit_device(thr->cgpu);
 			}
 		}
+#endif
 	}
 
 	return NULL;
@@ -5687,29 +5347,494 @@ static void enable_curses(void) {
 	unlock_curses();
 }
 
-int main (int argc, char *argv[])
-{
-	unsigned int i, pools_active = 0;
-	struct block *block, *tmpblock;
-	struct work *work, *tmpwork;
-	struct sigaction handler;
-	struct thr_info *thr;
-	char name[256];
 
-	/* This dangerous functions tramples random dynamically allocated
-	 * variables so do it before anything at all */
-	if (unlikely(curl_global_init(CURL_GLOBAL_ALL)))
-		quit(1, "Failed to curl_global_init");
+struct device_api cpu_api;
 
-	if (unlikely(pthread_mutex_init(&hash_lock, NULL)))
-		quit(1, "Failed to pthread_mutex_init");
-	if (unlikely(pthread_mutex_init(&qd_lock, NULL)))
-		quit(1, "Failed to pthread_mutex_init");
-	if (unlikely(pthread_mutex_init(&curses_lock, NULL)))
-		quit(1, "Failed to pthread_mutex_init");
-	if (unlikely(pthread_mutex_init(&control_lock, NULL)))
-		quit(1, "Failed to pthread_mutex_init");
-	if (unlikely(pthread_rwlock_init(&blk_lock, NULL)))
+static void cpu_detect()
+{
+	int i;
+
+	// Reckon number of cores in the box
+	#if defined(WIN32)
+	{
+		DWORD system_am;
+		DWORD process_am;
+		BOOL ok = GetProcessAffinityMask(
+			GetCurrentProcess(),
+			&system_am,
+			&process_am
+		);
+		if (!ok) {
+			applog(LOG_ERR, "couldn't figure out number of processors :(");
+			num_processors = 1;
+		} else {
+			size_t n = 32;
+			num_processors = 0;
+			while (n--)
+				if (process_am & (1<<n))
+					++num_processors;
+		}
+	}
+	#else
+		num_processors = sysconf(_SC_NPROCESSORS_ONLN);
+	#endif /* !WIN32 */
+
+	if (opt_n_threads < 0 || !forced_n_threads) {
+		if (nDevs && !opt_usecpu)
+			opt_n_threads = 0;
+		else
+			opt_n_threads = num_processors;
+	}
+	if (num_processors < 1)
+		return;
+
+	if (total_devices + opt_n_threads > MAX_DEVICES)
+		opt_n_threads = MAX_DEVICES - total_devices;
+	cpus = calloc(opt_n_threads, sizeof(struct cgpu_info));
+	if (unlikely(!cpus))
+		quit(1, "Failed to calloc cpus");
+	for (i = 0; i < opt_n_threads; ++i) {
+		struct cgpu_info *cgpu;
+
+		cgpu = devices[total_devices + i] = &cpus[i];
+		cgpu->api = &cpu_api;
+		cgpu->enabled = true;
+		cgpu->device_id = i;
+		cgpu->threads = 1;
+	}
+	total_devices += opt_n_threads;
+}
+
+static void reinit_cpu_device(struct cgpu_info *cpu)
+{
+	tq_push(thr_info[cpur_thr_id].q, cpu);
+}
+
+static bool cpu_thread_prepare(struct thr_info *thr)
+{
+	thread_reportin(thr);
+
+	return true;
+}
+
+static uint64_t cpu_can_limit_work(struct thr_info *thr)
+{
+	return 0xfffff;
+}
+
+static bool cpu_thread_init(struct thr_info *thr)
+{
+	const int thr_id = thr->id;
+
+	/* Set worker threads to nice 19 and then preferentially to SCHED_IDLE
+	 * and if that fails, then SCHED_BATCH. No need for this to be an
+	 * error if it fails */
+	setpriority(PRIO_PROCESS, 0, 19);
+	drop_policy();
+	/* Cpu affinity only makes sense if the number of threads is a multiple
+	 * of the number of CPUs */
+	if (!(opt_n_threads % num_processors))
+		affine_to_cpu(dev_from_id(thr_id), dev_from_id(thr_id) % num_processors);
+	return true;
+}
+
+static uint64_t cpu_scanhash(struct thr_info *thr, struct work *work, uint64_t max_nonce)
+{
+	const int thr_id = thr->id;
+
+	uint32_t first_nonce = work->blk.nonce;
+	uint32_t last_nonce;
+	bool rc;
+
+CPUSearch:
+	last_nonce = first_nonce;
+	rc = false;
+
+	/* scan nonces for a proof-of-work hash */
+	{
+		sha256_func func = sha256_funcs[opt_algo];
+		rc = (*func)(
+			thr_id,
+			work->midstate,
+			work->data,
+			work->hash1,
+			work->hash,
+			work->target,
+			max_nonce,
+			&last_nonce,
+			work->blk.nonce
+		);
+	}
+
+	/* if nonce found, submit work */
+	if (unlikely(rc)) {
+		if (opt_debug)
+			applog(LOG_DEBUG, "CPU %d found something?", dev_from_id(thr_id));
+		if (unlikely(!submit_work_sync(thr, work))) {
+			applog(LOG_ERR, "Failed to submit_work_sync in miner_thread %d", thr_id);
+		}
+		work->blk.nonce = last_nonce + 1;
+		goto CPUSearch;
+	}
+	else
+	if (unlikely(last_nonce == first_nonce))
+		return 0;
+
+	work->blk.nonce = last_nonce + 1;
+	return last_nonce - first_nonce + 1;
+}
+
+struct device_api cpu_api = {
+	.name = "CPU",
+	.api_detect = cpu_detect,
+	.reinit_device = reinit_cpu_device,
+	.thread_prepare = cpu_thread_prepare,
+	.can_limit_work = cpu_can_limit_work,
+	.thread_init = cpu_thread_init,
+	.scanhash = cpu_scanhash,
+};
+
+
+#ifdef HAVE_OPENCL
+struct device_api opencl_api;
+
+static void opencl_detect()
+{
+	int i;
+
+	nDevs = clDevicesNum();
+	if (nDevs < 0) {
+		applog(LOG_ERR, "clDevicesNum returned error, none usable");
+		nDevs = 0;
+	}
+
+	if (MAX_DEVICES - total_devices < nDevs)
+		nDevs = MAX_DEVICES - total_devices;
+
+	if (!nDevs) {
+		return;
+	}
+
+	if (opt_kernel) {
+		if (strcmp(opt_kernel, "poclbm") && strcmp(opt_kernel, "phatk"))
+			quit(1, "Invalid kernel name specified - must be poclbm or phatk");
+		if (!strcmp(opt_kernel, "poclbm"))
+			chosen_kernel = KL_POCLBM;
+		else
+			chosen_kernel = KL_PHATK;
+	} else
+		chosen_kernel = KL_NONE;
+
+	for (i = 0; i < nDevs; ++i) {
+		struct cgpu_info *cgpu;
+		cgpu = devices[total_devices++] = &gpus[i];
+		cgpu->enabled = true;
+		cgpu->api = &opencl_api;
+		cgpu->device_id = i;
+		cgpu->threads = opt_g_threads;
+	}
+}
+
+static void reinit_opencl_device(struct cgpu_info *gpu)
+{
+	tq_push(thr_info[gpur_thr_id].q, gpu);
+}
+
+static void get_opencl_statline(char *buf, struct cgpu_info *gpu)
+{
+	tailsprintf(buf, " | I:%2d", gpu->intensity);
+#ifdef HAVE_ADL
+	if (gpu->has_adl) {
+		int gpuid = gpu->device_id;
+		float gt = gpu_temp(gpuid);
+		int gf = gpu_fanspeed(gpuid);
+		int gp;
+
+		if (gt != -1)
+			tailsprintf(buf, " %5.1fC ", gt);
+		else
+			tailsprintf(buf, "        ", gt);
+		if (gf != -1)
+			tailsprintf(buf, " %4dRPM", gf);
+		else if ((gp = gpu_fanpercent(gpuid)) != -1)
+			tailsprintf(buf, " %3d%%", gp);
+	}
+#endif
+}
+
+struct opencl_thread_data {
+	cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *);
+	uint32_t *res;
+	struct work *last_work;
+	struct work _last_work;
+};
+
+static uint32_t *blank_res;
+
+static bool opencl_thread_prepare(struct thr_info *thr)
+{
+	char name[256];
+	struct timeval now;
+	struct cgpu_info *cgpu = thr->cgpu;
+	int gpu = cgpu->device_id;
+	int i = thr->id;
+	static bool failmessage = false;
+
+	if (!blank_res)
+		blank_res = calloc(BUFFERSIZE, 1);
+	if (!blank_res) {
+		applog(LOG_ERR, "Failed to calloc in opencl_thread_init");
+		return false;
+	}
+
+	applog(LOG_INFO, "Init GPU thread %i", i);
+	clStates[i] = initCl(gpu, name, sizeof(name));
+	if (!clStates[i]) {
+		enable_curses();
+		applog(LOG_ERR, "Failed to init GPU thread %d, disabling device %d", i, gpu);
+		if (!failmessage) {
+			char *buf;
+
+			applog(LOG_ERR, "Restarting the GPU from the menu is unlikely to fix this.");
+			applog(LOG_ERR, "Try stopping other applications using the GPU like afterburner.");
+			applog(LOG_ERR, "Then restart cgminer.");
+			failmessage = true;
+			buf = curses_input("Press enter to continue");
+			if (buf)
+				free(buf);
+		}
+		cgpu->enabled = false;
+		cgpu->status = LIFE_NOSTART;
+		return false;
+	}
+	applog(LOG_INFO, "initCl() finished. Found %s", name);
+	gettimeofday(&now, NULL);
+	get_datestamp(cgpu->init, &now);
+
+	have_opencl = true;
+
+	return true;
+}
+
+static bool opencl_thread_init(struct thr_info *thr)
+{
+	const int thr_id = thr->id;
+	struct cgpu_info *gpu = thr->cgpu;
+
+	struct opencl_thread_data *thrdata;
+	thrdata = calloc(1, sizeof(*thrdata));
+	thr->cgpu_data = thrdata;
+
+	if (!thrdata) {
+		applog(LOG_ERR, "Failed to calloc in opencl_thread_init");
+		return false;
+	}
+
+	switch (chosen_kernel) {
+		case KL_POCLBM:
+			thrdata->queue_kernel_parameters = &queue_poclbm_kernel;
+			break;
+		case KL_PHATK:
+		default:
+			thrdata->queue_kernel_parameters = &queue_phatk_kernel;
+			break;
+	}
+
+	thrdata->res = calloc(BUFFERSIZE, 1);
+
+	if (!thrdata->res) {
+		free(thrdata);
+		applog(LOG_ERR, "Failed to calloc in opencl_thread_init");
+		return false;
+	}
+
+	_clState *clState = clStates[thr_id];
+	cl_int status;
+
+	status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
+			BUFFERSIZE, blank_res, 0, NULL, NULL);
+	if (unlikely(status != CL_SUCCESS)) {
+		applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
+		return false;
+	}
+
+	gpu->status = LIFE_WELL;
+
+	return true;
+}
+
+static void opencl_free_work(struct thr_info *thr, struct work *work)
+{
+	const int thr_id = thr->id;
+	struct opencl_thread_data *thrdata = thr->cgpu_data;
+	_clState *clState = clStates[thr_id];
+
+	clFinish(clState->commandQueue);
+	if (thrdata->res[FOUND]) {
+		thrdata->last_work = &thrdata->_last_work;
+		memcpy(thrdata->last_work, work, sizeof(*thrdata->last_work));
+	}
+}
+
+static bool opencl_prepare_work(struct thr_info *thr, struct work *work)
+{
+	precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
+	return true;
+}
+
+static uint64_t opencl_scanhash(struct thr_info *thr, struct work *work, uint64_t max_nonce)
+{
+	const int thr_id = thr->id;
+	struct opencl_thread_data *thrdata = thr->cgpu_data;
+	struct cgpu_info *gpu = thr->cgpu;
+	_clState *clState = clStates[thr_id];
+	const cl_kernel *kernel = &clState->kernel;
+
+	double gpu_ms_average = 7;
+	cl_int status;
+
+	size_t globalThreads[1];
+	size_t localThreads[1] = { clState->work_size };
+	unsigned int threads;
+	unsigned int hashes;
+
+
+	struct timeval tv_gpustart, tv_gpuend, diff;
+	suseconds_t gpu_us;
+
+	gettimeofday(&tv_gpustart, NULL);
+	timeval_subtract(&diff, &tv_gpustart, &tv_gpuend);
+	/* This finish flushes the readbuffer set with CL_FALSE later */
+	clFinish(clState->commandQueue);
+	gettimeofday(&tv_gpuend, NULL);
+	timeval_subtract(&diff, &tv_gpuend, &tv_gpustart);
+	gpu_us = diff.tv_sec * 1000000 + diff.tv_usec;
+	decay_time(&gpu_ms_average, gpu_us / 1000);
+	if (gpu->dynamic) {
+		/* Try to not let the GPU be out for longer than 6ms, but
+		 * increase intensity when the system is idle, unless
+		 * dynamic is disabled. */
+		if (gpu_ms_average > 7) {
+			if (gpu->intensity > -10)
+				--gpu->intensity;
+		} else if (gpu_ms_average < 3) {
+			if (gpu->intensity < 10)
+				++gpu->intensity;
+		}
+	}
+	set_threads_hashes(clState->preferred_vwidth, &threads, &hashes, globalThreads,
+			   localThreads[0], gpu->intensity);
+
+	status = thrdata->queue_kernel_parameters(clState, &work->blk);
+	if (unlikely(status != CL_SUCCESS)) {
+		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
+		return 0;
+	}
+
+	/* MAXBUFFERS entry is used as a flag to say nonces exist */
+	if (thrdata->res[FOUND]) {
+		/* Clear the buffer again */
+		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+				BUFFERSIZE, blank_res, 0, NULL, NULL);
+		if (unlikely(status != CL_SUCCESS)) {
+			applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
+			return 0;
+		}
+		if (unlikely(thrdata->last_work)) {
+			if (opt_debug)
+				applog(LOG_DEBUG, "GPU %d found something in last work?", gpu->device_id);
+			postcalc_hash_async(thr, thrdata->last_work, thrdata->res);
+			thrdata->last_work = NULL;
+		} else {
+			if (opt_debug)
+				applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id);
+			postcalc_hash_async(thr, work, thrdata->res);
+		}
+		memset(thrdata->res, 0, BUFFERSIZE);
+		clFinish(clState->commandQueue);
+	}
+	status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
+			globalThreads, localThreads, 0,  NULL, NULL);
+	if (unlikely(status != CL_SUCCESS)) {
+		applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)");
+		return 0;
+	}
+
+	status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+			BUFFERSIZE, thrdata->res, 0, NULL, NULL);
+	if (unlikely(status != CL_SUCCESS)) {
+		applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)");
+		return 0;
+	}
+
+	work->blk.nonce += hashes;
+
+	return hashes;
+}
+
+static void opencl_thread_shutdown(struct thr_info *thr)
+{
+	const int thr_id = thr->id;
+	_clState *clState = clStates[thr_id];
+
+	clReleaseCommandQueue(clState->commandQueue);
+	clReleaseKernel(clState->kernel);
+	clReleaseProgram(clState->program);
+	clReleaseContext(clState->context);
+}
+
+struct device_api opencl_api = {
+	.name = "GPU",
+	.api_detect = opencl_detect,
+	.reinit_device = reinit_opencl_device,
+	.get_statline = get_opencl_statline,
+	.thread_prepare = opencl_thread_prepare,
+	.thread_init = opencl_thread_init,
+	.free_work = opencl_free_work,
+	.prepare_work = opencl_prepare_work,
+	.scanhash = opencl_scanhash,
+	.thread_shutdown = opencl_thread_shutdown,
+};
+#endif
+
+
+static int cgminer_id_count = 0;
+
+void enable_device(struct cgpu_info *cgpu)
+{
+	cgpu->enabled = true;
+	devices[cgpu->cgminer_id = cgminer_id_count++] = cgpu;
+	mining_threads += cgpu->threads;
+#ifdef OPENCL
+	if (cgpu->api == &opencl_api) {
+		gpu_threads += cgpu->threads;
+	}
+#endif
+}
+
+int main (int argc, char *argv[])
+{
+	unsigned int i, pools_active = 0;
+	unsigned int j, k;
+	struct block *block, *tmpblock;
+	struct work *work, *tmpwork;
+	struct sigaction handler;
+	struct thr_info *thr;
+
+	/* This dangerous functions tramples random dynamically allocated
+	 * variables so do it before anything at all */
+	if (unlikely(curl_global_init(CURL_GLOBAL_ALL)))
+		quit(1, "Failed to curl_global_init");
+
+	if (unlikely(pthread_mutex_init(&hash_lock, NULL)))
+		quit(1, "Failed to pthread_mutex_init");
+	if (unlikely(pthread_mutex_init(&qd_lock, NULL)))
+		quit(1, "Failed to pthread_mutex_init");
+	if (unlikely(pthread_mutex_init(&curses_lock, NULL)))
+		quit(1, "Failed to pthread_mutex_init");
+	if (unlikely(pthread_mutex_init(&control_lock, NULL)))
+		quit(1, "Failed to pthread_mutex_init");
+	if (unlikely(pthread_rwlock_init(&blk_lock, NULL)))
 		quit(1, "Failed to pthread_rwlock_init");
 
 	sprintf(packagename, "%s %s", PACKAGE, VERSION);
@@ -5744,48 +5869,11 @@ int main (int argc, char *argv[])
 	HASH_ADD_STR(blocks, hash, block);
 	strcpy(current_block, block->hash);
 
-	// Reckon number of cores in the box
-	#if defined(WIN32)
-
-		DWORD system_am;
-		DWORD process_am;
-		BOOL ok = GetProcessAffinityMask(
-			GetCurrentProcess(),
-			&system_am,
-			&process_am
-		);
-		if (!ok) {
-			applog(LOG_ERR, "couldn't figure out number of processors :(");
-			num_processors = 1;
-		} else {
-			size_t n = 32;
-			num_processors = 0;
-			while (n--)
-				if (process_am & (1<<n))
-					++num_processors;
-		}
-
-	#else
-
-		num_processors = sysconf(_SC_NPROCESSORS_ONLN);
-
-	#endif /* !WIN32 */
-
-	opt_n_threads = num_processors;
+	memset(gpus, 0, sizeof(gpus));
+	for (i = 0; i < MAX_GPUDEVICES; i++)
+		gpus[i].dynamic = true;
 
-#ifdef HAVE_OPENCL
-	if (!skip_to_bench) {
-		for (i = 0; i < MAX_GPUDEVICES; i++) {
-			gpu_devices[i] = false;
-			gpus[i].dynamic = true;
-		}
-		nDevs = clDevicesNum();
-		if (nDevs < 0) {
-			applog(LOG_ERR, "clDevicesNum returned error, none usable");
-			nDevs = 0;
-		}
-	}
-#endif
+	memset(devices, 0, sizeof(devices));
 
 	/* parse command line */
 	opt_register_table(opt_config_table,
@@ -5802,12 +5890,6 @@ int main (int argc, char *argv[])
 
 	applog(LOG_WARNING, "Started %s", packagename);
 
-	if (opt_nogpu)
-		nDevs = 0;
-
-	if (nDevs && !opt_usecpu)
-		opt_n_threads = 0;
-
 	strcat(opt_kernel_path, "/");
 
 	if (want_per_device_stats)
@@ -5849,63 +5931,50 @@ int main (int argc, char *argv[])
 		exit(0);
 	}
 
-	if (opt_kernel) {
-		if (strcmp(opt_kernel, "poclbm") && strcmp(opt_kernel, "phatk"))
-			quit(1, "Invalid kernel name specified - must be poclbm or phatk");
-		if (!strcmp(opt_kernel, "poclbm"))
-			chosen_kernel = KL_POCLBM;
-		else
-			chosen_kernel = KL_PHATK;
-	} else
-		chosen_kernel = KL_NONE;
-
 #ifdef HAVE_OPENCL
-	gpu_threads = nDevs * opt_g_threads;
-	if (total_devices) {
-		if (total_devices > nDevs)
-			quit(1, "More devices specified than exist");
-		for (i = 0; i < MAX_GPUDEVICES; i++)
-			if (gpu_devices[i] && i + 1 > nDevs)
-				quit (1, "Command line options set a device that doesn't exist");
-#ifdef HAVE_ADL
-		for (i = 0; i < nDevs; i++) {
-			/* Make sure we do not attempt to adl manage devices
-			 * that we disable */
-			if (!gpu_devices[i])
-				gpus[i].gpu_engine =
-				gpus[i].gpu_memclock =
-				gpus[i].gpu_vddc =
-				gpus[i].gpu_fan =
-				gpus[i].gpu_powertune = 0;
-		}
-#endif
-	} else {
-		for (i = 0; i < nDevs; i++)
-			gpu_devices[i] = true;
-		total_devices = nDevs;
-	}
-#else
-  gpu_threads = 0;
+	if (!opt_nogpu)
+		opencl_api.api_detect();
 #endif
 
-	if (!gpu_threads && !forced_n_threads) {
-		/* Maybe they turned GPU off; restore default CPU threads. */
-		opt_n_threads = num_processors;
+	cpu_api.api_detect();
+
+	if (devices_enabled == -1) {
+		applog(LOG_ERR, "Devices detected:");
+		for (i = 0; i < total_devices; ++i) {
+			applog(LOG_ERR, " %2d. %s%d", i, devices[i]->api->name, devices[i]->device_id);
+		}
+		quit(0, "%d devices listed", total_devices);
+	}
+
+	mining_threads = 0;
+	gpu_threads = 0;
+	if (devices_enabled) {
+		for (i = 0; i < (sizeof(devices_enabled) * 8) - 1; ++i) {
+			if (devices_enabled & (1 << i)) {
+				if (i >= total_devices)
+					quit (1, "Command line options set a device that doesn't exist");
+				enable_device(devices[i]);
+			} else if (i < total_devices) {
+				if (opt_removedisabled) {
+					if (devices[i]->api == &cpu_api)
+						--opt_n_threads;
+				} else {
+					enable_device(devices[i]);
+				}
+				devices[i]->enabled = false;
+			}
+		}
+		total_devices = cgminer_id_count;
+	} else {
+		for (i = 0; i < total_devices; ++i)
+			enable_device(devices[i]);
 	}
 
-	if (!opt_n_threads && ! gpu_threads)
+	if (!total_devices)
 		quit(1, "All devices disabled, cannot mine!");
 
-	logcursor = 8;
-	gpucursor = logcursor;
-	cpucursor = gpucursor + nDevs;
-	logstart = cpucursor + 1;
-	if (opt_n_threads) {
-		if (opt_n_threads < num_processors)
-			logstart += opt_n_threads;
-		else
-			logstart += num_processors;
-	}
+	devcursor = 8;
+	logstart = devcursor + total_devices + 1;
 	logcursor = logstart + 1;
 
 	if (opt_realquiet)
@@ -5953,8 +6022,6 @@ int main (int argc, char *argv[])
 			fork_monitor();
 	#endif // defined(unix)
 
-	mining_threads = opt_n_threads + gpu_threads;
-
 	total_threads = mining_threads + 8;
 	work_restart = calloc(total_threads, sizeof(*work_restart));
 	if (!work_restart)
@@ -5984,12 +6051,6 @@ int main (int argc, char *argv[])
 	if (!thr->q)
 		quit(1, "Failed to tq_new");
 
-	if (opt_n_threads ) {
-		cpus = calloc(num_processors, sizeof(struct cgpu_info));
-		if (unlikely(!cpus))
-			quit(1, "Failed to calloc cpus");
-	}
-
 	stage_thr_id = mining_threads + 3;
 	thr = &thr_info[stage_thr_id];
 	thr->q = tq_new();
@@ -6071,87 +6132,42 @@ retry_pools:
 #ifdef HAVE_OPENCL
 	if (!opt_noadl)
 		init_adl(nDevs);
-	bool failmessage = false;
-
-	/* start GPU mining threads */
-	for (i = 0; i < nDevs * opt_g_threads; i++) {
-		int gpu = i % nDevs;
-		struct cgpu_info *cgpu;
-		struct timeval now;
-
-		gpus[gpu].is_gpu = 1;
-		gpus[gpu].cpu_gpu = gpu;
-
-		thr = &thr_info[i];
-		thr->id = i;
-		cgpu = thr->cgpu = &gpus[gpu];
-
-		thr->q = tq_new();
-		if (!thr->q)
-			quit(1, "tq_new failed in starting gpu mining threads");
-
-		/* Enable threads for devices set not to mine but disable
-		 * their queue in case we wish to enable them later*/
-		if (gpu_devices[gpu]) {
-			if (opt_debug)
-				applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id);
-
-			tq_push(thr->q, &ping);
-		}
-
-		applog(LOG_INFO, "Init GPU thread %i", i);
-		clStates[i] = initCl(gpu, name, sizeof(name));
-		if (!clStates[i]) {
-			enable_curses();
-			applog(LOG_ERR, "Failed to init GPU thread %d, disabling device %d", i, gpu);
-			if (!failmessage) {
-				char *buf;
-
-				applog(LOG_ERR, "Restarting the GPU from the menu is unlikely to fix this.");
-				applog(LOG_ERR, "Try stopping other applications using the GPU like afterburner.");
-				applog(LOG_ERR, "Then restart cgminer.");
-				failmessage = true;
-				buf = curses_input("Press enter to continue");
-				if (buf)
-					free(buf);
-			}
-			gpu_devices[gpu] = false;
-			cgpu->status = LIFE_NOSTART;
-			continue;
-		}
-		applog(LOG_INFO, "initCl() finished. Found %s", name);
-		gettimeofday(&now, NULL);
-		get_datestamp(cgpu->init, &now);
-
-		if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr)))
-			quit(1, "thread %d create failed", i);
-	}
-
-	applog(LOG_INFO, "%d gpu miner threads started", gpu_threads);
 #else
 	opt_g_threads = 0;
 #endif
 
-	/* start CPU mining threads */
-	for (i = gpu_threads; i < mining_threads; i++) {
-		int cpu = (i - gpu_threads) % num_processors;
+	// Start threads
+	k = 0;
+	for (i = 0; i < total_devices; ++i) {
+		struct cgpu_info *cgpu = devices[i];
+		for (j = 0; j < cgpu->threads; ++j, ++k) {
+			thr = &thr_info[k];
+			thr->id = k;
+			thr->cgpu = cgpu;
+
+			thr->q = tq_new();
+			if (!thr->q)
+				quit(1, "tq_new failed in starting %s%d mining thread (#%d)", cgpu->api->name, cgpu->device_id, i);
+
+			/* Enable threads for devices set not to mine but disable
+			 * their queue in case we wish to enable them later */
+			if (cgpu->enabled) {
+				if (opt_debug)
+					applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id);
 
-		thr = &thr_info[i];
-
-		thr->id = i;
-		cpus[cpu].cpu_gpu = cpu;
-		thr->cgpu = &cpus[cpu];
-
-		thr->q = tq_new();
-		if (!thr->q)
-			quit(1, "tq_new failed in starting cpu mining threads");
+				tq_push(thr->q, &ping);
+			}
 
-		thread_reportin(thr);
+			if (cgpu->api->thread_prepare && !cgpu->api->thread_prepare(thr))
+				continue;
 
-		if (unlikely(thr_info_create(thr, NULL, miner_thread, thr)))
-			quit(1, "thread %d create failed", i);
+			if (unlikely(thr_info_create(thr, NULL, miner_thread, thr)))
+				quit(1, "thread %d create failed", thr->id);
+		}
 	}
 
+	applog(LOG_INFO, "%d gpu miner threads started", gpu_threads);
+
 	applog(LOG_INFO, "%d cpu miner threads started, "
 		"using SHA256 '%s' algorithm.",
 		opt_n_threads,
diff --git a/miner.h b/miner.h
index bd451401..86352a38 100644
--- a/miner.h
+++ b/miner.h
@@ -208,9 +208,35 @@ struct gpu_adl {
 };
 #endif
 
+struct cgpu_info;
+struct thr_info;
+struct work;
+
+struct device_api {
+	char*name;
+
+	// API-global functions
+	void (*api_detect)();
+
+	// Device-specific functions
+	void (*reinit_device)(struct cgpu_info*);
+	void (*get_statline)(char*, struct cgpu_info*);
+
+	// Thread-specific functions
+	bool (*thread_prepare)(struct thr_info*);
+	uint64_t (*can_limit_work)(struct thr_info*);
+	bool (*thread_init)(struct thr_info*);
+	void (*free_work)(struct thr_info*, struct work*);
+	bool (*prepare_work)(struct thr_info*, struct work*);
+	uint64_t (*scanhash)(struct thr_info*, struct work*, uint64_t);
+	void (*thread_shutdown)(struct thr_info*);
+};
+
 struct cgpu_info {
-	int is_gpu;
-	int cpu_gpu;
+	int cgminer_id;
+	struct device_api *api;
+	int device_id;
+	bool enabled;
 	int accepted;
 	int rejected;
 	int hw_errors;
@@ -221,6 +247,8 @@ struct cgpu_info {
 	char init[40];
 	struct timeval last_message_tv;
 
+	int threads;
+
 	bool dynamic;
 	int intensity;
 #ifdef HAVE_ADL
@@ -257,6 +285,7 @@ struct thr_info {
 	pthread_t	pth;
 	struct thread_q	*q;
 	struct cgpu_info *cgpu;
+	void *cgpu_data;
 	struct timeval last;
 	struct timeval sick;
 
@@ -352,57 +381,62 @@ extern json_t *json_rpc_call(CURL *curl, const char *url, const char *userpass,
 extern char *bin2hex(const unsigned char *p, size_t len);
 extern bool hex2bin(unsigned char *p, const char *hexstr, size_t len);
 
-extern unsigned int ScanHash_4WaySSE2(int, const unsigned char *pmidstate,
-	unsigned char *pdata, unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone, uint32_t nonce);
-
-extern unsigned int ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate,
+typedef bool (*sha256_func)(int thr_id, const unsigned char *pmidstate,
 	unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone, uint32_t nonce);
+	uint32_t max_nonce,
+	uint32_t *last_nonce,
+	uint32_t nonce);
 
-extern unsigned int scanhash_sse2_amd64(int, const unsigned char *pmidstate,
+extern bool ScanHash_4WaySSE2(int, const unsigned char *pmidstate,
 	unsigned char *pdata, unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone);
+	uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
 
-extern bool scanhash_via(int, unsigned char *data_inout,
+extern bool ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate,
+	unsigned char *pdata,
+	unsigned char *phash1, unsigned char *phash,
+	const unsigned char *ptarget,
+	uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
+
+extern bool scanhash_via(int, const unsigned char *pmidstate,
+	unsigned char *pdata,
+	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *target,
-	uint32_t max_nonce, unsigned long *hashes_done, uint32_t n);
+	uint32_t max_nonce, uint32_t *last_nonce, uint32_t n);
 
 extern bool scanhash_c(int, const unsigned char *midstate, unsigned char *data,
 	      unsigned char *hash1, unsigned char *hash,
 	      const unsigned char *target,
-	      uint32_t max_nonce, unsigned long *hashes_done, uint32_t n);
+	      uint32_t max_nonce, uint32_t *last_nonce, uint32_t n);
 
 extern bool scanhash_cryptopp(int, const unsigned char *midstate,unsigned char *data,
 	      unsigned char *hash1, unsigned char *hash,
 	      const unsigned char *target,
-	      uint32_t max_nonce, unsigned long *hashes_done, uint32_t n);
+	      uint32_t max_nonce, uint32_t *last_nonce, uint32_t n);
 
 extern bool scanhash_asm32(int, const unsigned char *midstate,unsigned char *data,
 	      unsigned char *hash1, unsigned char *hash,
 	      const unsigned char *target,
-	      uint32_t max_nonce, unsigned long *hashes_done, uint32_t nonce);
+	      uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
 
-extern int scanhash_sse2_64(int, const unsigned char *pmidstate, unsigned char *pdata,
+extern bool scanhash_sse2_64(int, const unsigned char *pmidstate, unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce);
 
-extern int scanhash_sse4_64(int, const unsigned char *pmidstate, unsigned char *pdata,
+extern bool scanhash_sse4_64(int, const unsigned char *pmidstate, unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce);
 
-extern int scanhash_sse2_32(int, const unsigned char *pmidstate, unsigned char *pdata,
+extern bool scanhash_sse2_32(int, const unsigned char *pmidstate, unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce);
 
 extern int
@@ -431,6 +465,7 @@ extern bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, flo
 extern void api(void);
 
 #define MAX_GPUDEVICES 16
+#define MAX_DEVICES 32
 #define MAX_POOLS (32)
 
 extern int nDevs;
@@ -444,9 +479,10 @@ extern struct work_restart *work_restart;
 extern struct cgpu_info gpus[MAX_GPUDEVICES];
 extern int gpu_threads;
 extern double total_secs;
-extern bool gpu_devices[MAX_GPUDEVICES];
 extern int mining_threads;
 extern struct cgpu_info *cpus;
+extern int total_devices;
+extern struct cgpu_info *devices[];
 extern int total_pools;
 extern struct pool *pools[MAX_POOLS];
 extern const char *algo_names[];
diff --git a/sha256_4way.c b/sha256_4way.c
index c81e05b9..15e852ac 100644
--- a/sha256_4way.c
+++ b/sha256_4way.c
@@ -101,14 +101,16 @@ static const unsigned int pSHA256InitState[8] =
 {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
 
 
-unsigned int ScanHash_4WaySSE2(int thr_id, const unsigned char *pmidstate,
+bool ScanHash_4WaySSE2(int thr_id, const unsigned char *pmidstate,
 	unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
-    unsigned int *nNonce_p = (unsigned int*)(pdata + 12);
+    unsigned int *nNonce_p = (unsigned int*)(pdata + 76);
+
+	pdata += 64;
 
     work_restart[thr_id].restart = 0;
 
@@ -132,17 +134,18 @@ unsigned int ScanHash_4WaySSE2(int thr_id, const unsigned char *pmidstate,
                     ((unsigned int*)phash)[i] = thash[i][j];
 
 		if (fulltest(phash, ptarget)) {
-			*nHashesDone = nonce;
-			*nNonce_p = nonce + j;
-                	return nonce + j;
+					nonce += j;
+					*last_nonce = nonce;
+					*nNonce_p = nonce;
+					return true;
 		}
             }
         }
 
         if ((nonce >= max_nonce) || work_restart[thr_id].restart)
         {
-            *nHashesDone = nonce;
-            return -1;
+            *last_nonce = nonce;
+            return false;
         }
     }
 }
diff --git a/sha256_altivec_4way.c b/sha256_altivec_4way.c
index b70e4d03..aa50486f 100644
--- a/sha256_altivec_4way.c
+++ b/sha256_altivec_4way.c
@@ -74,14 +74,16 @@ static const unsigned int pSHA256InitState[8] =
 {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
 
 
-unsigned int ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate,
+bool ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate,
 	unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
-    unsigned int *nNonce_p = (unsigned int*)(pdata + 12);
+    unsigned int *nNonce_p = (unsigned int*)(pdata + 76);
+
+	pdata += 64;
 
     work_restart[thr_id].restart = 0;
 
@@ -104,17 +106,18 @@ unsigned int ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate,
                     ((unsigned int*)phash)[i] = thash[i][j];
 
 		if (fulltest(phash, ptarget)) {
-			*nHashesDone = nonce;
-			*nNonce_p = nonce + j;
-                	return nonce + j;
+					nonce += j;
+					*last_nonce = nonce;
+					*nNonce_p = nonce;
+					return true;
 		}
             }
         }
 
         if ((nonce >= max_nonce) || work_restart[thr_id].restart)
         {
-            *nHashesDone = nonce;
-            return -1;
+            *last_nonce = nonce;
+            return false;
         }
 
         nonce += NPAR;
diff --git a/sha256_cryptopp.c b/sha256_cryptopp.c
index c0c1b6fd..11c1c5ca 100644
--- a/sha256_cryptopp.c
+++ b/sha256_cryptopp.c
@@ -97,11 +97,13 @@ bool scanhash_cryptopp(int thr_id, const unsigned char *midstate,
 		unsigned char *data,
 	        unsigned char *hash1, unsigned char *hash,
 		const unsigned char *target,
-	        uint32_t max_nonce, unsigned long *hashes_done,
+	        uint32_t max_nonce, uint32_t *last_nonce,
 		uint32_t n)
 {
 	uint32_t *hash32 = (uint32_t *) hash;
-	uint32_t *nonce = (uint32_t *)(data + 12);
+	uint32_t *nonce = (uint32_t *)(data + 76);
+
+	data += 64;
 
 	work_restart[thr_id].restart = 0;
 
@@ -113,12 +115,12 @@ bool scanhash_cryptopp(int thr_id, const unsigned char *midstate,
 		runhash(hash, hash1, sha256_init_state);
 
 		if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
-			*hashes_done = n;
+			*last_nonce = n;
 			return true;
 		}
 
 		if ((n >= max_nonce) || work_restart[thr_id].restart) {
-			*hashes_done = n;
+			*last_nonce = n;
 			return false;
 		}
 	}
@@ -579,11 +581,13 @@ bool scanhash_asm32(int thr_id, const unsigned char *midstate,
 		unsigned char *data,
 	        unsigned char *hash1, unsigned char *hash,
 		const unsigned char *target,
-	        uint32_t max_nonce, unsigned long *hashes_done,
+	        uint32_t max_nonce, uint32_t *last_nonce,
 		uint32_t n)
 {
 	uint32_t *hash32 = (uint32_t *) hash;
-	uint32_t *nonce = (uint32_t *)(data + 12);
+	uint32_t *nonce = (uint32_t *)(data + 76);
+
+	data += 64;
 
 	work_restart[thr_id].restart = 0;
 
@@ -595,12 +599,12 @@ bool scanhash_asm32(int thr_id, const unsigned char *midstate,
 		runhash32(hash, hash1, sha256_init_state);
 
 		if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
-			*hashes_done = n;
+			*last_nonce = n;
 			return true;
 		}
 
 		if ((n >= max_nonce) || work_restart[thr_id].restart) {
-			*hashes_done = n;
+			*last_nonce = n;
 			return false;
 		}
 	}
diff --git a/sha256_generic.c b/sha256_generic.c
index 5bc4d972..05f4c376 100644
--- a/sha256_generic.c
+++ b/sha256_generic.c
@@ -242,13 +242,15 @@ const uint32_t sha256_init_state[8] = {
 bool scanhash_c(int thr_id, const unsigned char *midstate, unsigned char *data,
 	        unsigned char *hash1, unsigned char *hash,
 		const unsigned char *target,
-	        uint32_t max_nonce, unsigned long *hashes_done,
+	        uint32_t max_nonce, uint32_t *last_nonce,
 		uint32_t n)
 {
 	uint32_t *hash32 = (uint32_t *) hash;
-	uint32_t *nonce = (uint32_t *)(data + 12);
+	uint32_t *nonce = (uint32_t *)(data + 76);
 	unsigned long stat_ctr = 0;
 
+	data += 64;
+
 	work_restart[thr_id].restart = 0;
 
 	while (1) {
@@ -261,12 +263,12 @@ bool scanhash_c(int thr_id, const unsigned char *midstate, unsigned char *data,
 		stat_ctr++;
 
 		if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
-			*hashes_done = n;
+			*last_nonce = n;
 			return true;
 		}
 
 		if ((n >= max_nonce) || work_restart[thr_id].restart) {
-			*hashes_done = n;
+			*last_nonce = n;
 			return false;
 		}
 	}
diff --git a/sha256_sse2_amd64.c b/sha256_sse2_amd64.c
index 561aa3a1..5c82314f 100644
--- a/sha256_sse2_amd64.c
+++ b/sha256_sse2_amd64.c
@@ -50,14 +50,14 @@ const uint32_t sha256_init[8]__attribute__((aligned(0x100))) =
 __m128i g_4sha256_k[64];
 __m128i sha256_consts_m128i[64]__attribute__((aligned(0x1000)));
 
-int scanhash_sse2_64(int thr_id, const unsigned char *pmidstate,
+bool scanhash_sse2_64(int thr_id, const unsigned char *pmidstate,
 	unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
-    uint32_t *nNonce_p = (uint32_t *)(pdata + 12);
+    uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
     uint32_t m_midstate[8], m_w[16], m_w1[16];
     __m128i m_4w[64] __attribute__ ((aligned (0x100)));
     __m128i m_4hash[64] __attribute__ ((aligned (0x100)));
@@ -65,6 +65,8 @@ int scanhash_sse2_64(int thr_id, const unsigned char *pmidstate,
     __m128i offset;
     int i;
 
+	pdata += 64;
+
     work_restart[thr_id].restart = 0;
 
     /* For debugging */
@@ -114,19 +116,20 @@ int scanhash_sse2_64(int thr_id, const unsigned char *pmidstate,
 		}
 
 		if (fulltest(phash, ptarget)) {
-		     *nHashesDone = nonce;
-		     *nNonce_p = nonce + j;
-		     return nonce + j;
+		     nonce += j;
+		     *last_nonce = nonce + 1;
+		     *nNonce_p = nonce;
+		     return true;
 		}
 	}
 
-	nonce += 4;
-
         if (unlikely((nonce >= max_nonce) || work_restart[thr_id].restart))
         {
-            *nHashesDone = nonce;
-            return -1;
+			*last_nonce = nonce;
+			return false;
 	}
+
+	nonce += 4;
    }
 }
 
diff --git a/sha256_sse2_i386.c b/sha256_sse2_i386.c
index 72a90c99..321626f0 100644
--- a/sha256_sse2_i386.c
+++ b/sha256_sse2_i386.c
@@ -50,14 +50,14 @@ const uint32_t sha256_32init[8]__attribute__((aligned(0x100))) =
 __m128i g_4sha256_k[64];
 __m128i sha256_consts_m128i[64]__attribute__((aligned(0x1000)));
 
-int scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
+bool scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
 	unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
-    uint32_t *nNonce_p = (uint32_t *)(pdata + 12);
+    uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
     uint32_t m_midstate[8], m_w[16], m_w1[16];
     __m128i m_4w[64] __attribute__ ((aligned (0x100)));
     __m128i m_4hash[64] __attribute__ ((aligned (0x100)));
@@ -65,6 +65,8 @@ int scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
     __m128i offset;
     int i;
 
+	pdata += 64;
+
     work_restart[thr_id].restart = 0;
 
     /* Message expansion */
@@ -105,20 +107,21 @@ int scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
 		}
 
 		if (fulltest(phash, ptarget)) {
-		     *nHashesDone = nonce;
-		     *nNonce_p = nonce + j;
-		     return nonce + j;
+		     nonce += j;
+		     *last_nonce = nonce;
+		     *nNonce_p = nonce;
+		     return true;
 		}
 	    }
 	}
 
+	if (unlikely((nonce >= max_nonce) || work_restart[thr_id].restart)) {
+		*last_nonce = nonce;
+		return false;
+	}
+
 	nonce += 4;
 
-        if (unlikely((nonce >= max_nonce) || work_restart[thr_id].restart))
-        {
-            *nHashesDone = nonce;
-            return -1;
-	}
    }
 }
 
diff --git a/sha256_sse4_amd64.c b/sha256_sse4_amd64.c
index f150e8c6..67e0604b 100644
--- a/sha256_sse4_amd64.c
+++ b/sha256_sse4_amd64.c
@@ -49,19 +49,21 @@ static uint32_t g_sha256_hinit[8] =
 
 __m128i g_4sha256_k[64];
 
-int scanhash_sse4_64(int thr_id, const unsigned char *pmidstate,
+bool scanhash_sse4_64(int thr_id, const unsigned char *pmidstate,
 	unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
-    uint32_t *nNonce_p = (uint32_t *)(pdata + 12);
+    uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
     uint32_t m_midstate[8], m_w[16], m_w1[16];
     __m128i m_4w[64], m_4hash[64], m_4hash1[64];
     __m128i offset;
     int i;
 
+	pdata += 64;
+
     work_restart[thr_id].restart = 0;
 
     /* For debugging */
@@ -113,19 +115,20 @@ int scanhash_sse4_64(int thr_id, const unsigned char *pmidstate,
 		}
 
 		if (fulltest(phash, ptarget)) {
-		     *nHashesDone = nonce;
-		     *nNonce_p = nonce + j;
-		     return nonce + j;
+			nonce += j;
+			*last_nonce = nonce;
+			*nNonce_p = nonce;
+			return true;
 		}
 	}
 
-	nonce += 4;
-
         if (unlikely((nonce >= max_nonce) || work_restart[thr_id].restart))
         {
-            *nHashesDone = nonce;
-            return -1;
+			*last_nonce = nonce;
+			return false;
 	}
+
+	nonce += 4;
    }
 }
 
diff --git a/sha256_via.c b/sha256_via.c
index fdf0045e..550807cb 100644
--- a/sha256_via.c
+++ b/sha256_via.c
@@ -19,9 +19,11 @@ static void via_sha256(void *hash, void *buf, unsigned len)
 		     :"memory");
 }
 
-bool scanhash_via(int thr_id, unsigned char *data_inout,
-		  const unsigned char *target,
-		  uint32_t max_nonce, unsigned long *hashes_done,
+bool scanhash_via(int thr_id, const unsigned char *pmidstate,
+	unsigned char *data_inout,
+	unsigned char *phash1, unsigned char *phash,
+	const unsigned char *target,
+		  uint32_t max_nonce, uint32_t *last_nonce,
 		  uint32_t n)
 {
 	unsigned char data[128] __attribute__((aligned(128)));
@@ -70,12 +72,12 @@ bool scanhash_via(int thr_id, unsigned char *data_inout,
 				dout32[i] = swab32(data32[i]);
 			}
 
-			*hashes_done = n;
+			*last_nonce = n;
 			return true;
 		}
 
 		if ((n >= max_nonce) || work_restart[thr_id].restart) {
-			*hashes_done = n;
+			*last_nonce = n;
 			return false;
 		}
 	}