From 845961af66bfb8c2ff57e9a1b32f260b2c4c9372 Mon Sep 17 00:00:00 2001
From: Luke Dashjr <luke-jr+git@utopios.org>
Date: Thu, 5 Jan 2012 19:26:01 -0500
Subject: [PATCH 1/3] Refactor to abstract device-specific code

---
 adl.c   |   9 +
 api.c   |  12 +-
 main.c  | 698 +++++++++++++++++++++++++++++++-------------------------
 miner.h |  29 ++-
 4 files changed, 425 insertions(+), 323 deletions(-)

diff --git a/adl.c b/adl.c
index 6f973ff4..baa1076b 100644
--- a/adl.c
+++ b/adl.c
@@ -212,6 +212,15 @@ void init_adl(int nDevs)
 			continue;
 		}
 
+		if (!gpus[gpu].enabled) {
+			gpus[i].gpu_engine =
+			gpus[i].gpu_memclock =
+			gpus[i].gpu_vddc =
+			gpus[i].gpu_fan =
+			gpus[i].gpu_powertune = 0;
+			continue;
+		}
+
 		gpus[gpu].has_adl = true;
 		/* Flag adl as active if any card is successfully activated */
 		adl_active = true;
diff --git a/api.c b/api.c
index fe29b947..86c7c2d7 100644
--- a/api.c
+++ b/api.c
@@ -387,7 +387,7 @@ void gpustatus(int gpu, bool isjson)
 #endif
 			gt = gv = gm = gc = ga = gf = gp = pt = 0;
 
-		if (gpu_devices[gpu])
+		if (cgpu->enabled)
 			enabled = (char *)YES;
 		else
 			enabled = (char *)NO;
@@ -662,13 +662,13 @@ void gpuenable(SOCKETTYPE c, char *param, bool isjson)
 		return;
 	}
 
-	if (gpu_devices[id]) {
+	if (gpus[id].enabled) {
 		strcpy(io_buffer, message(MSG_ALRENA, id, isjson));
 		return;
 	}
 
 	for (i = 0; i < gpu_threads; i++) {
-		gpu = thr_info[i].cgpu->cpu_gpu;
+		gpu = thr_info[i].cgpu->device_id;
 		if (gpu == id) {
 			thr = &thr_info[i];
 			if (thr->cgpu->status != LIFE_WELL) {
@@ -676,7 +676,7 @@ void gpuenable(SOCKETTYPE c, char *param, bool isjson)
 				return;
 			}
 
-			gpu_devices[id] = true;
+			gpus[id].enabled = true;
 			tq_push(thr->q, &ping);
 
 		}
@@ -705,12 +705,12 @@ void gpudisable(SOCKETTYPE c, char *param, bool isjson)
 		return;
 	}
 
-	if (!gpu_devices[id]) {
+	if (!gpus[id].enabled) {
 		strcpy(io_buffer, message(MSG_ALRDIS, id, isjson));
 		return;
 	}
 
-	gpu_devices[id] = false;
+	gpus[id].enabled = false;
 
 	strcpy(io_buffer, message(MSG_GPUDIS, id, isjson));
 }
diff --git a/main.c b/main.c
index a12a0ee9..31c25ef0 100644
--- a/main.c
+++ b/main.c
@@ -1,6 +1,7 @@
 
 /*
  * Copyright 2011 Con Kolivas
+ * Copyright 2011 Luke Dashjr
  * Copyright 2010 Jeff Garzik
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -205,12 +206,14 @@ enum sha256_algos opt_algo = ALGO_C;
 #endif
 int nDevs;
 static int opt_g_threads = 2;
-static int opt_device;
-static int total_devices;
-bool gpu_devices[MAX_GPUDEVICES];
+static signed int devices_enabled = 0;
+static bool opt_removedisabled = false;
+int total_devices = 0;
+struct cgpu_info *devices[MAX_DEVICES];
+bool have_opencl = false;
 int gpu_threads;
 static bool forced_n_threads;
-int opt_n_threads;
+int opt_n_threads = -1;
 int mining_threads;
 int num_processors;
 bool use_curses = true;
@@ -1002,17 +1005,20 @@ static char *set_float_0_to_99(const char *arg, float *f)
 	return NULL;
 }
 
-static char *set_devices(const char *arg, int *i)
+static char *set_devices(char *arg)
 {
-	char *err = opt_set_intval(arg, i);
-
-	if (err)
-		return err;
+	int i = strtol(arg, &arg, 0);
+	if (*arg) {
+		if (*arg == '?') {
+			devices_enabled = -1;
+			return NULL;
+		}
+		return "Invalid device number";
+	}
 
-	if (*i < 0 || *i > 15)
-		return "Invalid GPU device number";
-	total_devices++;
-	gpu_devices[*i] = true;
+	if (i < 0 || i >= (sizeof(devices_enabled) * 8) - 1)
+		return "Invalid device number";
+	devices_enabled |= 1 << i;
 	return NULL;
 }
 
@@ -1545,10 +1551,10 @@ static struct opt_table opt_config_table[] = {
 	OPT_WITHOUT_ARG("--debug|-D",
 		     enable_debug, &opt_debug,
 		     "Enable debug output"),
-#ifdef HAVE_OPENCL
 	OPT_WITH_ARG("--device|-d",
-		     set_devices, NULL, &opt_device,
+		     set_devices, NULL, NULL,
 	             "Select device to use, (Use repeat -d for multiple devices, default: all)"),
+#ifdef HAVE_OPENCL
 	OPT_WITHOUT_ARG("--disable-gpu|-G",
 			opt_set_bool, &opt_nogpu,
 			"Disable GPU mining even if suitable devices exist"),
@@ -1643,6 +1649,9 @@ static struct opt_table opt_config_table[] = {
 	OPT_WITHOUT_ARG("--real-quiet",
 			opt_set_bool, &opt_realquiet,
 			"Disable all output"),
+	OPT_WITHOUT_ARG("--remove-disabled",
+		     opt_set_bool, &opt_removedisabled,
+	         "Remove disabled devices entirely, as if they didn't exist"),
 	OPT_WITH_ARG("--retries|-r",
 		     opt_set_intval, opt_show_intval, &opt_retries,
 		     "Number of times to retry before giving up, if JSON-RPC call fails (-1 means never)"),
@@ -1929,7 +1938,7 @@ err_out:
 
 static inline int dev_from_id(int thr_id)
 {
-	return thr_info[thr_id].cgpu->cpu_gpu;
+	return thr_info[thr_id].cgpu->device_id;
 }
 
 /* Make the change in the recent value adjust dynamically when the difference
@@ -1965,7 +1974,7 @@ static int requests_staged(void)
 static WINDOW *mainwin, *statuswin, *logwin;
 double total_secs = 0.1;
 static char statusline[256];
-static int cpucursor, gpucursor, logstart, logcursor;
+static int devcursor, logstart, logcursor;
 struct cgpu_info gpus[MAX_GPUDEVICES]; /* Maximum number apparently possible */
 struct cgpu_info *cpus;
 
@@ -2001,24 +2010,7 @@ static void tailsprintf(char *f, const char *fmt, ...)
 
 static void get_statline(char *buf, struct cgpu_info *cgpu)
 {
-	sprintf(buf, "%sPU%d ", cgpu->is_gpu ? "G" : "C", cgpu->cpu_gpu);
-#ifdef HAVE_ADL
-	if (cgpu->has_adl) {
-		int gpu = cgpu->cpu_gpu;
-		float gt = gpu_temp(gpu);
-		int gf = gpu_fanspeed(gpu);
-		int gp = gpu_fanpercent(gpu);
-
-		if (gt != -1)
-			tailsprintf(buf, "%.1fC ", gt);
-		if (gf != -1)
-			tailsprintf(buf, "%dRPM ", gf);
-		else if (gp != -1)
-			tailsprintf(buf, "%d%% ", gp);
-		if (gt > -1 || gf > -1 || gp > -1)
-			tailsprintf(buf, "| ");
-	}
-#endif
+	sprintf(buf, "%s%d ", cgpu->api->name, cgpu->device_id);
 	tailsprintf(buf, "(%ds):%.1f (avg):%.1f Mh/s | A:%d R:%d HW:%d U:%.2f/m",
 		opt_log_interval,
 		cgpu->rolling,
@@ -2027,8 +2019,8 @@ static void get_statline(char *buf, struct cgpu_info *cgpu)
 		cgpu->rejected,
 		cgpu->hw_errors,
 		cgpu->utility);
-	if (cgpu->is_gpu)
-		tailsprintf(buf, " I:%d", cgpu->intensity);
+	if (cgpu->api->get_statline)
+		cgpu->api->get_statline(buf, cgpu);
 }
 
 static void text_print_status(int thr_id)
@@ -2069,8 +2061,8 @@ static void curses_print_status(void)
 	mvwprintw(statuswin, 5, 0, " Block: %s...  Started: %s", current_hash, blocktime);
 	mvwhline(statuswin, 6, 0, '-', 80);
 	mvwhline(statuswin, logstart - 1, 0, '-', 80);
-	mvwprintw(statuswin, gpucursor - 1, 1, "[P]ool management %s[S]ettings [D]isplay options [Q]uit",
-		gpu_threads ? "[G]PU management " : "");
+	mvwprintw(statuswin, devcursor - 1, 1, "[P]ool management %s[S]ettings [D]isplay options [Q]uit",
+		have_opencl ? "[G]PU management " : "");
 	/* The window will be updated once we're done with all the devices */
 	wnoutrefresh(statuswin);
 }
@@ -2084,38 +2076,17 @@ static void adj_width(int var, int *length)
 static void curses_print_devstatus(int thr_id)
 {
 	static int awidth = 1, rwidth = 1, hwwidth = 1, uwidth = 1;
-
-	if (thr_id >= 0 && thr_id < gpu_threads) {
-		int gpu = dev_from_id(thr_id);
-		struct cgpu_info *cgpu = &gpus[gpu];
+	struct cgpu_info *cgpu = thr_info[thr_id].cgpu;
+	char logline[255];
 
 		cgpu->utility = cgpu->accepted / ( total_secs ? total_secs : 1 ) * 60;
 
-		mvwprintw(statuswin, gpucursor + gpu, 0, " GPU %d: ", gpu);
-#ifdef HAVE_ADL
-		if (cgpu->has_adl) {
-			float gt = gpu_temp(gpu);
-			int gf = gpu_fanspeed(gpu);
-			int gp = gpu_fanpercent(gpu);
-
-			if (gt != -1)
-				wprintw(statuswin, "%5.1fC ", gt);
-			else
-				wprintw(statuswin, "       ");
-			if (gf != -1)
-				wprintw(statuswin, "%4dRPM ", gf);
-			else if (gp != -1)
-				wprintw(statuswin, "%3d%%    ", gp);
-			else
-				wprintw(statuswin, "        ");
-			wprintw(statuswin, "| ");
-		}
-#endif
+	mvwprintw(statuswin, devcursor + cgpu->cgminer_id, 0, " %s %d: ", cgpu->api->name, cgpu->device_id);
 		if (cgpu->status == LIFE_DEAD)
 			wprintw(statuswin, "DEAD ");
 		else if (cgpu->status == LIFE_SICK)
 			wprintw(statuswin, "SICK ");
-		else  if (!gpu_devices[gpu])
+	else if (!cgpu->enabled)
 			wprintw(statuswin, "OFF  ");
 		else
 			wprintw(statuswin, "%5.1f", cgpu->rolling);
@@ -2123,30 +2094,20 @@ static void curses_print_devstatus(int thr_id)
 		adj_width(cgpu->rejected, &rwidth);
 		adj_width(cgpu->hw_errors, &hwwidth);
 		adj_width(cgpu->utility, &uwidth);
-		wprintw(statuswin, "/%5.1fMh/s | A:%*d R:%*d HW:%*d U:%*.2f/m I:%2d",
+	wprintw(statuswin, "/%5.1fMh/s | A:%*d R:%*d HW:%*d U:%*.2f/m",
 			cgpu->total_mhashes / total_secs,
 			awidth, cgpu->accepted,
 			rwidth, cgpu->rejected,
 			hwwidth, cgpu->hw_errors,
-			uwidth + 3, cgpu->utility,
-			gpus[gpu].intensity);
-		wclrtoeol(statuswin);
-	} else if (thr_id >= gpu_threads) {
-		int cpu = dev_from_id(thr_id);
-		struct cgpu_info *cgpu = &cpus[cpu];
+		uwidth + 3, cgpu->utility);
 
-		cgpu->utility = cgpu->accepted / ( total_secs ? total_secs : 1 ) * 60;
+	if (cgpu->api->get_statline) {
+		logline[0] = '\0';
+		cgpu->api->get_statline(logline, cgpu);
+		wprintw(statuswin, "%s", logline);
+	}
 
-		adj_width(cgpu->accepted, &awidth);
-		adj_width(cgpu->rejected, &rwidth);
-		adj_width(cgpu->utility, &uwidth);
-		mvwprintw(statuswin, cpucursor + cpu, 0, " CPU %d: %5.2f/%5.2fMh/s | A:%*d R:%*d U:%*.2f/m",
-			cpu, cgpu->rolling, cgpu->total_mhashes / total_secs,
-			awidth, cgpu->accepted,
-			rwidth, cgpu->rejected,
-			uwidth + 3, cgpu->utility);
 		wclrtoeol(statuswin);
-	}
 	wnoutrefresh(statuswin);
 }
 
@@ -2365,14 +2326,14 @@ static bool submit_upstream_work(const struct work *work)
 			applog(LOG_DEBUG, "PROOF OF WORK RESULT: true (yay!!!)");
 		if (!QUIET) {
 			if (donor(work->pool))
-				applog(LOG_NOTICE, "Accepted %s %sPU %d thread %d donate",
-				       hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id);
+				applog(LOG_NOTICE, "Accepted %s %s %d thread %d donate",
+				       hashshow, cgpu->api->name, cgpu->device_id, thr_id);
 			else if (total_pools > 1)
-				applog(LOG_NOTICE, "Accepted %s %sPU %d thread %d pool %d",
-				       hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id, work->pool->pool_no);
+				applog(LOG_NOTICE, "Accepted %s %s %d thread %d pool %d",
+				       hashshow, cgpu->api->name, cgpu->device_id, thr_id, work->pool->pool_no);
 			else
-				applog(LOG_NOTICE, "Accepted %s %sPU %d thread %d",
-				       hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id);
+				applog(LOG_NOTICE, "Accepted %s %s %d thread %d",
+				       hashshow, cgpu->api->name, cgpu->device_id, thr_id);
 		}
 		if (opt_shares && total_accepted >= opt_shares) {
 			applog(LOG_WARNING, "Successfully mined %d accepted shares as requested and exiting.", opt_shares);
@@ -2387,14 +2348,14 @@ static bool submit_upstream_work(const struct work *work)
 			applog(LOG_DEBUG, "PROOF OF WORK RESULT: false (booooo)");
 		if (!QUIET) {
 			if (donor(work->pool))
-				applog(LOG_NOTICE, "Rejected %s %sPU %d thread %d donate",
-				       hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id);
+				applog(LOG_NOTICE, "Rejected %s %s %d thread %d donate",
+				       hashshow, cgpu->api->name, cgpu->device_id, thr_id);
 			else if (total_pools > 1)
-				applog(LOG_NOTICE, "Rejected %s %sPU %d thread %d pool %d",
-				       hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id, work->pool->pool_no);
+				applog(LOG_NOTICE, "Rejected %s %s %d thread %d pool %d",
+				       hashshow, cgpu->api->name, cgpu->device_id, thr_id, work->pool->pool_no);
 			else
-				applog(LOG_NOTICE, "Rejected %s %sPU %d thread %d",
-				       hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id);
+				applog(LOG_NOTICE, "Rejected %s %s %d thread %d",
+				       hashshow, cgpu->api->name, cgpu->device_id, thr_id);
 		}
 	}
 
@@ -3228,11 +3189,11 @@ static void write_config(FILE *fcfg)
 	if (schedstop.enable)
 		fprintf(fcfg, ",\n\"stop-time\" : \"%d:%d\"", schedstop.tm.tm_hour, schedstop.tm.tm_min);
 	for(i = 0; i < nDevs; i++)
-		if (!gpu_devices[i])
+		if (!gpus[i].enabled)
 			break;
 	if (i < nDevs)
 		for(i = 0; i < nDevs; i++)
-			if (gpu_devices[i])
+			if (gpus[i].enabled)
 				fprintf(fcfg, ",\n\"device\" : \"%d\"", i);
 	if (strcmp(opt_api_description, PACKAGE_STRING) != 0)
 		fprintf(fcfg, ",\n\"api-description\" : \"%s\"", opt_api_description);
@@ -3558,6 +3519,7 @@ retry:
 
 #ifdef HAVE_OPENCL
 void reinit_device(struct cgpu_info *cgpu);
+struct device_api opencl_api;
 
 static void manage_gpu(void)
 {
@@ -3626,7 +3588,7 @@ retry:
 			if (thr->cgpu != cgpu)
 				continue;
 			get_datestamp(checkin, &thr->last);
-			wlog("Thread %d: %.1f Mh/s %s ", i, thr->rolling, gpu_devices[gpu] ? "Enabled" : "Disabled");
+			wlog("Thread %d: %.1f Mh/s %s ", i, thr->rolling, cgpu->enabled ? "Enabled" : "Disabled");
 			switch (cgpu->status) {
 				default:
 				case LIFE_WELL:
@@ -3657,24 +3619,29 @@ retry:
 	else
 		selected = -1;
 	if (!strncasecmp(&input, "e", 1)) {
+		struct cgpu_info *cgpu;
+
 		if (selected)
 			selected = curses_int("Select GPU to enable");
 		if (selected < 0 || selected >= nDevs) {
 			wlogprint("Invalid selection\n");
 			goto retry;
 		}
-		if (gpu_devices[selected]) {
+		if (gpus[selected].enabled) {
 			wlogprint("Device already enabled\n");
 			goto retry;
 		}
-		gpu_devices[selected] = true;
-		for (i = 0; i < gpu_threads; i++) {
+		gpus[selected].enabled = true;
+		for (i = 0; i < mining_threads; ++i) {
+			thr = &thr_info[i];
+			cgpu = thr->cgpu;
+			if (cgpu->api != &opencl_api)
+				continue;
 			if (dev_from_id(i) != selected)
 				continue;
-			thr = &thr_info[i];
-			if (thr->cgpu->status != LIFE_WELL) {
+			if (cgpu->status != LIFE_WELL) {
 				wlogprint("Must restart device before enabling it");
-				gpu_devices[selected] = false;
+				gpus[selected].enabled = false;
 				goto retry;
 			}
 			if (opt_debug)
@@ -3690,11 +3657,11 @@ retry:
 			wlogprint("Invalid selection\n");
 			goto retry;
 		}
-		if (!gpu_devices[selected]) {
+		if (!gpus[selected].enabled) {
 			wlogprint("Device already disabled\n");
 			goto retry;
 		}
-		gpu_devices[selected] = false;
+		gpus[selected].enabled = false;
 		goto retry;
 	} else if (!strncasecmp(&input, "i", 1)) {
 		int intensity;
@@ -3778,7 +3745,7 @@ static void *input_thread(void *userdata)
 			display_pools();
 		else if (!strncasecmp(&input, "s", 1))
 			set_options();
-		else if (gpu_threads && !strncasecmp(&input, "g", 1))
+		else if (have_opencl && !strncasecmp(&input, "g", 1))
 			manage_gpu();
 		if (opt_realquiet) {
 			disable_curses();
@@ -4382,7 +4349,7 @@ static void *miner_thread(void *userdata)
 	/* Cpu affinity only makes sense if the number of threads is a multiple
 	 * of the number of CPUs */
 	if (!(opt_n_threads % num_processors))
-		affine_to_cpu(thr_id - gpu_threads, dev_from_id(thr_id));
+		affine_to_cpu(dev_from_id(thr_id), dev_from_id(thr_id) % num_processors);
 
 	/* Invalidate pool so it fails can_roll() test */
 	work->pool = NULL;
@@ -4881,7 +4848,7 @@ static void *gpuminer_thread(void *userdata)
 				requested = true;
 			}
 		}
-		if (unlikely(!gpu_devices[gpu] || mythr->pause)) {
+		if (unlikely(!gpus[gpu].enabled || mythr->pause)) {
 			applog(LOG_WARNING, "Thread %d being disabled", thr_id);
 			mythr->rolling = mythr->cgpu->rolling = 0;
 			if (opt_debug)
@@ -5076,7 +5043,7 @@ static void *reinit_cpu(void *userdata)
 	pthread_detach(pthread_self());
 #if 0
 	struct cgpu_info *cgpu = (struct cgpu_info *)userdata;
-	int cpu = cgpu->cpu_gpu;
+	int cpu = cgpu->device_id;
 	long thr_id = ....(long)userdata;
 	struct thr_info *thr = &thr_info[thr_id];
 	int cpu = dev_from_id(thr_id);
@@ -5132,10 +5099,14 @@ select_cgpu:
 		goto out;
 	}
 
-	gpu = cgpu->cpu_gpu;
-	gpu_devices[gpu] = false;
+	gpu = cgpu->device_id;
+	cgpu->enabled = false;
 
-	for (thr_id = 0; thr_id < gpu_threads; thr_id ++) {
+	for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
+		thr = &thr_info[thr_id];
+		cgpu = thr->cgpu;
+		if (cgpu->api != &opencl_api)
+			continue;
 		if (dev_from_id(thr_id) != gpu)
 			continue;
 
@@ -5154,14 +5125,16 @@ select_cgpu:
 			applog(LOG_WARNING, "Thread %d no longer exists", thr_id);
 	}
 
-	gpu_devices[gpu] = true;
+	cgpu->enabled = true;
 
-	for (thr_id = 0; thr_id < gpu_threads; thr_id ++) {
+	for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
+		thr = &thr_info[thr_id];
+		cgpu = thr->cgpu;
+		if (cgpu->api != &opencl_api)
+			continue;
 		if (dev_from_id(thr_id) != gpu)
 			continue;
 
-		thr = &thr_info[thr_id];
-
 		/* Lose this ram cause we may get stuck here! */
 		//tq_freeze(thr->q);
 
@@ -5190,11 +5163,14 @@ select_cgpu:
 	gettimeofday(&now, NULL);
 	get_datestamp(cgpu->init, &now);
 
-	for (thr_id = 0; thr_id < gpu_threads; thr_id ++) {
+	for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
+		thr = &thr_info[thr_id];
+		cgpu = thr->cgpu;
+		if (cgpu->api != &opencl_api)
+			continue;
 		if (dev_from_id(thr_id) != gpu)
 			continue;
 
-		thr = &thr_info[thr_id];
 		tq_push(thr->q, &ping);
 	}
 
@@ -5210,24 +5186,16 @@ static void *reinit_gpu(void *userdata)
 
 void reinit_device(struct cgpu_info *cgpu)
 {
-	if (cgpu->is_gpu)
-		tq_push(thr_info[gpur_thr_id].q, cgpu);
-	else
-		tq_push(thr_info[cpur_thr_id].q, cgpu);
+	if (cgpu->api->reinit_device)
+		cgpu->api->reinit_device(cgpu);
 }
 
 /* Determine which are the first threads belonging to a device and if they're
  * active */
 static bool active_device(int thr_id)
 {
-	if (thr_id < gpu_threads) {
-		if (thr_id >= total_devices)
-			return false;
-		if (!gpu_devices[dev_from_id(thr_id)])
-			return false;
-	} else if (thr_id > gpu_threads + num_processors)
-		return false;
-	return true;
+	struct cgpu_info *cgpu = thr_info[thr_id].cgpu;
+	return cgpu->enabled;
 }
 
 /* Makes sure the hashmeter keeps going even if mining threads stall, updates
@@ -5322,25 +5290,30 @@ static void *watchdog_thread(void *userdata)
 				struct thr_info *thr;
 				thr = &thr_info[i];
 
-				/* Don't touch disabled GPUs */
-				if (thr->cgpu->is_gpu && !gpu_devices[thr->cgpu->cpu_gpu])
+				/* Don't touch disabled devices */
+				if (!thr->cgpu->enabled)
 					continue;
 				thr->pause = false;
 				tq_push(thr->q, &ping);
 			}
 		}
 
-		for (i = 0; i < gpu_threads; i++) {
+#ifdef HAVE_OPENCL
+		for (i = 0; i < mining_threads; i++) {
 			struct thr_info *thr;
 			bool *enable;
+			struct cgpu_info *cgpu;
 			int gpu;
 
+			thr = &thr_info[i];
+			cgpu = thr->cgpu;
+			if (cgpu->api != &opencl_api)
+				continue;
 			/* Use only one thread per device to determine if the GPU is healthy */
 			if (i >= nDevs)
 				break;
-			thr = &thr_info[i];
-			gpu = thr->cgpu->cpu_gpu;
-			enable = &gpu_devices[gpu];
+			gpu = thr->cgpu->device_id;
+			enable = &cgpu->enabled;
 #ifdef HAVE_ADL
 			if (adl_active && gpus[gpu].has_adl && *enable)
 				gpu_autotune(gpu, enable);
@@ -5392,6 +5365,7 @@ static void *watchdog_thread(void *userdata)
 					reinit_device(thr->cgpu);
 			}
 		}
+#endif
 	}
 
 	return NULL;
@@ -5695,14 +5669,232 @@ static void enable_curses(void) {
 	unlock_curses();
 }
 
+
+struct device_api cpu_api;
+
+static void cpu_detect()
+{
+	int i;
+
+	// Reckon number of cores in the box
+	#if defined(WIN32)
+	{
+		DWORD system_am;
+		DWORD process_am;
+		BOOL ok = GetProcessAffinityMask(
+			GetCurrentProcess(),
+			&system_am,
+			&process_am
+		);
+		if (!ok) {
+			applog(LOG_ERR, "couldn't figure out number of processors :(");
+			num_processors = 1;
+		} else {
+			size_t n = 32;
+			num_processors = 0;
+			while (n--)
+				if (process_am & (1<<n))
+					++num_processors;
+		}
+	}
+	#else
+		num_processors = sysconf(_SC_NPROCESSORS_ONLN);
+	#endif /* !WIN32 */
+
+	if (opt_n_threads < 0 || !forced_n_threads) {
+		if (nDevs && !opt_usecpu)
+			opt_n_threads = 0;
+		else
+			opt_n_threads = num_processors;
+	}
+	if (num_processors < 1)
+		return;
+
+	if (total_devices + opt_n_threads > MAX_DEVICES)
+		opt_n_threads = MAX_DEVICES - total_devices;
+	cpus = calloc(opt_n_threads, sizeof(struct cgpu_info));
+	if (unlikely(!cpus))
+		quit(1, "Failed to calloc cpus");
+	for (i = 0; i < opt_n_threads; ++i) {
+		struct cgpu_info *cgpu;
+
+		cgpu = devices[total_devices + i] = &cpus[i];
+		cgpu->api = &cpu_api;
+		cgpu->enabled = true;
+		cgpu->device_id = i;
+		cgpu->threads = 1;
+	}
+	total_devices += opt_n_threads;
+}
+
+static void reinit_cpu_device(struct cgpu_info *cpu)
+{
+	tq_push(thr_info[cpur_thr_id].q, cpu);
+}
+
+static void cpu_thread_start(struct thr_info *thr)
+{
+	thread_reportin(thr);
+
+	if (unlikely(thr_info_create(thr, NULL, miner_thread, thr)))
+		quit(1, "thread %d create failed", thr->id);
+}
+
+struct device_api cpu_api = {
+	.name = "CPU",
+	.api_detect = cpu_detect,
+	.reinit_device = reinit_cpu_device,
+	.thread_start = cpu_thread_start,
+};
+
+
+#ifdef HAVE_OPENCL
+struct device_api opencl_api;
+
+static void opencl_detect()
+{
+	int i;
+
+	nDevs = clDevicesNum();
+	if (nDevs < 0) {
+		applog(LOG_ERR, "clDevicesNum returned error, none usable");
+		nDevs = 0;
+	}
+
+	if (MAX_DEVICES - total_devices < nDevs)
+		nDevs = MAX_DEVICES - total_devices;
+
+	if (!nDevs) {
+		return;
+	}
+
+	if (opt_kernel) {
+		if (strcmp(opt_kernel, "poclbm") && strcmp(opt_kernel, "phatk"))
+			quit(1, "Invalid kernel name specified - must be poclbm or phatk");
+		if (!strcmp(opt_kernel, "poclbm"))
+			chosen_kernel = KL_POCLBM;
+		else
+			chosen_kernel = KL_PHATK;
+	} else
+		chosen_kernel = KL_NONE;
+
+	for (i = 0; i < nDevs; ++i) {
+		struct cgpu_info *cgpu;
+		cgpu = devices[total_devices++] = &gpus[i];
+		cgpu->enabled = true;
+		cgpu->api = &opencl_api;
+		cgpu->device_id = i;
+		cgpu->threads = opt_g_threads;
+	}
+}
+
+static void reinit_opencl_device(struct cgpu_info *gpu)
+{
+	tq_push(thr_info[gpur_thr_id].q, gpu);
+}
+
+static void get_opencl_statline(char *buf, struct cgpu_info *gpu)
+{
+	tailsprintf(buf, " | I:%2d", gpu->intensity);
+#ifdef HAVE_ADL
+	if (gpu->has_adl) {
+		int gpuid = gpu->device_id;
+		float gt = gpu_temp(gpuid);
+		int gf = gpu_fanspeed(gpuid);
+		int gp;
+
+		if (gt != -1)
+			tailsprintf(buf, " %5.1fC ", gt);
+		else
+			tailsprintf(buf, "        ", gt);
+		if (gf != -1)
+			tailsprintf(buf, " %4dRPM", gf);
+		else if ((gp = gpu_fanpercent(gpuid)) != -1)
+			tailsprintf(buf, " %3d%%", gp);
+	}
+#endif
+}
+
+static void opencl_thread_start(struct thr_info *thr)
+{
+	char name[256];
+	struct timeval now;
+	struct cgpu_info *cgpu = thr->cgpu;
+	int gpu = cgpu->device_id;
+	int i = thr->id;
+	static bool failmessage = false;
+
+	/* Enable threads for devices set not to mine but disable
+	 * their queue in case we wish to enable them later*/
+	if (cgpu->enabled) {
+		if (opt_debug)
+			applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id);
+
+		tq_push(thr->q, &ping);
+	}
+
+	applog(LOG_INFO, "Init GPU thread %i", i);
+	clStates[i] = initCl(gpu, name, sizeof(name));
+	if (!clStates[i]) {
+		enable_curses();
+		applog(LOG_ERR, "Failed to init GPU thread %d, disabling device %d", i, gpu);
+		if (!failmessage) {
+			char *buf;
+
+			applog(LOG_ERR, "Restarting the GPU from the menu is unlikely to fix this.");
+			applog(LOG_ERR, "Try stopping other applications using the GPU like afterburner.");
+			applog(LOG_ERR, "Then restart cgminer.");
+			failmessage = true;
+			buf = curses_input("Press enter to continue");
+			if (buf)
+				free(buf);
+		}
+		cgpu->enabled = false;
+		cgpu->status = LIFE_NOSTART;
+		return;
+	}
+	applog(LOG_INFO, "initCl() finished. Found %s", name);
+	gettimeofday(&now, NULL);
+	get_datestamp(cgpu->init, &now);
+
+	have_opencl = true;
+
+	if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr)))
+		quit(1, "thread %d create failed", i);
+}
+
+struct device_api opencl_api = {
+	.name = "GPU",
+	.api_detect = opencl_detect,
+	.reinit_device = reinit_opencl_device,
+	.get_statline = get_opencl_statline,
+	.thread_start = opencl_thread_start,
+};
+#endif
+
+
+static int cgminer_id_count = 0;
+
+void enable_device(struct cgpu_info *cgpu)
+{
+	cgpu->enabled = true;
+	devices[cgpu->cgminer_id = cgminer_id_count++] = cgpu;
+	mining_threads += cgpu->threads;
+#ifdef OPENCL
+	if (cgpu->api == &opencl_api) {
+		gpu_threads += cgpu->threads;
+	}
+#endif
+}
+
 int main (int argc, char *argv[])
 {
 	unsigned int i, pools_active = 0;
+	unsigned int j, k;
 	struct block *block, *tmpblock;
 	struct work *work, *tmpwork;
 	struct sigaction handler;
 	struct thr_info *thr;
-	char name[256];
 
 	/* This dangerous functions tramples random dynamically allocated
 	 * variables so do it before anything at all */
@@ -5752,48 +5944,11 @@ int main (int argc, char *argv[])
 	HASH_ADD_STR(blocks, hash, block);
 	strcpy(current_block, block->hash);
 
-	// Reckon number of cores in the box
-	#if defined(WIN32)
-
-		DWORD system_am;
-		DWORD process_am;
-		BOOL ok = GetProcessAffinityMask(
-			GetCurrentProcess(),
-			&system_am,
-			&process_am
-		);
-		if (!ok) {
-			applog(LOG_ERR, "couldn't figure out number of processors :(");
-			num_processors = 1;
-		} else {
-			size_t n = 32;
-			num_processors = 0;
-			while (n--)
-				if (process_am & (1<<n))
-					++num_processors;
-		}
-
-	#else
-
-		num_processors = sysconf(_SC_NPROCESSORS_ONLN);
-
-	#endif /* !WIN32 */
+	memset(gpus, 0, sizeof(gpus));
+	for (i = 0; i < MAX_GPUDEVICES; i++)
+		gpus[i].dynamic = true;
 
-	opt_n_threads = num_processors;
-
-#ifdef HAVE_OPENCL
-	if (!skip_to_bench) {
-		for (i = 0; i < MAX_GPUDEVICES; i++) {
-			gpu_devices[i] = false;
-			gpus[i].dynamic = true;
-		}
-		nDevs = clDevicesNum();
-		if (nDevs < 0) {
-			applog(LOG_ERR, "clDevicesNum returned error, none usable");
-			nDevs = 0;
-		}
-	}
-#endif
+	memset(devices, 0, sizeof(devices));
 
 	/* parse command line */
 	opt_register_table(opt_config_table,
@@ -5810,12 +5965,6 @@ int main (int argc, char *argv[])
 
 	applog(LOG_WARNING, "Started %s", packagename);
 
-	if (opt_nogpu)
-		nDevs = 0;
-
-	if (nDevs && !opt_usecpu)
-		opt_n_threads = 0;
-
 	strcat(opt_kernel_path, "/");
 
 	if (want_per_device_stats)
@@ -5857,63 +6006,50 @@ int main (int argc, char *argv[])
 		exit(0);
 	}
 
-	if (opt_kernel) {
-		if (strcmp(opt_kernel, "poclbm") && strcmp(opt_kernel, "phatk"))
-			quit(1, "Invalid kernel name specified - must be poclbm or phatk");
-		if (!strcmp(opt_kernel, "poclbm"))
-			chosen_kernel = KL_POCLBM;
-		else
-			chosen_kernel = KL_PHATK;
-	} else
-		chosen_kernel = KL_NONE;
-
 #ifdef HAVE_OPENCL
-	gpu_threads = nDevs * opt_g_threads;
-	if (total_devices) {
-		if (total_devices > nDevs)
-			quit(1, "More devices specified than exist");
-		for (i = 0; i < MAX_GPUDEVICES; i++)
-			if (gpu_devices[i] && i + 1 > nDevs)
-				quit (1, "Command line options set a device that doesn't exist");
-#ifdef HAVE_ADL
-		for (i = 0; i < nDevs; i++) {
-			/* Make sure we do not attempt to adl manage devices
-			 * that we disable */
-			if (!gpu_devices[i])
-				gpus[i].gpu_engine =
-				gpus[i].gpu_memclock =
-				gpus[i].gpu_vddc =
-				gpus[i].gpu_fan =
-				gpus[i].gpu_powertune = 0;
-		}
-#endif
-	} else {
-		for (i = 0; i < nDevs; i++)
-			gpu_devices[i] = true;
-		total_devices = nDevs;
-	}
-#else
-  gpu_threads = 0;
+	if (!opt_nogpu)
+		opencl_api.api_detect();
 #endif
 
-	if (!gpu_threads && !forced_n_threads) {
-		/* Maybe they turned GPU off; restore default CPU threads. */
-		opt_n_threads = num_processors;
+	cpu_api.api_detect();
+
+	if (devices_enabled == -1) {
+		applog(LOG_ERR, "Devices detected:");
+		for (i = 0; i < total_devices; ++i) {
+			applog(LOG_ERR, " %2d. %s%d", i, devices[i]->api->name, devices[i]->device_id);
+		}
+		quit(0, "%d devices listed", total_devices);
+	}
+
+	mining_threads = 0;
+	gpu_threads = 0;
+	if (devices_enabled) {
+		for (i = 0; i < (sizeof(devices_enabled) * 8) - 1; ++i) {
+			if (devices_enabled & (1 << i)) {
+				if (i >= total_devices)
+					quit (1, "Command line options set a device that doesn't exist");
+				enable_device(devices[i]);
+			} else if (i < total_devices) {
+				if (opt_removedisabled) {
+					if (devices[i]->api == &cpu_api)
+						--opt_n_threads;
+				} else {
+					enable_device(devices[i]);
+				}
+				devices[i]->enabled = false;
+			}
+		}
+		total_devices = cgminer_id_count;
+	} else {
+		for (i = 0; i < total_devices; ++i)
+			enable_device(devices[i]);
 	}
 
-	if (!opt_n_threads && ! gpu_threads)
+	if (!total_devices)
 		quit(1, "All devices disabled, cannot mine!");
 
-	logcursor = 8;
-	gpucursor = logcursor;
-	cpucursor = gpucursor + nDevs;
-	logstart = cpucursor + 1;
-	if (opt_n_threads) {
-		if (opt_n_threads < num_processors)
-			logstart += opt_n_threads;
-		else
-			logstart += num_processors;
-	}
+	devcursor = 8;
+	logstart = devcursor + total_devices + 1;
 	logcursor = logstart + 1;
 
 	if (opt_realquiet)
@@ -5961,8 +6097,6 @@ int main (int argc, char *argv[])
 			fork_monitor();
 	#endif // defined(unix)
 
-	mining_threads = opt_n_threads + gpu_threads;
-
 	total_threads = mining_threads + 8;
 	work_restart = calloc(total_threads, sizeof(*work_restart));
 	if (!work_restart)
@@ -5992,12 +6126,6 @@ int main (int argc, char *argv[])
 	if (!thr->q)
 		quit(1, "Failed to tq_new");
 
-	if (opt_n_threads ) {
-		cpus = calloc(num_processors, sizeof(struct cgpu_info));
-		if (unlikely(!cpus))
-			quit(1, "Failed to calloc cpus");
-	}
-
 	stage_thr_id = mining_threads + 3;
 	thr = &thr_info[stage_thr_id];
 	thr->q = tq_new();
@@ -6079,87 +6207,29 @@ retry_pools:
 #ifdef HAVE_OPENCL
 	if (!opt_noadl)
 		init_adl(nDevs);
-	bool failmessage = false;
-
-	/* start GPU mining threads */
-	for (i = 0; i < nDevs * opt_g_threads; i++) {
-		int gpu = i % nDevs;
-		struct cgpu_info *cgpu;
-		struct timeval now;
-
-		gpus[gpu].is_gpu = 1;
-		gpus[gpu].cpu_gpu = gpu;
-
-		thr = &thr_info[i];
-		thr->id = i;
-		cgpu = thr->cgpu = &gpus[gpu];
-
-		thr->q = tq_new();
-		if (!thr->q)
-			quit(1, "tq_new failed in starting gpu mining threads");
-
-		/* Enable threads for devices set not to mine but disable
-		 * their queue in case we wish to enable them later*/
-		if (gpu_devices[gpu]) {
-			if (opt_debug)
-				applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id);
-
-			tq_push(thr->q, &ping);
-		}
-
-		applog(LOG_INFO, "Init GPU thread %i", i);
-		clStates[i] = initCl(gpu, name, sizeof(name));
-		if (!clStates[i]) {
-			enable_curses();
-			applog(LOG_ERR, "Failed to init GPU thread %d, disabling device %d", i, gpu);
-			if (!failmessage) {
-				char *buf;
-
-				applog(LOG_ERR, "Restarting the GPU from the menu is unlikely to fix this.");
-				applog(LOG_ERR, "Try stopping other applications using the GPU like afterburner.");
-				applog(LOG_ERR, "Then restart cgminer.");
-				failmessage = true;
-				buf = curses_input("Press enter to continue");
-				if (buf)
-					free(buf);
-			}
-			gpu_devices[gpu] = false;
-			cgpu->status = LIFE_NOSTART;
-			continue;
-		}
-		applog(LOG_INFO, "initCl() finished. Found %s", name);
-		gettimeofday(&now, NULL);
-		get_datestamp(cgpu->init, &now);
-
-		if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr)))
-			quit(1, "thread %d create failed", i);
-	}
-
-	applog(LOG_INFO, "%d gpu miner threads started", gpu_threads);
 #else
 	opt_g_threads = 0;
 #endif
 
-	/* start CPU mining threads */
-	for (i = gpu_threads; i < mining_threads; i++) {
-		int cpu = (i - gpu_threads) % num_processors;
+	// Start threads
+	k = 0;
+	for (i = 0; i < total_devices; ++i) {
+		struct cgpu_info *cgpu = devices[i];
+		for (j = 0; j < cgpu->threads; ++j, ++k) {
+			thr = &thr_info[k];
+			thr->id = k;
+			thr->cgpu = cgpu;
 
-		thr = &thr_info[i];
+			thr->q = tq_new();
+			if (!thr->q)
+				quit(1, "tq_new failed in starting %s%d mining thread (#%d)", cgpu->api->name, cgpu->device_id, i);
 
-		thr->id = i;
-		cpus[cpu].cpu_gpu = cpu;
-		thr->cgpu = &cpus[cpu];
-
-		thr->q = tq_new();
-		if (!thr->q)
-			quit(1, "tq_new failed in starting cpu mining threads");
-
-		thread_reportin(thr);
-
-		if (unlikely(thr_info_create(thr, NULL, miner_thread, thr)))
-			quit(1, "thread %d create failed", i);
+			cgpu->api->thread_start(thr);
+		}
 	}
 
+	applog(LOG_INFO, "%d gpu miner threads started", gpu_threads);
+
 	applog(LOG_INFO, "%d cpu miner threads started, "
 		"using SHA256 '%s' algorithm.",
 		opt_n_threads,
diff --git a/miner.h b/miner.h
index bd451401..cb92d99b 100644
--- a/miner.h
+++ b/miner.h
@@ -208,9 +208,28 @@ struct gpu_adl {
 };
 #endif
 
+struct cgpu_info;
+struct thr_info;
+
+struct device_api {
+	char*name;
+
+	// API-global functions
+	void (*api_detect)();
+
+	// Device-specific functions
+	void (*reinit_device)(struct cgpu_info*);
+	void (*get_statline)(char*, struct cgpu_info*);
+
+	// Thread-specific functions
+	void (*thread_start)(struct thr_info*);
+};
+
 struct cgpu_info {
-	int is_gpu;
-	int cpu_gpu;
+	int cgminer_id;
+	struct device_api *api;
+	int device_id;
+	bool enabled;
 	int accepted;
 	int rejected;
 	int hw_errors;
@@ -221,6 +240,8 @@ struct cgpu_info {
 	char init[40];
 	struct timeval last_message_tv;
 
+	int threads;
+
 	bool dynamic;
 	int intensity;
 #ifdef HAVE_ADL
@@ -431,6 +452,7 @@ extern bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, flo
 extern void api(void);
 
 #define MAX_GPUDEVICES 16
+#define MAX_DEVICES 32
 #define MAX_POOLS (32)
 
 extern int nDevs;
@@ -444,9 +466,10 @@ extern struct work_restart *work_restart;
 extern struct cgpu_info gpus[MAX_GPUDEVICES];
 extern int gpu_threads;
 extern double total_secs;
-extern bool gpu_devices[MAX_GPUDEVICES];
 extern int mining_threads;
 extern struct cgpu_info *cpus;
+extern int total_devices;
+extern struct cgpu_info *devices[];
 extern int total_pools;
 extern struct pool *pools[MAX_POOLS];
 extern const char *algo_names[];

From a4d1fe1e5d116851d45624496327445db9660ff0 Mon Sep 17 00:00:00 2001
From: Luke Dashjr <luke-jr+git@utopios.org>
Date: Sat, 7 Jan 2012 02:56:27 -0500
Subject: [PATCH 2/3] Refactor miner_thread to be common code for any kind of
 device

This expands on the device API, by taking the actual main thread out of the
device's code, and calling the API only for specific tasks. This brings
various changes that were made to the GPU thread code into the CPU miner.
It also fixes a bug where shares found on old GPU work were discarded when
it switched to a new work.
---
 main.c  | 886 +++++++++++++++++++++++++++++---------------------------
 miner.h |  10 +-
 2 files changed, 467 insertions(+), 429 deletions(-)

diff --git a/main.c b/main.c
index 31c25ef0..bfd4c15e 100644
--- a/main.c
+++ b/main.c
@@ -4323,230 +4323,149 @@ bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce)
 	return submit_work_sync(thr, work);
 }
 
+static inline bool abandon_work(int thr_id, struct work *work, struct timeval *wdiff, uint64_t hashes)
+{
+	if (wdiff->tv_sec > opt_scantime ||
+	    work->blk.nonce >= MAXTHREADS - hashes ||
+	    stale_work(work, false))
+		return true;
+	return false;
+}
+
 static void *miner_thread(void *userdata)
 {
-	struct work *work = make_work();
 	struct thr_info *mythr = userdata;
 	const int thr_id = mythr->id;
-	uint32_t max_nonce = 0xffffff, total_hashes = 0;
-	unsigned long hashes_done = max_nonce;
-	bool needs_work = true;
+	struct cgpu_info *cgpu = mythr->cgpu;
+	struct device_api *api = cgpu->api;
+
 	/* Try to cycle approximately 5 times before each log update */
-	const unsigned long cycle = opt_log_interval / 5 ? : 1;
+	const unsigned long def_cycle = opt_log_interval / 5 ? : 1;
+	unsigned long cycle;
+	struct timeval tv_start, tv_end, tv_workstart, tv_lastupdate;
+	struct timeval diff, sdiff, wdiff;
+	uint32_t max_nonce = api->can_limit_work ? api->can_limit_work(mythr) : 0xffffffff;
+	uint32_t hashes_done = 0;
+	uint32_t hashes;
+	struct work *work = make_work();
 	unsigned const int request_interval = opt_scantime * 2 / 3 ? : 1;
+	unsigned const long request_nonce = MAXTHREADS / 3 * 2;
 	bool requested = false;
-	uint32_t nonce_inc = max_nonce, hash_div = 1;
-	double hash_divfloat = 1.0;
-
+	uint32_t hash_div = 1;
 	pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
 
-	/* Set worker threads to nice 19 and then preferentially to SCHED_IDLE
-	 * and if that fails, then SCHED_BATCH. No need for this to be an
-	 * error if it fails */
-	setpriority(PRIO_PROCESS, 0, 19);
-	drop_policy();
+	if (api->thread_init && !api->thread_init(mythr))
+		goto out;
 
-	/* Cpu affinity only makes sense if the number of threads is a multiple
-	 * of the number of CPUs */
-	if (!(opt_n_threads % num_processors))
-		affine_to_cpu(dev_from_id(thr_id), dev_from_id(thr_id) % num_processors);
+	if (opt_debug)
+		applog(LOG_DEBUG, "Popping ping in miner thread");
+	tq_pop(mythr->q, NULL); /* Wait for a ping to start */
 
-	/* Invalidate pool so it fails can_roll() test */
-	work->pool = NULL;
+	sdiff.tv_sec = sdiff.tv_usec = 0;
+	gettimeofday(&tv_lastupdate, NULL);
 
 	while (1) {
-		struct timeval tv_workstart, tv_start, tv_end, diff;
-		uint64_t max64;
-		bool rc;
-
-		if (needs_work) {
-			gettimeofday(&tv_workstart, NULL);
-			/* obtain new work from internal workio thread */
-			if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) {
-				applog(LOG_ERR, "work retrieval failed, exiting "
-					"mining thread %d", thr_id);
-				goto out;
-			}
-			needs_work = requested = false;
-			total_hashes = 0;
-			max_nonce = work->blk.nonce + hashes_done;
+		work_restart[thr_id].restart = 0;
+		if (api->free_work && likely(work->pool))
+			api->free_work(mythr, work);
+		if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) {
+			applog(LOG_ERR, "work retrieval failed, exiting "
+				"mining thread %d", thr_id);
+			break;
 		}
-		hashes_done = 0;
-		gettimeofday(&tv_start, NULL);
-
-		/* scan nonces for a proof-of-work hash */
-		switch (opt_algo) {
-		case ALGO_C:
-			rc = scanhash_c(thr_id, work->midstate, work->data + 64,
-				        work->hash1, work->hash, work->target,
-					max_nonce, &hashes_done,
-					work->blk.nonce);
+		requested = false;
+		cycle = (can_roll(work) && should_roll(work)) ? 1 : def_cycle;
+		gettimeofday(&tv_workstart, NULL);
+		work->blk.nonce = 0;
+		if (api->prepare_work && !api->prepare_work(mythr, work)) {
+			applog(LOG_ERR, "work prepare failed, exiting "
+				"mining thread %d", thr_id);
 			break;
+		}
 
-#ifdef WANT_X8632_SSE2
-		case ALGO_SSE2_32: {
-			unsigned int rc5 =
-			        scanhash_sse2_32(thr_id, work->midstate, work->data + 64,
-						 work->hash1, work->hash,
-						 work->target,
-					         max_nonce, &hashes_done,
-						 work->blk.nonce);
-			rc = (rc5 == -1) ? false : true;
-			}
-			break;
-#endif
+		do {
+			gettimeofday(&tv_start, NULL);
 
-#ifdef WANT_X8664_SSE2
-		case ALGO_SSE2_64: {
-			unsigned int rc5 =
-			        scanhash_sse2_64(thr_id, work->midstate, work->data + 64,
-						 work->hash1, work->hash,
-						 work->target,
-					         max_nonce, &hashes_done,
-						 work->blk.nonce);
-			rc = (rc5 == -1) ? false : true;
+			hashes = api->scanhash(mythr, work, work->blk.nonce + max_nonce);
+			if (unlikely(work_restart[thr_id].restart))
+				break;
+			if (unlikely(!hashes))
+				goto out;
+			hashes_done += hashes;
+
+			gettimeofday(&tv_end, NULL);
+			timeval_subtract(&diff, &tv_end, &tv_start);
+			sdiff.tv_sec += diff.tv_sec;
+			sdiff.tv_usec += diff.tv_usec;
+			if (sdiff.tv_usec > 1000000) {
+				++sdiff.tv_sec;
+				sdiff.tv_usec -= 1000000;
 			}
-			break;
-#endif
 
-#ifdef WANT_X8664_SSE4
-		case ALGO_SSE4_64: {
-			unsigned int rc5 =
-			        scanhash_sse4_64(thr_id, work->midstate, work->data + 64,
-						 work->hash1, work->hash,
-						 work->target,
-					         max_nonce, &hashes_done,
-						 work->blk.nonce);
-			rc = (rc5 == -1) ? false : true;
-			}
-			break;
+			timeval_subtract(&wdiff, &tv_end, &tv_workstart);
+			if (!requested) {
+#if 0
+			if (wdiff.tv_sec > request_interval)
+				hash_div = (MAXTHREADS / total_hashes) ? : 1;
 #endif
-
-#ifdef WANT_SSE2_4WAY
-		case ALGO_4WAY: {
-			unsigned int rc4 =
-				ScanHash_4WaySSE2(thr_id, work->midstate, work->data + 64,
-						  work->hash1, work->hash,
-						  work->target,
-						  max_nonce, &hashes_done,
-						  work->blk.nonce);
-			rc = (rc4 == -1) ? false : true;
+				if (wdiff.tv_sec > request_interval || work->blk.nonce > request_nonce) {
+					thread_reportout(mythr);
+					if (unlikely(!queue_request(mythr, false))) {
+						applog(LOG_ERR, "Failed to queue_request in miner_thread %d", thr_id);
+						goto out;
+					}
+					thread_reportin(mythr);
+					requested = true;
+				}
 			}
-			break;
-#endif
-
-#ifdef WANT_ALTIVEC_4WAY
-            case ALGO_ALTIVEC_4WAY:
-            {
-                unsigned int rc4 = ScanHash_altivec_4way(thr_id, work->midstate, work->data + 64,
-                        work->hash1, work->hash,
-                        work->target,
-                        max_nonce, &hashes_done,
-                        work->blk.nonce);
-                rc = (rc4 == -1) ? false : true;
-            }
-                break;
-#endif
 
-#ifdef WANT_VIA_PADLOCK
-		case ALGO_VIA:
-			rc = scanhash_via(thr_id, work->data, work->target,
-					  max_nonce, &hashes_done,
-					  work->blk.nonce);
-			break;
-#endif
-		case ALGO_CRYPTOPP:
-			rc = scanhash_cryptopp(thr_id, work->midstate, work->data + 64,
-				        work->hash1, work->hash, work->target,
-					max_nonce, &hashes_done,
-					work->blk.nonce);
-			break;
-
-#ifdef WANT_CRYPTOPP_ASM32
-		case ALGO_CRYPTOPP_ASM32:
-			rc = scanhash_asm32(thr_id, work->midstate, work->data + 64,
-				        work->hash1, work->hash, work->target,
-					max_nonce, &hashes_done,
-					work->blk.nonce);
-			break;
-#endif
+			if (sdiff.tv_sec < cycle) {
+				if (likely(!api->can_limit_work || max_nonce == 0xffffffff))
+					continue;
 
-		default:
-			/* should never happen */
-			goto out;
-		}
+				{
+					int mult = 1000000 / ((sdiff.tv_usec + 0x400) / 0x400) + 0x10;
+					mult *= cycle;
+					if (max_nonce > (0xffffffff * 0x400) / mult)
+						max_nonce = 0xffffffff;
+					else
+						max_nonce = (max_nonce * mult) / 0x400;
+				}
+			} else if (unlikely(sdiff.tv_sec > cycle) && api->can_limit_work) {
+				max_nonce = max_nonce * cycle / sdiff.tv_sec;
+			} else if (unlikely(sdiff.tv_usec > 100000) && api->can_limit_work) {
+				max_nonce = max_nonce * 0x400 / (((cycle * 1000000) + sdiff.tv_usec) / (cycle * 1000000 / 0x400));
+			}
 
-		/* record scanhash elapsed time */
-		gettimeofday(&tv_end, NULL);
-		timeval_subtract(&diff, &tv_end, &tv_start);
-
-		hashes_done -= work->blk.nonce;
-		hashmeter(thr_id, &diff, hashes_done);
-		total_hashes += hashes_done;
-		work->blk.nonce += hashes_done;
-
-		/* adjust max_nonce to meet target cycle time */
-		if (diff.tv_usec > 500000)
-			diff.tv_sec++;
-		if (diff.tv_sec && diff.tv_sec != cycle) {
-			uint64_t next_inc = ((uint64_t)hashes_done * (uint64_t)cycle) / (uint64_t)diff.tv_sec;
-
-			if (next_inc > (uint64_t)nonce_inc / 2 * 3)
-				next_inc = nonce_inc / 2 * 3;
-			nonce_inc = next_inc;
-		} else if (!diff.tv_sec)
-			nonce_inc = hashes_done * 2;
-		if (nonce_inc < 4)
-			nonce_inc = 0xffffff;
-		max64 = work->blk.nonce + nonce_inc;
-		if (max64 > 0xfffffffaULL)
-			max64 = 0xfffffffaULL;
-		max_nonce = max64;
-
-		/* if nonce found, submit work */
-		if (unlikely(rc)) {
-			if (opt_debug)
-				applog(LOG_DEBUG, "CPU %d found something?", dev_from_id(thr_id));
-			if (unlikely(!submit_work_sync(mythr, work))) {
-				applog(LOG_ERR, "Failed to submit_work_sync in miner_thread %d", thr_id);
-				break;
+			timeval_subtract(&diff, &tv_end, &tv_lastupdate);
+			if (diff.tv_sec >= opt_log_interval) {
+				hashmeter(thr_id, &diff, hashes_done);
+				hashes_done = 0;
+				tv_lastupdate = tv_end;
 			}
-			work->blk.nonce += 4;
-		}
 
-		timeval_subtract(&diff, &tv_end, &tv_workstart);
-		if (!requested && (diff.tv_sec >= request_interval)) {
-			thread_reportout(mythr);
-			if (unlikely(!queue_request(mythr, false))) {
-				applog(LOG_ERR, "Failed to queue_request in miner_thread %d", thr_id);
-				goto out;
+			if (unlikely(mythr->pause || !cgpu->enabled)) {
+				applog(LOG_WARNING, "Thread %d being disabled", thr_id);
+				mythr->rolling = mythr->cgpu->rolling = 0;
+				if (opt_debug)
+					applog(LOG_DEBUG, "Popping wakeup ping in miner thread");
+				thread_reportout(mythr);
+				tq_pop(mythr->q, NULL); /* Ignore ping that's popped */
+				thread_reportin(mythr);
+				applog(LOG_WARNING, "Thread %d being re-enabled", thr_id);
 			}
-			thread_reportin(mythr);
-			requested = true;
-		}
 
-		if (diff.tv_sec > opt_scantime) {
-			decay_time(&hash_divfloat , (double)((MAXTHREADS / total_hashes) ? : 1));
-			hash_div = hash_divfloat;
-			needs_work = true;
-		} else if (work_restart[thr_id].restart || stale_work(work, false) ||
-			work->blk.nonce >= MAXTHREADS - hashes_done)
-				needs_work = true;
-
-		if (unlikely(mythr->pause)) {
-			applog(LOG_WARNING, "Thread %d being disabled", thr_id);
-			mythr->rolling = mythr->cgpu->rolling = 0;
-			if (opt_debug)
-				applog(LOG_DEBUG, "Popping wakeup ping in miner thread");
+			sdiff.tv_sec = sdiff.tv_usec = 0;
 
-			thread_reportout(mythr);
-			tq_pop(mythr->q, NULL); /* Ignore ping that's popped */
-			thread_reportin(mythr);
-			applog(LOG_WARNING, "Thread %d being re-enabled", thr_id);
-		}
+			if (can_roll(work) && should_roll(work))
+				roll_work(work);
+		} while (!abandon_work(thr_id, work, &wdiff, hashes));
 	}
 
 out:
+	if (api->thread_shutdown)
+		api->thread_shutdown(mythr);
+
 	thread_reportin(mythr);
 	applog(LOG_ERR, "Thread %d failure, exiting", thr_id);
 	tq_freeze(mythr->q);
@@ -4654,224 +4573,6 @@ static void set_threads_hashes(unsigned int vectors, unsigned int *threads,
 	*globalThreads = *threads;
 	*hashes = *threads * vectors;
 }
-
-static void *gpuminer_thread(void *userdata)
-{
-	cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *);
-
-	const unsigned long cycle = opt_log_interval / 5 ? : 1;
-	struct timeval tv_start, tv_end, diff, tv_workstart;
-	struct thr_info *mythr = userdata;
-	const int thr_id = mythr->id;
-	uint32_t *res, *blank_res;
-	double gpu_ms_average = 7;
-	int gpu = dev_from_id(thr_id);
-
-	size_t globalThreads[1];
-	size_t localThreads[1];
-
-	cl_int status;
-
-	_clState *clState = clStates[thr_id];
-	const cl_kernel *kernel = &clState->kernel;
-
-	struct work *work = make_work();
-	unsigned int threads;
-	unsigned const int vectors = clState->preferred_vwidth;
-	unsigned int hashes;
-	unsigned int hashes_done = 0;
-
-	/* Request the next work item at 2/3 of the scantime */
-	unsigned const int request_interval = opt_scantime * 2 / 3 ? : 1;
-	unsigned const long request_nonce = MAXTHREADS / 3 * 2;
-	bool requested = false;
-	uint32_t total_hashes = 0, hash_div = 1;
-
-	switch (chosen_kernel) {
-		case KL_POCLBM:
-			queue_kernel_parameters = &queue_poclbm_kernel;
-			break;
-		case KL_PHATK:
-		default:
-			queue_kernel_parameters = &queue_phatk_kernel;
-			break;
-	}
-
-	pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
-
-	res = calloc(BUFFERSIZE, 1);
-	blank_res = calloc(BUFFERSIZE, 1);
-
-	if (!res || !blank_res) {
-		applog(LOG_ERR, "Failed to calloc in gpuminer_thread");
-		goto out;
-	}
-
-	gettimeofday(&tv_start, NULL);
-	localThreads[0] = clState->work_size;
-	set_threads_hashes(vectors, &threads, &hashes, &globalThreads[0],
-			   localThreads[0], gpus[gpu].intensity);
-
-	diff.tv_sec = 0;
-	gettimeofday(&tv_end, NULL);
-
-	work->pool = NULL;
-
-	status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
-			BUFFERSIZE, blank_res, 0, NULL, NULL);
-	if (unlikely(status != CL_SUCCESS))
-		{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
-
-	mythr->cgpu->status = LIFE_WELL;
-	if (opt_debug)
-		applog(LOG_DEBUG, "Popping ping in gpuminer thread");
-
-	tq_pop(mythr->q, NULL); /* Wait for a ping to start */
-	gettimeofday(&tv_workstart, NULL);
-	/* obtain new work from internal workio thread */
-	if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) {
-		applog(LOG_ERR, "work retrieval failed, exiting "
-			"gpu mining thread %d", thr_id);
-		goto out;
-	}
-	requested = false;
-	precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
-	work->blk.nonce = 0;
-
-	while (1) {
-		struct timeval tv_gpustart, tv_gpuend;
-		suseconds_t gpu_us;
-
-		gettimeofday(&tv_gpustart, NULL);
-		timeval_subtract(&diff, &tv_gpustart, &tv_gpuend);
-		/* This finish flushes the readbuffer set with CL_FALSE later */
-		clFinish(clState->commandQueue);
-		gettimeofday(&tv_gpuend, NULL);
-		timeval_subtract(&diff, &tv_gpuend, &tv_gpustart);
-		gpu_us = diff.tv_sec * 1000000 + diff.tv_usec;
-		decay_time(&gpu_ms_average, gpu_us / 1000);
-		if (gpus[gpu].dynamic) {
-			/* Try to not let the GPU be out for longer than 6ms, but
-			 * increase intensity when the system is idle, unless
-			 * dynamic is disabled. */
-			if (gpu_ms_average > 7) {
-				if (gpus[gpu].intensity > -10)
-					gpus[gpu].intensity--;
-			} else if (gpu_ms_average < 3) {
-				if (gpus[gpu].intensity < 10)
-					gpus[gpu].intensity++;
-			}
-		}
-		set_threads_hashes(vectors, &threads, &hashes, globalThreads,
-				   localThreads[0], gpus[gpu].intensity);
-
-		if (diff.tv_sec > opt_scantime ||
-		    work->blk.nonce >= MAXTHREADS - hashes ||
-		    work_restart[thr_id].restart ||
-		    stale_work(work, false)) {
-			/* Ignore any reads since we're getting new work and queue a clean buffer */
-			status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
-					BUFFERSIZE, blank_res, 0, NULL, NULL);
-			if (unlikely(status != CL_SUCCESS))
-				{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
-			memset(res, 0, BUFFERSIZE);
-
-			gettimeofday(&tv_workstart, NULL);
-			if (opt_debug)
-				applog(LOG_DEBUG, "getwork thread %d", thr_id);
-			/* obtain new work from internal workio thread */
-			if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) {
-				applog(LOG_ERR, "work retrieval failed, exiting "
-					"gpu mining thread %d", thr_id);
-				goto out;
-			}
-			requested = false;
-
-			precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
-			work_restart[thr_id].restart = 0;
-
-			/* Flushes the writebuffer set with CL_FALSE above */
-			clFinish(clState->commandQueue);
-		}
-		status = queue_kernel_parameters(clState, &work->blk);
-		if (unlikely(status != CL_SUCCESS))
-			{ applog(LOG_ERR, "Error: clSetKernelArg of all params failed."); goto out; }
-
-		/* MAXBUFFERS entry is used as a flag to say nonces exist */
-		if (res[FOUND]) {
-			/* Clear the buffer again */
-			status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
-					BUFFERSIZE, blank_res, 0, NULL, NULL);
-			if (unlikely(status != CL_SUCCESS))
-				{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
-			if (opt_debug)
-				applog(LOG_DEBUG, "GPU %d found something?", gpu);
-			postcalc_hash_async(mythr, work, res);
-			memset(res, 0, BUFFERSIZE);
-			clFinish(clState->commandQueue);
-		}
-
-		status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
-				globalThreads, localThreads, 0,  NULL, NULL);
-		if (unlikely(status != CL_SUCCESS))
-			{ applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; }
-
-		status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
-				BUFFERSIZE, res, 0, NULL, NULL);
-		if (unlikely(status != CL_SUCCESS))
-			{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
-
-		gettimeofday(&tv_end, NULL);
-		timeval_subtract(&diff, &tv_end, &tv_start);
-		hashes_done += hashes;
-		total_hashes += hashes;
-		work->blk.nonce += hashes;
-		if (diff.tv_sec >= cycle) {
-			hashmeter(thr_id, &diff, hashes_done);
-			gettimeofday(&tv_start, NULL);
-			hashes_done = 0;
-		}
-
-		timeval_subtract(&diff, &tv_end, &tv_workstart);
-		if (!requested) {
-#if 0
-			if (diff.tv_sec > request_interval)
-				hash_div = (MAXTHREADS / total_hashes) ? : 1;
-#endif
-			if (diff.tv_sec > request_interval || work->blk.nonce > request_nonce) {
-				thread_reportout(mythr);
-				if (unlikely(!queue_request(mythr, false))) {
-					applog(LOG_ERR, "Failed to queue_request in gpuminer_thread %d", thr_id);
-					goto out;
-				}
-				thread_reportin(mythr);
-				requested = true;
-			}
-		}
-		if (unlikely(!gpus[gpu].enabled || mythr->pause)) {
-			applog(LOG_WARNING, "Thread %d being disabled", thr_id);
-			mythr->rolling = mythr->cgpu->rolling = 0;
-			if (opt_debug)
-				applog(LOG_DEBUG, "Popping wakeup ping in gpuminer thread");
-
-			thread_reportout(mythr);
-			tq_pop(mythr->q, NULL); /* Ignore ping that's popped */
-			thread_reportin(mythr);
-			applog(LOG_WARNING, "Thread %d being re-enabled", thr_id);
-		}
-	}
-out:
-	clReleaseCommandQueue(clState->commandQueue);
-	clReleaseKernel(clState->kernel);
-	clReleaseProgram(clState->program);
-	clReleaseContext(clState->context);
-
-	thread_reportin(mythr);
-	applog(LOG_ERR, "Thread %d failure, exiting", thr_id);
-	tq_freeze(mythr->q);
-
-	return NULL;
-}
 #endif /* HAVE_OPENCL */
 
 /* Stage another work item from the work returned in a longpoll */
@@ -5153,7 +4854,7 @@ select_cgpu:
 		}
 		applog(LOG_INFO, "initCl() finished. Found %s", name);
 
-		if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr))) {
+		if (unlikely(thr_info_create(thr, NULL, miner_thread, thr))) {
 			applog(LOG_ERR, "thread %d create failed", thr_id);
 			return NULL;
 		}
@@ -5732,19 +5433,157 @@ static void reinit_cpu_device(struct cgpu_info *cpu)
 	tq_push(thr_info[cpur_thr_id].q, cpu);
 }
 
-static void cpu_thread_start(struct thr_info *thr)
+static bool cpu_thread_prepare(struct thr_info *thr)
 {
 	thread_reportin(thr);
 
-	if (unlikely(thr_info_create(thr, NULL, miner_thread, thr)))
-		quit(1, "thread %d create failed", thr->id);
+	return true;
+}
+
+static uint64_t cpu_can_limit_work(struct thr_info *thr)
+{
+	return 0xfffff;
+}
+
+static bool cpu_thread_init(struct thr_info *thr)
+{
+	const int thr_id = thr->id;
+
+	/* Set worker threads to nice 19 and then preferentially to SCHED_IDLE
+	 * and if that fails, then SCHED_BATCH. No need for this to be an
+	 * error if it fails */
+	setpriority(PRIO_PROCESS, 0, 19);
+	drop_policy();
+	/* Cpu affinity only makes sense if the number of threads is a multiple
+	 * of the number of CPUs */
+	if (!(opt_n_threads % num_processors))
+		affine_to_cpu(dev_from_id(thr_id), dev_from_id(thr_id) % num_processors);
+	return true;
+}
+
+static uint64_t cpu_scanhash(struct thr_info *thr, struct work *work, uint64_t max_nonce)
+{
+	const int thr_id = thr->id;
+
+	long unsigned int hashes_done = 0;
+	uint32_t first_nonce = work->blk.nonce;
+	bool rc = false;
+
+	/* scan nonces for a proof-of-work hash */
+	switch (opt_algo) {
+	case ALGO_C:
+		rc = scanhash_c(thr_id, work->midstate, work->data + 64,
+			        work->hash1, work->hash, work->target,
+				max_nonce, &hashes_done,
+				work->blk.nonce);
+		break;
+#ifdef WANT_X8632_SSE2
+	case ALGO_SSE2_32: {
+		unsigned int rc5 =
+		        scanhash_sse2_32(thr_id, work->midstate, work->data + 64,
+					 work->hash1, work->hash,
+					 work->target,
+				         max_nonce, &hashes_done,
+					 work->blk.nonce);
+		rc = (rc5 == -1) ? false : true;
+		}
+		break;
+#endif
+#ifdef WANT_X8664_SSE2
+	case ALGO_SSE2_64: {
+		unsigned int rc5 =
+		        scanhash_sse2_64(thr_id, work->midstate, work->data + 64,
+					 work->hash1, work->hash,
+					 work->target,
+				         max_nonce, &hashes_done,
+					 work->blk.nonce);
+		rc = (rc5 == -1) ? false : true;
+		}
+		break;
+#endif
+#ifdef WANT_X8664_SSE4
+	case ALGO_SSE4_64: {
+		unsigned int rc5 =
+		        scanhash_sse4_64(thr_id, work->midstate, work->data + 64,
+					 work->hash1, work->hash,
+					 work->target,
+				         max_nonce, &hashes_done,
+					 work->blk.nonce);
+		rc = (rc5 == -1) ? false : true;
+		}
+		break;
+#endif
+#ifdef WANT_SSE2_4WAY
+	case ALGO_4WAY: {
+		unsigned int rc4 =
+			ScanHash_4WaySSE2(thr_id, work->midstate, work->data + 64,
+					  work->hash1, work->hash,
+					  work->target,
+					  max_nonce, &hashes_done,
+					  work->blk.nonce);
+		rc = (rc4 == -1) ? false : true;
+		}
+		break;
+#endif
+#ifdef WANT_ALTIVEC_4WAY
+        case ALGO_ALTIVEC_4WAY:
+        {
+            unsigned int rc4 = ScanHash_altivec_4way(thr_id, work->midstate, work->data + 64,
+                    work->hash1, work->hash,
+                    work->target,
+                    max_nonce, &hashes_done,
+                    work->blk.nonce);
+            rc = (rc4 == -1) ? false : true;
+        }
+        break;
+#endif
+#ifdef WANT_VIA_PADLOCK
+	case ALGO_VIA:
+		rc = scanhash_via(thr_id, work->data, work->target,
+				  max_nonce, &hashes_done,
+				  work->blk.nonce);
+		break;
+#endif
+	case ALGO_CRYPTOPP:
+		rc = scanhash_cryptopp(thr_id, work->midstate, work->data + 64,
+			        work->hash1, work->hash, work->target,
+				max_nonce, &hashes_done,
+				work->blk.nonce);
+		break;
+#ifdef WANT_CRYPTOPP_ASM32
+	case ALGO_CRYPTOPP_ASM32:
+		rc = scanhash_asm32(thr_id, work->midstate, work->data + 64,
+			        work->hash1, work->hash, work->target,
+				max_nonce, &hashes_done,
+				work->blk.nonce);
+		break;
+#endif
+	default:
+		/* should never happen */
+		applog(LOG_ERR, "Unrecognized hash algorithm! This should be impossible!");
+	}
+
+	/* if nonce found, submit work */
+	if (unlikely(rc)) {
+		if (opt_debug)
+			applog(LOG_DEBUG, "CPU %d found something?", dev_from_id(thr_id));
+		if (unlikely(!submit_work_sync(thr, work))) {
+			applog(LOG_ERR, "Failed to submit_work_sync in miner_thread %d", thr_id);
+		}
+	}
+
+	work->blk.nonce = hashes_done;
+	return (uint64_t)hashes_done - first_nonce;
 }
 
 struct device_api cpu_api = {
 	.name = "CPU",
 	.api_detect = cpu_detect,
 	.reinit_device = reinit_cpu_device,
-	.thread_start = cpu_thread_start,
+	.thread_prepare = cpu_thread_prepare,
+	.can_limit_work = cpu_can_limit_work,
+	.thread_init = cpu_thread_init,
+	.scanhash = cpu_scanhash,
 };
 
 
@@ -5815,7 +5654,16 @@ static void get_opencl_statline(char *buf, struct cgpu_info *gpu)
 #endif
 }
 
-static void opencl_thread_start(struct thr_info *thr)
+struct opencl_thread_data {
+	cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *);
+	uint32_t *res;
+	struct work *last_work;
+	struct work _last_work;
+};
+
+static uint32_t *blank_res;
+
+static bool opencl_thread_prepare(struct thr_info *thr)
 {
 	char name[256];
 	struct timeval now;
@@ -5824,13 +5672,11 @@ static void opencl_thread_start(struct thr_info *thr)
 	int i = thr->id;
 	static bool failmessage = false;
 
-	/* Enable threads for devices set not to mine but disable
-	 * their queue in case we wish to enable them later*/
-	if (cgpu->enabled) {
-		if (opt_debug)
-			applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id);
-
-		tq_push(thr->q, &ping);
+	if (!blank_res)
+		blank_res = calloc(BUFFERSIZE, 1);
+	if (!blank_res) {
+		applog(LOG_ERR, "Failed to calloc in opencl_thread_init");
+		return false;
 	}
 
 	applog(LOG_INFO, "Init GPU thread %i", i);
@@ -5851,7 +5697,7 @@ static void opencl_thread_start(struct thr_info *thr)
 		}
 		cgpu->enabled = false;
 		cgpu->status = LIFE_NOSTART;
-		return;
+		return false;
 	}
 	applog(LOG_INFO, "initCl() finished. Found %s", name);
 	gettimeofday(&now, NULL);
@@ -5859,8 +5705,174 @@ static void opencl_thread_start(struct thr_info *thr)
 
 	have_opencl = true;
 
-	if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr)))
-		quit(1, "thread %d create failed", i);
+	return true;
+}
+
+static bool opencl_thread_init(struct thr_info *thr)
+{
+	const int thr_id = thr->id;
+	struct cgpu_info *gpu = thr->cgpu;
+
+	struct opencl_thread_data *thrdata;
+	thrdata = calloc(1, sizeof(*thrdata));
+	thr->cgpu_data = thrdata;
+
+	if (!thrdata) {
+		applog(LOG_ERR, "Failed to calloc in opencl_thread_init");
+		return false;
+	}
+
+	switch (chosen_kernel) {
+		case KL_POCLBM:
+			thrdata->queue_kernel_parameters = &queue_poclbm_kernel;
+			break;
+		case KL_PHATK:
+		default:
+			thrdata->queue_kernel_parameters = &queue_phatk_kernel;
+			break;
+	}
+
+	thrdata->res = calloc(BUFFERSIZE, 1);
+
+	if (!thrdata->res) {
+		free(thrdata);
+		applog(LOG_ERR, "Failed to calloc in opencl_thread_init");
+		return false;
+	}
+
+	_clState *clState = clStates[thr_id];
+	cl_int status;
+
+	status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
+			BUFFERSIZE, blank_res, 0, NULL, NULL);
+	if (unlikely(status != CL_SUCCESS)) {
+		applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
+		return false;
+	}
+
+	gpu->status = LIFE_WELL;
+
+	return true;
+}
+
+static void opencl_free_work(struct thr_info *thr, struct work *work)
+{
+	const int thr_id = thr->id;
+	struct opencl_thread_data *thrdata = thr->cgpu_data;
+	_clState *clState = clStates[thr_id];
+
+	clFinish(clState->commandQueue);
+	if (thrdata->res[FOUND]) {
+		thrdata->last_work = &thrdata->_last_work;
+		memcpy(thrdata->last_work, work, sizeof(*thrdata->last_work));
+	}
+}
+
+static bool opencl_prepare_work(struct thr_info *thr, struct work *work)
+{
+	precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
+	return true;
+}
+
+static uint64_t opencl_scanhash(struct thr_info *thr, struct work *work, uint64_t max_nonce)
+{
+	const int thr_id = thr->id;
+	struct opencl_thread_data *thrdata = thr->cgpu_data;
+	struct cgpu_info *gpu = thr->cgpu;
+	_clState *clState = clStates[thr_id];
+	const cl_kernel *kernel = &clState->kernel;
+
+	double gpu_ms_average = 7;
+	cl_int status;
+
+	size_t globalThreads[1];
+	size_t localThreads[1] = { clState->work_size };
+	unsigned int threads;
+	unsigned int hashes;
+
+
+	struct timeval tv_gpustart, tv_gpuend, diff;
+	suseconds_t gpu_us;
+
+	gettimeofday(&tv_gpustart, NULL);
+	timeval_subtract(&diff, &tv_gpustart, &tv_gpuend);
+	/* This finish flushes the readbuffer set with CL_FALSE later */
+	clFinish(clState->commandQueue);
+	gettimeofday(&tv_gpuend, NULL);
+	timeval_subtract(&diff, &tv_gpuend, &tv_gpustart);
+	gpu_us = diff.tv_sec * 1000000 + diff.tv_usec;
+	decay_time(&gpu_ms_average, gpu_us / 1000);
+	if (gpu->dynamic) {
+		/* Try to not let the GPU be out for longer than 6ms, but
+		 * increase intensity when the system is idle, unless
+		 * dynamic is disabled. */
+		if (gpu_ms_average > 7) {
+			if (gpu->intensity > -10)
+				--gpu->intensity;
+		} else if (gpu_ms_average < 3) {
+			if (gpu->intensity < 10)
+				++gpu->intensity;
+		}
+	}
+	set_threads_hashes(clState->preferred_vwidth, &threads, &hashes, globalThreads,
+			   localThreads[0], gpu->intensity);
+
+	status = thrdata->queue_kernel_parameters(clState, &work->blk);
+	if (unlikely(status != CL_SUCCESS)) {
+		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
+		return 0;
+	}
+
+	/* MAXBUFFERS entry is used as a flag to say nonces exist */
+	if (thrdata->res[FOUND]) {
+		/* Clear the buffer again */
+		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+				BUFFERSIZE, blank_res, 0, NULL, NULL);
+		if (unlikely(status != CL_SUCCESS)) {
+			applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
+			return 0;
+		}
+		if (unlikely(thrdata->last_work)) {
+			if (opt_debug)
+				applog(LOG_DEBUG, "GPU %d found something in last work?", gpu->device_id);
+			postcalc_hash_async(thr, thrdata->last_work, thrdata->res);
+			thrdata->last_work = NULL;
+		} else {
+			if (opt_debug)
+				applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id);
+			postcalc_hash_async(thr, work, thrdata->res);
+		}
+		memset(thrdata->res, 0, BUFFERSIZE);
+		clFinish(clState->commandQueue);
+	}
+	status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
+			globalThreads, localThreads, 0,  NULL, NULL);
+	if (unlikely(status != CL_SUCCESS)) {
+		applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)");
+		return 0;
+	}
+
+	status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+			BUFFERSIZE, thrdata->res, 0, NULL, NULL);
+	if (unlikely(status != CL_SUCCESS)) {
+		applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)");
+		return 0;
+	}
+
+	work->blk.nonce += hashes;
+
+	return hashes;
+}
+
+static void opencl_thread_shutdown(struct thr_info *thr)
+{
+	const int thr_id = thr->id;
+	_clState *clState = clStates[thr_id];
+
+	clReleaseCommandQueue(clState->commandQueue);
+	clReleaseKernel(clState->kernel);
+	clReleaseProgram(clState->program);
+	clReleaseContext(clState->context);
 }
 
 struct device_api opencl_api = {
@@ -5868,7 +5880,12 @@ struct device_api opencl_api = {
 	.api_detect = opencl_detect,
 	.reinit_device = reinit_opencl_device,
 	.get_statline = get_opencl_statline,
-	.thread_start = opencl_thread_start,
+	.thread_prepare = opencl_thread_prepare,
+	.thread_init = opencl_thread_init,
+	.free_work = opencl_free_work,
+	.prepare_work = opencl_prepare_work,
+	.scanhash = opencl_scanhash,
+	.thread_shutdown = opencl_thread_shutdown,
 };
 #endif
 
@@ -6224,7 +6241,20 @@ retry_pools:
 			if (!thr->q)
 				quit(1, "tq_new failed in starting %s%d mining thread (#%d)", cgpu->api->name, cgpu->device_id, i);
 
-			cgpu->api->thread_start(thr);
+			/* Enable threads for devices set not to mine but disable
+			 * their queue in case we wish to enable them later */
+			if (cgpu->enabled) {
+				if (opt_debug)
+					applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id);
+
+				tq_push(thr->q, &ping);
+			}
+
+			if (cgpu->api->thread_prepare && !cgpu->api->thread_prepare(thr))
+				continue;
+
+			if (unlikely(thr_info_create(thr, NULL, miner_thread, thr)))
+				quit(1, "thread %d create failed", thr->id);
 		}
 	}
 
diff --git a/miner.h b/miner.h
index cb92d99b..33518b14 100644
--- a/miner.h
+++ b/miner.h
@@ -210,6 +210,7 @@ struct gpu_adl {
 
 struct cgpu_info;
 struct thr_info;
+struct work;
 
 struct device_api {
 	char*name;
@@ -222,7 +223,13 @@ struct device_api {
 	void (*get_statline)(char*, struct cgpu_info*);
 
 	// Thread-specific functions
-	void (*thread_start)(struct thr_info*);
+	bool (*thread_prepare)(struct thr_info*);
+	uint64_t (*can_limit_work)(struct thr_info*);
+	bool (*thread_init)(struct thr_info*);
+	void (*free_work)(struct thr_info*, struct work*);
+	bool (*prepare_work)(struct thr_info*, struct work*);
+	uint64_t (*scanhash)(struct thr_info*, struct work*, uint64_t);
+	void (*thread_shutdown)(struct thr_info*);
 };
 
 struct cgpu_info {
@@ -278,6 +285,7 @@ struct thr_info {
 	pthread_t	pth;
 	struct thread_q	*q;
 	struct cgpu_info *cgpu;
+	void *cgpu_data;
 	struct timeval last;
 	struct timeval sick;
 

From b9d197dee8eb96c48004b5b3b546e9e322a271ba Mon Sep 17 00:00:00 2001
From: Luke Dashjr <luke-jr+git@utopios.org>
Date: Tue, 17 Jan 2012 16:29:01 -0500
Subject: [PATCH 3/3] Refactor the CPU scanhash_* functions to use a common
 API. Fixes bugs.

- Before, some returned bool, and others returned int (which was then turned
  into a bool with a comparison); now, everything returns a bool
- Before, some set hashes_done to nonce - 1 when a share was found and others
  set it to nonce + 1 or 2. This caused some algorithms to scan/submit shares
  twice with the new cpu_scanhash function. Now, it has all been replaced with
  last_nonce, which is set to the final nonce checked by the scanhash_* func.
- VIA needs the full data, and cannot use midstate. All the others were
  expecting midstate and data+64 for their parameters. Now, we pass midstate
  and the full data pointer, and let the scanhash_* function choose which to
  use.
---
 main.c                | 144 +++++++++---------------------------------
 miner.h               |  45 +++++++------
 sha256_4way.c         |  19 +++---
 sha256_altivec_4way.c |  19 +++---
 sha256_cryptopp.c     |  20 +++---
 sha256_generic.c      |  10 +--
 sha256_sse2_amd64.c   |  23 ++++---
 sha256_sse2_i386.c    |  25 ++++----
 sha256_sse4_amd64.c   |  23 ++++---
 sha256_via.c          |  12 ++--
 10 files changed, 142 insertions(+), 198 deletions(-)

diff --git a/main.c b/main.c
index bfd4c15e..2ba19409 100644
--- a/main.c
+++ b/main.c
@@ -146,7 +146,6 @@ const char *algo_names[] = {
 #endif
 };
 
-typedef void (*sha256_func)();
 static const sha256_func sha256_funcs[] = {
 	[ALGO_C]		= (sha256_func)scanhash_c,
 #ifdef WANT_SSE2_4WAY
@@ -488,34 +487,20 @@ static double bench_algo_stage3(
 	struct timeval end;
 	struct timeval start;
 	uint32_t max_nonce = (1<<22);
-	unsigned long hashes_done = 0;
+	uint32_t last_nonce = 0;
 
 	gettimeofday(&start, 0);
-		#if defined(WANT_VIA_PADLOCK)
-
-			// For some reason, the VIA padlock hasher has a different API ...
-			if (ALGO_VIA==algo) {
-				(void)scanhash_via(
-					0,
-					work.data,
-					work.target,
-					max_nonce,
-					&hashes_done,
-					work.blk.nonce
-				);
-			} else
-		#endif
 			{
 				sha256_func func = sha256_funcs[algo];
 				(*func)(
 					0,
 					work.midstate,
-					work.data + 64,
+					work.data,
 					work.hash1,
 					work.hash,
 					work.target,
 					max_nonce,
-					&hashes_done,
+					&last_nonce,
 					work.blk.nonce
 				);
 			}
@@ -528,7 +513,7 @@ static double bench_algo_stage3(
 
 	double rate = -1.0;
 	if (0<usec_elapsed) {
-		rate = (1.0*hashes_done)/usec_elapsed;
+		rate = (1.0*(last_nonce+1))/usec_elapsed;
 	}
 	return rate;
 }
@@ -5465,102 +5450,28 @@ static uint64_t cpu_scanhash(struct thr_info *thr, struct work *work, uint64_t m
 {
 	const int thr_id = thr->id;
 
-	long unsigned int hashes_done = 0;
 	uint32_t first_nonce = work->blk.nonce;
-	bool rc = false;
+	uint32_t last_nonce;
+	bool rc;
+
+CPUSearch:
+	last_nonce = first_nonce;
+	rc = false;
 
 	/* scan nonces for a proof-of-work hash */
-	switch (opt_algo) {
-	case ALGO_C:
-		rc = scanhash_c(thr_id, work->midstate, work->data + 64,
-			        work->hash1, work->hash, work->target,
-				max_nonce, &hashes_done,
-				work->blk.nonce);
-		break;
-#ifdef WANT_X8632_SSE2
-	case ALGO_SSE2_32: {
-		unsigned int rc5 =
-		        scanhash_sse2_32(thr_id, work->midstate, work->data + 64,
-					 work->hash1, work->hash,
-					 work->target,
-				         max_nonce, &hashes_done,
-					 work->blk.nonce);
-		rc = (rc5 == -1) ? false : true;
-		}
-		break;
-#endif
-#ifdef WANT_X8664_SSE2
-	case ALGO_SSE2_64: {
-		unsigned int rc5 =
-		        scanhash_sse2_64(thr_id, work->midstate, work->data + 64,
-					 work->hash1, work->hash,
-					 work->target,
-				         max_nonce, &hashes_done,
-					 work->blk.nonce);
-		rc = (rc5 == -1) ? false : true;
-		}
-		break;
-#endif
-#ifdef WANT_X8664_SSE4
-	case ALGO_SSE4_64: {
-		unsigned int rc5 =
-		        scanhash_sse4_64(thr_id, work->midstate, work->data + 64,
-					 work->hash1, work->hash,
-					 work->target,
-				         max_nonce, &hashes_done,
-					 work->blk.nonce);
-		rc = (rc5 == -1) ? false : true;
-		}
-		break;
-#endif
-#ifdef WANT_SSE2_4WAY
-	case ALGO_4WAY: {
-		unsigned int rc4 =
-			ScanHash_4WaySSE2(thr_id, work->midstate, work->data + 64,
-					  work->hash1, work->hash,
-					  work->target,
-					  max_nonce, &hashes_done,
-					  work->blk.nonce);
-		rc = (rc4 == -1) ? false : true;
-		}
-		break;
-#endif
-#ifdef WANT_ALTIVEC_4WAY
-        case ALGO_ALTIVEC_4WAY:
-        {
-            unsigned int rc4 = ScanHash_altivec_4way(thr_id, work->midstate, work->data + 64,
-                    work->hash1, work->hash,
-                    work->target,
-                    max_nonce, &hashes_done,
-                    work->blk.nonce);
-            rc = (rc4 == -1) ? false : true;
-        }
-        break;
-#endif
-#ifdef WANT_VIA_PADLOCK
-	case ALGO_VIA:
-		rc = scanhash_via(thr_id, work->data, work->target,
-				  max_nonce, &hashes_done,
-				  work->blk.nonce);
-		break;
-#endif
-	case ALGO_CRYPTOPP:
-		rc = scanhash_cryptopp(thr_id, work->midstate, work->data + 64,
-			        work->hash1, work->hash, work->target,
-				max_nonce, &hashes_done,
-				work->blk.nonce);
-		break;
-#ifdef WANT_CRYPTOPP_ASM32
-	case ALGO_CRYPTOPP_ASM32:
-		rc = scanhash_asm32(thr_id, work->midstate, work->data + 64,
-			        work->hash1, work->hash, work->target,
-				max_nonce, &hashes_done,
-				work->blk.nonce);
-		break;
-#endif
-	default:
-		/* should never happen */
-		applog(LOG_ERR, "Unrecognized hash algorithm! This should be impossible!");
+	{
+		sha256_func func = sha256_funcs[opt_algo];
+		rc = (*func)(
+			thr_id,
+			work->midstate,
+			work->data,
+			work->hash1,
+			work->hash,
+			work->target,
+			max_nonce,
+			&last_nonce,
+			work->blk.nonce
+		);
 	}
 
 	/* if nonce found, submit work */
@@ -5570,10 +5481,15 @@ static uint64_t cpu_scanhash(struct thr_info *thr, struct work *work, uint64_t m
 		if (unlikely(!submit_work_sync(thr, work))) {
 			applog(LOG_ERR, "Failed to submit_work_sync in miner_thread %d", thr_id);
 		}
+		work->blk.nonce = last_nonce + 1;
+		goto CPUSearch;
 	}
+	else
+	if (unlikely(last_nonce == first_nonce))
+		return 0;
 
-	work->blk.nonce = hashes_done;
-	return (uint64_t)hashes_done - first_nonce;
+	work->blk.nonce = last_nonce + 1;
+	return last_nonce - first_nonce + 1;
 }
 
 struct device_api cpu_api = {
diff --git a/miner.h b/miner.h
index 33518b14..86352a38 100644
--- a/miner.h
+++ b/miner.h
@@ -381,57 +381,62 @@ extern json_t *json_rpc_call(CURL *curl, const char *url, const char *userpass,
 extern char *bin2hex(const unsigned char *p, size_t len);
 extern bool hex2bin(unsigned char *p, const char *hexstr, size_t len);
 
-extern unsigned int ScanHash_4WaySSE2(int, const unsigned char *pmidstate,
-	unsigned char *pdata, unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone, uint32_t nonce);
-
-extern unsigned int ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate,
+typedef bool (*sha256_func)(int thr_id, const unsigned char *pmidstate,
 	unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone, uint32_t nonce);
+	uint32_t max_nonce,
+	uint32_t *last_nonce,
+	uint32_t nonce);
 
-extern unsigned int scanhash_sse2_amd64(int, const unsigned char *pmidstate,
+extern bool ScanHash_4WaySSE2(int, const unsigned char *pmidstate,
 	unsigned char *pdata, unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone);
+	uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
+
+extern bool ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate,
+	unsigned char *pdata,
+	unsigned char *phash1, unsigned char *phash,
+	const unsigned char *ptarget,
+	uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
 
-extern bool scanhash_via(int, unsigned char *data_inout,
+extern bool scanhash_via(int, const unsigned char *pmidstate,
+	unsigned char *pdata,
+	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *target,
-	uint32_t max_nonce, unsigned long *hashes_done, uint32_t n);
+	uint32_t max_nonce, uint32_t *last_nonce, uint32_t n);
 
 extern bool scanhash_c(int, const unsigned char *midstate, unsigned char *data,
 	      unsigned char *hash1, unsigned char *hash,
 	      const unsigned char *target,
-	      uint32_t max_nonce, unsigned long *hashes_done, uint32_t n);
+	      uint32_t max_nonce, uint32_t *last_nonce, uint32_t n);
 
 extern bool scanhash_cryptopp(int, const unsigned char *midstate,unsigned char *data,
 	      unsigned char *hash1, unsigned char *hash,
 	      const unsigned char *target,
-	      uint32_t max_nonce, unsigned long *hashes_done, uint32_t n);
+	      uint32_t max_nonce, uint32_t *last_nonce, uint32_t n);
 
 extern bool scanhash_asm32(int, const unsigned char *midstate,unsigned char *data,
 	      unsigned char *hash1, unsigned char *hash,
 	      const unsigned char *target,
-	      uint32_t max_nonce, unsigned long *hashes_done, uint32_t nonce);
+	      uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
 
-extern int scanhash_sse2_64(int, const unsigned char *pmidstate, unsigned char *pdata,
+extern bool scanhash_sse2_64(int, const unsigned char *pmidstate, unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce);
 
-extern int scanhash_sse4_64(int, const unsigned char *pmidstate, unsigned char *pdata,
+extern bool scanhash_sse4_64(int, const unsigned char *pmidstate, unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce);
 
-extern int scanhash_sse2_32(int, const unsigned char *pmidstate, unsigned char *pdata,
+extern bool scanhash_sse2_32(int, const unsigned char *pmidstate, unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce);
 
 extern int
diff --git a/sha256_4way.c b/sha256_4way.c
index c81e05b9..15e852ac 100644
--- a/sha256_4way.c
+++ b/sha256_4way.c
@@ -101,14 +101,16 @@ static const unsigned int pSHA256InitState[8] =
 {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
 
 
-unsigned int ScanHash_4WaySSE2(int thr_id, const unsigned char *pmidstate,
+bool ScanHash_4WaySSE2(int thr_id, const unsigned char *pmidstate,
 	unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
-    unsigned int *nNonce_p = (unsigned int*)(pdata + 12);
+    unsigned int *nNonce_p = (unsigned int*)(pdata + 76);
+
+	pdata += 64;
 
     work_restart[thr_id].restart = 0;
 
@@ -132,17 +134,18 @@ unsigned int ScanHash_4WaySSE2(int thr_id, const unsigned char *pmidstate,
                     ((unsigned int*)phash)[i] = thash[i][j];
 
 		if (fulltest(phash, ptarget)) {
-			*nHashesDone = nonce;
-			*nNonce_p = nonce + j;
-                	return nonce + j;
+					nonce += j;
+					*last_nonce = nonce;
+					*nNonce_p = nonce;
+					return true;
 		}
             }
         }
 
         if ((nonce >= max_nonce) || work_restart[thr_id].restart)
         {
-            *nHashesDone = nonce;
-            return -1;
+            *last_nonce = nonce;
+            return false;
         }
     }
 }
diff --git a/sha256_altivec_4way.c b/sha256_altivec_4way.c
index b70e4d03..aa50486f 100644
--- a/sha256_altivec_4way.c
+++ b/sha256_altivec_4way.c
@@ -74,14 +74,16 @@ static const unsigned int pSHA256InitState[8] =
 {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
 
 
-unsigned int ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate,
+bool ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate,
 	unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
-    unsigned int *nNonce_p = (unsigned int*)(pdata + 12);
+    unsigned int *nNonce_p = (unsigned int*)(pdata + 76);
+
+	pdata += 64;
 
     work_restart[thr_id].restart = 0;
 
@@ -104,17 +106,18 @@ unsigned int ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate,
                     ((unsigned int*)phash)[i] = thash[i][j];
 
 		if (fulltest(phash, ptarget)) {
-			*nHashesDone = nonce;
-			*nNonce_p = nonce + j;
-                	return nonce + j;
+					nonce += j;
+					*last_nonce = nonce;
+					*nNonce_p = nonce;
+					return true;
 		}
             }
         }
 
         if ((nonce >= max_nonce) || work_restart[thr_id].restart)
         {
-            *nHashesDone = nonce;
-            return -1;
+            *last_nonce = nonce;
+            return false;
         }
 
         nonce += NPAR;
diff --git a/sha256_cryptopp.c b/sha256_cryptopp.c
index c0c1b6fd..11c1c5ca 100644
--- a/sha256_cryptopp.c
+++ b/sha256_cryptopp.c
@@ -97,11 +97,13 @@ bool scanhash_cryptopp(int thr_id, const unsigned char *midstate,
 		unsigned char *data,
 	        unsigned char *hash1, unsigned char *hash,
 		const unsigned char *target,
-	        uint32_t max_nonce, unsigned long *hashes_done,
+	        uint32_t max_nonce, uint32_t *last_nonce,
 		uint32_t n)
 {
 	uint32_t *hash32 = (uint32_t *) hash;
-	uint32_t *nonce = (uint32_t *)(data + 12);
+	uint32_t *nonce = (uint32_t *)(data + 76);
+
+	data += 64;
 
 	work_restart[thr_id].restart = 0;
 
@@ -113,12 +115,12 @@ bool scanhash_cryptopp(int thr_id, const unsigned char *midstate,
 		runhash(hash, hash1, sha256_init_state);
 
 		if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
-			*hashes_done = n;
+			*last_nonce = n;
 			return true;
 		}
 
 		if ((n >= max_nonce) || work_restart[thr_id].restart) {
-			*hashes_done = n;
+			*last_nonce = n;
 			return false;
 		}
 	}
@@ -579,11 +581,13 @@ bool scanhash_asm32(int thr_id, const unsigned char *midstate,
 		unsigned char *data,
 	        unsigned char *hash1, unsigned char *hash,
 		const unsigned char *target,
-	        uint32_t max_nonce, unsigned long *hashes_done,
+	        uint32_t max_nonce, uint32_t *last_nonce,
 		uint32_t n)
 {
 	uint32_t *hash32 = (uint32_t *) hash;
-	uint32_t *nonce = (uint32_t *)(data + 12);
+	uint32_t *nonce = (uint32_t *)(data + 76);
+
+	data += 64;
 
 	work_restart[thr_id].restart = 0;
 
@@ -595,12 +599,12 @@ bool scanhash_asm32(int thr_id, const unsigned char *midstate,
 		runhash32(hash, hash1, sha256_init_state);
 
 		if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
-			*hashes_done = n;
+			*last_nonce = n;
 			return true;
 		}
 
 		if ((n >= max_nonce) || work_restart[thr_id].restart) {
-			*hashes_done = n;
+			*last_nonce = n;
 			return false;
 		}
 	}
diff --git a/sha256_generic.c b/sha256_generic.c
index 5bc4d972..05f4c376 100644
--- a/sha256_generic.c
+++ b/sha256_generic.c
@@ -242,13 +242,15 @@ const uint32_t sha256_init_state[8] = {
 bool scanhash_c(int thr_id, const unsigned char *midstate, unsigned char *data,
 	        unsigned char *hash1, unsigned char *hash,
 		const unsigned char *target,
-	        uint32_t max_nonce, unsigned long *hashes_done,
+	        uint32_t max_nonce, uint32_t *last_nonce,
 		uint32_t n)
 {
 	uint32_t *hash32 = (uint32_t *) hash;
-	uint32_t *nonce = (uint32_t *)(data + 12);
+	uint32_t *nonce = (uint32_t *)(data + 76);
 	unsigned long stat_ctr = 0;
 
+	data += 64;
+
 	work_restart[thr_id].restart = 0;
 
 	while (1) {
@@ -261,12 +263,12 @@ bool scanhash_c(int thr_id, const unsigned char *midstate, unsigned char *data,
 		stat_ctr++;
 
 		if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
-			*hashes_done = n;
+			*last_nonce = n;
 			return true;
 		}
 
 		if ((n >= max_nonce) || work_restart[thr_id].restart) {
-			*hashes_done = n;
+			*last_nonce = n;
 			return false;
 		}
 	}
diff --git a/sha256_sse2_amd64.c b/sha256_sse2_amd64.c
index 561aa3a1..5c82314f 100644
--- a/sha256_sse2_amd64.c
+++ b/sha256_sse2_amd64.c
@@ -50,14 +50,14 @@ const uint32_t sha256_init[8]__attribute__((aligned(0x100))) =
 __m128i g_4sha256_k[64];
 __m128i sha256_consts_m128i[64]__attribute__((aligned(0x1000)));
 
-int scanhash_sse2_64(int thr_id, const unsigned char *pmidstate,
+bool scanhash_sse2_64(int thr_id, const unsigned char *pmidstate,
 	unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
-    uint32_t *nNonce_p = (uint32_t *)(pdata + 12);
+    uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
     uint32_t m_midstate[8], m_w[16], m_w1[16];
     __m128i m_4w[64] __attribute__ ((aligned (0x100)));
     __m128i m_4hash[64] __attribute__ ((aligned (0x100)));
@@ -65,6 +65,8 @@ int scanhash_sse2_64(int thr_id, const unsigned char *pmidstate,
     __m128i offset;
     int i;
 
+	pdata += 64;
+
     work_restart[thr_id].restart = 0;
 
     /* For debugging */
@@ -114,19 +116,20 @@ int scanhash_sse2_64(int thr_id, const unsigned char *pmidstate,
 		}
 
 		if (fulltest(phash, ptarget)) {
-		     *nHashesDone = nonce;
-		     *nNonce_p = nonce + j;
-		     return nonce + j;
+		     nonce += j;
+		     *last_nonce = nonce + 1;
+		     *nNonce_p = nonce;
+		     return true;
 		}
 	}
 
-	nonce += 4;
-
         if (unlikely((nonce >= max_nonce) || work_restart[thr_id].restart))
         {
-            *nHashesDone = nonce;
-            return -1;
+			*last_nonce = nonce;
+			return false;
 	}
+
+	nonce += 4;
    }
 }
 
diff --git a/sha256_sse2_i386.c b/sha256_sse2_i386.c
index ef3f0ee5..09350951 100644
--- a/sha256_sse2_i386.c
+++ b/sha256_sse2_i386.c
@@ -50,14 +50,14 @@ const uint32_t sha256_32init[8]__attribute__((aligned(0x100))) =
 __m128i g_4sha256_k[64];
 __m128i sha256_consts_m128i[64]__attribute__((aligned(0x1000)));
 
-int scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
+bool scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
 	unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
-    uint32_t *nNonce_p = (uint32_t *)(pdata + 12);
+    uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
     uint32_t m_midstate[8], m_w[16], m_w1[16];
     __m128i m_4w[64] __attribute__ ((aligned (0x100)));
     __m128i m_4hash[64] __attribute__ ((aligned (0x100)));
@@ -65,6 +65,8 @@ int scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
     __m128i offset;
     int i;
 
+	pdata += 64;
+
     work_restart[thr_id].restart = 0;
 
     /* For debugging */
@@ -116,19 +118,20 @@ int scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
 		}
 
 		if (fulltest(phash, ptarget)) {
-		     *nHashesDone = nonce;
-		     *nNonce_p = nonce + j;
-		     return nonce + j;
+		     nonce += j;
+		     *last_nonce = nonce;
+		     *nNonce_p = nonce;
+		     return true;
 		}
 	}
 
+	if (unlikely((nonce >= max_nonce) || work_restart[thr_id].restart)) {
+		*last_nonce = nonce;
+		return false;
+	}
+
 	nonce += 4;
 
-        if (unlikely((nonce >= max_nonce) || work_restart[thr_id].restart))
-        {
-            *nHashesDone = nonce;
-            return -1;
-	}
    }
 }
 
diff --git a/sha256_sse4_amd64.c b/sha256_sse4_amd64.c
index f150e8c6..67e0604b 100644
--- a/sha256_sse4_amd64.c
+++ b/sha256_sse4_amd64.c
@@ -49,19 +49,21 @@ static uint32_t g_sha256_hinit[8] =
 
 __m128i g_4sha256_k[64];
 
-int scanhash_sse4_64(int thr_id, const unsigned char *pmidstate,
+bool scanhash_sse4_64(int thr_id, const unsigned char *pmidstate,
 	unsigned char *pdata,
 	unsigned char *phash1, unsigned char *phash,
 	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *nHashesDone,
+	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
-    uint32_t *nNonce_p = (uint32_t *)(pdata + 12);
+    uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
     uint32_t m_midstate[8], m_w[16], m_w1[16];
     __m128i m_4w[64], m_4hash[64], m_4hash1[64];
     __m128i offset;
     int i;
 
+	pdata += 64;
+
     work_restart[thr_id].restart = 0;
 
     /* For debugging */
@@ -113,19 +115,20 @@ int scanhash_sse4_64(int thr_id, const unsigned char *pmidstate,
 		}
 
 		if (fulltest(phash, ptarget)) {
-		     *nHashesDone = nonce;
-		     *nNonce_p = nonce + j;
-		     return nonce + j;
+			nonce += j;
+			*last_nonce = nonce;
+			*nNonce_p = nonce;
+			return true;
 		}
 	}
 
-	nonce += 4;
-
         if (unlikely((nonce >= max_nonce) || work_restart[thr_id].restart))
         {
-            *nHashesDone = nonce;
-            return -1;
+			*last_nonce = nonce;
+			return false;
 	}
+
+	nonce += 4;
    }
 }
 
diff --git a/sha256_via.c b/sha256_via.c
index fdf0045e..550807cb 100644
--- a/sha256_via.c
+++ b/sha256_via.c
@@ -19,9 +19,11 @@ static void via_sha256(void *hash, void *buf, unsigned len)
 		     :"memory");
 }
 
-bool scanhash_via(int thr_id, unsigned char *data_inout,
-		  const unsigned char *target,
-		  uint32_t max_nonce, unsigned long *hashes_done,
+bool scanhash_via(int thr_id, const unsigned char *pmidstate,
+	unsigned char *data_inout,
+	unsigned char *phash1, unsigned char *phash,
+	const unsigned char *target,
+		  uint32_t max_nonce, uint32_t *last_nonce,
 		  uint32_t n)
 {
 	unsigned char data[128] __attribute__((aligned(128)));
@@ -70,12 +72,12 @@ bool scanhash_via(int thr_id, unsigned char *data_inout,
 				dout32[i] = swab32(data32[i]);
 			}
 
-			*hashes_done = n;
+			*last_nonce = n;
 			return true;
 		}
 
 		if ((n >= max_nonce) || work_restart[thr_id].restart) {
-			*hashes_done = n;
+			*last_nonce = n;
 			return false;
 		}
 	}