Merge branch 'master' into build-msvs2010-upd

2025-03-13 06:01:03 +00:00 · 2014-02-07 21:45:26 +02:00 · 2014-02-07 21:45:26 +02:00 · dff7876b7a
commit dff7876b7a
parent feac311305 d6e46f2113
18 changed files with 1479 additions and 496 deletions
--- a/AUTHORS.md
+++ b/AUTHORS.md
@ -2,11 +2,12 @@

 ## Core

-* Scrypt-only refactor and maintenance: Noel Maersk <veox@wemakethings.net> LfxRFgXzA13TSTxgFGUFEtumv5ShGzAmLw
-* Core: Martin Danielsen <kalroth@gmail.com> 1DNBcSEENBwDKrcTyTW61ezWhzsPy5imkn
-* Core: Con Kolivas <kernel@kolivas.org> 15qSxP1SQcUX3o4nhkfdbgyoWEFMomJ4rZ
-* Core: Luke Dashjr <luke-jr+cgminer@utopios.org> 1QATWksNFGeUJCWBrN4g6hGM178Lovm7Wh
+* Scrypt-only refactor and maintenance: Noel Maersk <veox at wemakethings dot net> LfxRFgXzA13TSTxgFGUFEtumv5ShGzAmLw
+* Core: Martin Danielsen <kalroth {at} gmail <dot> com> 1DNBcSEENBwDKrcTyTW61ezWhzsPy5imkn
+* Core: Con Kolivas <kernel [at] kolivas {dot} org> 15qSxP1SQcUX3o4nhkfdbgyoWEFMomJ4rZ
+* Core: Luke Dashjr <luke-jr+cgminer @at@ utopios .dot. org> 1QATWksNFGeUJCWBrN4g6hGM178Lovm7Wh
 * API: Andrew Smith <kan0i {at} kano-kun [dot] net> 1Jjk2LmktEQKnv8r2cZ9MvLiZwZ9gxabKm
+* MSVS2010 build: troky <troky2001 at yahoo dot com> Letoqz4yEnJxjWw9B7Ysn8h9VDQTMDnT29


 ## OpenCL kernels
@ -14,19 +15,23 @@
 All current kernels are based on `scrypt`, originally by Colin Percival,
 updated by many others.

-* ckolivas: Con Kolivas <kernel@kolivas.org> 15qSxP1SQcUX3o4nhkfdbgyoWEFMomJ4rZ
-* zuikkis: Zuikkis LeXck7EYgxyjw13zNDxZFmmgmWffFvhmSh
 * alexkarnew/alexkarold: Alexey Karimov LMqRcHdwnZtTMH6c2kWoxSoKM5KySfaP5C
+* ckolivas: Con Kolivas <kernel @at@ kolivas }dot{ org> 15qSxP1SQcUX3o4nhkfdbgyoWEFMomJ4rZ
+* psw: Pavel Semjanov LP6GRFvgoMxKA6AW4TVF668cNezEGZvEtr
+* zuikkis: Zuikkis LeXck7EYgxyjw13zNDxZFmmgmWffFvhmSh


 ## Testing, bug fixes, improvements

 * Gabriel Devenyi <gdevenyi>
 * Benjamin Herrenschmidt <ozbenh>
+* Drogean <Drogean>
+* Wolf` <wolf9466>
 * Joe4782 <Joe4782>
 * gacheson <gacheson>
 * tonobitc <tonobitc>
 * Perry Huang <perryh>
+* Joseph Bruggeman <jbruggeman>

 ...and many others. See:

--- a/Makefile.am
+++ b/Makefile.am
@ -26,6 +26,11 @@ endif

 sgminer_CPPFLAGS += $(ADL_CPPFLAGS)

+if USE_GIT_VERSION
+GIT_VERSION      := $(shell sh -c 'git describe --abbrev=4 --dirty')
+sgminer_CPPFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\"
+endif
+
 sgminer_SOURCES := sgminer.c
 sgminer_SOURCES	+= api.c
 sgminer_SOURCES	+= elist.h miner.h compat.h bench_block.h
--- a/NEWS.md
+++ b/NEWS.md
@ -1,19 +1,63 @@
-Version 4.0.0 - 15th January 2014
+# Release news
+
+## Version 4.1.0 - 7th February 2014
+
+* Writing configuration file from NCurses interface is broken!
+* Commandline option parsing may be broken!
+* MinGW building may be broken!
+* Bug reporting documentation: `doc/BUGS.md`.
+* Kernel selection and integration: `doc/KERNEL.md`.
+* Several updates to other documentation files.
+* Warn if `GPU_*` environment variables are not set.
+* Maximum `intensity` lowered to 31 (anything above that gives an overflow
+  anyway).
+* Experimental `xintensity` setting (by _Kalroth_), see
+  [commit message](https://github.com/veox/sgminer/commit/7aeae40af22e6108aab8b68a229eea25a639d650).
+* Experimental `rawintensity` setting (by _Kalroth_), see
+  [commit message](https://github.com/veox/sgminer/commit/d11df698d141988491494aa1f29c7d3595e9712b).
+* `-v` is now a shorthand for `--verbose`, not `--vectors`.
+* Default `scantime` and `expiry` changed to 7 and 28 (tests run by
+  _MissedOutOnBTC_).
+* Increased precision of `API_MHS`; added `API_KHS`.
+* Pulled in kernels: `alexkarold`, `alexkarnew`, `psw`, `zuikkis`; renamed
+  `scrypt` to `ckolivas`; all kernels now reside in directory `kernel`.
+  Kernels can be chosen at startup only, by specifying `kernel`.
+* Small optimisation to `ckolivas` kernel (by _gdevenyi_).
+* Named pools via `poolname` (by _Kalroth_).
+* Failover pool switching back delay is configurable via
+  `failover-switch-delay` (by _Kalroth_).
+* Pool `state`: `enabled`, `disabled`, and `hidden` (by _Joe4782_).
+* Allow all pools to be set `disabled`.
+* Use RPM in ADL `get-fanspeed` requests (from `bfgminer`, by _luke-jr_).
+* Verbose ADL failure messages (by _Joe4782_ and _deba12_).
+* Use `git` version string if available.
+* Allow bypassing ADL checks during build with `--disable-adl-checks`.
+* MinGW build checks (by _tonobitc_).
+* Experimental Microsoft Visual Studio 2010 building support in branch
+  `build-msvs2010-upd` (by _troky_).
+
+
+## Version 4.0.0 - 15th January 2014

 * Fork `veox/sgminer` from `ckolivas/cgminer` version 3.7.2.
-* Remove code referencing SHA256d mining, FPGAs and ASICS. Leftovers most probably still remain.
+* Remove code referencing SHA256d mining, FPGAs and ASICS. Leftovers most
+  probably still remain.
 * AMD ADL crash fix on R9 chipsets by Benjamin Herrenschmidt.
 * Maximum allowed intensity increased to 42.
 * Move documentation to directory `doc`.
-* `--gpu-threads` support for comma-separated values by Martin "Kalroth" Danielsen.
-* AMD ADL SDK 5 mandatory, preparation for ADL Overdrive 6 support by Martin.
+* `--gpu-threads` support for comma-separated values by Martin Danielsen
+  (_Kalroth_).
+* AMD ADL SDK 5 mandatory, preparation for ADL Overdrive 6 support by
+  _Kalroth_.
 * Allow changing TCP keepalive packet idle time using `--tcp-keepalive`.
 * Automatic library presence detection by `configure`.
 * `--scrypt` option removed (no other choice now).
 * `--vectors` option removed (current kernel only supports 1).
-* Display per-GPU reject percentage instead of absolute values by Martin.
+* Display per-GPU reject percentage instead of absolute values by _Kalroth_.
 * Do not show date in log by default (switch with `--log-show-date`).
-* Fix network difficulty display to resemble that of cgminer 3.1.1.
-* Forward-port relevant bugfixes form `ckolivas/cgminer`, up to cgminer version 3.10.0.
+* Fix network difficulty display to resemble that of `cgminer` 3.1.1.
+* Forward-port relevant bugfixes form `ckolivas/cgminer`, up to `cgminer`
+  version 3.10.0.
+

 Previous NEWS file available [here](https://github.com/veox/sgminer/blob/829f0687bfd0ddb0cf12a9a8588ae2478dfe8d99/NEWS).
--- a/README.md
+++ b/README.md
@ -24,10 +24,10 @@ License: GPLv3.  See `COPYING` for details.
 Documentation is available in directory `doc`. For details on several topics, see:

 * `API` for the RPC API specification;
-* `FAQ` for frequently asked questions;
+* `FAQ.md` for frequently asked questions;
 * `GPU` for semi-obsolete information on GPU configuration options and mining SHA256d-based coins;
 * `KERNEL.md` for OpenCL kernel-related information;
-* `MINING` for how to find the right balance in GPU configuration to mine Scrypt-based coins effectively;
+* `MINING.md` for how to find the right balance in GPU configuration to mine Scrypt-based coins effectively;
 * `windows-build.txt` for information on how to build on Windows.

 Note that **most of the documentation is outdated**. If you want to contribute, fork this repository, update as needed, and submit a pull request.
@ -39,30 +39,25 @@ Note that **most of the documentation is outdated**. If you want to contribute,

 Mandatory:

-    curl dev library    http://curl.haxx.se/libcurl/
-    (libcurl4-openssl-dev)
-
-    pkg-config          http://www.freedesktop.org/wiki/Software/pkg-config
-    libtool             hhttp://www.gnu.org/software/libtool/
-
-    AMD APP SDK         http://developer.amd.com/tools-and-sdks/heterogeneous-computing/amd-accelerated-parallel-processing-app-sdk/downloads/
+* [curl dev library](http://curl.haxx.se/libcurl/) - `libcurl4-openssl-dev` on Debian
+* [pkg-config](http://www.freedesktop.org/wiki/Software/pkg-config)
+* [libtool](http://www.gnu.org/software/libtool/)
+* [AMD APP SDK](http://developer.amd.com/tools-and-sdks/heterogeneous-computing/amd-accelerated-parallel-processing-app-sdk/downloads/)	- available under various names as a package on different GNU/Linux distributions

 Optional:

-    curses dev library
-    (libncurses5-dev or libpdcurses on WIN32 for text user interface)
-
-    AMD ADL SDK         http://developer.amd.com/tools-and-sdks/graphics-development/display-library-adl-sdk/
-    (Version 5 or 6, required for ATI GPU monitoring & clocking)
+* curses dev library - `libncurses5-dev` on Debian or `libpdcurses` on WIN32, for text user interface
+* [AMD ADL SDK](http://developer.amd.com/tools-and-sdks/graphics-development/display-library-adl-sdk/) - version 6, required for ATI GPU monitoring & clocking

 If building from git:

-    autoconf
-    automake
+* autoconf
+* automake

 sgminer-specific configuration options:

    --disable-adl           Override detection and disable building with adl
+	--disable-adl-checks
    --without-curses        Do not compile support for curses TUI

 ### *nix build instructions
@ -75,6 +70,9 @@ Then:
    CFLAGS="-O2 -Wall -march=native" ./configure <options>
    make

+To compile a version that can be used accross machines, remove
+`-march=native`.
+
 To compile a debug version, replace `-O2` with `-ggdb`.

 Systemwide installation is optional. You may run `sgminer` from the build
--- a/adl.c
+++ b/adl.c
@ -126,6 +126,60 @@ static LPADLDisplayInfo lpAdlDisplayInfo = NULL;
 int set_fanspeed(int gpu, int iFanSpeed);
 static float __gpu_temp(struct gpu_adl *ga);

+char *adl_error_desc(int error)
+{
+	char *result;
+	switch(error)
+	{
+		case ADL_ERR:
+			result = "Generic error (escape call failed?)";
+			break;
+		case ADL_ERR_NOT_INIT:
+			result = "ADL not initialized";
+			break;
+		case ADL_ERR_INVALID_PARAM:
+			result = "Invalid parameter";
+			break;
+		case ADL_ERR_INVALID_PARAM_SIZE:
+			result = "Invalid parameter size";
+			break;
+		case ADL_ERR_INVALID_ADL_IDX:
+			result = "Invalid ADL index";
+			break;
+		case ADL_ERR_INVALID_CONTROLLER_IDX:
+			result = "Invalid controller index";
+			break;
+		case ADL_ERR_INVALID_DIPLAY_IDX:
+			result = "Invalid display index";
+			break;
+		case ADL_ERR_NOT_SUPPORTED:
+			result = "Function not supported by the driver";
+			break;
+		case ADL_ERR_NULL_POINTER:
+			result = "Null Pointer error";
+			break;
+		case ADL_ERR_DISABLED_ADAPTER:
+			result = "Disabled adapter, can't make call";
+			break;
+		case ADL_ERR_INVALID_CALLBACK:
+			result = "Invalid callback";
+			break;
+		case ADL_ERR_RESOURCE_CONFLICT:
+			result = "Display resource conflict";
+			break;
+		case ADL_ERR_SET_INCOMPLETE:
+			result = "Failed to update some of the values";
+			break;
+		case ADL_ERR_NO_XDISPLAY:
+			result = "No Linux XDisplay in Linux Console environment";
+			break;
+		default:
+			result = "Unhandled error";
+			break;
+	}
+	return result;
+}
+
 static inline void lock_adl(void)
 {
 	mutex_lock(&adl_lock);
@ -250,13 +304,13 @@ static bool prepare_adl(void)
 	// retrieve adapter information only for adapters that are physically present and enabled in the system
 	result = ADL_Main_Control_Create(ADL_Main_Memory_Alloc, 1);
 	if (result != ADL_OK) {
-		applog(LOG_INFO, "ADL initialisation error: %d!", result);
+		applog(LOG_INFO, "ADL initialisation error: %d (%s)", result, adl_error_desc(result));
 		return false;
 	}

 	result = ADL_Main_Control_Refresh();
 	if (result != ADL_OK) {
-		applog(LOG_INFO, "ADL refresh error: %d!", result);
+		applog(LOG_INFO, "ADL refresh error: %d (%s)", result, adl_error_desc(result));
 		return false;
 	}

--- a/api.c
+++ b/api.c
@ -274,7 +274,6 @@ static const char *JSON_PARAMETER = "parameter";
 #define MSG_DISPOOL 48
 #define MSG_ALRENAP 49
 #define MSG_ALRDISP 50
-#define MSG_DISLASTP 51
 #define MSG_MISPDP 52
 #define MSG_INVPDP 53
 #define MSG_TOOMANYP 54
@ -399,7 +398,6 @@ struct CODES {
 { SEVERITY_SUCC,  MSG_DISPOOL,	PARAM_POOL,	"Disabling pool %d:'%s'" },
 { SEVERITY_INFO,  MSG_ALRENAP,	PARAM_POOL,	"Pool %d:'%s' already enabled" },
 { SEVERITY_INFO,  MSG_ALRDISP,	PARAM_POOL,	"Pool %d:'%s' already disabled" },
- { SEVERITY_ERR,   MSG_DISLASTP,PARAM_POOL,	"Cannot disable last active pool %d:'%s'" },
 { SEVERITY_ERR,   MSG_MISPDP,	PARAM_NONE,	"Missing addpool details" },
 { SEVERITY_ERR,   MSG_INVPDP,	PARAM_STR,	"Invalid addpool details '%s'" },
 { SEVERITY_ERR,   MSG_TOOMANYP,PARAM_NONE,	"Reached maximum number of pools (%d)" },
@ -2351,11 +2349,6 @@ static void disablepool(struct io_data *io_data, __maybe_unused SOCKETTYPE c, ch
 		return;
 	}

-	if (enabled_pools <= 1) {
-		message(io_data, MSG_DISLASTP, id, NULL, isjson);
-		return;
-	}
-
 	pool->state = POOL_DISABLED;
 	if (pool == current_pool())
 		switch_pools(NULL);
--- a/configure.ac
+++ b/configure.ac
@ -1,7 +1,7 @@
 ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
 ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
 m4_define([v_maj], [4])
-m4_define([v_min], [0])
+m4_define([v_min], [1])
 m4_define([v_mic], [0])
 ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
 m4_define([v_ver], [v_maj.v_min.v_mic])
@ -11,7 +11,7 @@ m4_define([lt_age], v_min)
 ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
 ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##

-AC_INIT([sgminer], [v_ver], [https://github.com/veox/sgminer/issues])
+AC_INIT([sgminer], [v_ver], [veox@wemakethings.net], [sgminer], [https://github.com/veox/sgminer])

 AC_PREREQ(2.59)
 AC_CANONICAL_SYSTEM
@ -135,20 +135,13 @@ if test -n "$SGMINER_SDK"; then
 	LDFLAGS="-L$SGMINER_SDK/lib/$target $LDFLAGS"
 fi

-opencl="yes"
-
-AC_ARG_ENABLE([opencl],
-	[AC_HELP_STRING([--enable-opencl],[Enable support for GPU mining with opencl])],
-	[opencl=$enableval]
-	)
-if test "x$opencl" != xno; then
-	# Check for OpenCL (the long way needed on mingw32 due to calling conventions)
-	AC_MSG_CHECKING([for OpenCL])
-	SAVED_LIBS=$LIBS
-	SAVED_CFLAGS=$CFLAGS
-	LIBS="$LIBS $OPENCL_LIBS"
-	CFLAGS="$CFLAGS $OPENCL_FLAGS"
-	AC_LINK_IFELSE(
+# Check for OpenCL (the long way needed on mingw32 due to calling conventions)
+AC_MSG_CHECKING([for OpenCL])
+SAVED_LIBS=$LIBS
+SAVED_CFLAGS=$CFLAGS
+LIBS="$LIBS $OPENCL_LIBS"
+CFLAGS="$CFLAGS $OPENCL_FLAGS"
+AC_LINK_IFELSE(
 	[AC_LANG_PROGRAM([[
 		#ifdef __APPLE_CC__
 		#include <OpenCL/opencl.h>
@ -158,19 +151,13 @@ if test "x$opencl" != xno; then
 	]],
 	[[return clSetKernelArg(0, 0, 0, 0); ]])],
 	[AC_MSG_RESULT(yes)
-	found_opencl=1
-	],
+		found_opencl=1],
 	[AC_MSG_RESULT(no)
-	OPENCL_FLAGS=
-	OPENCL_LIBS=
-	found_opencl=0])
-	LIBS=$SAVED_LIBS
-	CFLAGS=$SAVED_CFLAGS
-else
-	OPENCL_FLAGS=""
-	OPENCL_LIBS=""
-fi
-AM_CONDITIONAL([HAS_OPENCL], [test x$opencl = xyes])
+		OPENCL_FLAGS=
+		OPENCL_LIBS=
+		found_opencl=0])
+LIBS=$SAVED_LIBS
+CFLAGS=$SAVED_CFLAGS

 has_winpthread=false
 if test "x$have_win32" = xtrue; then
@ -225,6 +212,11 @@ else
 	DLOPEN_FLAGS=""
 fi

+AC_ARG_ENABLE([git-version],
+	AS_HELP_STRING([--disable-git-version],
+		[Disable use of git version in version string even if available]),
+		[wantgitver=$enableval], [wantgitver=yes])
+
 curses="auto"

 AC_ARG_WITH([curses],
@ -287,6 +279,20 @@ else
 fi
 AC_SUBST(LIBCURL_LIBS)

+# Enable or disable use of git version in version string
+AC_MSG_CHECKING(whether to use git version if available)
+if test "x$wantgitver" = "xyes" ; then
+	AC_CHECK_PROGS([GIT], [git])
+	AC_CHECK_FILE([.git/], hasgitdir=yes)
+	usegitver=$ac_cv_file__git_
+	if test $GIT -a "x$hasgitdir" = "xyes"; then
+		AC_DEFINE([USE_GIT_VERSION], [1], [Use GIT version in version string])
+	fi
+else
+	AC_MSG_RESULT([no, disabled by configure])
+	usegitver=no
+fi
+AM_CONDITIONAL(USE_GIT_VERSION, [test x$usegitver = xyes])

 #check execv signature
 AC_COMPILE_IFELSE([AC_LANG_SOURCE([
@ -338,6 +344,7 @@ AC_DEFINE_UNQUOTED([ALEXKARNEW_KERNNAME], ["alexkarnew"], [Filename for Alexey K
 AC_DEFINE_UNQUOTED([ALEXKAROLD_KERNNAME], ["alexkarold"], [Filename for Alexey Karimov's optimised kernel for Catalyst <13.4])
 AC_DEFINE_UNQUOTED([CKOLIVAS_KERNNAME], ["ckolivas"], [Filename for original scrypt kernel])
 AC_DEFINE_UNQUOTED([ZUIKKIS_KERNNAME], ["zuikkis"], [Filename for Zuikkis' optimised kernel])
+AC_DEFINE_UNQUOTED([PSW_KERNNAME], ["psw"], [Filename for psw's experimental kernel])

 AC_SUBST(OPENCL_LIBS)
 AC_SUBST(OPENCL_FLAGS)
@ -373,6 +380,8 @@ echo
 echo "Configuration Options Summary:"
 echo

+echo "  Use git version......: $usegitver"
+
 if test "x$libcurl" != xno; then
 	echo "  libcurl(GBT+getwork).: Enabled: $LIBCURL_LIBS"
 else
@ -381,17 +390,11 @@ fi

 echo "  curses.TUI...........: $cursesmsg"

-
-if test "x$opencl" != xno; then
-	if test $found_opencl = 1; then
-		echo "  OpenCL...............: FOUND. GPU mining support enabled"
-	else
-		echo "  OpenCL...............: NOT FOUND. GPU mining support DISABLED"
-		AC_MSG_ERROR([No mining configured in])
-	fi
+if test $found_opencl = 1; then
+	echo "  OpenCL...............: FOUND. GPU mining support enabled"
 else
-	echo "  OpenCL...............: Detection overrided. GPU mining support DISABLED"
-	AC_MSG_ERROR([No mining configured in])
+	echo "  OpenCL...............: NOT FOUND. GPU mining support DISABLED"
+	AC_MSG_ERROR([OpenCL not found])
 fi

 if test "x$adl" != xno; then
--- a/doc/BUGS.md
+++ b/doc/BUGS.md
@ -1,22 +1,23 @@
 # Bug reporting

 First and foremost, see `README.md` and other documentation in `doc`.
-Common issues and solutions are also often discussed online.
+Although the documentation might be outdated, a lot of it is still
+relevant.

 The [issue tracker](https://github.com/veox/sgminer/issues) is there
 specifically for reporting bugs, issues and proposed improvements. Other
-communication channels are not regularly monitored.
+communication channels are not necessarily monitored.

 Search the issue list to see if it has already been reported.

-Information that is most often relevant:
+Information that may be relevant, depending on the nature of your issue:

 * OS version;
 * Catalyst driver version;
 * AMD APP SDK version;
 * AMD ADL version;
 * GPUs used (`sgminer --ndevs`);
-* whether you're using a pre-compiled binary or bult from source;
+* whether you're using a pre-compiled binary or built from source;
 * `sgminer` version (`sgminer --version` and/or `git describe`);
 * contents of the configuration file;
 * launch procedure (manual or via script);
@ -25,4 +26,6 @@ Information that is most often relevant:
 * actual result;
 * debug output (`sgminer --text-only --debug --verbose`).

-It is preferred that large amounts of data be posted on external sites.
+If there is a need to provide more than a screenfull of log
+data, it is preferred that a link is given instead. Try
+[gist](https://gist.github.com).
--- a/doc/FAQ
+++ b/doc/FAQ
@ -1,154 +0,0 @@
-FAQ
-
-Q: Can I mine on servers from different networks (eg smartcoin and bitcoin) at
-the same time?
-A: No, sgminer keeps a database of the block it's working on to ensure it does
-not work on stale blocks, and having different blocks from two networks would
-make it invalidate the work from each other.
-
-Q: Can I configure sgminer to mine with different login credentials or pools
-for each separate device?
-A: No.
-
-Q: Can I put multiple pools in the config file?
-A: Yes, check the example.conf file. Alternatively, set up everything either on
-the command line or via the menu after startup and choose settings->write
-config file and the file will be loaded one each startup.
-
-Q: The build fails with gcc is unable to build a binary.
-A: Remove the "-march=native" component of your CFLAGS as your version of gcc
-does not support it.
-
-Q: Can you implement feature X?
-A: I can, but time is limited, and people who donate are more likely to get
-their feature requests implemented.
-
-Q: Work keeps going to my backup pool even though my primary pool hasn't
-failed?
-A: sgminer checks for conditions where the primary pool is lagging and will
-pass some work to the backup servers under those conditions. The reason for
-doing this is to try its absolute best to keep the GPUs working on something
-useful and not risk idle periods. You can disable this behaviour with the
-option --failover-only.
-
-Q: Is this a virus?
-A: sgminer is being packaged with other trojan scripts and some antivirus
-software is falsely accusing sgminer.exe as being the actual virus, rather
-than whatever it is being packaged with. If you installed sgminer yourself,
-then you do not have a virus on your computer. Complain to your antivirus
-software company. They seem to be flagging even source code now from sgminer
-as viruses, even though text source files can't do anything by themself.
-
-Q: Can you modify the display to include more of one thing in the output and
-less of another, or can you change the quiet mode or can you add yet another
-output mode?
-A: Everyone will always have their own view of what's important to monitor.
-The defaults are very sane and I have very little interest in changing this
-any further.
-
-Q: What are the best parameters to pass for X pool/hardware/device.
-A: Virtually always, the DEFAULT parameters give the best results. Most user
-defined settings lead to worse performance. The ONLY thing most users should
-need to set is the Intensity for GPUs.
-
-Q: What happened to CPU mining?
-A: Being increasingly irrelevant for most users, and a maintenance issue, it is
-no longer under active development and will not be supported. No binary builds
-supporting CPU mining will be released. Virtually all remaining users of CPU
-mining are as back ends for illegal botnets. The main reason sgminer is being
-inappopriately tagged as a virus by antivirus software is due to the trojans
-packaging a CPU mining capable version of it. There is no longer ANY CPU mining
-code in sgminer. If you are mining bitcoin with CPU today, you are spending
-1000x more in electricity costs than you are earning in bitcoin.
-
-Q: GUI version?
-A: No. The RPC interface makes it possible for someone else to write one
-though.
-
-Q: I'm having an issue. What debugging information should I provide in the
-bug report?
-A: Start sgminer with your regular commands and add -D -T --verbose and provide
-the full startup output and a summary of your hardware, operating system, AMD
-driver version and AMD APP SDK version (if installed separately).
-
-Q: Why don't you provide win64 builds?
-A: Win32 builds work everywhere and there is precisely zero advantage to a
-64 bit build on windows.
-
-Q: Is it faster to mine on windows or linux?
-A: It makes no difference. It comes down to choice of operating system for
-their various features. Linux offers much better long term stability and
-remote monitoring and security, while windows offers you overclocking tools
-that can achieve much more than sgminer can do on linux.
-
-Q: Can I mine with sgminer on a MAC?
-A: sgminer will compile on OSX, but the performance of GPU mining is
-compromised due to the opencl implementation on OSX, there is no temperature
-or fanspeed monitoring, and the cooling design of most MACs, despite having
-powerful GPUs, will usually not cope with constant usage leading to a high
-risk of thermal damage. It is highly recommended not to mine on a MAC.
-
-Q: I switch users on windows and my mining stops working?
-A: That's correct, it does. It's a permissions issue that there is no known
-fix for due to monitoring of GPU fanspeeds and temperatures. If you disable
-the monitoring with --no-adl it should switch okay.
-
-Q: My network gets slower and slower and then dies for a minute?
-A; Try the --net-delay option.
-
-Q: How do I tune for p2pool?
-A: p2pool has very rapid expiration of work and new blocks, it is suggested you
-decrease intensity by 1 from your optimal value, and decrease GPU threads to 1
-with -g 1. It is also recommended to use --failover-only since the work is
-effectively like a different block chain.
-
-Q: Are OpenCL kernels from other mining software useable in sgminer?
-A: No, the APIs are slightly different between the different software and they
-will not work.
-
-Q: I run PHP on windows to access the API with the example miner.php. Why does
-it fail when php is installed properly but I only get errors about Sockets not
-working in the logs?
-A: http://us.php.net/manual/en/sockets.installation.php
-
-Q: Can I mine scrypt with FPGAs or ASICs?
-A: As of Jan 15 2014, no.
-
-Q: What is stratum and how do I use it?
-A: Stratum is a protocol designed for pooled mining in such a way as to
-minimise the amount of network communications, yet scale to hardware of any
-speed. With versions of sgminer 2.8.0+, if a pool has stratum support, sgminer
-will automatically detect it and switch to the support as advertised if it can.
-If you input the stratum port directly into your configuration, or use the
-special prefix "stratum+tcp://" instead of "http://", sgminer will ONLY try to
-use stratum protocol mining. The advantages of stratum to the miner are no
-delays in getting more work for the miner, less rejects across block changes,
-and far less network communications for the same amount of mining hashrate. If
-you do NOT wish sgminer to automatically switch to stratum protocol even if it
-is detected, add the --fix-protocol option.
-
-Q: Why don't the statistics add up: Accepted, Rejected, Stale, Hardware Errors,
-Diff1 Work, etc. when mining greater than 1 difficulty shares?
-A: As an example, if you look at 'Difficulty Accepted' in the RPC API, the number
-of difficulty shares accepted does not usually exactly equal the amount of work
-done to find them. If you are mining at 8 difficulty, then you would expect on
-average to find one 8 difficulty share, per 8 single difficulty shares found.
-However, the number is actually random and converges over time, it is an average,
-not an exact value, thus you may find more or less than the expected average.
-
-Q: Why do the scrypt diffs not match with the current difficulty target?
-A: The current scrypt block difficulty is expressed in terms of how many
-multiples of the BTC difficulty it currently is (eg 28) whereas the shares of
-"difficulty 1" are actually 65536 times smaller than the BTC ones. The diff
-expressed by sgminer is as multiples of difficulty 1 shares.
-
-Q: Can I make a donation?
-A: Yes, see AUTHORS.md for authors' donation addresses.
-
-Q: What should my Work Utility (WU) be?
-A: Work utility is the product of hashrate * luck and only stabilises over a
-very long period of time. Assuming all your work is valid work, bitcoin mining
-should produce a work utility of approximately 1 per 71.6MH. This means at
-5GH you should have a WU of 5000 / 71.6 or ~ 69. You cannot make your machine
-do "better WU" than this - it is luck related. However you can make it much
-worse if your machine produces a lot of hardware errors producing invalid work.
--- a/doc/FAQ.md
+++ b/doc/FAQ.md
@ -0,0 +1,170 @@
+# FAQ
+
+Q: Why is the network difficulty wrong?
+A: It is not wrong. Sharediff of 1 (historically) corresponds to a
+network difficulty of 1/65536. Throughout the inteface, share difficulty
+is displayed as whole numbers, not fractionals. Pools use the same
+convention (with the notable exception of P2Pool). Until pools start
+using true network difficulty to display share difficulty, there is no
+reason to display difficulty differently. This is a vicious cycle and a
+remnant of Bitcoin mining on GPUs.
+
+Q: Can I mine on servers from different networks (eg litecoin and
+dogecoin) at the same time?
+A: No. `sgminer` keeps a database of the block it's working on to ensure
+it does not work on stale blocks, and having different blocks from two
+networks would make it invalidate the work from each other.
+
+Q: Can I configure sgminer to mine with different login credentials or
+pools for each separate device?
+A: No.
+
+Q: Can I put multiple pools in the config file?
+A: Yes, check the `example.conf` file. Alternatively, set up everything
+either on the command line or via the menu after startup and choose
+`Settings -> Write config file`.
+
+Q: The build fails with `gcc is unable to build a binary`.
+A: Remove the "-march=native" component of your `CFLAGS` as your version
+of gcc does not support it.
+
+Q: Can you implement feature X?
+A: I can, but time is limited, and people who donate are more likely to
+get their feature requests implemented.
+
+Q: Work keeps going to my backup pool even though my primary pool
+hasn't failed?
+A: sgminer checks for conditions where the primary pool is lagging and
+will pass some work to the backup servers under those conditions. The
+reason for doing this is to try its absolute best to keep the GPUs
+working on something useful and not risk idle periods. You can disable
+this behaviour with the option --failover-only.
+
+Q: Is this a virus?
+A: sgminer is being packaged with other trojan
+scripts and some antivirus software is falsely accusing sgminer.exe as
+being the actual virus, rather than whatever it is being packaged with.
+If you had built sgminer yourself, then you do not have a virus on your
+computer. Complain to your antivirus software company..
+
+Q: Can you modify the display to include more of one thing in the output
+and less of another, or can you change the quiet mode or can you add
+yet another output mode?
+A: Everyone will always have their own view of what's important to
+monitor. The shipped NCurses TUI is intentionally ascetic, and is only
+provided as a fallback. It is recomended to use an API client if you
+want to customise the display.
+
+Q: GUI version?
+A: No. The API makes it possible for someone else to write one though.
+
+Q: What are the best parameters to pass for pool / hardware / device?
+A: See `doc/MINING.md` in your source distribution directory, or
+[doc/MINING.md](https://github.com/veox/sgminer/blob/master/doc/MINING.md]
+for an online version. Note that the latter is for the latest
+development version, and arguments listed there are not necessarily
+available in your local version.
+
+Q: Is CPU mining supported?
+A: No. Consider using [cpuminer](https://github.com/pooler/cpuminer).
+
+Q: I'm having an issue. What debugging information should I provide in
+the bug report?
+A: See `doc/BUGS.md` in your source distribution directory, or
+[doc/BUGS.md](https://github.com/veox/sgminer/blob/master/doc/BUGS.md]
+for an online version.
+
+Q: Why don't you provide binaries?
+A: Binaries are a hassle to maintain. On Linux, they should be provided
+by your distribution's package manager anyway. Runnning an unoptimised
+binary gives a minor performance penalty. Running binaries from
+untrusted providers is a security risk. There has not been sufficient
+community interest to provide distributed determininstic builds.
+
+Q: Is it better to mine on Linux or Windows?
+A: It comes down to choice of operating system for their various
+features. Linux offers specialised mining distributions, much better
+long term stability, remote monitoring and security, while Windows
+offers overclocking tools that can achieve much more than sgminer can do
+on Linux. YMMV.
+
+Q: Can I mine with sgminer on a Mac?
+A: sgminer will compile on OSX, but the performance of GPU mining
+is compromised due to the OpenCL implementation on OSX, there is no
+temperature or fanspeed monitoring, and the cooling design will usually
+not cope with constant usage leading to a high risk of thermal damage.
+It is highly recommended not to mine on a Mac.
+
+Q: I switch users on Windows and my mining stops working?
+A: That's correct, it does. It's a permissions issue that there is no
+known fix for due to monitoring of GPU fanspeeds and temperatures. If
+you disable the monitoring with `--no-adl` it should switch okay.
+
+Q: My network gets slower and slower and then dies for a minute?
+A: Try the `--net-delay` option.
+
+Q: How do I tune for P2Pool?
+A: P2Pool has very rapid expiration of work and new blocks, it is
+suggested you decrease intensity, decrease `scantime` and `expiry`,
+and/or decrease GPU threads to 1 with `-g 1`. It is also recommended to
+use `--failover-only` since the work is effectively a separate
+blockchain.
+
+Q: Are OpenCL kernels from other mining software usable in sgminer?
+A: Most often no.
+
+Q: How do I add my own kernel?
+A: See `doc/KERNEL.md` in your source distribution directory, or
+[doc/KERNEL.md](https://github.com/veox/sgminer/blob/master/doc/KERNEL.md]
+for an online version.
+
+Q: I run PHP on Windows to access the API with the example
+`miner.php`. Why does it fail when PHP is installed properly but
+I only get errors about Sockets not working in the logs?
+A: http://us.php.net/manual/en/sockets.installation.php
+
+Q: Will sgminer support FPGAs or ASICs?
+A: No. sgminer will only support GPUs. It is bad software design
+practice to try and support every gadget out there. Developers
+for dedicated hardware products are better off creating standalone
+software.
+
+Q: What is stratum and how do I use it?
+A: Stratum is a protocol designed for pooled mining in such a way as to
+minimise the amount of network communications, yet scale to hardware
+of any speed. If a pool has stratum support (and most public ones do),
+sgminer will automatically detect it and switch to the support as
+advertised if it can. If you input the stratum port directly into your
+configuration, or use the special prefix `stratum+tcp://` instead of
+`http://`, sgminer will ONLY try to use stratum protocol mining. The
+advantages of stratum to the miner are no delays in getting more work
+for the miner, less rejects across block changes, and far less network
+communications for the same amount of mining hashrate. If you do not
+wish sgminer to automatically switch to stratum protocol even if it is
+detected, add the `--fix-protocol` option.
+
+Q: Why don't the statistics add up: Accepted, Rejected, Stale, Hardware
+Errors, Diff1 Work, etc. when mining greater than 1 difficulty shares?
+A: As an example, if you look at 'Difficulty Accepted' in the RPC API,
+the number of difficulty shares accepted does not usually exactly equal
+the amount of work done to find them. If you are mining at 8 difficulty,
+then you would expect on average to find one 8 difficulty share, per 8
+single difficulty shares found. However, the number is actually random
+and converges over time, it is an average, not an exact value, thus you
+may find more or less than the expected average.
+
+Q: Why do the scrypt diffs not match with the current difficulty target?
+A: The current scrypt block difficulty is expressed in terms of how
+many multiples of the BTC difficulty it currently is (eg 28) whereas
+the shares of "difficulty 1" are actually 65536 times smaller than the
+BTC ones. The diff expressed by sgminer is as multiples of difficulty 1
+shares.
+
+Q: Can I make a donation?
+A: Yes, see AUTHORS.md for authors' donation addresses.
+
+Q: What is Work Utility (WU)?
+A: Work utility is the product of hashrate * luck and only stabilises
+over a very long period of time. Luck includes hardware error rate,
+share reject rate and other parameters. Therefore, it is often a better
+indicator of hardware or software misconfiguration.
--- a/doc/KERNEL.md
+++ b/doc/KERNEL.md
@ -42,6 +42,13 @@ Con Kolivas in `cgminer` and renamed to reflect the fact.
 Only supports `vectors=1`.


+### psw
+
+Pavel Semjanov optimised kernel, SHA256 speedups.
+
+[Announcement](https://bitcointalk.org/index.php?topic=369858.0).
+
+
 ### zuikkis

 Zuikkis' optimised kernel, based on `ckolivas`.
@ -57,30 +64,36 @@ Only supports `vectors=1`, `lookup-gap=2` and `worksize=256`.

 * OpenCL source code only, licenced under GPLv3 (or later).
 * Not hard-coded for a specific GPU model or manufacturer.
-* Known limitations and any specific configuration quirks must be mentioned.
+* Known limitations and any specific configuration quirks must be
+  mentioned.


 ### Procedure

-1. Copy the kernel you wish to modify and commit it verbatim.
+* Copy the kernel you wish to modify, change the file encoding to UTF-8
+and commit it without any further modifications.

 This way, it is easy to verify that there are no hidden changes. Note in
 the commit message which kernel is used as a base.

-2. Make changes to the kernel. Commit them.
+* Make changes to the kernel. Commit them.

 This allows to produce a diff that makes sense.

-3. Search for KL_CKOLIVAS and CKOLIVAS_KERNNAME in the top-level source
+* Search for `KL_CKOLIVAS and CKOLIVAS_KERNNAME` in the top-level source
 directory and make additions to the listed files in order to integrate
 the new kernel.

 Now it can be selected when starting via the `--kernel` argument or
 `kernel` configuration option.

-4. Add yourself to the "kernels" section in `AUTHORS.md`. Keep it short.
+* Recompile and test that the kernel actually works.

-5. Submit a pull request on GitHub, or file it at the issue tracker,
-outlining the changes made, known limitations, and tested GPUs. List
+* Add yourself to the "kernels" section in `AUTHORS.md`. Keep it short.
+
+* Submit a pull request on GitHub, file it at the issue tracker, or mail
+it.
+
+Outline the changes made, known limitations, and tested GPUs. List
 your git repository and branch name. The current repository and issue
 tracker links should be in `README.md`.
--- a/doc/MINING
+++ b/doc/MINING
@ -1,237 +0,0 @@
-While BTC donations are preferred, if you wish to donate to the author, Con
-Kolivas, in LTC, please submit your donations to:
-
-Lc8TWMiKM7gRUrG8VB8pPNP1Yvt1SGZnoH
-
-Otherwise, please donate in BTC as per the main README.
-
---
-
-Scrypt mining, AKA litecoin mining, for GPU is completely different to sha256
-used for bitcoin mining. The algorithm was originally developed in a manner
-that it was anticipated would make it suitable for mining on CPU but NOT GPU.
-Thanks to some innovative work by Artforz and mtrlt, this was proven to be
-wrong. However, it has very different requirements to bitcoin mining and is a
-lot more complicated to get working well. Note that it is a ram dependent
-workload, and requires you to have enough system ram as well as fast enough
-GPU ram. If you have less system ram than your GPU has, it may not be possible
-to mine at any reasonable rate.
-
-There are 5 main parameters to tuning scrypt, all of which are optional for
-further fine tuning. When you start mining, sgminer may fail IN RANDOM WAYS.
-They are all due to parameters being outside what the GPU can cope with.
-
-NOTE that if it does not fail at startup, the presence of hardware errors (HW)
-are a sure sign that you have set the parameters too high.
-
-
-DRIVERS AND OPENCL SDK
-
-The choice of driver version for your GPU is critical, as some are known to
-break scrypt mining entirely while others give poor hashrates. As for the
-OpenCL SDK installed, for AMD it must be version 2.6 or later.
-
-
-Step 1 on Linux:
-export GPU_MAX_ALLOC_PERCENT=100
-If you do not do this, you may find it impossible to scrypt mine. You may find
-a value of 40 is enough and increasing this further has little effect.
-
-export GPU_USE_SYNC_OBJECTS=1
-may help CPU usage a little as well.
-
-On windows the same commands can be passed via a batch file if the following
-lines are in the .bat before starting sgminer:
-setx GPU_MAX_ALLOC_PERCENT 100
-setx GPU_USE_SYNC_OBJECTS 1
-
--intensity XX (-I XX)
-
-The scale goes from 0 to 42. The reason this is crucial is that too
-high an intensity can actually be disastrous with scrypt because it CAN
-run out of ram. High intensities start writing over the same ram and it
-is highly dependent on the GPU, but they can start actually DECREASING
-your hashrate, or even worse, start producing garbage with HW errors
-skyrocketing, or locking up the system altogether. Note that if you do
-NOT specify an intensity, sgminer uses dynamic mode which is designed
-to minimise the harm to a running desktop and performance WILL be poor.
-The lower limit to intensity with scrypt is usually 8 and sgminer will
-prevent it going too low.
-SUMMARY: Setting this for reasonable hashrates is mandatory.
-
--shaders XXX
-
-is an option where you tell sgminer how many shaders your GPU has. This
-helps sgminer try to choose some meaningful baseline parameters. Use
-this table below to determine how many shaders your GPU has, and note
-that there are some variants of these cards, and nvidia shaders are much
-much lower and virtually pointless trying to mine on. If this is not
-set, sgminer will query the device for how much memory it supports and
-will try to set a value based on that.
-SUMMARY: This will get you started but fine tuning for optimal performance is
-required. Using --thread-concurrency is recommended instead.
-
-GPU  Shaders
-7750 512
-7770 640
-7850 1024
-7870 1280
-7950 1792
-7970 2048
-
-6850 960
-6870 1120
-6950 1408
-6970 1536
-6990 (6970x2)
-
-6570 480
-6670 480
-6790 800
-
-6450 160
-
-5670 400
-5750 720
-5770 800
-5830 1120
-5850 1440
-5870 1600
-5970 (5870x2)
-
-These are only used as a rough guide for sgminer, and it is rare that this is
-all you will need to set.
-
-
-Optional parameters to tune:
-g, --thread-concurrency, --lookup-gap
-
--thread-concurrency:
-This tunes the optimal size of work that scrypt can do. It is internally tuned
-by sgminer to be the highest reasonable multiple of shaders that it can
-allocate on your GPU. Ideally it should be a multiple of your shader count.
-vliw5 architecture (R5XXX) would be best at 5x shaders, while VLIW4 (R6xxx and
-R7xxx) are best at 4x. Setting thread concurrency overrides anything you put
-into --shaders and is ultimately a BETTER way to tune performance.
-SUMMARY: Spend lots of time finding the highest value that your device likes
-and increases hashrate.
-
-g:
-Once you have found the optimal shaders and intensity, you can start increasing
-the -g value till sgminer fails to start. This is really only of value if you
-want to run low intensities as you will be unable to run more than 1.
-SUMMARY: Don't touch this.
-
--lookup-gap
-This tunes a compromise between ram usage and performance. Performance peaks
-at a gap of 2, but increasing the gap can save you some GPU ram, but almost
-always at the cost of significant loss of hashrate. Setting lookup gap
-overrides the default of 2, but sgminer will use the --shaders value to choose
-a thread-concurrency if you haven't chosen one.
-SUMMARY: Don't touch this.
-
-
-Related parameters:
--worksize XX (-w XX)
-Has a minor effect, should be a multiple of 64 up to 256 maximum.
-SUMMARY: Worth playing with once everything else has been tried but will
-probably do nothing.
-
-
-Overclocking for scrypt mining:
-First of all, do not underclock your memory initially. Scrypt mining requires
-memory speed and on most, but not all, GPUs, lowering memory speed lowers
-mining performance.
-
-Second, absolute engine clock speeds do NOT correlate with hashrate. The ratio
-of engine clock speed to memory matters, so if you set your memory to the
-default value, and then start overclocking as you are running it, you should
-find a sweet spot where the hashrate peaks and then it might actually drop if
-you increase the engine clock speed further.
-
-Third, the combination of motherboard, CPU and system ram ALSO makes a
-difference, so values that work for a GPU on one system may not work for the
-same GPU on a different system. A decent amount of system ram is actually
-required for scrypt mining, and 4GB is suggested.
-
-Finally, the power consumption while mining at high engine clocks, very high
-memory clocks can be far in excess of what you might imagine.
-For example, a 7970 running with the following settings:
--thread-concurrency 22392 --gpu-engine 1135 --gpu-memclock 1890
-was using 305W!
-
---
-TUNING AN AMD RADEON 7970
-Example tuning a 7970 for Scrypt mining:
-
-On linux run this command:
-export GPU_MAX_ALLOC_PERCENT=100
-or on windows this:
-setx GPU_MAX_ALLOC_PERCENT 100
-in the same console/bash/dos prompt/bat file/whatever you want to call it,
-before running sgminer.
-
-First, find the highest thread concurrency that you can start it at. They should
-all start at 8192 but some will go up to 3 times that. Don't go too high on the
-intensity while testing and don't change gpu threads. If you cannot go above
-8192, don't fret as you can still get a high hashrate.
-
-Delete any .bin files so you're starting from scratch and see what bins get
-generated.
-
-First try without any thread concurrency or even shaders, as sgminer will try to
-find an optimal value
-sgminer -I 13
-
-If that starts mining, see what bin was generated, it is likely the largest
-meaningful TC you can set.
-Starting it on mine I get:
-scrypt130302Tahitiglg2tc22392w64l8.bin
-
-See tc22392 that's telling you what thread concurrency it was. It should start
-without TC parameters, but you never know. So if it doesn't, start with
--thread-concurrency 8192 and add 2048 to it at a time till you find the highest
-value it will start successfully at.
-
-Then start overclocking the eyeballs off your memory, as 7970s are exquisitely
-sensitive to memory speed and amazingly overclockable but please make sure it
-keeps adequately cooled with --auto-fan! Do it while it's running from the GPU
-menu. Go up by 25 at a time every 30 seconds or so until your GPU crashes. Then
-reboot and start it 25 lower as a rough start. Mine runs stable at 1900 memory
-without overvolting. Overvolting is the only thing that can actually damage your
-GPU so I wouldn't recommend it at all.
-
-Then once you find the maximum memory clock speed, you need to find the sweet
-spot engine clock speed that matches it. It's a fine line where one more MHz
-will make the hashrate drop by 20%. It's somewhere in the .57 - 0.6 ratio range.
-Start your engine clock speed at half your memory clock speed and then increase
-it by 5 at a time. The hashrate should climb a little each rise in engine speed
-and then suddenly drop above a certain value. Decrease it by 1 then until you
-find it climbs dramatically. If your engine clock speed cannot get that high
-without crashing the GPU, you will have to use a lower memclock.
-
-Then, and only then, bother trying to increase intensity further.
-
-My final settings were:
--gpu-engine 1141  --gpu-memclock 1875 -I 20
-for a hashrate of 745kH.
-
-Note I did not bother setting a thread concurrency. Once you have the magic
-endpoint, look at what tc was chosen by the bin file generated and then hard
-code that in next time (eg --thread-concurrency 22392) as slight changes in
-thread concurrency will happen every time if you don't specify one, and the tc
-to clock ratios are critical!
-
-Good luck, and if this doesn't work for you, well same old magic discussion
-applies, I cannot debug every hardware combo out there.
-
-Your numbers will be your numbers depending on your hardware combination and OS,
-so don't expect to get exactly the same results!
-
---
-While BTC donations are preferred, if you wish to donate to the author, Con
-Kolivas, in LTC, please submit your donations to:
-
-Lc8TWMiKM7gRUrG8VB8pPNP1Yvt1SGZnoH
-
-Otherwise, please donate in BTC as per the main README.
--- a/doc/MINING.md
+++ b/doc/MINING.md
@ -0,0 +1,263 @@
+# Mining scrypt
+
+## Introduction
+
+Mining scrypt-based cryptocurrencies using GPUs is completely different
+to mining SHA256d (used in Bitcoin). The former was intentionally
+developed in a manner that (it was hoped) would make it suitable
+for mining on CPUs, but not GPUs. Thanks to some innovative work by
+_Artforz_ and _mtrlt_, this was proven to be wrong.
+
+However, it has very different requirements compared to SHA256d and
+is a lot more complicated to get working well. It is a RAM-dependent
+workload, and requires you to have enough system RAM as well as fast
+enough GPU RAM. What is "enough" depends on setup specifics.
+
+
+## Catalyst drivers and OpenCL SDK
+
+The choice of driver version for your GPU is critical, as some are known
+to break scrypt mining entirely while others give poor hashrates. It is
+recommended that you first try with the latest stable version available.
+
+Latest driver distribution versions may aready include the AMD APP
+SDK, therefore presenting an OpenCL vendor conflict when building or
+running. Systems with NVidia cards and NVidia drivers may have a similar
+conflict. If this is the case, check which OpenCL vendor is used, and
+consider removing unneeded ones.
+
+
+## Runtime environment
+
+Environment variables must be set to allow access from console /
+terminal / screen.
+
+On Linux:
+
+export DISPLAY=:0
+export GPU_MAX_ALLOC_PERCENT=100
+export GPU_USE_SYNC_OBJECTS=1
+
+On Windows:
+
+setx GPU_MAX_ALLOC_PERCENT 100
+setx GPU_USE_SYNC_OBJECTS 1
+
+
+## Tuning
+
+When mining is started, sgminer may fail in various ways. This is often
+not a bug in the software, but rather misconfiguration. The failures may
+occur due to parameters being outside what the GPU can cope with (both
+too high and too low).
+
+All parameters are optional for fine tuning.
+
+**WARNING**: documentation below has not been reviewed to be up-to-date.
+
+
+--intensity XX (-I XX)
+
+The scale goes from 0 to 31. The reason this is crucial is that too
+high an intensity can actually be disastrous with scrypt because it CAN
+run out of ram. High intensities start writing over the same ram and it
+is highly dependent on the GPU, but they can start actually DECREASING
+your hashrate, or even worse, start producing garbage with HW errors
+skyrocketing, or locking up the system altogether. Note that if you do
+NOT specify an intensity, sgminer uses dynamic mode which is designed
+to minimise the harm to a running desktop and performance WILL be poor.
+The lower limit to intensity with scrypt is usually 8 and sgminer will
+prevent it going too low.
+
+SUMMARY: Setting this for reasonable hashrates is mandatory.
+
+
+--shaders XXX
+
+is an option where you tell sgminer how many shaders your GPU has. This
+helps sgminer try to choose some meaningful baseline parameters. Use
+this table below to determine how many shaders your GPU has, and note
+that there are some variants of these cards, and nvidia shaders are
+much much lower and virtually pointless trying to mine on. If this is
+not set, sgminer will query the device for how much memory it supports
+and will try to set a value based on that.
+
+SUMMARY: This will get you started but fine tuning for optimal
+performance is required. Using --thread-concurrency is recommended
+instead.
+
+GPU  Shaders
+7750 512
+7770 640
+7850 1024
+7870 1280
+7950 1792
+7970 2048
+
+6850 960
+6870 1120
+6950 1408
+6970 1536
+6990 (6970x2)
+
+6570 480
+6670 480
+6790 800
+
+6450 160
+
+5670 400
+5750 720
+5770 800
+5830 1120
+5850 1440
+5870 1600
+5970 (5870x2)
+
+These are only used as a rough guide for sgminer, and it is rare that
+this is all you will need to set.
+
+
+--thread-concurrency
+
+This tunes the optimal size of work that scrypt can do. It is internally
+tuned by sgminer to be the highest reasonable multiple of shaders that
+it can allocate on your GPU. Ideally it should be a multiple of your
+shader count. vliw5 architecture (R5XXX) would be best at 5x shaders,
+while VLIW4 (R6xxx and R7xxx) are best at 4x. Setting thread concurrency
+overrides anything you put into --shaders and is ultimately a BETTER way
+to tune performance.
+
+SUMMARY: Spend lots of time finding the highest value that your device
+likes and increases hashrate.
+
+
+-g
+
+Once you have found the optimal shaders and intensity, you can start
+increasing the -g value till sgminer fails to start. This is really only
+of value if you want to run low intensities as you will be unable to run
+more than 1.
+
+SUMMARY: Don't touch this.
+
+
+--lookup-gap
+
+This tunes a compromise between ram usage and performance. Performance
+peaks at a gap of 2, but increasing the gap can save you some GPU
+ram, but almost always at the cost of significant loss of hashrate.
+Setting lookup gap overrides the default of 2, but sgminer will use the
+--shaders value to choose a thread-concurrency if you haven't chosen
+one.
+
+SUMMARY: Don't touch this.
+
+
+Related parameters:
+--worksize XX (-w XX)
+Has a minor effect, should be a multiple of 64 up to 256 maximum.
+SUMMARY: Worth playing with once everything else has been tried but will
+probably do nothing.
+
+
+Overclocking for scrypt mining: First of all, do not underclock your
+memory initially. Scrypt mining requires memory speed and on most, but
+not all, GPUs, lowering memory speed lowers mining performance.
+
+
+Second, absolute engine clock speeds do NOT correlate with hashrate. The
+ratio of engine clock speed to memory matters, so if you set your memory
+to the default value, and then start overclocking as you are running it,
+you should find a sweet spot where the hashrate peaks and then it might
+actually drop if you increase the engine clock speed further.
+
+
+Third, the combination of motherboard, CPU and system ram ALSO makes a
+difference, so values that work for a GPU on one system may not work for
+the same GPU on a different system. A decent amount of system ram is
+actually required for scrypt mining, and 4GB is suggested.
+
+
+Finally, the power consumption while mining at high engine clocks,
+very high memory clocks can be far in excess of what you might
+imagine. For example, a 7970 running with the following settings:
+--thread-concurrency 22392 --gpu-engine 1135 --gpu-memclock 1890 was
+using 305W!
+
+
+## Example: tuning a 7970
+
+On linux run this command:
+
+    export GPU_MAX_ALLOC_PERCENT=100
+
+or on windows this:
+
+    setx GPU_MAX_ALLOC_PERCENT 100
+
+in the same console/bash/dos prompt/bat file/whatever you want to call it,
+before running sgminer.
+
+First, find the highest thread concurrency that you can start it at.
+They should all start at 8192 but some will go up to 3 times that. Don't
+go too high on the intensity while testing and don't change gpu threads.
+If you cannot go above 8192, don't fret as you can still get a high
+hashrate.
+
+Delete any .bin files so you're starting from scratch and see what bins
+get generated.
+
+First try without any thread concurrency or even shaders, as sgminer
+will try to find an optimal value
+
+    sgminer -I 13
+
+If that starts mining, see what bin was generated, it is likely the
+largest meaningful TC you can set. Starting it on mine I get:
+
+    scrypt130302Tahitiglg2tc22392w64l8.bin
+
+See tc22392 that's telling you what thread concurrency it was. It should
+start without TC parameters, but you never know. So if it doesn't, start
+with --thread-concurrency 8192 and add 2048 to it at a time till you
+find the highest value it will start successfully at.
+
+Then start overclocking the eyeballs off your memory, as 7970s are
+exquisitely sensitive to memory speed and amazingly overclockable but
+please make sure it keeps adequately cooled with --auto-fan! Do it
+while it's running from the GPU menu. Go up by 25 at a time every 30
+seconds or so until your GPU crashes. Then reboot and start it 25 lower
+as a rough start. Mine runs stable at 1900 memory without overvolting.
+Overvolting is the only thing that can actually damage your GPU so I
+wouldn't recommend it at all.
+
+Then once you find the maximum memory clock speed, you need to find
+the sweet spot engine clock speed that matches it. It's a fine line
+where one more MHz will make the hashrate drop by 20%. It's somewhere in
+the .57 - 0.6 ratio range. Start your engine clock speed at half your
+memory clock speed and then increase it by 5 at a time. The hashrate
+should climb a little each rise in engine speed and then suddenly drop
+above a certain value. Decrease it by 1 then until you find it climbs
+dramatically. If your engine clock speed cannot get that high without
+crashing the GPU, you will have to use a lower memclock.
+
+Then, and only then, bother trying to increase intensity further.
+
+My final settings were:
+
+    --gpu-engine 1141  --gpu-memclock 1875 -I 20
+
+for a hashrate of 745kH.
+
+Note I did not bother setting a thread concurrency. Once you have the
+magic endpoint, look at what tc was chosen by the bin file generated
+and then hard code that in next time (eg --thread-concurrency 22392) as
+slight changes in thread concurrency will happen every time if you don't
+specify one, and the tc to clock ratios are critical!
+
+Good luck, and if this doesn't work for you, well same old magic
+discussion applies, I cannot debug every hardware combo out there.
+
+Your numbers will be your numbers depending on your hardware combination
+and OS, so don't expect to get exactly the same results!
--- a/driver-opencl.c
+++ b/driver-opencl.c
@ -204,6 +204,8 @@ static enum cl_kernels select_kernel(char *arg)
 		return KL_CKOLIVAS;
 	if (!strcmp(arg, ZUIKKIS_KERNNAME))
 		return KL_ZUIKKIS;
+	if (!strcmp(arg, PSW_KERNNAME))
+		return KL_PSW;

 	return KL_NONE;
 }
@ -565,6 +567,7 @@ char *set_intensity(char *arg)
 	else {
 		gpus[device].dynamic = false;
 		val = atoi(nextptr);
+		if (val == 0) return "disabled";
 		if (val < MIN_INTENSITY || val > MAX_INTENSITY)
 			return "Invalid value passed to set intensity";
 		tt = &gpus[device].intensity;
@ -581,6 +584,7 @@ char *set_intensity(char *arg)
 		else {
 			gpus[device].dynamic = false;
 			val = atoi(nextptr);
+			if (val == 0) return "disabled";
 			if (val < MIN_INTENSITY || val > MAX_INTENSITY)
 				return "Invalid value passed to set intensity";

@ -612,6 +616,7 @@ char *set_xintensity(char *arg)
 	if (nextptr == NULL)
 		return "Invalid parameters for shader based intensity";
 	val = atoi(nextptr);
+	if (val == 0) return "disabled";
 	if (val < MIN_XINTENSITY || val > MAX_XINTENSITY)
 		return "Invalid value passed to set shader-based intensity";

@ -623,6 +628,7 @@ char *set_xintensity(char *arg)

 	while ((nextptr = strtok(NULL, ",")) != NULL) {
 		val = atoi(nextptr);
+		if (val == 0) return "disabled";
 		if (val < MIN_XINTENSITY || val > MAX_XINTENSITY)
 			return "Invalid value passed to set shader based intensity";
 		gpus[device].dynamic = false; // Disable dynamic intensity
@ -651,6 +657,7 @@ char *set_rawintensity(char *arg)
 	if (nextptr == NULL)
 		return "Invalid parameters for raw intensity";
 	val = atoi(nextptr);
+	if (val == 0) return "disabled";
 	if (val < MIN_RAWINTENSITY || val > MAX_RAWINTENSITY)
 		return "Invalid value passed to set raw intensity";

@ -662,6 +669,7 @@ char *set_rawintensity(char *arg)

 	while ((nextptr = strtok(NULL, ",")) != NULL) {
 		val = atoi(nextptr);
+		if (val == 0) return "disabled";
 		if (val < MIN_RAWINTENSITY || val > MAX_RAWINTENSITY)
 			return "Invalid value passed to set raw intensity";
 		gpus[device].dynamic = false; // Disable dynamic intensity
@ -723,8 +731,10 @@ void manage_gpu(void)
 	char checkin[40];
 	char input;

-	if (!opt_g_threads)
+	if (!opt_g_threads) {
+		applog(LOG_ERR, "opt_g_threads not set in manage_gpu()");
 		return;
+	}

 	opt_loginput = true;
 	immedok(logwin, true);
@ -1309,6 +1319,9 @@ static bool opencl_thread_prepare(struct thr_info *thr)
 			case KL_ZUIKKIS:
 				cgpu->kname = ZUIKKIS_KERNNAME;
 				break;
+			case KL_PSW:
+				cgpu->kname = PSW_KERNNAME;
+				break;
 			default:
 				break;
 		}
@ -1340,6 +1353,7 @@ static bool opencl_thread_init(struct thr_info *thr)
 	case KL_ALEXKARNEW:
 	case KL_ALEXKAROLD:
 	case KL_CKOLIVAS:
+	case KL_PSW:
 	case KL_ZUIKKIS:
 		thrdata->queue_kernel_parameters = &queue_scrypt_kernel;
 		break;
--- a/kernel/psw.cl
+++ b/kernel/psw.cl
@ -0,0 +1,792 @@
+/*-
+ * Copyright 2009 Colin Percival, 2011 ArtForz, 2011 pooler, 2012 mtrlt,
+ * 2013 Optimiztion by Pavel Semjanov,
+ * 2012-2013 Con Kolivas.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * This file was originally written by Colin Percival as part of the Tarsnap
+ * online backup system.
+ */
+
+__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
+__constant uint K[] = {
+	0x428a2f98U,
+	0x71374491U,
+	0xb5c0fbcfU,
+	0xe9b5dba5U,
+	0x3956c25bU,
+	0x59f111f1U,
+	0x923f82a4U,
+	0xab1c5ed5U,
+	0xd807aa98U,
+	0x12835b01U,
+	0x243185beU, // 10
+	0x550c7dc3U,
+	0x72be5d74U,
+	0x80deb1feU,
+	0x9bdc06a7U,
+	0xe49b69c1U,
+	0xefbe4786U,
+	0x0fc19dc6U,
+	0x240ca1ccU,
+	0x2de92c6fU,
+	0x4a7484aaU, // 20
+	0x5cb0a9dcU,
+	0x76f988daU,
+	0x983e5152U,
+	0xa831c66dU,
+	0xb00327c8U,
+	0xbf597fc7U,
+	0xc6e00bf3U,
+	0xd5a79147U,
+	0x06ca6351U,
+	0x14292967U, // 30
+	0x27b70a85U,
+	0x2e1b2138U,
+	0x4d2c6dfcU,
+	0x53380d13U,
+	0x650a7354U,
+	0x766a0abbU,
+	0x81c2c92eU,
+	0x92722c85U,
+	0xa2bfe8a1U,
+	0xa81a664bU, // 40
+	0xc24b8b70U,
+	0xc76c51a3U,
+	0xd192e819U,
+	0xd6990624U,
+	0xf40e3585U,
+	0x106aa070U,
+	0x19a4c116U,
+	0x1e376c08U,
+	0x2748774cU,
+	0x34b0bcb5U, // 50
+	0x391c0cb3U,
+	0x4ed8aa4aU,
+	0x5b9cca4fU,
+	0x682e6ff3U,
+	0x748f82eeU,
+	0x78a5636fU,
+	0x84c87814U,
+	0x8cc70208U,
+	0x90befffaU,
+	0xa4506cebU, // 60
+	0xbef9a3f7U,
+	0xc67178f2U,
+	0x98c7e2a2U,
+	0xfc08884dU,
+	0xcd2a11aeU,
+	0x510e527fU,
+	0x9b05688cU,
+	0xC3910C8EU,
+	0xfb6feee7U,
+	0x2a01a605U, // 70
+	0x0c2e12e0U,
+	0x4498517BU,
+	0x6a09e667U,
+	0xa4ce148bU,
+	0x95F61999U,
+	0xc19bf174U,
+	0xBB67AE85U,
+	0x3C6EF372U,
+	0xA54FF53AU,
+	0x1F83D9ABU, // 80
+	0x5BE0CD19U,
+	0x5C5C5C5CU,
+	0x36363636U,
+	0x80000000U,
+	0x000003FFU,
+	0x00000280U,
+	0x000004a0U,
+	0x00000300U
+};
+
+#define rotl(x,y) rotate(x,y)
+#define Ch(x,y,z) bitselect(z,y,x)
+#define Maj(x,y,z) Ch((x^z),y,z)
+
+#define EndianSwap(n) (rotl(n & ES[0], 24U)|rotl(n & ES[1], 8U))
+
+#define Tr2(x)		(rotl(x, 30U) ^ rotl(x, 19U) ^ rotl(x, 10U))
+#define Tr1(x)		(rotl(x, 26U) ^ rotl(x, 21U) ^ rotl(x, 7U))
+#define Wr2(x)		(rotl(x, 25U) ^ rotl(x, 14U) ^ (x>>3U))
+#define Wr1(x)		(rotl(x, 15U) ^ rotl(x, 13U) ^ (x>>10U))
+
+#define RND(a, b, c, d, e, f, g, h, k)	\
+	h += Tr1(e); 			\
+	h += Ch(e, f, g); 		\
+	h += k;				\
+	d += h;				\
+	h += Tr2(a); 			\
+	h += Maj(a, b, c);
+
+void SHA256(uint4*restrict state0,uint4*restrict state1, const uint4 block0, const uint4 block1, const uint4 block2, const uint4 block3)
+{
+	uint4 S0 = *state0;
+	uint4 S1 = *state1;
+	
+#define A S0.x
+#define B S0.y
+#define C S0.z
+#define D S0.w
+#define E S1.x
+#define F S1.y
+#define G S1.z
+#define H S1.w
+
+	uint4 W[4];
+
+	W[ 0].x = block0.x;
+	RND(A,B,C,D,E,F,G,H, W[0].x+ K[0]);
+	W[ 0].y = block0.y;
+	RND(H,A,B,C,D,E,F,G, W[0].y+ K[1]);
+	W[ 0].z = block0.z;
+	RND(G,H,A,B,C,D,E,F, W[0].z+ K[2]);
+	W[ 0].w = block0.w;
+	RND(F,G,H,A,B,C,D,E, W[0].w+ K[3]);
+
+	W[ 1].x = block1.x;
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[4]);
+	W[ 1].y = block1.y;
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[5]);
+	W[ 1].z = block1.z;
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[6]);
+	W[ 1].w = block1.w;
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[7]);
+
+	W[ 2].x = block2.x;
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[8]);
+	W[ 2].y = block2.y;
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[9]);
+	W[ 2].z = block2.z;
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[10]);
+	W[ 2].w = block2.w;
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[11]);
+
+	W[ 3].x = block3.x;
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[12]);
+	W[ 3].y = block3.y;
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[13]);
+	W[ 3].z = block3.z;
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[14]);
+	W[ 3].w = block3.w;
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[76]);
+
+
+#define WUpdate(i) { uint4 tmp1, tmp2, tmp3;						\
+	tmp1 = (uint4) (W[(i+0)%4].y, W[(i+0)%4].z, W[(i+0)%4].w, W[(i+1)%4].x);	\
+	tmp2 = (uint4) (W[(i+2)%4].y, W[(i+2)%4].z, W[(i+2)%4].w, W[(i+3)%4].x);	\
+	tmp3 = (uint4) (W[(i+3)%4].z, W[(i+3)%4].w, 0, 0);				\
+	W[(i+0)%4] += tmp2 + Wr2 (tmp1) + Wr1 (tmp3);					\
+	tmp1 = (uint4) (0, 0, W[(i+0)%4].x, W[(i+0)%4].y);				\
+	W[(i+0)%4] += Wr1 (tmp1);							\
+	}
+
+	WUpdate (0);
+	RND(A,B,C,D,E,F,G,H, W[0].x+ K[15]);
+	RND(H,A,B,C,D,E,F,G, W[0].y+ K[16]);
+	RND(G,H,A,B,C,D,E,F, W[0].z+ K[17]);
+	RND(F,G,H,A,B,C,D,E, W[0].w+ K[18]);
+
+	WUpdate (1);
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[19]);
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[20]);
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[21]);
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[22]);
+
+	WUpdate (2);
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[23]);
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[24]);
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[25]);
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[26]);
+
+	WUpdate (3);
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[27]);
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[28]);
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[29]);
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[30]);
+
+	WUpdate (0);
+	RND(A,B,C,D,E,F,G,H, W[0].x+ K[31]);
+	RND(H,A,B,C,D,E,F,G, W[0].y+ K[32]);
+	RND(G,H,A,B,C,D,E,F, W[0].z+ K[33]);
+	RND(F,G,H,A,B,C,D,E, W[0].w+ K[34]);
+
+	WUpdate (1);
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[35]);
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[36]);
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[37]);
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[38]);
+
+	WUpdate (2);
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[39]);
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[40]);
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[41]);
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[42]);
+
+	WUpdate (3);
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[43]);
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[44]);
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[45]);
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[46]);
+
+	WUpdate (0);
+	RND(A,B,C,D,E,F,G,H, W[0].x+ K[47]);
+	RND(H,A,B,C,D,E,F,G, W[0].y+ K[48]);
+	RND(G,H,A,B,C,D,E,F, W[0].z+ K[49]);
+	RND(F,G,H,A,B,C,D,E, W[0].w+ K[50]);
+
+	WUpdate (1);
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[51]);
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[52]);
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[53]);
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[54]);
+
+	WUpdate (2);
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[55]);
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[56]);
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[57]);
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[58]);
+
+	WUpdate (3);
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[59]);
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[60]);
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[61]);
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[62]);
+	
+#undef A
+#undef B
+#undef C
+#undef D
+#undef E
+#undef F
+#undef G
+#undef H
+
+	*state0 += S0;
+	*state1 += S1;
+}
+
+void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block0, const uint4 block1, const uint4 block2, const uint4 block3)
+{
+#define A (*state0).x
+#define B (*state0).y
+#define C (*state0).z
+#define D (*state0).w
+#define E (*state1).x
+#define F (*state1).y
+#define G (*state1).z
+#define H (*state1).w
+
+	uint4 W[4];
+
+	W[0].x = block0.x;
+	D= K[63] +W[0].x;
+	H= K[64] +W[0].x;
+
+	W[0].y = block0.y;
+	C= K[65] +Tr1(D)+Ch(D, K[66], K[67])+W[0].y;
+	G= K[68] +C+Tr2(H)+Ch(H, K[69] ,K[70]);
+
+	W[0].z = block0.z;
+	B= K[71] +Tr1(C)+Ch(C,D,K[66])+W[0].z;
+	F= K[72] +B+Tr2(G)+Maj(G,H, K[73]);
+
+	W[0].w = block0.w;
+	A= K[74] +Tr1(B)+Ch(B,C,D)+W[0].w;
+	E= K[75] +A+Tr2(F)+Maj(F,G,H);
+
+	W[1].x = block1.x;
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[4]);
+	W[1].y = block1.y;
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[5]);
+	W[1].z = block1.z;
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[6]);
+	W[1].w = block1.w;
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[7]);
+	
+	W[2].x = block2.x;
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[8]);
+	W[2].y = block2.y;
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[9]);
+	W[2].z = block2.z;
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[10]);
+	W[2].w = block2.w;
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[11]);
+	
+	W[3].x = block3.x;
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[12]);
+	W[3].y = block3.y;
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[13]);
+	W[3].z = block3.z;
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[14]);
+	W[3].w = block3.w;
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[76]);
+
+	W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y);
+	RND(A,B,C,D,E,F,G,H, W[0].x+ K[15]);
+
+	W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z);
+	RND(H,A,B,C,D,E,F,G, W[0].y+ K[16]);
+
+	W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w);
+	RND(G,H,A,B,C,D,E,F, W[0].z+ K[17]);
+
+	W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x);
+	RND(F,G,H,A,B,C,D,E, W[0].w+ K[18]);
+
+	W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y);
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[19]);
+
+	W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z);
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[20]);
+
+	W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w);
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[21]);
+
+	W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x);
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[22]);
+
+	W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y);
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[23]);
+
+	W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z);
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[24]);
+
+	W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w);
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[25]);
+
+	W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x);
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[26]);
+
+	W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y);
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[27]);
+
+	W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z);
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[28]);
+
+	W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w);
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[29]);
+
+	W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x);
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[30]);
+
+	W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y);
+	RND(A,B,C,D,E,F,G,H, W[0].x+ K[31]);
+
+	W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z);
+	RND(H,A,B,C,D,E,F,G, W[0].y+ K[32]);
+
+	W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w);
+	RND(G,H,A,B,C,D,E,F, W[0].z+ K[33]);
+
+	W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x);
+	RND(F,G,H,A,B,C,D,E, W[0].w+ K[34]);
+
+	W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y);
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[35]);
+
+	W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z);
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[36]);
+
+	W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w);
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[37]);
+
+	W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x);
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[38]);
+
+	W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y);
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[39]);
+
+	W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z);
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[40]);
+
+	W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w);
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[41]);
+
+	W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x);
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[42]);
+
+	W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y);
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[43]);
+
+	W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z);
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[44]);
+
+	W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w);
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[45]);
+
+	W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x);
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[46]);
+
+	W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y);
+	RND(A,B,C,D,E,F,G,H, W[0].x+ K[47]);
+
+	W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z);
+	RND(H,A,B,C,D,E,F,G, W[0].y+ K[48]);
+
+	W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w);
+	RND(G,H,A,B,C,D,E,F, W[0].z+ K[49]);
+
+	W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x);
+	RND(F,G,H,A,B,C,D,E, W[0].w+ K[50]);
+
+	W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y);
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[51]);
+
+	W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z);
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[52]);
+
+	W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w);
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[53]);
+
+	W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x);
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[54]);
+
+	W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y);
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[55]);
+
+	W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z);
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[56]);
+
+	W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w);
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[57]);
+
+	W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x);
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[58]);
+
+	W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y);
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[59]);
+
+	W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z);
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[60]);
+
+	W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w);
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[61]);
+
+	W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x);
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[62]);
+	
+#undef A
+#undef B
+#undef C
+#undef D
+#undef E
+#undef F
+#undef G
+#undef H
+
+	*state0 += (uint4)(K[73], K[77], K[78], K[79]);
+	*state1 += (uint4)(K[66], K[67], K[80], K[81]);
+}
+
+__constant uint fixedW[64] =
+{
+	0x428a2f99,0xf1374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
+	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf794,
+	0xf59b89c2,0x73924787,0x23c6886e,0xa42ca65c,0x15ed3627,0x4d6edcbf,0xe28217fc,0xef02488f,
+	0xb707775c,0x0468c23f,0xe7e72b4c,0x49e1f1a2,0x4b99c816,0x926d1570,0xaa0fc072,0xadb36e2c,
+	0xad87a3ea,0xbcb1d3a3,0x7b993186,0x562b9420,0xbff3ca0c,0xda4b0c23,0x6cd8711a,0x8f337caa,
+	0xc91b1417,0xc359dce1,0xa83253a7,0x3b13c12d,0x9d3d725d,0xd9031a84,0xb1a03340,0x16f58012,
+	0xe64fb6a2,0xe84d923a,0xe93a5730,0x09837686,0x078ff753,0x29833341,0xd5de0b7e,0x6948ccf4,
+	0xe0a1adbe,0x7c728e11,0x511c78e4,0x315b45bd,0xfca71413,0xea28f96a,0x79703128,0x4e1ef848,
+};
+
+void SHA256_fixed(uint4*restrict state0,uint4*restrict state1)
+{
+	uint4 S0 = *state0;
+	uint4 S1 = *state1;
+
+#define A S0.x
+#define B S0.y
+#define C S0.z
+#define D S0.w
+#define E S1.x
+#define F S1.y
+#define G S1.z
+#define H S1.w
+
+	RND(A,B,C,D,E,F,G,H, fixedW[0]);
+	RND(H,A,B,C,D,E,F,G, fixedW[1]);
+	RND(G,H,A,B,C,D,E,F, fixedW[2]);
+	RND(F,G,H,A,B,C,D,E, fixedW[3]);
+	RND(E,F,G,H,A,B,C,D, fixedW[4]);
+	RND(D,E,F,G,H,A,B,C, fixedW[5]);
+	RND(C,D,E,F,G,H,A,B, fixedW[6]);
+	RND(B,C,D,E,F,G,H,A, fixedW[7]);
+	RND(A,B,C,D,E,F,G,H, fixedW[8]);
+	RND(H,A,B,C,D,E,F,G, fixedW[9]);
+	RND(G,H,A,B,C,D,E,F, fixedW[10]);
+	RND(F,G,H,A,B,C,D,E, fixedW[11]);
+	RND(E,F,G,H,A,B,C,D, fixedW[12]);
+	RND(D,E,F,G,H,A,B,C, fixedW[13]);
+	RND(C,D,E,F,G,H,A,B, fixedW[14]);
+	RND(B,C,D,E,F,G,H,A, fixedW[15]);
+	RND(A,B,C,D,E,F,G,H, fixedW[16]);
+	RND(H,A,B,C,D,E,F,G, fixedW[17]);
+	RND(G,H,A,B,C,D,E,F, fixedW[18]);
+	RND(F,G,H,A,B,C,D,E, fixedW[19]);
+	RND(E,F,G,H,A,B,C,D, fixedW[20]);
+	RND(D,E,F,G,H,A,B,C, fixedW[21]);
+	RND(C,D,E,F,G,H,A,B, fixedW[22]);
+	RND(B,C,D,E,F,G,H,A, fixedW[23]);
+	RND(A,B,C,D,E,F,G,H, fixedW[24]);
+	RND(H,A,B,C,D,E,F,G, fixedW[25]);
+	RND(G,H,A,B,C,D,E,F, fixedW[26]);
+	RND(F,G,H,A,B,C,D,E, fixedW[27]);
+	RND(E,F,G,H,A,B,C,D, fixedW[28]);
+	RND(D,E,F,G,H,A,B,C, fixedW[29]);
+	RND(C,D,E,F,G,H,A,B, fixedW[30]);
+	RND(B,C,D,E,F,G,H,A, fixedW[31]);
+	RND(A,B,C,D,E,F,G,H, fixedW[32]);
+	RND(H,A,B,C,D,E,F,G, fixedW[33]);
+	RND(G,H,A,B,C,D,E,F, fixedW[34]);
+	RND(F,G,H,A,B,C,D,E, fixedW[35]);
+	RND(E,F,G,H,A,B,C,D, fixedW[36]);
+	RND(D,E,F,G,H,A,B,C, fixedW[37]);
+	RND(C,D,E,F,G,H,A,B, fixedW[38]);
+	RND(B,C,D,E,F,G,H,A, fixedW[39]);
+	RND(A,B,C,D,E,F,G,H, fixedW[40]);
+	RND(H,A,B,C,D,E,F,G, fixedW[41]);
+	RND(G,H,A,B,C,D,E,F, fixedW[42]);
+	RND(F,G,H,A,B,C,D,E, fixedW[43]);
+	RND(E,F,G,H,A,B,C,D, fixedW[44]);
+	RND(D,E,F,G,H,A,B,C, fixedW[45]);
+	RND(C,D,E,F,G,H,A,B, fixedW[46]);
+	RND(B,C,D,E,F,G,H,A, fixedW[47]);
+	RND(A,B,C,D,E,F,G,H, fixedW[48]);
+	RND(H,A,B,C,D,E,F,G, fixedW[49]);
+	RND(G,H,A,B,C,D,E,F, fixedW[50]);
+	RND(F,G,H,A,B,C,D,E, fixedW[51]);
+	RND(E,F,G,H,A,B,C,D, fixedW[52]);
+	RND(D,E,F,G,H,A,B,C, fixedW[53]);
+	RND(C,D,E,F,G,H,A,B, fixedW[54]);
+	RND(B,C,D,E,F,G,H,A, fixedW[55]);
+	RND(A,B,C,D,E,F,G,H, fixedW[56]);
+	RND(H,A,B,C,D,E,F,G, fixedW[57]);
+	RND(G,H,A,B,C,D,E,F, fixedW[58]);
+	RND(F,G,H,A,B,C,D,E, fixedW[59]);
+	RND(E,F,G,H,A,B,C,D, fixedW[60]);
+	RND(D,E,F,G,H,A,B,C, fixedW[61]);
+	RND(C,D,E,F,G,H,A,B, fixedW[62]);
+	RND(B,C,D,E,F,G,H,A, fixedW[63]);
+	
+#undef A
+#undef B
+#undef C
+#undef D
+#undef E
+#undef F
+#undef G
+#undef H
+	*state0 += S0;
+	*state1 += S1;
+}
+
+void shittify(uint4 B[8])
+{
+	uint4 tmp[4];
+	tmp[0] = (uint4)(B[1].x,B[2].y,B[3].z,B[0].w);
+	tmp[1] = (uint4)(B[2].x,B[3].y,B[0].z,B[1].w);
+	tmp[2] = (uint4)(B[3].x,B[0].y,B[1].z,B[2].w);
+	tmp[3] = (uint4)(B[0].x,B[1].y,B[2].z,B[3].w);
+	
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+		B[i] = EndianSwap(tmp[i]);
+
+	tmp[0] = (uint4)(B[5].x,B[6].y,B[7].z,B[4].w);
+	tmp[1] = (uint4)(B[6].x,B[7].y,B[4].z,B[5].w);
+	tmp[2] = (uint4)(B[7].x,B[4].y,B[5].z,B[6].w);
+	tmp[3] = (uint4)(B[4].x,B[5].y,B[6].z,B[7].w);
+	
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+		B[i+4] = EndianSwap(tmp[i]);
+}
+
+void unshittify(uint4 B[8])
+{
+	uint4 tmp[4];
+	tmp[0] = (uint4)(B[3].x,B[2].y,B[1].z,B[0].w);
+	tmp[1] = (uint4)(B[0].x,B[3].y,B[2].z,B[1].w);
+	tmp[2] = (uint4)(B[1].x,B[0].y,B[3].z,B[2].w);
+	tmp[3] = (uint4)(B[2].x,B[1].y,B[0].z,B[3].w);
+	
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+		B[i] = EndianSwap(tmp[i]);
+
+	tmp[0] = (uint4)(B[7].x,B[6].y,B[5].z,B[4].w);
+	tmp[1] = (uint4)(B[4].x,B[7].y,B[6].z,B[5].w);
+	tmp[2] = (uint4)(B[5].x,B[4].y,B[7].z,B[6].w);
+	tmp[3] = (uint4)(B[6].x,B[5].y,B[4].z,B[7].w);
+	
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+		B[i+4] = EndianSwap(tmp[i]);
+}
+
+void salsa(uint4 B[8])
+{
+	uint4 w[4];
+
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+		w[i] = (B[i]^=B[i+4]);
+
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+	{
+		w[0] ^= rotl(w[3]     +w[2]     , 7U);
+		w[1] ^= rotl(w[0]     +w[3]     , 9U);
+		w[2] ^= rotl(w[1]     +w[0]     ,13U);
+		w[3] ^= rotl(w[2]     +w[1]     ,18U);
+		w[2] ^= rotl(w[3].wxyz+w[0].zwxy, 7U);
+		w[1] ^= rotl(w[2].wxyz+w[3].zwxy, 9U);
+		w[0] ^= rotl(w[1].wxyz+w[2].zwxy,13U);
+		w[3] ^= rotl(w[0].wxyz+w[1].zwxy,18U);
+	}
+
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+		w[i] = (B[i+4]^=(B[i]+=w[i]));
+
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+	{
+		w[0] ^= rotl(w[3]     +w[2]     , 7U);
+		w[1] ^= rotl(w[0]     +w[3]     , 9U);
+		w[2] ^= rotl(w[1]     +w[0]     ,13U);
+		w[3] ^= rotl(w[2]     +w[1]     ,18U);
+		w[2] ^= rotl(w[3].wxyz+w[0].zwxy, 7U);
+		w[1] ^= rotl(w[2].wxyz+w[3].zwxy, 9U);
+		w[0] ^= rotl(w[1].wxyz+w[2].zwxy,13U);
+		w[3] ^= rotl(w[0].wxyz+w[1].zwxy,18U);
+	}
+
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+		B[i+4] += w[i];
+}
+
+#define Coord(x,y,z) x+y*(x ## SIZE)+z*(y ## SIZE)*(x ## SIZE)
+#define CO Coord(z,x,y)
+
+void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
+{
+	shittify(X);
+	const uint zSIZE = 8;
+	const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0));
+	const uint xSIZE = CONCURRENT_THREADS;
+	uint x = get_global_id(0)%xSIZE;
+
+	for(uint y=0; y<1024/LOOKUP_GAP; ++y)
+	{
+#pragma unroll
+		for(uint z=0; z<zSIZE; ++z)
+			lookup[CO] = X[z];
+		for(uint i=0; i<LOOKUP_GAP; ++i) 
+			salsa(X);
+	}
+#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
+	{
+		uint y = (1024/LOOKUP_GAP);
+#pragma unroll
+		for(uint z=0; z<zSIZE; ++z)
+			lookup[CO] = X[z];
+		for(uint i=0; i<1024%LOOKUP_GAP; ++i)
+			salsa(X); 
+	}
+#endif
+	for (uint i=0; i<1024; ++i) 
+	{
+		uint4 V[8];
+		uint j = X[7].x & K[85];
+		uint y = (j/LOOKUP_GAP);
+#pragma unroll
+		for(uint z=0; z<zSIZE; ++z)
+			V[z] = lookup[CO];
+
+#if (LOOKUP_GAP == 1)
+#elif (LOOKUP_GAP == 2)
+		if (j&1)
+			salsa(V);
+#else
+		uint val = j%LOOKUP_GAP;
+		for (uint z=0; z<val; ++z) 
+			salsa(V);
+#endif
+
+#pragma unroll
+		for(uint z=0; z<zSIZE; ++z)
+			X[z] ^= V[z];
+		salsa(X);
+	}
+	unshittify(X);
+}
+
+#define FOUND (0xFF)
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
+
+__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
+__kernel void search(__global const uint4 * restrict input,
+volatile __global uint*restrict output, __global uint4*restrict padcache,
+const uint4 midstate0, const uint4 midstate16, const uint target)
+{
+	uint gid = get_global_id(0);
+	uint4 X[8];
+	uint4 tstate0, tstate1, ostate0, ostate1, tmp0, tmp1;
+	uint4 data = (uint4)(input[4].x,input[4].y,input[4].z,gid);
+	uint4 pad0 = midstate0, pad1 = midstate16;
+
+	SHA256(&pad0,&pad1, data, (uint4)(K[84],0,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[86]));
+	SHA256_fresh(&ostate0,&ostate1, pad0^ K[82], pad1^ K[82], K[82], K[82]);
+	SHA256_fresh(&tstate0,&tstate1, pad0^ K[83], pad1^ K[83], K[83], K[83]);
+
+	tmp0 = tstate0;
+	tmp1 = tstate1;
+	SHA256(&tstate0, &tstate1, input[0],input[1],input[2],input[3]);
+
+#pragma unroll
+	for (uint i=0; i<4; i++) 
+	{
+		pad0 = tstate0;
+		pad1 = tstate1;
+		X[i*2 ] = ostate0;
+		X[i*2+1] = ostate1;
+
+		SHA256(&pad0,&pad1, data, (uint4)(i+1,K[84],0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[87]));
+		SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88]));
+	}
+	scrypt_core(X,padcache);
+	SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3]);
+	SHA256(&tmp0,&tmp1, X[4], X[5], X[6], X[7]);
+	SHA256_fixed(&tmp0,&tmp1);
+	SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88]));
+
+	bool result = (EndianSwap(ostate1.w) <= target);
+	if (result)
+		SETFOUND(gid);
+}
--- a/miner.h
+++ b/miner.h
@ -377,6 +377,7 @@ enum cl_kernels {
 	KL_ALEXKARNEW,
 	KL_ALEXKAROLD,
 	KL_CKOLIVAS,
+	KL_PSW,
 	KL_ZUIKKIS,
 };

--- a/ocl.c
+++ b/ocl.c
@ -440,6 +440,11 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			strcpy(filename, CKOLIVAS_KERNNAME".cl");
 			strcpy(binaryfilename, CKOLIVAS_KERNNAME);
 			break;
+		case KL_PSW:
+			applog(LOG_WARNING, "Kernel psw is experimental.");
+			strcpy(filename, PSW_KERNNAME".cl");
+			strcpy(binaryfilename, PSW_KERNNAME);
+			break;
 		case KL_ZUIKKIS:
 			applog(LOG_WARNING, "Kernel zuikkis is experimental.");
 			strcpy(filename, ZUIKKIS_KERNNAME".cl");
--- a/sgminer.c
+++ b/sgminer.c
@ -62,6 +62,11 @@ char *curly = ":D";
 	#include <sys/wait.h>
 #endif

+#ifdef GIT_VERSION
+#undef VERSION
+#define VERSION GIT_VERSION
+#endif
+
 struct strategies strategies[] = {
 	{ "Failover" },
 	{ "Round Robin" },
@ -85,8 +90,8 @@ bool opt_compact;
 const int opt_cutofftemp = 95;
 int opt_log_interval = 5;
 int opt_queue = 1;
-int opt_scantime = 10;
-int opt_expiry = 30;
+int opt_scantime = 7;
+int opt_expiry = 28;
 static const bool opt_time = true;
 unsigned long long global_hashrate;
 unsigned long global_quota_gcd = 1;
@ -1125,7 +1130,7 @@ static struct opt_table opt_config_table[] = {
 		     set_int_0_to_9999, opt_show_intval, &opt_platform_id,
 		     "Select OpenCL platform ID to use for GPU mining"),
 #ifndef HAVE_ADL
-	OPT_WITH_ARG("--gpu-threads|-g",
+	OPT_WITH_ARG("--gpu-threads|-g", // FIXME: why is this in a conditional?
 		     set_int_1_to_10, opt_show_intval, &opt_g_threads,
 		     "Number of threads per GPU (1 - 10)"),
 #else
@ -1295,16 +1300,16 @@ static struct opt_table opt_config_table[] = {
 #ifdef HAVE_ADL
 	OPT_WITH_ARG("--temp-cutoff",
 		     set_temp_cutoff, opt_show_intval, &opt_cutofftemp,
-		     "Temperature where a device will be automatically disabled, one value or comma separated list"),
+		     "Temperature which a device will be automatically disabled at, one value or comma separated list"),
 	OPT_WITH_ARG("--temp-hysteresis",
 		     set_int_1_to_10, opt_show_intval, &opt_hysteresis,
 		     "Set how much the temperature can fluctuate outside limits when automanaging speeds"),
 	OPT_WITH_ARG("--temp-overheat",
 		     set_temp_overheat, opt_show_intval, &opt_overheattemp,
-		     "Overheat temperature when automatically managing fan and GPU speeds, one value or comma separated list"),
+		     "Temperature which a device will be throttled at while automanaging fan and/or GPU, one value or comma separated list"),
 	OPT_WITH_ARG("--temp-target",
 		     set_temp_target, opt_show_intval, &opt_targettemp,
-		     "Target temperature when automatically managing fan and GPU speeds, one value or comma separated list"),
+		     "Temperature which a device should stay at while automanaging fan and/or GPU, one value or comma separated list"),
 #endif
 	OPT_WITHOUT_ARG("--text-only|-T",
 			opt_set_invbool, &use_curses,
@ -1389,28 +1394,30 @@ static char *parse_config(json_t *config, bool fileconf, int parent_iteration)
 				int n, size = json_array_size(val);

 				for (n = 0; n < size && !err; n++) {
-					if (json_is_string(json_array_get(val, n)))
+					if (json_is_string(json_array_get(val, n))) {
 						err = opt->cb_arg(json_string_value(json_array_get(val, n)), opt->u.arg);
+					}
 					else if (json_is_object(json_array_get(val, n)))
 					{
 						err = parse_config(json_array_get(val, n), false, n);
 						json_array_index = parent_iteration;
 					}
 				}
-			} else if ((opt->type & OPT_NOARG) && json_is_true(val))
+			} else if ((opt->type & OPT_NOARG) && json_is_boolean(val)) {
 				err = opt->cb(opt->u.arg);
-			else
+			} else {
 				err = "Invalid value";
+			}

 			if (err) {
 				/* Allow invalid values to be in configuration
 				 * file, just skipping over them provided the
 				 * JSON is still valid after that. */
 				if (fileconf) {
-					applog(LOG_ERR, "Invalid config option %s: %s", p, err);
+					applog(LOG_WARNING, "Skipping config option %s: %s", p, err);
 					fileconf_load = -1;
 				} else {
-					snprintf(err_buf, sizeof(err_buf), "Parsing JSON option %s: %s",
+					snprintf(err_buf, sizeof(err_buf), "Error parsing JSON option %s: %s",
 						p, err);
 					return err_buf;
 				}
@ -2126,7 +2133,7 @@ static void curses_print_status(void)
 	struct pool *pool = current_pool();

 	wattron(statuswin, A_BOLD);
-	cg_mvwprintw(statuswin, 0, 0, PACKAGE " version " VERSION " - Started: %s", datestamp);
+	cg_mvwprintw(statuswin, 0, 0, PACKAGE " " VERSION " - Started: %s", datestamp);
 	wattroff(statuswin, A_BOLD);
 	mvwhline(statuswin, 1, 0, '-', 80);
 	cg_mvwprintw(statuswin, 2, 0, "%s", statusline);
@ -4195,6 +4202,9 @@ void write_config(FILE *fcfg)
 				case KL_CKOLIVAS:
 					fprintf(fcfg, CKOLIVAS_KERNNAME);
 					break;
+				case KL_PSW:
+					fprintf(fcfg, PSW_KERNNAME);
+					break;
 				case KL_ZUIKKIS:
 					fprintf(fcfg, ZUIKKIS_KERNNAME);
 					break;
@ -4216,6 +4226,11 @@ void write_config(FILE *fcfg)
 			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
 				(int)gpus[i].shaders);

+		fputs("\",\n\"gpu-threads\" : \"", fcfg);
+		for(i = 0; i < nDevs; i++)
+			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
+				(int)gpus[i].threads);
+
 #ifdef HAVE_ADL
 		fputs("\",\n\"gpu-engine\" : \"", fcfg);
 		for(i = 0; i < nDevs; i++)
@ -4540,10 +4555,6 @@ retry:
 		switch_pools(pool);
 		goto updated;
 	} else if (!strncasecmp(&input, "d", 1)) {
-		if (enabled_pools <= 1) {
-			wlogprint("Cannot disable last pool");
-			goto retry;
-		}
 		selected = curses_int("Select pool number");
 		if (selected < 0 || selected >= total_pools) {
 			wlogprint("Invalid selection\n");