Merge branch 'master' of git://github.com/ckolivas/cgminer.git

12 years ago · 6007b3dc2c
39 changed files with 4770 additions and 574 deletions
--- a/.gitignore
+++ b/.gitignore
@ -31,3 +31,13 @@ mingw32-config.cache
				@@ -31,3 +31,13 @@ mingw32-config.cache
 ext_deps
 config.h.in
 config.h
+
+ccan/libccan.a
+lib/arg-nonnull.h
+lib/c++defs.h
+lib/libgnu.a
+lib/signal.h
+lib/string.h
+lib/warn-on-use.h
+
+mkinstalldirs
--- a/30
+++ b/30
@ -39,7 +39,7 @@ To give an IP address/subnet access to a group you use the group letter
				@@ -39,7 +39,7 @@ To give an IP address/subnet access to a group you use the group letter
 in front of the IP address instead of W: e.g. P:192.168.0/32
 An IP address/subnet can only be a member of one group
 A sample API group would be:
- --api-groups P:switchpool:enablepool:addpool:disablepool:removepool:*
+ --api-groups P:switchpool:enablepool:addpool:disablepool:removepool.poolpriority:*
 This would create a group 'P' that can do all current pool commands and all
 non-priviliged commands - the '*' means all non-priviledged commands
 Without the '*' the group would only have access to the pool commands
@ -174,6 +174,11 @@ The list of requests - a (*) means it requires privileged access - and replies a
				@@ -174,6 +174,11 @@ The list of requests - a (*) means it requires privileged access - and replies a
                              Use '\\' to get a '\' and '\,' to include a comma
                              inside URL, USR or PASS

+ poolpriority|N,... (*)
+               none           There is no reply section just the STATUS section
+                              stating the results of changing pool priorities
+                              See usage below
+
 disablepool|N (*)
               none           There is no reply section just the STATUS section
                              stating the results of disabling pool N
@ -270,8 +275,18 @@ The list of requests - a (*) means it requires privileged access - and replies a
				@@ -270,8 +275,18 @@ The list of requests - a (*) means it requires privileged access - and replies a
 When you enable, disable or restart a GPU or PGA, you will also get Thread messages
 in the cgminer status window

-When you switch to a different pool to the current one, you will get a
-'Switching to URL' message in the cgminer status windows
+The 'poolpriority' command can be used to reset the priority order of multiple
+pools with a single command - 'switchpool' only sets a single pool to first priority
+Each pool should be listed by id number in order of preference (first = most
+preferred)
+Any pools not listed will be prioritised after the ones that are listed, in the
+priority order they were originally
+If the priority change affects the miner's preference for mining, it may switch
+immediately
+
+When you switch to a different pool to the current one (including by priority
+change), you will get a 'Switching to URL' message in the cgminer status
+windows

 Obviously, the JSON format is simply just the names as given before the '='
 with the values after the '='
@ -309,7 +324,14 @@ miner.php - an example web page to access the API
				@@ -309,7 +324,14 @@ miner.php - an example web page to access the API
 Feature Changelog for external applications using the API:


-API V1.14
+API V1.15
+
+Added API commands:
+ 'poolpriority'
+
+----------
+
+API V1.14 (cgminer v2.5.0)

 Modified API commands:
 'stats' - more icarus timing stats added
--- a/27
+++ b/27
@ -16,7 +16,25 @@ p2pool.
				@@ -16,7 +16,25 @@ p2pool.

 Icarus

-There is a hidden option in cgminer when Icarus support is compiled in:
+There are two hidden options in cgminer when Icarus support is compiled in:
+
+--icarus-options <arg> Set specific FPGA board configurations - one set of values for all or comma separated
+           baud:work_division:fpga_count
+
+           baud           The Serial/USB baud rate - 115200 or 57600 only - default 115200
+           work_division  The fraction of work divided up for each FPGA chip - 1, 2, 4 or 8
+                          e.g. 2 means each FPGA does half the nonce range - default 2
+           fpga_count     The actual number of FPGA working - this would normally be the same
+                          as work_division - range is from 1 up to 'work_division'
+                          It defaults to the value of work_division - or 2 if you don't specify
+                          work_division
+
+If you define fewer comma seperated values than Icarus devices, the last values will be used
+for all extra devices
+
+An example would be: --icarus-options 57600:2:1
+This would mean: use 57600 baud, the FPGA board divides the work in half however
+only 1 FPGA actually runs on the board (e.g. like an early CM1 Icarus copy bitstream)

 --icarus-timing <arg> Set how the Icarus timing is calculated - one setting/value for all or comma separated
           default[=N]   Use the default Icarus hash time (2.6316ns)
@ -24,6 +42,9 @@ There is a hidden option in cgminer when Icarus support is compiled in:
				@@ -24,6 +42,9 @@ There is a hidden option in cgminer when Icarus support is compiled in:
           long          Re-calculate the hash time continuously
           value[=N]     Specify the hash time in nanoseconds (e.g. 2.6316) and abort time (e.g. 2.6316=80)

+If you define fewer comma seperated values than Icarus devices, the last values will be used
+for all extra devices
+
 Icarus timing is required for devices that do not exactly match a default Icarus Rev3 in
 processing speed
 If you have an Icarus Rev3 you should not normally need to use --icarus-timing since the
@ -55,9 +76,9 @@ bitstream to the default one, use 'long' mode and give it at least a few hundred
				@@ -55,9 +76,9 @@ bitstream to the default one, use 'long' mode and give it at least a few hundred
 'short' mode and take note of the final hash time value (Hs) calculated
 You can also use the RPC API 'stats' command to see the current hash time (Hs) at any time

-The Icarus code currently only works with a dual FPGA device that supports the same commands as
+The Icarus code currently only works with an FPGA device that supports the same commands as
 Icarus Rev3 requires and also is less than ~840MH/s and greater than 2MH/s
-If a dual FPGA device does hash faster than ~840MH/s it should work correctly if you supply the
+If an FPGA device does hash faster than ~840MH/s it should work correctly if you supply the
 correct hash time nanoseconds value

 The timing code itself will affect the Icarus performance since it increases the delay after
--- a/Makefile.am
+++ b/Makefile.am
@ -10,7 +10,7 @@ endif
				@@ -10,7 +10,7 @@ endif
 EXTRA_DIST	= example.conf m4/gnulib-cache.m4 linux-usb-cgminer \
 		  ADL_SDK/readme.txt api-example.php miner.php	\
 		  API.class API.java api-example.c windows-build.txt \
-		  bitstreams/* API-README FPGA-README
+		  bitstreams/* API-README FPGA-README SCRYPT-README

 SUBDIRS		= lib compat ccan

@ -45,6 +45,10 @@ cgminer_SOURCES += ocl.c ocl.h findnonce.c findnonce.h
				@@ -45,6 +45,10 @@ cgminer_SOURCES += ocl.c ocl.h findnonce.c findnonce.h
 cgminer_SOURCES += adl.c adl.h adl_functions.h
 cgminer_SOURCES += *.cl

+if HAS_SCRYPT
+cgminer_SOURCES += scrypt.c scrypt.h
+endif
+
 if HAS_CPUMINE
 # original CPU related sources, unchanged
 cgminer_SOURCES	+= \
--- a/250
+++ b/250
@ -1,3 +1,253 @@
				@@ -1,3 +1,253 @@
+Version 2.6.4 - August 7, 2012
+
+- Convert the serial autodetect functions to use int instead of char to
+enumerate devices.
+- Make the serial open timeout for BFL generically 1 second on windows.
+- Deuglify windows autodetect code for BFL.
+- There is no point zeroing temperature in BFL if we fail to get a response, and
+we should register it as a HW error, suggesting throttling.
+- Update SCRYPT README with information about HW errors.
+- Use the scrypt CPU code to confirm results from OCL code, and mark failures as
+HW errors, making it easier to tune scrypt parameters.
+- We may as well leave one curl still available per pool instead of reaping the
+last one.
+- Need to recheck the pool->curls count on regaining the pool lock after the
+pthread conditional wait returns.
+- Display reaped debug message outside mutex lock to avoid recursive locking.
+- Add specific information when ADL detects error -10 saying the device is not
+enabled.
+- api.c update API start message and include port number
+- miner.php ignore arg when readonly
+- miner.php allow pool inputs: delete, addpool, poolpriority
+
+
+Version 2.6.3 - August 5, 2012
+
+- Count likely throttling episodes on bitforce devices as hardware errors.
+- Style cleanups.
+- Use FTD2XX.DLL on Windows to autodetect BitFORCE SHA256 devices.
+- Make pool_disabled the first in the enums == 0, fixing the pool enabled count
+which compares if value is not enabled before enabling it.
+- Correct writing of scrypt parameters to config file based on command line
+parameters only.
+- Use different variables for command line specified lookup gap and thread
+concurrency to differentiate user defined versus auto chosen values.
+- Queue a request on pool switch in case we have no work from the new pool yet.
+- Display failover only mode in pool menu and allow it to be toggled live.
+- Reinstate check for system queueing lag when the current pool's queue is maxed
+out, there is no staged work, and the work is needed now.
+- There is no need for pool active testing to be mandatory any more with queue
+request changes.
+- Fix harmless warnings.
+- Check the current staged and global queued as well before queueing requests.
+Discard stales before ageing work in the watchdog thread. Queue requests after
+discarding and ageing work in watchdog thread. Display accurate global queued in
+curses output. Reuse variable in age_work().
+- The queueing mechanism has become a complex state machine that is no longer
+predictable. Rewrite it from scratch watching only current queues in flight and
+staged work available on a pool by pool basis.
+- API remove unused warning in non-GPU compile
+- api.c in linux allow to open a closed socket in TIME_WAIT
+- Queue an extra request whenever staged work drops below mining thread count in
+hash_pop.
+- Update debian package configs to v2.6.2
+
+
+Version 2.6.2 - August 3, 2012
+
+- Scrypt mining does not support block testing yet so don't try to print it.
+- Clear the bitforce buffer whenever we get an unexpected result as it has
+likely throttled and we are getting cached responses out of order, and use the
+temperature monitoring as a kind of watchdog to flush unexpected results.
+- It is not critical getting the temperature response in bitforce so don't
+mandatorily wait on the mutex lock.
+- Check there is a cutoff temp actually set in bitforce before using it as a cut
+off value otherwise it may think it's set to zero degrees.
+- We dropped the temporary stopping of curl recruiting on submit_fail by
+mistake, reinstate it.
+- Make threads report in either side of the scanhash function in case we miss
+reporting in when restarting work.
+- Don't make mandatory work and its clones last forever.
+- Make test work for pool_active mandatory work items to smooth out staged work
+counts when in failover-only mode.
+- Add debugging output when work is found stale as to why.
+- Print the 3 parameters that are passed to applog for a debug line in
+bitforce.c
+- Clear bitforce buffer on init as previously.
+- Add some headroom to the number of curls available per pool to allow for
+longpoll and sendwork curls.
+- Revert "Revert "Change BFL driver thread initialising to a constant 100ms
+delay between devices instead of a random arrangement.""
+- Revert "Remove bitforce_thread_init"
+- Show the correct base units on GPU summary.
+- Differentiate between the send return value being a bool and the get return
+value when managing them in bitforce scanhash.
+- 23a8c60 Revert "bitforce: Skip out of sending work if work restart requested"
+
+
+Version 2.6.1 - July 30, 2012
+
+- Display scrypt as being built in as well.
+- Fix build warning about KL_SCRYPT when built without scrypt support.
+- Remove the low hash count determinant of hardware being sick. A low hash rate
+can be for poor network connectivity or scrypt mining, neither of which are due
+to a sick device.
+- api.c poolpriority changes
+
+
+Version 2.6.0 - July 29, 2012
+
+- Display kilohash when suitable, but store the global mhash value still truly
+in megahashes to not break the API output.
+- Don't try and print curses output for devices that won't fit on the screen.
+- Add scrypt documentation in the form of a separate readme.
+- Fix build error without scrypt enabled.
+- Limit total number of curls recruited per pool to the number of mining threads
+to prevent blasting the network when we only have one pool to talk to.
+- bitforce: Skip out of sending work if work restart requested
+- Keep a counter of enabled pools and use that instead of iterating over the
+pool list. Use that value to ensure we don't set the last remaining active pool
+to the rejecting state.
+- fpgautils: add support for 57.6 kBd serial
+- miner.php add a socket RCV timeout for if cgminer is hung and the API thread
+is still running
+- Limit thread concurrency for scrypt to 5xshaders if shaders is specified.
+- Simplify repeated use of gpus[gpu]. in ocl.c
+- Find the nearest power of 2 maximum alloc size for the scrypt buffer that can
+successfully be allocated and is large enough to accomodate the thread
+concurrency chosen, thus mapping it to an intensity.
+- Don't make opt_scrypt mandatory blocking with opencl code.
+- Update kernel versions reflecting changes in the API.
+- Make the thread concurrency and lookup gap options hidden on the command line
+and autotune parameters with a newly parsed --shaders option.
+- Fix target testing with scrypt kernel as it would have been missing shares
+below target.
+- Bugfix: Use a mutex to control non-curses output
+- Simplify code to a single vprintf path for curses-less printing
+- Move opt_quiet check to my_log_curses, so it works for curses-less builds
+- Use log_generic for vapplog to cut down on code duplication
+- Add space to log output now that there is more screen real estate available.
+- BFL force all code to timeout to avoid hanging
+- Bugfix: Copy argv[0] given to dirname()
+- Always create the largest possible padbuffer for scrypt kernels even if not
+needed for thread_concurrency, giving us some headroom for intensity levels.
+- Use the detected maximum allocable memory on a GPU to determine the optimal
+scrypt settings when lookup_gap and thread_concurrency parameters are not given.
+- Check the maximum allocable memory size per opencl device.
+- Add debugging output if buffer allocation fails for scrypt and round up
+bufsize to a multiple of 256.
+- Nonce testing for btc got screwed up, leading to no accepted shares. Fix it.
+- Display size of scrypt buffer used in debug.
+- Allow intensities up to 20 if scrypt is compiled in.
+- Add name to scrypt kernel copyright.
+- Allow lookup gap and thread concurrency to be passed per device and store
+details in kernel binary filename.
+- Ignore negative intensities for scrypt.
+- Change the scale of intensity for scrypt kernel and fix a build warning.
+- Correct target value passed to scrypt kernel.
+- Use 256 output slots for kernels to allow 1 for each worksize.
+- Test the target in the actual scrypt kernel itself saving further
+calculations.
+- Reinstate GPU only opencl device detection.
+- Decrease lookup gap to 1. Does not seem to help in any way being 2.
+- Fix build.
+- Make pad0 and pad1 local variable in scrypt kernel.
+- Constify input variable in scrypt kernel.
+- Send correct values to scrypt kernel to get it finally working.
+- Create command queue before compiling program in opencl.
+- Detach pthread from within the api thread in case it is terminated due to not
+being instantiated before pthread_cancel is called from main, leading to a
+segfault.
+- Debug output per thread hashrate is out by a factor of 1000.
+- Initialise mdplatform.
+- Find the gpu platform with the most devices and use that if no platform option
+is passed.
+- Allow more platforms to be probed if first does not return GPUs.
+- Fix external scrypt algo missing.
+- Limit scrypt to 1 vector.
+- Handle KL_SCRYPT in config write.
+- Get rid of stuff.
+- Don't enqueuewrite buffer at all for pad8 and pass work details around for
+scrypt in dev_blk.
+- Set the correct data for cldata and prepare for pad8 fixes.
+- Bugfix: Fix build without curses but with OpenCL
+- Find the gpu platform with the most devices and use that if no platform option
+is passed.
+- Allow more platforms to be probed if first does not return GPUs.
+- Get rid of spaces in arrays in scrypt kernel.
+- Start with smaller amount of hashes in cpu mining to enable scrypt to return
+today sometime.
+- Show Khash hashrates when scrypt is in use.
+- Free the scratchbuf memory allocated in scrypt and don't check if CPUs are
+sick since they can't be. Prepare for khash hash rates in display.
+- Add cpumining capability for scrypt.
+- Set scrypt settings and buffer size in ocl.c code to be future modifiable.
+- Cope with when we cannot set intensity low enough to meet dynamic interval by
+inducing a forced sleep.
+- Make dynamic and scrypt opencl calls blocking.
+- Calculate midstate in separate function and remove likely/unlikely macros
+since they're dependent on pools, not code design.
+- bitforce: Use "full work" vs "nonce range" for kernel name
+- Display in debug mode when we're making the midstate locally.
+- Fix nonce submission code for scrypt.
+- Make sure goffset is set for scrypt and drop padbuffer8 to something
+manageable for now.
+- Set up buffer8 for scrypt.
+- Build fix for opt scrypt.
+- Don't check postcalc nonce with sha256 in scrypt.
+- Don't test nonce with sha and various fixes for scrypt.
+- Make scrypt buffers and midstate compatible with cgminer.
+- Use cgminer specific output array entries in scrypt kernel.
+- Provide initial support for the scrypt kernel to compile with and mine scrypt
+with the --scrypt option.
+- Enable completely compiling scrypt out.
+- Begin import of scrypt opencl kernel from reaper.
+- bitforce_get_result returns -1 on error now.
+- Check return value of read in BFgets
+- Bugfix: Make our Windows nanosleep/sleep replacements standards-compliant
+(which fixes nmsleep) and include compat.h for bitforce (for sleep)
+- rpc: Use a single switch statement for both stringifications of cgpu->status
+- Fix whitespace mangling.
+- miner.php fix rig # when miners fail
+- Only try to shut down work cleanly if we've successfully connected and started
+mining.
+- Use switch statement for cgpu->status and fix spelling.
+- Abbrv. correction
+- Bugfix: Don't declare devices SICK if they're just busy initialising
+- Bugfix: Calculate nsec in nmsleep correctly
+- Bugfix: Adapt OpenCL scanhash errors to driver API change (errors are now -1,
+not 0)
+- Remove superfluous ave_wait
+- Put kname change for broken nonce-range back in
+- Add average wait time to api stats
+- Change BFL driver thread initialising to a constant 100ms delay between
+devices instead of a random arrangement.
+- Spelling typo.
+- Time opencl work from start of queueing a kernel till it's flushed when
+calculating dynamic intensity.
+- Modify te scanhash API to use an int64_t and return -1 on error, allowing zero
+to be a valid return value.
+- Check for work restart after the hashmeter is invoked for we lose the hashes
+otherwise contributed in the count.
+- Remove disabled: label from mining thread function, using a separate
+mt_disable function.
+- Style changes.
+- Missed one nonce-range disabling.
+- Add average return time to api stats
+- miner.php allow rig names in number buttons
+- Remove bitforce_thread_init The delay thing does nothing useful... when long
+poll comes around, all threads restart at the same time anyway.
+- Change timeouts to time-vals for accuracy.
+- fix API support for big endian machines
+- Cope with signals interrupting the nanosleep of nmsleep.
+- Use standard cfsetispeed/cfsetospeed to set baud rate on *nix
+- miner.php split() flagged deprecated in PHP 5.3.0
+- More BFL tweaks. Add delay between closing and reopening port. Remove buffer
+clear in re-init Add kernel type (mini-rig or single)
+- Make long timeout 10seconds on bitforce for when usleep or nanosleep just
+can't be accurate...
+
+
 Version 2.5.0 - July 6, 2012

 - Fix --benchmark not working since the dynamic addition of pools and pool
--- a/25
+++ b/25
@ -33,22 +33,32 @@ READ EXECUTIVE SUMMARY BELOW FOR FIRST TIME USERS!
				@@ -33,22 +33,32 @@ READ EXECUTIVE SUMMARY BELOW FOR FIRST TIME USERS!
 Dependencies:
 	curl dev library 	http://curl.haxx.se/libcurl/
 	(libcurl4-openssl-dev)
+
 	curses dev library
 	(libncurses5-dev or libpdcurses on WIN32)
+
 	pkg-config		http://www.freedesktop.org/wiki/Software/pkg-config
+	libtool			http://www.gnu.org/software/libtool/
+
 	jansson			http://www.digip.org/jansson/
 	(jansson is included in-tree and not necessary)
+
 	yasm 1.0.1+ http://yasm.tortall.net/
 	(yasm is optional, gives assembly routines for CPU mining)
+
 	AMD APP SDK		http://developer.amd.com/sdks/AMDAPPSDK
 	(This sdk is mandatory for GPU mining)
+
 	AMD ADL SDK		http://developer.amd.com/sdks/ADLSDK
 	(This sdk is mandatory for ATI GPU monitoring & clocking)
+
 	libudev headers
 	(This is only required for FPGA auto-detection and is linux only)
+
 	libusb headers
 	(This is only required for ZTEX support)

+
 CGMiner specific configuration options:
 	--enable-cpumining      Build with cpu mining support(default disabled)
 	--disable-opencl        Override detection and disable building with opencl
@ -57,6 +67,9 @@ CGMiner specific configuration options:
				@@ -57,6 +67,9 @@ CGMiner specific configuration options:
 	--enable-icarus         Compile support for Icarus Board(default disabled)
 	--enable-modminer       Compile support for ModMiner FPGAs(default disabled)
 	--enable-ztex           Compile support for Ztex Board(default disabled)
+	--enable-scrypt         Compile support for scrypt litecoin mining (default disabled)
+	--without-curses        Compile support for curses TUI (default enabled)
+	--without-libudev       Autodetect FPGAs using libudev (default enabled)

 Basic *nix build instructions:
 	To build with GPU mining support:
@ -153,6 +166,7 @@ Options for both config file and command line:
				@@ -153,6 +166,7 @@ Options for both config file and command line:
 --scan-time|-s <arg> Upper bound on time spent scanning current work, in seconds (default: 60)
 --sched-start <arg> Set a time of day in HH:MM to start mining (a once off without a stop time)
 --sched-stop <arg>  Set a time of day in HH:MM to stop mining (will quit without a start time)
+--scrypt            Use the scrypt algorithm for mining (litecoin only)
 --sharelog <arg>    Append share log to file
 --shares <arg>      Quit after mining N shares (default: unlimited)
 --socks-proxy <arg> Set socks4 proxy (host:port)
@ -197,6 +211,14 @@ GPU only options:
				@@ -197,6 +211,14 @@ GPU only options:
 --worksize|-w <arg> Override detected optimal worksize - one value or comma separated list


+SCRYPT only options:
+
+--lookup-gap <arg>  Set GPU lookup gap for scrypt mining, comma separated
+--thread-concurrency <arg> Set GPU thread concurrency for scrypt mining, comma separated
+
+See SCRYPT-README for more information regarding litecoin mining.
+
+
 FPGA mining boards(BitForce, Icarus, ModMiner, Ztex) only options:

 --scan-serial|-S <arg> Serial port to probe for FPGA mining device
@ -722,7 +744,8 @@ A: Cgminer is being packaged with other trojan scripts and some antivirus
				@@ -722,7 +744,8 @@ A: Cgminer is being packaged with other trojan scripts and some antivirus
 software is falsely accusing cgminer.exe as being the actual virus, rather
 than whatever it is being packaged with. If you installed cgminer yourself,
 then you do not have a virus on your computer. Complain to your antivirus
-software company.
+software company. They seem to be flagging even source code now from cgminer
+as viruses, even though text source files can't do anything by themself.

 Q: Can you modify the display to include more of one thing in the output and
 less of another, or can you change the quiet mode or can you add yet another
--- a/143
+++ b/143
@ -0,0 +1,143 @@
				@@ -0,0 +1,143 @@
+If you wish to donate to the author, Con Kolivas, in LTC, please submit your
+donations to:
+
+Lc8TWMiKM7gRUrG8VB8pPNP1Yvt1SGZnoH
+
+Otherwise, please donate in BTC as per the main README.
+
+---
+
+
+Scrypt mining, AKA litecoin mining, for GPU is completely different to sha256
+used for bitcoin mining. The algorithm was originally developed in a manner
+that it was anticipated would make it suitable for mining on CPU but NOT GPU.
+Thanks to some innovative work by Artforz and mtrlt, this was proven to be
+wrong. However, it has very different requirements to bitcoin mining and is a
+lot more complicated to get working well. Note that it is a ram dependent
+workload, and requires you to have enough system ram as well as fast enough
+GPU ram. If you have less system ram than your GPU has, it may not be possible
+to mine at any reasonable rate.
+
+There are 5 main parameters to tuning scrypt, 2 of which you MUST set, and
+the others are optional for further fine tuning. When you start scrypt mining
+with the --scrypt option, cgminer will fail IN RANDOM WAYS. They are all due
+to parameters being outside what the GPU can cope with. Not giving cgminer a
+hint as to your GPU type, it will hardly ever perform well.
+
+NOTE that if it does not fail at startup, the presence of hardware errors (HW)
+are a sure sign that you have set the parameters too high.
+
+
+Step 1 on linux:
+export GPU_MAX_ALLOC_PERCENT=100
+If you do not do this, you may find it impossible to scrypt mine. You may find
+a value of 40 is enough and increasing this further has little effect.
+
+export GPU_USE_SYNC_OBJECTS=1
+may help CPU usage a little as well.
+
+--shaders XXX
+
+is a new option where you tell cgminer how many shaders your GPU has. This
+helps cgminer try to choose some meaningful baseline parameters. Use this table
+below to determine how many shaders your GPU has, and note that there are some
+variants of these cards, and nvidia shaders are much much lower and virtually
+pointless trying to mine on.
+
+GPU  Shaders
+7750 512
+7770 640
+7850 1024
+7870 1280
+7950 1792
+7970 2048
+
+6850 960
+6870 1120
+6950 1408
+6970 1536
+6990 (6970x2)
+
+6570 480
+6670 480
+6790 800
+
+6450 160
+
+5670 400
+5750 720
+5770 800
+5830 1120
+5850 1440
+5870 1600
+5970 (5870x2)
+
+These are only used as a rough guide for cgminer, and it is rare that this is
+all you will need to set.
+
+
+--intensity XX
+
+Just like in bitcoin mining, scrypt mining takes an intensity, however the
+scale goes from 0 to 20 to mimic the "Aggression" used in mtrlt's reaper. The
+reason this is crucial is that too high an intensity can actually be
+disastrous with scrypt because it CAN run out of ram. Intensities over 13
+start writing over the same ram and it is highly dependent on the GPU, but they
+can start actually DECREASING your hashrate, or even worse, start producing
+garbage with HW errors skyrocketing. The low level detail is that intensity is
+only guaranteed up to the power of 2 that most closely matches the thread
+concurrency. i.e. a thread concurrency of 6144 has 8192 as the nearest power
+of two above it, thus as 2^13=8192, that is an intensity of 13.
+
+
+Optional parameters to tune:
+-g, --thread-concurrency, --lookup-gap
+
+-g:
+Once you have found the optimal shaders and intensity, you can start increasing
+the -g value till cgminer fails to start. Rarely will you be able to go over
+about -g 4 and each increase in -g only increases hashrate slightly.
+
+--thread-concurrency:
+This tunes the optimal size of work that scrypt can do. It is internally tuned
+by cgminer to be the highest reasonable multiple of shaders that it can
+allocate on your GPU. Ideally it should be a multiple of your shader count.
+vliw5 architecture (R5XXX) would be best at 5x shaders, while VLIW4 (R6xxx and
+R7xxx) are best at 4x. Setting thread concurrency overrides anything you put
+into --shaders.
+
+--lookup-gap
+This tunes a compromise between ram usage and performance. Performance peaks
+at a gap of 2, but increasing the gap can save you some GPU ram, but almost
+always at the cost of significant loss of hashrate. Setting lookup gap
+overrides the default of 2, but cgminer will use the --shaders value to choose
+a thread-concurrency if you haven't chosen one.
+
+
+Overclocking for scrypt mining:
+First of all, do not underclock your memory initially. Scrypt mining requires
+memory speed and on most, but not all, GPUs, lowering memory speed lowers
+mining performance.
+
+Second, absolute engine clock speeds do NOT correlate with hashrate. The ratio
+of engine clock speed to memory matters, so if you set your memory to the
+default value, and then start overclocking as you are running it, you should
+find a sweet spot where the hashrate peaks and then it might actually drop if
+you increase the engine clock speed further. Unless you wish to run with a
+dynamic intensity, do not go over 13 without testing it while it's running to
+see that it increases hashrate AND utility WITHOUT increasing your HW errors.
+
+
+Suggested values for 7970 for example:
+export GPU_MAX_ALLOC_PERCENT=100
+--thread-concurrency 8192 -g 4 --gpu-engine 1135 --gpu-memclock 1375
+
+
+---
+
+If you wish to donate to the author, Con Kolivas, in LTC, please submit your
+donations to:
+
+Lc8TWMiKM7gRUrG8VB8pPNP1Yvt1SGZnoH
+
+Otherwise, please donate in BTC as per the main README.
--- a/adl.c
+++ b/adl.c
@ -33,6 +33,10 @@
				@@ -33,6 +33,10 @@
 #endif
 #include "adl_functions.h"

+#ifndef HAVE_CURSES
+#define wlogprint(...)  applog(LOG_WARNING, __VA_ARGS__)
+#endif
+
 bool adl_active;
 bool opt_reorder = false;

@ -237,6 +241,8 @@ void init_adl(int nDevs)
				@@ -237,6 +241,8 @@ void init_adl(int nDevs)
 		result = ADL_Adapter_ID_Get(iAdapterIndex, &lpAdapterID);
 		if (result != ADL_OK) {
 			applog(LOG_INFO, "Failed to ADL_Adapter_ID_Get. Error %d", result);
+			if (result == -10)
+				applog(LOG_INFO, "This error says the device is not enabled");
 			continue;
 		}

@ -764,6 +770,7 @@ bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, float *vdd
				@@ -764,6 +770,7 @@ bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, float *vdd
 	return true;
 }

+#ifdef HAVE_CURSES
 static void get_enginerange(int gpu, int *imin, int *imax)
 {
 	struct gpu_adl *ga;
@ -776,6 +783,7 @@ static void get_enginerange(int gpu, int *imin, int *imax)
				@@ -776,6 +783,7 @@ static void get_enginerange(int gpu, int *imin, int *imax)
 	*imin = ga->lpOdParameters.sEngineClock.iMin / 100;
 	*imax = ga->lpOdParameters.sEngineClock.iMax / 100;
 }
+#endif

 int set_engineclock(int gpu, int iEngineClock)
 {
@ -824,6 +832,7 @@ out:
				@@ -824,6 +832,7 @@ out:
 	return ret;
 }

+#ifdef HAVE_CURSES
 static void get_memoryrange(int gpu, int *imin, int *imax)
 {
 	struct gpu_adl *ga;
@ -836,6 +845,7 @@ static void get_memoryrange(int gpu, int *imin, int *imax)
				@@ -836,6 +845,7 @@ static void get_memoryrange(int gpu, int *imin, int *imax)
 	*imin = ga->lpOdParameters.sMemoryClock.iMin / 100;
 	*imax = ga->lpOdParameters.sMemoryClock.iMax / 100;
 }
+#endif

 int set_memoryclock(int gpu, int iMemoryClock)
 {
@ -876,6 +886,7 @@ out:
				@@ -876,6 +886,7 @@ out:
 	return ret;
 }

+#ifdef HAVE_CURSES
 static void get_vddcrange(int gpu, float *imin, float *imax)
 {
 	struct gpu_adl *ga;
@ -889,7 +900,6 @@ static void get_vddcrange(int gpu, float *imin, float *imax)
				@@ -889,7 +900,6 @@ static void get_vddcrange(int gpu, float *imin, float *imax)
 	*imax = (float)ga->lpOdParameters.sVddc.iMax / 1000;
 }

-#ifdef HAVE_CURSES
 static float curses_float(const char *query)
 {
 	float ret;
@ -997,6 +1007,7 @@ int set_fanspeed(int gpu, int iFanSpeed)
				@@ -997,6 +1007,7 @@ int set_fanspeed(int gpu, int iFanSpeed)
 	return ret;
 }

+#ifdef HAVE_CURSES
 static int set_powertune(int gpu, int iPercentage)
 {
 	struct gpu_adl *ga;
@ -1018,6 +1029,7 @@ static int set_powertune(int gpu, int iPercentage)
				@@ -1018,6 +1029,7 @@ static int set_powertune(int gpu, int iPercentage)
 	unlock_adl();
 	return ret;
 }
+#endif

 /* Returns whether the fanspeed is optimal already or not. The fan_window bool
 * tells us whether the current fanspeed is in the target range for fanspeeds.
--- a/api.c
+++ b/api.c
@ -166,7 +166,7 @@ static const char SEPARATOR = '|';
				@@ -166,7 +166,7 @@ static const char SEPARATOR = '|';
 #define SEPSTR "|"
 static const char GPUSEP = ',';

-static const char *APIVERSION = "1.14";
+static const char *APIVERSION = "1.15";
 static const char *DEAD = "Dead";
 static const char *SICK = "Sick";
 static const char *NOSTART = "NoStart";
@ -176,7 +176,9 @@ static const char *ALIVE = "Alive";
				@@ -176,7 +176,9 @@ static const char *ALIVE = "Alive";
 static const char *REJECTING = "Rejecting";
 static const char *UNKNOWN = "Unknown";
 #define _DYNAMIC "D"
+#ifdef HAVE_OPENCL
 static const char *DYNAMIC = _DYNAMIC;
+#endif

 static const char *YES = "Y";
 static const char *NO = "N";
@ -372,6 +374,8 @@ static const char *JSON_PARAMETER = "parameter";
				@@ -372,6 +374,8 @@ static const char *JSON_PARAMETER = "parameter";
 #define MSG_MINESTATS 70
 #define MSG_MISCHK 71
 #define MSG_CHECK 72
+#define MSG_POOLPRIO 73
+#define MSG_DUPPID 74

 enum code_severity {
 	SEVERITY_ERR,
@ -385,6 +389,7 @@ enum code_parameters {
				@@ -385,6 +389,7 @@ enum code_parameters {
 	PARAM_GPU,
 	PARAM_PGA,
 	PARAM_CPU,
+	PARAM_PID,
 	PARAM_GPUMAX,
 	PARAM_PGAMAX,
 	PARAM_CPUMAX,
@ -501,6 +506,8 @@ struct CODES {
				@@ -501,6 +506,8 @@ struct CODES {
 { SEVERITY_ERR,   MSG_ACCDENY,	PARAM_STR,	"Access denied to '%s' command" },
 { SEVERITY_SUCC,  MSG_ACCOK,	PARAM_NONE,	"Privileged access OK" },
 { SEVERITY_SUCC,  MSG_ENAPOOL,	PARAM_POOL,	"Enabling pool %d:'%s'" },
+ { SEVERITY_SUCC,  MSG_POOLPRIO,PARAM_NONE,	"Changed pool priorities" },
+ { SEVERITY_ERR,   MSG_DUPPID,	PARAM_PID,	"Duplicate pool specified %d" },
 { SEVERITY_SUCC,  MSG_DISPOOL,	PARAM_POOL,	"Disabling pool %d:'%s'" },
 { SEVERITY_INFO,  MSG_ALRENAP,	PARAM_POOL,	"Pool %d:'%s' already enabled" },
 { SEVERITY_INFO,  MSG_ALRDISP,	PARAM_POOL,	"Pool %d:'%s' already disabled" },
@ -1062,6 +1069,7 @@ static char *message(int messageid, int paramid, char *param2, bool isjson)
				@@ -1062,6 +1069,7 @@ static char *message(int messageid, int paramid, char *param2, bool isjson)
 				case PARAM_GPU:
 				case PARAM_PGA:
 				case PARAM_CPU:
+				case PARAM_PID:
 					sprintf(buf, codes[i].description, paramid);
 					break;
 				case PARAM_POOL:
@ -2128,6 +2136,74 @@ static void enablepool(__maybe_unused SOCKETTYPE c, char *param, bool isjson, __
				@@ -2128,6 +2136,74 @@ static void enablepool(__maybe_unused SOCKETTYPE c, char *param, bool isjson, __
 	strcpy(io_buffer, message(MSG_ENAPOOL, id, NULL, isjson));
 }

+static void poolpriority(__maybe_unused SOCKETTYPE c, char *param, bool isjson, __maybe_unused char group)
+{
+	char *ptr, *next;
+	int i, pr, prio = 0;
+
+	// TODO: all cgminer code needs a mutex added everywhere for change
+	//	access to total_pools and also parts of the pools[] array,
+	//	just copying total_pools here wont solve that
+
+	if (total_pools == 0) {
+		strcpy(io_buffer, message(MSG_NOPOOL, 0, NULL, isjson));
+		return;
+	}
+
+	if (param == NULL || *param == '\0') {
+		strcpy(io_buffer, message(MSG_MISPID, 0, NULL, isjson));
+		return;
+	}
+
+	bool pools_changed[total_pools];
+	int new_prio[total_pools];
+	for (i = 0; i < total_pools; ++i)
+		pools_changed[i] = false;
+
+	next = param;
+	while (next && *next) {
+		ptr = next;
+		next = strchr(ptr, ',');
+		if (next)
+			*(next++) = '\0';
+
+		i = atoi(ptr);
+		if (i < 0 || i >= total_pools) {
+			strcpy(io_buffer, message(MSG_INVPID, i, NULL, isjson));
+			return;
+		}
+
+		if (pools_changed[i]) {
+			strcpy(io_buffer, message(MSG_DUPPID, i, NULL, isjson));
+			return;
+		}
+
+		pools_changed[i] = true;
+		new_prio[i] = prio++;
+	}
+
+	// Only change them if no errors
+	for (i = 0; i < total_pools; i++) {
+		if (pools_changed[i])
+			pools[i]->prio = new_prio[i];
+	}
+
+	// In priority order, cycle through the unchanged pools and append them
+	for (pr = 0; pr < total_pools; pr++)
+		for (i = 0; i < total_pools; i++) {
+			if (!pools_changed[i] && pools[i]->prio == pr) {
+				pools[i]->prio = prio++;
+				pools_changed[i] = true;
+				break;
+			}
+		}
+
+	if (current_pool()->prio)
+		switch_pools(NULL);
+
+	strcpy(io_buffer, message(MSG_POOLPRIO, 0, NULL, isjson));
+}
+
 static void disablepool(__maybe_unused SOCKETTYPE c, char *param, bool isjson, __maybe_unused char group)
 {
 	struct pool *pool;
@ -2155,7 +2231,7 @@ static void disablepool(__maybe_unused SOCKETTYPE c, char *param, bool isjson, _
				@@ -2155,7 +2231,7 @@ static void disablepool(__maybe_unused SOCKETTYPE c, char *param, bool isjson, _
 		return;
 	}

-	if (active_pools() <= 1) {
+	if (enabled_pools <= 1) {
 		strcpy(io_buffer, message(MSG_DISLASTP, id, NULL, isjson));
 		return;
 	}
@ -2659,6 +2735,7 @@ struct CMDS {
				@@ -2659,6 +2735,7 @@ struct CMDS {
 	{ "cpucount",		cpucount,	false },
 	{ "switchpool",		switchpool,	true },
 	{ "addpool",		addpool,	true },
+	{ "poolpriority",	poolpriority,	true },
 	{ "enablepool",		enablepool,	true },
 	{ "disablepool",	disablepool,	true },
 	{ "removepool",		removepool,	true },
@ -3122,6 +3199,20 @@ void api(int api_thr_id)
				@@ -3122,6 +3199,20 @@ void api(int api_thr_id)

 	serv.sin_port = htons(port);

+#ifndef WIN32
+	// On linux with SO_REUSEADDR, bind will get the port if the previous
+	// socket is closed (even if it is still in TIME_WAIT) but fail if
+	// another program has it open - which is what we want
+	int optval = 1;
+	// If it doesn't work, we don't really care - just show a debug message
+	if (SOCKETFAIL(setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (void *)(&optval), sizeof(optval))))
+		applog(LOG_DEBUG, "API setsockopt SO_REUSEADDR failed (ignored): %s", SOCKERRMSG);
+#else
+	// On windows a 2nd program can bind to a port>1024 already in use unless
+	// SO_EXCLUSIVEADDRUSE is used - however then the bind to a closed port
+	// in TIME_WAIT will fail until the timeout - so we leave the options alone
+#endif
+
 	// try for more than 1 minute ... in case the old one hasn't completely gone yet
 	bound = 0;
 	bindstart = time(NULL);
@ -3150,12 +3241,12 @@ void api(int api_thr_id)
				@@ -3150,12 +3241,12 @@ void api(int api_thr_id)
 	}

 	if (opt_api_allow)
-		applog(LOG_WARNING, "API running in IP access mode");
+		applog(LOG_WARNING, "API running in IP access mode on port %d", port);
 	else {
 		if (opt_api_network)
-			applog(LOG_WARNING, "API running in UNRESTRICTED access mode");
+			applog(LOG_WARNING, "API running in UNRESTRICTED read access mode on port %d", port);
 		else
-			applog(LOG_WARNING, "API running in local access mode");
+			applog(LOG_WARNING, "API running in local read access mode on port %d", port);
 	}

 	io_buffer = malloc(MYBUFSIZ+1);
--- a/autogen.sh
+++ b/autogen.sh
@ -1,12 +1,17 @@
				@@ -1,12 +1,17 @@
 #!/bin/sh
+cwd="$PWD"
+bs_dir="$(dirname $(readlink -f $0))"
+rm -rf "${bs_dir}"/autom4te.cache
+rm -f "${bs_dir}"/aclocal.m4 "${bs_dir}"/ltmain.sh

-# You need autoconf 2.5x, preferably 2.57 or later
-# You need automake 1.7 or later. 1.6 might work.
-
-set -e
-
-aclocal -I m4
-autoheader
-automake --add-missing --copy
-autoconf
-
+echo 'Running autoreconf -if...'
+autoreconf -if || exit 1
+if test -z "$NOCONFIGURE" ; then
+	echo 'Configuring...'
+	cd "${bs_dir}" &> /dev/null
+	test "$?" = "0" || e=1
+	test "$cwd" != "$bs_dir" && cd "$bs_dir" &> /dev/null
+	./configure $@
+	test "$e" = "1" && exit 1
+	cd "$cwd"
+fi
--- a/cgminer.c
+++ b/cgminer.c
--- a/configure.ac
+++ b/configure.ac
@ -1,8 +1,8 @@
				@@ -1,8 +1,8 @@
 ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
 ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
 m4_define([v_maj], [2])
-m4_define([v_min], [5])
-m4_define([v_mic], [0])
+m4_define([v_min], [6])
+m4_define([v_mic], [4])
 ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
 m4_define([v_ver], [v_maj.v_min.v_mic])
 m4_define([lt_rev], m4_eval(v_maj + v_min))
@ -173,6 +173,8 @@ AC_ARG_ENABLE([adl],
				@@ -173,6 +173,8 @@ AC_ARG_ENABLE([adl],
 	[adl=$enableval]
 	)

+scrypt="no"
+
 if test "$found_opencl" = 1; then
 	if test "x$adl" != xno; then
 		AC_CHECK_FILE([ADL_SDK/adl_sdk.h], have_adl=true, have_adl=false,)
@ -183,10 +185,20 @@ if test "$found_opencl" = 1; then
				@@ -183,10 +185,20 @@ if test "$found_opencl" = 1; then
 			DLOPEN_FLAGS=""
 		fi
 	fi
+
+	AC_ARG_ENABLE([scrypt],
+		[AC_HELP_STRING([--enable-scrypt],[Compile support for scrypt litecoin mining (default disabled)])],
+		[scrypt=$enableval]
+		)
+	if test "x$scrypt" = xyes; then
+		AC_DEFINE([USE_SCRYPT], [1], [Defined to 1 if scrypt support is wanted])
+	fi
 else
 	DLOPEN_FLAGS=""
 fi

+AM_CONDITIONAL([HAS_SCRYPT], [test x$scrypt = xyes])
+
 bitforce="no"

 AC_ARG_ENABLE([bitforce],
@ -377,10 +389,11 @@ fi
				@@ -377,10 +389,11 @@ fi

 AC_DEFINE_UNQUOTED([CGMINER_PREFIX], ["$prefix/bin"], [Path to cgminer install])

-AC_DEFINE_UNQUOTED([PHATK_KERNNAME], ["phatk120223"], [Filename for phatk kernel])
-AC_DEFINE_UNQUOTED([POCLBM_KERNNAME], ["poclbm120327"], [Filename for poclbm kernel])
-AC_DEFINE_UNQUOTED([DIAKGCN_KERNNAME], ["diakgcn120427"], [Filename for diakgcn kernel])
-AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["diablo120328"], [Filename for diablo kernel])
+AC_DEFINE_UNQUOTED([PHATK_KERNNAME], ["phatk120724"], [Filename for phatk kernel])
+AC_DEFINE_UNQUOTED([POCLBM_KERNNAME], ["poclbm120724"], [Filename for poclbm kernel])
+AC_DEFINE_UNQUOTED([DIAKGCN_KERNNAME], ["diakgcn120724"], [Filename for diakgcn kernel])
+AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["diablo120724"], [Filename for diablo kernel])
+AC_DEFINE_UNQUOTED([SCRYPT_KERNNAME], ["scrypt120724"], [Filename for scrypt kernel])


 AC_SUBST(OPENCL_LIBS)
@ -425,17 +438,25 @@ echo "  curses.TUI...........: $cursesmsg"
				@@ -425,17 +438,25 @@ echo "  curses.TUI...........: $cursesmsg"
 if test "x$opencl" != xno; then
 	if test $found_opencl = 1; then
 		echo "  OpenCL...............: FOUND. GPU mining support enabled"
+	if test "x$scrypt" != xno; then
+		echo "  scrypt...............: Enabled"
+	else
+		echo "  scrypt...............: Disabled"
+	fi
+
 	else
 		echo "  OpenCL...............: NOT FOUND. GPU mining support DISABLED"
 		if test "x$cpumining$bitforce$icarus$ztex$modminer" = xnonononono; then
 			AC_MSG_ERROR([No mining configured in])
 		fi
+		echo "  scrypt...............: Disabled (needs OpenCL)"
 	fi
 else
 	echo "  OpenCL...............: Detection overrided. GPU mining support DISABLED"
 	if test "x$cpumining$bitforce$icarus$ztex$modminer" = xnonononono; then
 		AC_MSG_ERROR([No mining configured in])
 	fi
+	echo "  scrypt...............: Disabled (needs OpenCL)"
 fi

 if test "x$adl" != xno; then
--- a/debian/changelog
+++ b/debian/changelog
@ -1,112 +1,146 @@
				@@ -1,112 +1,146 @@
+cgminer (2.6.2-1) precise; urgency=low
+  Version 2.6.2 - August 3, 2012
+
+  * Scrypt mining does not support block testing yet so don't try to print it.
+  * Clear the bitforce buffer whenever we get an unexpected result as it has
+    likely throttled and we are getting cached responses out of order, and use the
+    temperature monitoring as a kind of watchdog to flush unexpected results.
+  * It is not critical getting the temperature response in bitforce so don't
+    mandatorily wait on the mutex lock.
+  * Check there is a cutoff temp actually set in bitforce before using it as a cut
+    off value otherwise it may think it's set to zero degrees.
+  * We dropped the temporary stopping of curl recruiting on submit_fail by
+    mistake, reinstate it.
+  * Make threads report in either side of the scanhash function in case we miss
+    reporting in when restarting work.
+  * Don't make mandatory work and its clones last forever.
+  * Make test work for pool_active mandatory work items to smooth out staged work
+    counts when in failover-only mode.
+  * Add debugging output when work is found stale as to why.
+  * Print the 3 parameters that are passed to applog for a debug line in
+    bitforce.c
+  * Clear bitforce buffer on init as previously.
+  * Add some headroom to the number of curls available per pool to allow for
+    longpoll and sendwork curls.
+  * Revert "Revert "Change BFL driver thread initialising to a constant 100ms
+    delay between devices instead of a random arrangement.""
+  * Revert "Remove bitforce_thread_init"
+  * Show the correct base units on GPU summary.
+  * Differentiate between the send return value being a bool and the get return
+    value when managing them in bitforce scanhash.
+  * 23a8c60 Revert "bitforce: Skip out of sending work if work restart requested" 
+
+ -- nushor <nushor@nushor-desktop>  Fri, 03 Aug 2012 11:27:44 -0500
+
 cgminer (2.4.2-1) stable; urgency=medium
  Version 2.4.2 - June 2, 2012

-  - API.class compiled with Java SE 6.0_03 - works with Win7x64
-  - miner.php highlight devs too slow finding shares (possibly failing)
-  - API update version to V1.11 and document changes
-  - API save default config file if none specified
-  - api.c save success incorrectly returns error
-  - api.c replace BUFSIZ (linux/windows have different values)
-  - Move RPC API content out of README to API-README
-  - Open a longpoll connection if a pool is in the REJECTING state as it's the
+  * API.class compiled with Java SE 6.0_03 - works with Win7x64
+  * miner.php highlight devs too slow finding shares (possibly failing)
+  * API update version to V1.11 and document changes
+  * API save default config file if none specified
+  * api.c save success incorrectly returns error
+  * api.c replace BUFSIZ (linux/windows have different values)
+  * Move RPC API content out of README to API-README
+  * Open a longpoll connection if a pool is in the REJECTING state as it's the
    only way to re-enable it automatically.
-  - Use only one longpoll as much as possible by using a pthread conditional
+  * Use only one longpoll as much as possible by using a pthread conditional
    broadcast that each longpoll thread waits on and checks if it's the current pool
    before
-  - If shares are known stale, don't use them to decide to disable a pool for
+  * If shares are known stale, don't use them to decide to disable a pool for
    sequential rejects.
-  - Restarting cgminer from within after ADL has been corrupted only leads to a
+  * Restarting cgminer from within after ADL has been corrupted only leads to a
    crash. Display a warning only and disable fanspeed monitoring.
-  - Icarus: fix abort calculation/allow user specified abort
-  - Icarus: make --icarus-timing hidden and document it in FPGA-README
-  - Icarus: high accuracy timing and other bitstream speed support
-  - add-MIPSEB-to-icarus-for-BIG_ENDIAN
-  - work_decode only needs swab32 on midstate under BIG ENDIAN
-  - add compile command to api-example.c
-  - save config bugfix: writing an extra ',' when no gpus
-  - Add dpkg-source commits
+  * Icarus: fix abort calculation/allow user specified abort
+  * Icarus: make --icarus-timing hidden and document it in FPGA-README
+  * Icarus: high accuracy timing and other bitstream speed support
+  * add-MIPSEB-to-icarus-for-BIG_ENDIAN
+  * work_decode only needs swab32 on midstate under BIG ENDIAN
+  * add compile command to api-example.c
+  * save config bugfix: writing an extra ',' when no gpus
+  * Add dpkg-source commits

 -- nushor <nushor11@gmail.com>  Sun, 03 Jun 2012 22:02:03 -0500

 cgminer (2.4.1-1) stable; urgency=low
  Version 2.4.1-1 - May 6, 2012
-  - In the unlikely event of finding a block, display the block solved count with
+  * In the unlikely event of finding a block, display the block solved count with
    the pool it came from for auditing.
-  - Display the device summary on exit even if a device has been disabled.
-  - Use correct pool enabled enums in api.c.
-  - Import Debian packaging configs
-  - Ensure we test for a pool recovering from idle so long as it's not set to
+  * Display the device summary on exit even if a device has been disabled.
+  * Use correct pool enabled enums in api.c.
+  * Import Debian packaging configs
+  * Ensure we test for a pool recovering from idle so long as it's not set to
    disabled.
-  - Fix pool number display.
-  - Give cgminer -T message only if curses is in use.
-  - Reinit_adl is no longer used.
-  - API 'stats' allow devices to add their own stats also for testing/debug
-  - API add getwork stats to cgminer - accesable from API 'stats'
-  - Don't initialise variables to zero when in global scope since they're already
+  * Fix pool number display.
+  * Give cgminer -T message only if curses is in use.
+  * Reinit_adl is no longer used.
+  * API 'stats' allow devices to add their own stats also for testing/debug
+  * API add getwork stats to cgminer - accesable from API 'stats'
+  * Don't initialise variables to zero when in global scope since they're already
    initialised.
-  - Get rid of unitialised variable warning when it's false.
-  - Move a pool to POOL_REJECTING to be disabled only after 3 minutes of
+  * Get rid of unitialised variable warning when it's false.
+  * Move a pool to POOL_REJECTING to be disabled only after 3 minutes of
    continuous rejected shares.
-  - Some tweaks to reporting and logging.
-  - Change FPGA detection order since BFL hangs on an ICA
-  - API support new pool status
-  - Add a temporarily disabled state for enabled pools called POOL_REJECTING and
+  * Some tweaks to reporting and logging.
+  * Change FPGA detection order since BFL hangs on an ICA
+  * API support new pool status
+  * Add a temporarily disabled state for enabled pools called POOL_REJECTING and
    use the work from each longpoll to help determine when a rejecting pool has
    started working again. Switch pools based on the multipool strategy once a pool
    is re-enabled.
-  - Removing extra debug
-  - Fix the benchmark feature by bypassing the new networking code.
-  - Reset sequential reject counter after a pool is disabled for when it is
+  * Removing extra debug
+  * Fix the benchmark feature by bypassing the new networking code.
+  * Reset sequential reject counter after a pool is disabled for when it is
    re-enabled.
-  - Icarus - correct MH/s and U: with work restart set at 8 seconds
-  - ztex updateFreq was always reporting on fpga 0
-  - Trying harder to get 1.15y working
-  - Specifying threads on multi fpga boards extra cgpu
-  - Missing the add cgpu per extra fpga on 1.15y boards
-  - API add last share time to each pool
-  - Don't try to reap curls if benchmarking is enabled.
+  * Icarus - correct MH/s and U: with work restart set at 8 seconds
+  * ztex updateFreq was always reporting on fpga 0
+  * Trying harder to get 1.15y working
+  * Specifying threads on multi fpga boards extra cgpu
+  * Missing the add cgpu per extra fpga on 1.15y boards
+  * API add last share time to each pool
+  * Don't try to reap curls if benchmarking is enabled.

 -- nushor <nushor11@gmail.com>  Sun, 06 May 2012 11:09:46 -0500

 cgminer (2.4.0-1) stable; urgency=low
  Version 2.4.0 - May 3, 2012

-  - Only show longpoll warning once when it has failed.
-  - Convert hashes to an unsigned long long as well.
-  - Detect pools that have issues represented by endless rejected shares and
+  * Only show longpoll warning once when it has failed.
+  * Convert hashes to an unsigned long long as well.
+  * Detect pools that have issues represented by endless rejected shares and
    disable them, with a parameter to optionally disable this feature.
-  - Bugfix: Use a 64-bit type for hashes_done (miner_thread) since it can overflow
+  * Bugfix: Use a 64-bit type for hashes_done (miner_thread) since it can overflow
    32-bit on some FPGAs
-  - Implement an older header fix for a label existing before the pthread_cleanup
+  * Implement an older header fix for a label existing before the pthread_cleanup
    macro.
-  - Limit the number of curls we recruit on communication failures and with
+  * Limit the number of curls we recruit on communication failures and with
    delaynet enabled to 5 by maintaining a per-pool curl count, and using a pthread
    conditional that wakes up when one is returned to the ring buffer.
-  - Generalise add_pool() functions since they're repeated in add_pool_details.
-  - Bugfix: Return failure, rather than quit, if BFwrite fails
-  - Disable failing devices such that the user can attempt to re-enable them
-  - Bugfix: thread_shutdown shouldn't try to free the device, since it's needed
+  * Generalise add_pool() functions since they're repeated in add_pool_details.
+  * Bugfix: Return failure, rather than quit, if BFwrite fails
+  * Disable failing devices such that the user can attempt to re-enable them
+  * Bugfix: thread_shutdown shouldn't try to free the device, since it's needed
    afterward
-  - API bool's and 1TBS fixes
-  - Icarus - minimise code delays and name timer variables
-  - api.c V1.9 add 'restart' + redesign 'quit' so thread exits cleanly
-  - api.c bug - remove extra ']'s in notify command
-  - Increase pool watch interval to 30 seconds.
-  - Reap curls that are unused for over a minute. This allows connections to be
+  * API bool's and 1TBS fixes
+  * Icarus - minimise code delays and name timer variables
+  * api.c V1.9 add 'restart' + redesign 'quit' so thread exits cleanly
+  * api.c bug - remove extra ']'s in notify command
+  * Increase pool watch interval to 30 seconds.
+  * Reap curls that are unused for over a minute. This allows connections to be
    closed, thereby allowing the number of curl handles to always be the minimum
    necessary to not delay networking.
-  - Use the ringbuffer of curls from the same pool for submit as well as getwork
+  * Use the ringbuffer of curls from the same pool for submit as well as getwork
    threads. Since the curl handles were already connected to the same pool and are
    immediately available, share submission will not be delayed by getworks.
-  - Implement a scaleable networking framework designed to cope with any sized
+  * Implement a scaleable networking framework designed to cope with any sized
    network requirements, yet minimise the number of connections being reopened. Do
    this by create a ring buffer linked list of curl handles to be used by getwork,
    recruiting extra handles when none is immediately available.
-  - There is no need for the submit and getwork curls to be tied to the pool
+  * There is no need for the submit and getwork curls to be tied to the pool
    struct.
-  - Do not recruit extra connection threads if there have been connection errors
+  * Do not recruit extra connection threads if there have been connection errors
    to the pool in question.
-  - We should not retry submitting shares indefinitely or we may end up with a
+  * We should not retry submitting shares indefinitely or we may end up with a
    huge backlog during network outages, so discard stale shares if we failed to
    submit them and they've become stale in the interim.

@ -114,32 +148,32 @@ cgminer (2.4.0-1) stable; urgency=low
				@@ -114,32 +148,32 @@ cgminer (2.4.0-1) stable; urgency=low

 cgminer (2.3.6-3) stable; urgency=low
  Version 2.3.6-3 - may 3, 2012
-  - More bug fixes, Pre 2.4.1 release.
+  * More bug fixes, Pre 2.4.1 release.

 -- nushor <nushor11@gmail.com>  Thurs, 03 May 2012 00:36:50 -0500

 cgminer (2.3.6-2) stable; urgency=low
  Version 2.3.6-2 - May 2, 2012
-  - Various bug fixes, latest build from repository.
+  * Various bug fixes, latest build from repository.

 -- nushor <nushor11@gmail.com>  Wed, 02 May 2012 18:17:49 -0500

 cgminer (2.3.6-1) stable; urgency=low

  Version 2.3.6 - April 29, 2012
-  - Shorten stale share messages slightly.
-  - Protect the freeing of current_hash under mutex_lock to prevent racing on it
+  * Shorten stale share messages slightly.
+  * Protect the freeing of current_hash under mutex_lock to prevent racing on it
    when set_curblock is hit concurrently.
-  - Change default behaviour to submitting stale, removing the --submit-stale
+  * Change default behaviour to submitting stale, removing the --submit-stale
    option and adding a --no-submit-stale option.
-  - Make sure to start the getwork and submit threads when a pool is added on the
+  * Make sure to start the getwork and submit threads when a pool is added on the
    fly. This fixes a crash when a pool is added to running cgminer and then
    switched to.
-  - Faster hardware can easily outstrip the speed we can get work and submit
+  * Faster hardware can easily outstrip the speed we can get work and submit
    shares when using only one connection per pool.
-  - Test the queued list to see if any get/submits are already queued and if they
+  * Test the queued list to see if any get/submits are already queued and if they
    are, start recruiting extra connections by generating new threads.
-  - This allows us to reuse network connections at low loads but recuit new open
+  * This allows us to reuse network connections at low loads but recuit new open
    connections as they're needed, so that cgminer can scale to hardware of any
    size.

--- a/debian/patches/series
+++ b/debian/patches/series
@ -0,0 +1,3 @@
				@@ -0,0 +1,3 @@
+v2.4.1
+v2.4.2
+v2.6.2
--- a/debian/patches/v2.6.2
+++ b/debian/patches/v2.6.2
--- a/diablo120724.cl
+++ b/diablo120724.cl
@ -1242,8 +1242,8 @@ void search(
				@@ -1242,8 +1242,8 @@ void search(
    
    ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]);
    
-#define FOUND (0x80)
-#define NFLAG (0x7F)
+#define FOUND (0x800)
+#define NFLAG (0x7FF)

 #if defined(VECTORS4)
 	bool result = any(ZA[924] == 0x136032EDU);
--- a/diakgcn120724.cl
+++ b/diakgcn120724.cl
@ -571,8 +571,8 @@ __kernel
				@@ -571,8 +571,8 @@ __kernel

 	V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);

-#define FOUND (0x80)
-#define NFLAG (0x7F)
+#define FOUND (0x800)
+#define NFLAG (0x7FF)

 #ifdef VECTORS4
 	if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU))
--- a/driver-bitforce.c
+++ b/driver-bitforce.c
@ -10,6 +10,7 @@
				@@ -10,6 +10,7 @@

 #include <limits.h>
 #include <pthread.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <strings.h>
 #include <sys/time.h>
@ -17,6 +18,32 @@
				@@ -17,6 +18,32 @@

 #include "config.h"

+#ifdef WIN32
+
+#include <windows.h>
+
+#define dlsym (void*)GetProcAddress
+#define dlclose FreeLibrary
+
+typedef unsigned long FT_STATUS;
+typedef PVOID FT_HANDLE;
+__stdcall FT_STATUS (*FT_ListDevices)(PVOID pArg1, PVOID pArg2, DWORD Flags);
+__stdcall FT_STATUS (*FT_Open)(int idx, FT_HANDLE*);
+__stdcall FT_STATUS (*FT_GetComPortNumber)(FT_HANDLE, LPLONG lplComPortNumber);
+__stdcall FT_STATUS (*FT_Close)(FT_HANDLE);
+const uint32_t FT_OPEN_BY_DESCRIPTION =       2;
+const uint32_t FT_LIST_ALL         = 0x20000000;
+const uint32_t FT_LIST_NUMBER_ONLY = 0x80000000;
+enum {
+	FT_OK,
+};
+
+// Code must deal with a timeout. Make it 1 second on windows, 0.1 on linux.
+#define BFopen(devpath)  serial_open(devpath, 0, 10, true)
+#else /* WIN32 */
+#define BFopen(devpath)  serial_open(devpath, 0, 1, true)
+#endif /* WIN32 */
+
 #include "compat.h"
 #include "fpgautils.h"
 #include "miner.h"
@ -32,15 +59,17 @@
				@@ -32,15 +59,17 @@
 #define tv_to_ms(tval) (tval.tv_sec * 1000 + tval.tv_usec / 1000)
 #define TIME_AVG_CONSTANT 8

-struct device_api bitforce_api;
+#define KNAME_WORK  "full work"
+#define KNAME_RANGE "nonce range"

-#define BFopen(devpath)  serial_open(devpath, 0, -1, true)
+struct device_api bitforce_api;

 static void BFgets(char *buf, size_t bufLen, int fd)
 {
-	do
+	do {
+		buf[0] = '\0';
 		--bufLen;
-	while (likely(bufLen && read(fd, buf, 1) == 1 && (buf++)[0] != '\n'));
+	} while (likely(bufLen && read(fd, buf, 1) == 1 && (buf++)[0] != '\n'));

 	buf[0] = '\0';
 }
@ -72,7 +101,7 @@ static bool bitforce_detect_one(const char *devpath)
				@@ -72,7 +101,7 @@ static bool bitforce_detect_one(const char *devpath)
 	BFwrite(fdDev, "ZGX", 3);
 	BFgets(pdevbuf, sizeof(pdevbuf), fdDev);
 	if (unlikely(!pdevbuf[0])) {
-		applog(LOG_ERR, "BFL: Error reading (ZGX)");
+		applog(LOG_ERR, "BFL: Error reading/timeout (ZGX)");
 		return 0;
 	}

@ -93,10 +122,10 @@ static bool bitforce_detect_one(const char *devpath)
				@@ -93,10 +122,10 @@ static bool bitforce_detect_one(const char *devpath)
 	if (opt_bfl_noncerange) {
 		bitforce->nonce_range = true;
 		bitforce->sleep_ms = BITFORCE_SLEEP_MS;
-		bitforce->kname = "Mini-rig";
+		bitforce->kname = KNAME_RANGE;
 	} else {
 		bitforce->sleep_ms = BITFORCE_SLEEP_MS * 5;
-		bitforce->kname = "Single";
+		bitforce->kname = KNAME_WORK;
 	}

 	if (likely((!memcmp(pdevbuf, ">>>ID: ", 7)) && (s = strstr(pdevbuf + 3, ">>>")))) {
@ -109,14 +138,96 @@ static bool bitforce_detect_one(const char *devpath)
				@@ -109,14 +138,96 @@ static bool bitforce_detect_one(const char *devpath)
 	return add_cgpu(bitforce);
 }

-static char bitforce_detect_auto()
+#define LOAD_SYM(sym)  do { \
+	if (!(sym = dlsym(dll, #sym))) {  \
+		applog(LOG_DEBUG, "Failed to load " #sym ", not using FTDI bitforce autodetect");  \
+		goto out;  \
+	}  \
+} while(0)
+
+#ifdef WIN32
+static int bitforce_autodetect_ftdi(void)
+{
+	char devpath[] = "\\\\.\\COMnnnnn";
+	char *devpathnum = &devpath[7];
+	char **bufptrs;
+	char *buf;
+	int found = 0;
+	int i;
+
+	FT_STATUS ftStatus;
+	DWORD numDevs;
+	HMODULE dll = LoadLibrary("FTD2XX.DLL");
+	if (!dll) {
+		applog(LOG_DEBUG, "FTD2XX.DLL failed to load, not using FTDI bitforce autodetect");
+		return 0;
+	}
+	LOAD_SYM(FT_ListDevices);
+	LOAD_SYM(FT_Open);
+	LOAD_SYM(FT_GetComPortNumber);
+	LOAD_SYM(FT_Close);
+	
+	ftStatus = FT_ListDevices(&numDevs, NULL, FT_LIST_NUMBER_ONLY);
+	if (ftStatus != FT_OK) {
+		applog(LOG_DEBUG, "FTDI device count failed, not using FTDI bitforce autodetect");
+		goto out;
+	}
+	applog(LOG_DEBUG, "FTDI reports %u devices", (unsigned)numDevs);
+
+	buf = alloca(65 * numDevs);
+	bufptrs = alloca(numDevs + 1);
+
+	for (i = 0; i < numDevs; ++i)
+		bufptrs[i] = &buf[i * 65];
+	bufptrs[numDevs] = NULL;
+	ftStatus = FT_ListDevices(bufptrs, &numDevs, FT_LIST_ALL | FT_OPEN_BY_DESCRIPTION);
+	if (ftStatus != FT_OK) {
+		applog(LOG_DEBUG, "FTDI device list failed, not using FTDI bitforce autodetect");
+		goto out;
+	}
+	
+	for (i = numDevs; i > 0; ) {
+		--i;
+		bufptrs[i][64] = '\0';
+		
+		if (!(strstr(bufptrs[i], "BitFORCE") && strstr(bufptrs[i], "SHA256")))
+			continue;
+		
+		FT_HANDLE ftHandle;
+		if (FT_OK != FT_Open(i, &ftHandle))
+			continue;
+		LONG lComPortNumber;
+		ftStatus = FT_GetComPortNumber(ftHandle, &lComPortNumber);
+		FT_Close(ftHandle);
+		if (FT_OK != ftStatus || lComPortNumber < 0)
+			continue;
+		
+		sprintf(devpathnum, "%d", (int)lComPortNumber);
+		
+		if (bitforce_detect_one(devpath))
+			++found;
+	}
+
+out:
+	dlclose(dll);
+	return found;
+}
+#else
+static int bitforce_autodetect_ftdi(void)
+{
+	return 0;
+}
+#endif
+
+static int bitforce_detect_auto(void)
 {
 	return (serial_autodetect_udev     (bitforce_detect_one, "BitFORCE*SHA256") ?:
 		serial_autodetect_devserial(bitforce_detect_one, "BitFORCE_SHA256") ?:
+		bitforce_autodetect_ftdi() ?:
 		0);
 }

-static void bitforce_detect()
+static void bitforce_detect(void)
 {
 	serial_detect_auto(bitforce_api.dname, bitforce_detect_one, bitforce_detect_auto);
 }
@ -152,7 +263,7 @@ static bool bitforce_thread_prepare(struct thr_info *thr)
				@@ -152,7 +263,7 @@ static bool bitforce_thread_prepare(struct thr_info *thr)
 	return true;
 }

-static void biforce_clear_buffer(struct cgpu_info *bitforce)
+static void bitforce_clear_buffer(struct cgpu_info *bitforce)
 {
 	int fdDev = bitforce->device_fd;
 	char pdevbuf[0x100];
@ -180,6 +291,8 @@ void bitforce_init(struct cgpu_info *bitforce)
				@@ -180,6 +291,8 @@ void bitforce_init(struct cgpu_info *bitforce)

 	applog(LOG_WARNING, "BFL%i: Re-initialising", bitforce->device_id);

+	bitforce_clear_buffer(bitforce);
+
 	mutex_lock(&bitforce->device_mutex);
 	if (fdDev) {
 		BFclose(fdDev);
@ -200,7 +313,7 @@ void bitforce_init(struct cgpu_info *bitforce)
				@@ -200,7 +313,7 @@ void bitforce_init(struct cgpu_info *bitforce)

 		if (unlikely(!pdevbuf[0])) {
 			mutex_unlock(&bitforce->device_mutex);
-			applog(LOG_ERR, "BFL%i: Error reading (ZGX)", bitforce->device_id);
+			applog(LOG_ERR, "BFL%i: Error reading/timeout (ZGX)", bitforce->device_id);
 			return;
 		}

@ -234,14 +347,18 @@ static bool bitforce_get_temp(struct cgpu_info *bitforce)
				@@ -234,14 +347,18 @@ static bool bitforce_get_temp(struct cgpu_info *bitforce)
 	if (!fdDev)
 		return false;

-	mutex_lock(&bitforce->device_mutex);
+	/* It is not critical getting temperature so don't get stuck if  we
+	 * can't grab the mutex here */
+	if (mutex_trylock(&bitforce->device_mutex))
+		return false;
+
 	BFwrite(fdDev, "ZLX", 3);
 	BFgets(pdevbuf, sizeof(pdevbuf), fdDev);
 	mutex_unlock(&bitforce->device_mutex);
 	
 	if (unlikely(!pdevbuf[0])) {
-		applog(LOG_ERR, "BFL%i: Error: Get temp returned empty string", bitforce->device_id);
-		bitforce->temp = 0;
+		applog(LOG_ERR, "BFL%i: Error: Get temp returned empty string/timed out", bitforce->device_id);
+		bitforce->hw_errors++;
 		return false;
 	}

@ -250,7 +367,7 @@ static bool bitforce_get_temp(struct cgpu_info *bitforce)
				@@ -250,7 +367,7 @@ static bool bitforce_get_temp(struct cgpu_info *bitforce)

 		if (temp > 0) {
 			bitforce->temp = temp;
-			if (temp > bitforce->cutofftemp) {
+			if (unlikely(bitforce->cutofftemp > 0 && temp > bitforce->cutofftemp)) {
 				applog(LOG_WARNING, "BFL%i: Hit thermal cutoff limit, disabling!", bitforce->device_id);
 				bitforce->deven = DEV_RECOVER;

@ -259,7 +376,17 @@ static bool bitforce_get_temp(struct cgpu_info *bitforce)
				@@ -259,7 +376,17 @@ static bool bitforce_get_temp(struct cgpu_info *bitforce)
 				bitforce->dev_thermal_cutoff_count++;
 			}
 		}
+	} else {
+		/* Use the temperature monitor as a kind of watchdog for when
+		 * our responses are out of sync and flush the buffer to
+		 * hopefully recover */
+		applog(LOG_WARNING, "BFL%i: Garbled response probably throttling, clearing buffer");
+		/* Count throttling episodes as hardware errors */
+		bitforce->hw_errors++;
+		bitforce_clear_buffer(bitforce);
+		return false;;
 	}
+
 	return true;
 }

@ -290,10 +417,12 @@ re_send:
				@@ -290,10 +417,12 @@ re_send:
 			applog(LOG_WARNING, "BFL%i: Does not support nonce range, disabling", bitforce->device_id);
 			bitforce->nonce_range = false;
 			bitforce->sleep_ms *= 5;
-			bitforce->kname = "Single";
+			bitforce->kname = KNAME_WORK;
 			goto re_send;
 		}
 		applog(LOG_ERR, "BFL%i: Error: Send work reports: %s", bitforce->device_id, pdevbuf);
+		bitforce->hw_errors++;
+		bitforce_clear_buffer(bitforce);
 		return false;
 	}

@ -328,12 +457,14 @@ re_send:
				@@ -328,12 +457,14 @@ re_send:
 	}

 	if (unlikely(!pdevbuf[0])) {
-		applog(LOG_ERR, "BFL%i: Error: Send block data returned empty string", bitforce->device_id);
+		applog(LOG_ERR, "BFL%i: Error: Send block data returned empty string/timed out", bitforce->device_id);
 		return false;
 	}

 	if (unlikely(strncasecmp(pdevbuf, "OK", 2))) {
 		applog(LOG_ERR, "BFL%i: Error: Send block data reports: %s", bitforce->device_id, pdevbuf);
+		bitforce->hw_errors++;
+		bitforce_clear_buffer(bitforce);
 		return false;
 	}

@ -408,7 +539,7 @@ static int64_t bitforce_get_result(struct thr_info *thr, struct work *work)
				@@ -408,7 +539,7 @@ static int64_t bitforce_get_result(struct thr_info *thr, struct work *work)
 		}

 		if (delay_time_ms != bitforce->sleep_ms)
-			  applog(LOG_DEBUG, "BFL%i: Wait time changed to: %d", bitforce->device_id, bitforce->sleep_ms, bitforce->wait_ms);
+			  applog(LOG_DEBUG, "BFL%i: Wait time changed to: %d, waited %u", bitforce->device_id, bitforce->sleep_ms, bitforce->wait_ms);

 		/* Work out the average time taken. Float for calculation, uint for display */
 		bitforce->avg_wait_f += (tv_to_ms(elapsed) - bitforce->avg_wait_f) / TIME_AVG_CONSTANT;
@ -421,7 +552,9 @@ static int64_t bitforce_get_result(struct thr_info *thr, struct work *work)
				@@ -421,7 +552,9 @@ static int64_t bitforce_get_result(struct thr_info *thr, struct work *work)
 	else if (!strncasecmp(pdevbuf, "I", 1))
 		return 0;	/* Device idle */
 	else if (strncasecmp(pdevbuf, "NONCE-FOUND", 11)) {
+		bitforce->hw_errors++;
 		applog(LOG_WARNING, "BFL%i: Error: Get result reports: %s", bitforce->device_id, pdevbuf);
+		bitforce_clear_buffer(bitforce);
 		return 0;
 	}

@ -438,7 +571,7 @@ static int64_t bitforce_get_result(struct thr_info *thr, struct work *work)
				@@ -438,7 +571,7 @@ static int64_t bitforce_get_result(struct thr_info *thr, struct work *work)
 				bitforce->nonce_range = false;
 				work->blk.nonce = 0xffffffff;
 				bitforce->sleep_ms *= 5;
-				bitforce->kname = "Single";
+				bitforce->kname = KNAME_WORK;
 		}
 			
 		submit_nonce(thr, work, nonce);
@ -469,9 +602,10 @@ static int64_t bitforce_scanhash(struct thr_info *thr, struct work *work, int64_
				@@ -469,9 +602,10 @@ static int64_t bitforce_scanhash(struct thr_info *thr, struct work *work, int64_
 {
 	struct cgpu_info *bitforce = thr->cgpu;
 	unsigned int sleep_time;
+	bool send_ret;
 	int64_t ret;

-	ret = bitforce_send_work(thr, work);
+	send_ret = bitforce_send_work(thr, work);

 	if (!bitforce->nonce_range) {
 		/* Initially wait 2/3 of the average cycle time so we can request more
@ -497,8 +631,10 @@ static int64_t bitforce_scanhash(struct thr_info *thr, struct work *work, int64_
				@@ -497,8 +631,10 @@ static int64_t bitforce_scanhash(struct thr_info *thr, struct work *work, int64_
 		bitforce->wait_ms = sleep_time;
 	}

-	if (ret)
+	if (send_ret)
 		ret = bitforce_get_result(thr, work);
+	else
+		ret = -1;

 	if (ret == -1) {
 		ret = 0;
@ -506,8 +642,9 @@ static int64_t bitforce_scanhash(struct thr_info *thr, struct work *work, int64_
				@@ -506,8 +642,9 @@ static int64_t bitforce_scanhash(struct thr_info *thr, struct work *work, int64_
 		bitforce->device_last_not_well = time(NULL);
 		bitforce->device_not_well_reason = REASON_DEV_COMMS_ERROR;
 		bitforce->dev_comms_error_count++;
+		bitforce->hw_errors++;
 		/* empty read buffer */
-		biforce_clear_buffer(bitforce);
+		bitforce_clear_buffer(bitforce);
 	}
 	return ret;
 }
@ -517,6 +654,20 @@ static bool bitforce_get_stats(struct cgpu_info *bitforce)
				@@ -517,6 +654,20 @@ static bool bitforce_get_stats(struct cgpu_info *bitforce)
 	return bitforce_get_temp(bitforce);
 }

+static bool bitforce_thread_init(struct thr_info *thr)
+{
+	struct cgpu_info *bitforce = thr->cgpu;
+	unsigned int wait;
+
+	/* Pause each new thread at least 100ms between initialising
+	 * so the devices aren't making calls all at the same time. */
+	wait = thr->id * MAX_START_DELAY_US;
+	applog(LOG_DEBUG, "BFL%i: Delaying start by %dms", bitforce->device_id, wait / 1000);
+	usleep(wait);
+
+	return true;
+}
+
 static struct api_data *bitforce_api_stats(struct cgpu_info *cgpu)
 {
 	struct api_data *root = NULL;
@ -540,6 +691,7 @@ struct device_api bitforce_api = {
				@@ -540,6 +691,7 @@ struct device_api bitforce_api = {
 	.get_statline_before = get_bitforce_statline_before,
 	.get_stats = bitforce_get_stats,
 	.thread_prepare = bitforce_thread_prepare,
+	.thread_init = bitforce_thread_init,
 	.scanhash = bitforce_scanhash,
 	.thread_shutdown = bitforce_shutdown,
 	.thread_enable = biforce_thread_enable
--- a/driver-cpu.c
+++ b/driver-cpu.c
@ -131,6 +131,9 @@ extern bool scanhash_sse2_32(struct thr_info*, const unsigned char *pmidstate, u
				@@ -131,6 +131,9 @@ extern bool scanhash_sse2_32(struct thr_info*, const unsigned char *pmidstate, u
 	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce);

+extern bool scanhash_scrypt(struct thr_info *thr, int thr_id, unsigned char *pdata, unsigned char *scratchbuf,
+	const unsigned char *ptarget,
+	uint32_t max_nonce, unsigned long *hashes_done);



@ -161,6 +164,9 @@ const char *algo_names[] = {
				@@ -161,6 +164,9 @@ const char *algo_names[] = {
 #ifdef WANT_ALTIVEC_4WAY
    [ALGO_ALTIVEC_4WAY] = "altivec_4way",
 #endif
+#ifdef WANT_SCRYPT
+    [ALGO_SCRYPT] = "scrypt",
+#endif
 };

 static const sha256_func sha256_funcs[] = {
@ -185,7 +191,10 @@ static const sha256_func sha256_funcs[] = {
				@@ -185,7 +191,10 @@ static const sha256_func sha256_funcs[] = {
 	[ALGO_SSE2_64]		= (sha256_func)scanhash_sse2_64,
 #endif
 #ifdef WANT_X8664_SSE4
-	[ALGO_SSE4_64]		= (sha256_func)scanhash_sse4_64
+	[ALGO_SSE4_64]		= (sha256_func)scanhash_sse4_64,
+#endif
+#ifdef WANT_SCRYPT
+	[ALGO_SCRYPT]		= (sha256_func)scanhash_scrypt
 #endif
 };
 #endif
@ -662,6 +671,9 @@ char *set_algo(const char *arg, enum sha256_algos *algo)
				@@ -662,6 +671,9 @@ char *set_algo(const char *arg, enum sha256_algos *algo)
 {
 	enum sha256_algos i;

+	if (opt_scrypt)
+		return "Can only use scrypt algorithm";
+
 	if (!strcmp(arg, "auto")) {
 		*algo = pick_fastest_algo();
 		return NULL;
@ -676,6 +688,13 @@ char *set_algo(const char *arg, enum sha256_algos *algo)
				@@ -676,6 +688,13 @@ char *set_algo(const char *arg, enum sha256_algos *algo)
 	return "Unknown algorithm";
 }

+#ifdef WANT_SCRYPT
+void set_scrypt_algo(enum sha256_algos *algo)
+{
+	*algo = ALGO_SCRYPT;
+}
+#endif
+
 void show_algo(char buf[OPT_SHOW_LEN], const enum sha256_algos *algo)
 {
 	strncpy(buf, algo_names[*algo], OPT_SHOW_LEN);
@ -758,7 +777,7 @@ static bool cpu_thread_prepare(struct thr_info *thr)
				@@ -758,7 +777,7 @@ static bool cpu_thread_prepare(struct thr_info *thr)

 static uint64_t cpu_can_limit_work(struct thr_info *thr)
 {
-	return 0xfffff;
+	return 0xffff;
 }

 static bool cpu_thread_init(struct thr_info *thr)
--- a/driver-cpu.h
+++ b/driver-cpu.h
@ -34,6 +34,10 @@
				@@ -34,6 +34,10 @@
 #define WANT_X8664_SSE4 1
 #endif

+#ifdef USE_SCRYPT
+#define WANT_SCRYPT
+#endif
+
 enum sha256_algos {
 	ALGO_C,			/* plain C */
 	ALGO_4WAY,		/* parallel SSE2 */
@ -44,6 +48,7 @@ enum sha256_algos {
				@@ -44,6 +48,7 @@ enum sha256_algos {
 	ALGO_SSE2_64,		/* SSE2 for x86_64 */
 	ALGO_SSE4_64,		/* SSE4 for x86_64 */
 	ALGO_ALTIVEC_4WAY,	/* parallel Altivec */
+	ALGO_SCRYPT,		/* scrypt */
 };

 extern const char *algo_names[];
@ -55,5 +60,6 @@ extern void show_algo(char buf[OPT_SHOW_LEN], const enum sha256_algos *algo);
				@@ -55,5 +60,6 @@ extern void show_algo(char buf[OPT_SHOW_LEN], const enum sha256_algos *algo);
 extern char *force_nthreads_int(const char *arg, int *i);
 extern void init_max_name_len();
 extern double bench_algo_stage3(enum sha256_algos algo);
+extern void set_scrypt_algo(enum sha256_algos *algo);

 #endif /* __DEVICE_CPU_H__ */
--- a/driver-icarus.c
+++ b/driver-icarus.c
@ -65,7 +65,7 @@
				@@ -65,7 +65,7 @@
 #define ASSERT1(condition) __maybe_unused static char sizeof_uint32_t_must_be_4[(condition)?1:-1]
 ASSERT1(sizeof(uint32_t) == 4);

-#define ICARUS_READ_TIME ((double)ICARUS_READ_SIZE * (double)8.0 / (double)ICARUS_IO_SPEED)
+#define ICARUS_READ_TIME(baud) ((double)ICARUS_READ_SIZE * (double)8.0 / (double)(baud))

 // Fraction of a second, USB timeout is measured in
 // i.e. 10 means 1/10 of a second
@ -176,11 +176,36 @@ struct ICARUS_INFO {
				@@ -176,11 +176,36 @@ struct ICARUS_INFO {
 	// (which will only affect W)
 	uint64_t history_count;
 	struct timeval history_time;
+
+	// icarus-options
+	int baud;
+	int work_division;
+	int fpga_count;
+	uint32_t nonce_mask;
 };

+#define END_CONDITION 0x0000ffff
+
 // One for each possible device
 static struct ICARUS_INFO **icarus_info;

+// Looking for options in --icarus-timing and --icarus-options:
+//
+// Code increments this each time we start to look at a device
+// However, this means that if other devices are checked by
+// the Icarus code (e.g. BFL) they will count in the option offset
+//
+// This, however, is deterministic so that's OK
+//
+// If we were to increment after successfully finding an Icarus
+// that would be random since an Icarus may fail and thus we'd
+// not be able to predict the option order
+//
+// This also assumes that serial_detect() checks them sequentially
+// and in the order specified on the command line
+//
+static int option_offset = -1;
+
 struct device_api icarus_api;

 static void rev(unsigned char *s, size_t l)
@ -195,8 +220,8 @@ static void rev(unsigned char *s, size_t l)
				@@ -195,8 +220,8 @@ static void rev(unsigned char *s, size_t l)
 	}
 }

-#define icarus_open2(devpath, purge)  serial_open(devpath, 115200, ICARUS_READ_FAULT_DECISECONDS, purge)
-#define icarus_open(devpath)  icarus_open2(devpath, false)
+#define icarus_open2(devpath, baud, purge)  serial_open(devpath, baud, ICARUS_READ_FAULT_DECISECONDS, purge)
+#define icarus_open(devpath, baud)  icarus_open2(devpath, baud, false)

 static int icarus_gets(unsigned char *buf, int fd, struct timeval *tv_finish, struct thr_info *thr, int read_count)
 {
@ -272,7 +297,7 @@ static const char *timing_mode_str(enum timing_mode timing_mode)
				@@ -272,7 +297,7 @@ static const char *timing_mode_str(enum timing_mode timing_mode)
 	}
 }

-static void set_timing_mode(struct cgpu_info *icarus)
+static void set_timing_mode(int this_option_offset, struct cgpu_info *icarus)
 {
 	struct ICARUS_INFO *info = icarus_info[icarus->device_id];
 	double Hs;
@ -285,7 +310,7 @@ static void set_timing_mode(struct cgpu_info *icarus)
				@@ -285,7 +310,7 @@ static void set_timing_mode(struct cgpu_info *icarus)
 		buf[0] = '\0';
 	else {
 		ptr = opt_icarus_timing;
-		for (i = 0; i < icarus->device_id; i++) {
+		for (i = 0; i < this_option_offset; i++) {
 			comma = strchr(ptr, ',');
 			if (comma == NULL)
 				break;
@ -354,11 +379,123 @@ static void set_timing_mode(struct cgpu_info *icarus)
				@@ -354,11 +379,123 @@ static void set_timing_mode(struct cgpu_info *icarus)

 	applog(LOG_DEBUG, "Icarus: Init: %d mode=%s read_count=%d Hs=%e",
 		icarus->device_id, timing_mode_str(info->timing_mode), info->read_count, info->Hs);
+}
+
+static uint32_t mask(int work_division)
+{
+	char err_buf[BUFSIZ+1];
+	uint32_t nonce_mask = 0x7fffffff;
+
+	// yes we can calculate these, but this way it's easy to see what they are
+	switch (work_division) {
+	case 1:
+		nonce_mask = 0xffffffff;
+		break;
+	case 2:
+		nonce_mask = 0x7fffffff;
+		break;
+	case 4:
+		nonce_mask = 0x3fffffff;
+		break;
+	case 8:
+		nonce_mask = 0x1fffffff;
+		break;
+	default:
+		sprintf(err_buf, "Invalid2 icarus-options for work_division (%d) must be 1, 2, 4 or 8", work_division);
+		quit(1, err_buf);
+	}
+
+	return nonce_mask;
+}
+
+static void get_options(int this_option_offset, int *baud, int *work_division, int *fpga_count)
+{
+	char err_buf[BUFSIZ+1];
+	char buf[BUFSIZ+1];
+	char *ptr, *comma, *colon, *colon2;
+	size_t max;
+	int i, tmp;
+
+	if (opt_icarus_options == NULL)
+		buf[0] = '\0';
+	else {
+		ptr = opt_icarus_options;
+		for (i = 0; i < this_option_offset; i++) {
+			comma = strchr(ptr, ',');
+			if (comma == NULL)
+				break;
+			ptr = comma + 1;
+		}
+
+		comma = strchr(ptr, ',');
+		if (comma == NULL)
+			max = strlen(ptr);
+		else
+			max = comma - ptr;

+		if (max > BUFSIZ)
+			max = BUFSIZ;
+		strncpy(buf, ptr, max);
+		buf[max] = '\0';
+	}
+
+	*baud = ICARUS_IO_SPEED;
+	*work_division = 2;
+	*fpga_count = 2;
+
+	if (*buf) {
+		colon = strchr(buf, ':');
+		if (colon)
+			*(colon++) = '\0';
+
+		if (*buf) {
+			tmp = atoi(buf);
+			switch (tmp) {
+			case 115200:
+				*baud = 115200;
+				break;
+			case 57600:
+				*baud = 57600;
+				break;
+			default:
+				sprintf(err_buf, "Invalid icarus-options for baud (%s) must be 115200 or 57600", buf);
+				quit(1, err_buf);
+			}
+		}
+
+		if (colon && *colon) {
+			colon2 = strchr(colon, ':');
+			if (colon2)
+				*(colon2++) = '\0';
+
+			if (*colon) {
+				tmp = atoi(colon);
+				if (tmp == 1 || tmp == 2 || tmp == 4 || tmp == 8) {
+					*work_division = tmp;
+					*fpga_count = tmp;	// default to the same
+				} else {
+					sprintf(err_buf, "Invalid icarus-options for work_division (%s) must be 1, 2, 4 or 8", colon);
+					quit(1, err_buf);
+				}
+			}
+
+			if (colon2 && *colon2) {
+				tmp = atoi(colon2);
+				if (tmp > 0 && tmp <= *work_division)
+					*fpga_count = tmp;
+				else {
+					sprintf(err_buf, "Invalid icarus-options for fpga_count (%s) must be >0 and <=work_division (%d)", colon2, *work_division);
+					quit(1, err_buf);
+				}
+			}
+		}
+	}
 }

 static bool icarus_detect_one(const char *devpath)
 {
+	int this_option_offset = ++option_offset;
+
 	struct ICARUS_INFO *info;
 	struct timeval tv_start, tv_finish;
 	int fd;
@ -379,9 +516,13 @@ static bool icarus_detect_one(const char *devpath)
				@@ -379,9 +516,13 @@ static bool icarus_detect_one(const char *devpath)
 	unsigned char ob_bin[64], nonce_bin[ICARUS_READ_SIZE];
 	char *nonce_hex;

+	int baud, work_division, fpga_count;
+
+	get_options(this_option_offset, &baud, &work_division, &fpga_count);
+
 	applog(LOG_DEBUG, "Icarus Detect: Attempting to open %s", devpath);

-	fd = icarus_open2(devpath, true);
+	fd = icarus_open2(devpath, baud, true);
 	if (unlikely(fd == -1)) {
 		applog(LOG_ERR, "Icarus Detect: Failed to open %s", devpath);
 		return false;
@ -429,6 +570,9 @@ static bool icarus_detect_one(const char *devpath)
				@@ -429,6 +570,9 @@ static bool icarus_detect_one(const char *devpath)
 	applog(LOG_INFO, "Found Icarus at %s, mark as %d",
 		devpath, icarus->device_id);

+	applog(LOG_DEBUG, "Icarus: Init: %d baud=%d work_division=%d fpga_count=%d",
+		icarus->device_id, baud, work_division, fpga_count);
+
 	// Since we are adding a new device on the end it needs to always be allocated
 	icarus_info[icarus->device_id] = (struct ICARUS_INFO *)malloc(sizeof(struct ICARUS_INFO));
 	if (unlikely(!(icarus_info[icarus->device_id])))
@ -439,10 +583,15 @@ static bool icarus_detect_one(const char *devpath)
				@@ -439,10 +583,15 @@ static bool icarus_detect_one(const char *devpath)
 	// Initialise everything to zero for a new device
 	memset(info, 0, sizeof(struct ICARUS_INFO));

-	info->golden_hashes = (golden_nonce_val & 0x7fffffff) << 1;
+	info->baud = baud;
+	info->work_division = work_division;
+	info->fpga_count = fpga_count;
+	info->nonce_mask = mask(work_division);
+
+	info->golden_hashes = (golden_nonce_val & info->nonce_mask) * fpga_count;
 	timersub(&tv_finish, &tv_start, &(info->golden_tv));

-	set_timing_mode(icarus);
+	set_timing_mode(this_option_offset, icarus);

 	return true;
 }
@ -458,7 +607,7 @@ static bool icarus_prepare(struct thr_info *thr)
				@@ -458,7 +607,7 @@ static bool icarus_prepare(struct thr_info *thr)

 	struct timeval now;

-	int fd = icarus_open(icarus->device_path);
+	int fd = icarus_open(icarus->device_path, icarus_info[icarus->device_id]->baud);
 	if (unlikely(-1 == fd)) {
 		applog(LOG_ERR, "Failed to open Icarus on %s",
 		       icarus->device_path);
@ -565,11 +714,9 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
				@@ -565,11 +714,9 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,

 	submit_nonce(thr, work, nonce);

-	hash_count = (nonce & 0x7fffffff);
-	if (hash_count++ == 0x7fffffff)
-		hash_count = 0xffffffff;
-	else
-		hash_count <<= 1;
+	hash_count = (nonce & info->nonce_mask);
+	hash_count++;
+	hash_count *= info->fpga_count;

 	if (opt_debug || info->do_icarus_timing)
 		timersub(&tv_finish, &tv_start, &elapsed);
@ -580,7 +727,9 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
				@@ -580,7 +727,9 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
 	}

 	// ignore possible end condition values
-	if (info->do_icarus_timing && (nonce & 0x7fffffff) > 0x000fffff && (nonce & 0x7fffffff) < 0x7ff00000) {
+	if (info->do_icarus_timing
+	&&  ((nonce & info->nonce_mask) > END_CONDITION)
+	&&  ((nonce & info->nonce_mask) < (info->nonce_mask & ~END_CONDITION))) {
 		gettimeofday(&tv_history_start, NULL);

 		history0 = &(info->history[0]);
@ -590,7 +739,7 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
				@@ -590,7 +739,7 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,

 		Ti = (double)(elapsed.tv_sec)
 			+ ((double)(elapsed.tv_usec))/((double)1000000)
-			- ICARUS_READ_TIME;
+			- ((double)ICARUS_READ_TIME(info->baud));
 		Xi = (double)hash_count;
 		history0->sumXiTi += Xi * Ti;
 		history0->sumXi += Xi;
@ -700,6 +849,9 @@ static struct api_data *icarus_api_stats(struct cgpu_info *cgpu)
				@@ -700,6 +849,9 @@ static struct api_data *icarus_api_stats(struct cgpu_info *cgpu)
 	root = api_add_uint(root, "timing_values", &(info->history[0].values), false);
 	root = api_add_const(root, "timing_mode", timing_mode_str(info->timing_mode), false);
 	root = api_add_bool(root, "is_timing", &(info->do_icarus_timing), false);
+	root = api_add_int(root, "baud", &(info->baud), false);
+	root = api_add_int(root, "work_division", &(info->work_division), false);
+	root = api_add_int(root, "fpga_count", &(info->fpga_count), false);

 	return root;
 }
--- a/driver-modminer.c
+++ b/driver-modminer.c
@ -91,7 +91,7 @@ modminer_detect_one(const char *devpath)
				@@ -91,7 +91,7 @@ modminer_detect_one(const char *devpath)

 #undef bailout

-static char
+static int
 modminer_detect_auto()
 {
 	return
--- a/driver-opencl.c
+++ b/driver-opencl.c
@ -127,6 +127,83 @@ char *set_worksize(char *arg)
				@@ -127,6 +127,83 @@ char *set_worksize(char *arg)
 	return NULL;
 }

+#ifdef USE_SCRYPT
+char *set_shaders(char *arg)
+{
+	int i, val = 0, device = 0;
+	char *nextptr;
+
+	nextptr = strtok(arg, ",");
+	if (nextptr == NULL)
+		return "Invalid parameters for set lookup gap";
+	val = atoi(nextptr);
+
+	gpus[device++].shaders = val;
+
+	while ((nextptr = strtok(NULL, ",")) != NULL) {
+		val = atoi(nextptr);
+
+		gpus[device++].shaders = val;
+	}
+	if (device == 1) {
+		for (i = device; i < MAX_GPUDEVICES; i++)
+			gpus[i].shaders = gpus[0].shaders;
+	}
+
+	return NULL;
+}
+
+char *set_lookup_gap(char *arg)
+{
+	int i, val = 0, device = 0;
+	char *nextptr;
+
+	nextptr = strtok(arg, ",");
+	if (nextptr == NULL)
+		return "Invalid parameters for set lookup gap";
+	val = atoi(nextptr);
+
+	gpus[device++].opt_lg = val;
+
+	while ((nextptr = strtok(NULL, ",")) != NULL) {
+		val = atoi(nextptr);
+
+		gpus[device++].opt_lg = val;
+	}
+	if (device == 1) {
+		for (i = device; i < MAX_GPUDEVICES; i++)
+			gpus[i].opt_lg = gpus[0].opt_lg;
+	}
+
+	return NULL;
+}
+
+char *set_thread_concurrency(char *arg)
+{
+	int i, val = 0, device = 0;
+	char *nextptr;
+
+	nextptr = strtok(arg, ",");
+	if (nextptr == NULL)
+		return "Invalid parameters for set thread concurrency";
+	val = atoi(nextptr);
+
+	gpus[device++].opt_tc = val;
+
+	while ((nextptr = strtok(NULL, ",")) != NULL) {
+		val = atoi(nextptr);
+
+		gpus[device++].opt_tc = val;
+	}
+	if (device == 1) {
+		for (i = device; i < MAX_GPUDEVICES; i++)
+			gpus[i].opt_tc = gpus[0].opt_tc;
+	}
+
+	return NULL;
+}
+#endif
+
 static enum cl_kernels select_kernel(char *arg)
 {
 	if (!strcmp(arg, "diablo"))
@ -137,6 +214,10 @@ static enum cl_kernels select_kernel(char *arg)
				@@ -137,6 +214,10 @@ static enum cl_kernels select_kernel(char *arg)
 		return KL_POCLBM;
 	if (!strcmp(arg, "phatk"))
 		return KL_PHATK;
+#ifdef USE_SCRYPT
+	if (!strcmp(arg, "scrypt"))
+		return KL_SCRYPT;
+#endif
 	return KL_NONE;
 }

@ -146,6 +227,8 @@ char *set_kernel(char *arg)
				@@ -146,6 +227,8 @@ char *set_kernel(char *arg)
 	int i, device = 0;
 	char *nextptr;

+	if (opt_scrypt)
+		return "Cannot use sha256 kernel with scrypt";
 	nextptr = strtok(arg, ",");
 	if (nextptr == NULL)
 		return "Invalid parameters for set kernel";
@ -577,9 +660,19 @@ retry:
				@@ -577,9 +660,19 @@ retry:

 	for (gpu = 0; gpu < nDevs; gpu++) {
 		struct cgpu_info *cgpu = &gpus[gpu];
+		double displayed_rolling, displayed_total;
+		bool mhash_base = true;
+
+		displayed_rolling = cgpu->rolling;
+		displayed_total = cgpu->total_mhashes / total_secs;
+		if (displayed_rolling < 1) {
+			displayed_rolling *= 1000;
+			displayed_total *= 1000;
+			mhash_base = false;
+		}

-		wlog("GPU %d: %.1f / %.1f Mh/s | A:%d  R:%d  HW:%d  U:%.2f/m  I:%d\n",
-			gpu, cgpu->rolling, cgpu->total_mhashes / total_secs,
+		wlog("GPU %d: %.1f / %.1f %sh/s | A:%d  R:%d  HW:%d  U:%.2f/m  I:%d\n",
+			gpu, displayed_rolling, displayed_total, mhash_base ? "M" : "K",
 			cgpu->accepted, cgpu->rejected, cgpu->hw_errors,
 			cgpu->utility, cgpu->intensity);
 #ifdef HAVE_ADL
@ -627,7 +720,10 @@ retry:
				@@ -627,7 +720,10 @@ retry:
 			if (thr->cgpu != cgpu)
 				continue;
 			get_datestamp(checkin, &thr->last);
-			wlog("Thread %d: %.1f Mh/s %s ", i, thr->rolling, cgpu->deven != DEV_DISABLED ? "Enabled" : "Disabled");
+			displayed_rolling = thr->rolling;
+			if (!mhash_base)
+				displayed_rolling *= 1000;
+			wlog("Thread %d: %.1f %sh/s %s ", i, displayed_rolling, mhash_base ? "M" : "K" , cgpu->deven != DEV_DISABLED ? "Enabled" : "Disabled");
 			switch (cgpu->status) {
 				default:
 				case LIFE_WELL:
@ -986,11 +1082,40 @@ static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint t
				@@ -986,11 +1082,40 @@ static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint t
 	return status;
 }

+#ifdef USE_SCRYPT
+static cl_int queue_scrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
+{
+	unsigned char *midstate = blk->work->midstate;
+	cl_kernel *kernel = &clState->kernel;
+	unsigned int num = 0;
+	cl_uint le_target;
+	cl_int status = 0;
+
+	le_target = *(cl_uint *)(blk->work->target + 28);
+	clState->cldata = blk->work->data;
+	status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
+
+	CL_SET_ARG(clState->CLbuffer0);
+	CL_SET_ARG(clState->outputBuffer);
+	CL_SET_ARG(clState->padbuffer8);
+	CL_SET_VARG(4, &midstate[0]);
+	CL_SET_VARG(4, &midstate[16]);
+	CL_SET_ARG(le_target);
+
+	return status;
+}
+#endif
+
 static void set_threads_hashes(unsigned int vectors, unsigned int *threads,
 			       int64_t *hashes, size_t *globalThreads,
 			       unsigned int minthreads, int intensity)
 {
-	*threads = 1 << (15 + intensity);
+	if (opt_scrypt) {
+		if (intensity < 0)
+			intensity = 0;
+		*threads = 1 << intensity;
+	} else
+		*threads = 1 << (15 + intensity);
 	if (*threads < minthreads)
 		*threads = minthreads;
 	*globalThreads = *threads;
@ -1210,16 +1335,17 @@ static bool opencl_thread_prepare(struct thr_info *thr)
				@@ -1210,16 +1335,17 @@ static bool opencl_thread_prepare(struct thr_info *thr)
 	applog(LOG_INFO, "Init GPU thread %i GPU %i virtual GPU %i", i, gpu, virtual_gpu);
 	clStates[i] = initCl(virtual_gpu, name, sizeof(name));
 	if (!clStates[i]) {
+#ifdef HAVE_CURSES
 		if (use_curses)
 			enable_curses();
+#endif
 		applog(LOG_ERR, "Failed to init GPU thread %d, disabling device %d", i, gpu);
 		if (!failmessage) {
-			char *buf;
-
 			applog(LOG_ERR, "Restarting the GPU from the menu will not fix this.");
 			applog(LOG_ERR, "Try restarting cgminer.");
 			failmessage = true;
 #ifdef HAVE_CURSES
+			char *buf;
 			if (use_curses) {
 				buf = curses_input("Press enter to continue");
 				if (buf)
@ -1241,19 +1367,25 @@ static bool opencl_thread_prepare(struct thr_info *thr)
				@@ -1241,19 +1367,25 @@ static bool opencl_thread_prepare(struct thr_info *thr)
 	if (!cgpu->kname)
 	{
 		switch (clStates[i]->chosen_kernel) {
-		case KL_DIABLO:
-			cgpu->kname = "diablo";
-			break;
-		case KL_DIAKGCN:
-			cgpu->kname = "diakgcn";
-			break;
-		case KL_PHATK:
-			cgpu->kname = "phatk";
-			break;
-		case KL_POCLBM:
-			cgpu->kname = "poclbm";
-		default:
-			break;
+			case KL_DIABLO:
+				cgpu->kname = "diablo";
+				break;
+			case KL_DIAKGCN:
+				cgpu->kname = "diakgcn";
+				break;
+			case KL_PHATK:
+				cgpu->kname = "phatk";
+				break;
+#ifdef USE_SCRYPT
+			case KL_SCRYPT:
+				cgpu->kname = "scrypt";
+				break;
+#endif
+			case KL_POCLBM:
+				cgpu->kname = "poclbm";
+				break;
+			default:
+				break;
 		}
 	}
 	applog(LOG_INFO, "initCl() finished. Found %s", name);
@ -1271,7 +1403,7 @@ static bool opencl_thread_init(struct thr_info *thr)
				@@ -1271,7 +1403,7 @@ static bool opencl_thread_init(struct thr_info *thr)
 	struct cgpu_info *gpu = thr->cgpu;
 	struct opencl_thread_data *thrdata;
 	_clState *clState = clStates[thr_id];
-	cl_int status;
+	cl_int status = 0;
 	thrdata = calloc(1, sizeof(*thrdata));
 	thr->cgpu_data = thrdata;

@ -1290,6 +1422,11 @@ static bool opencl_thread_init(struct thr_info *thr)
				@@ -1290,6 +1422,11 @@ static bool opencl_thread_init(struct thr_info *thr)
 		case KL_DIAKGCN:
 			thrdata->queue_kernel_parameters = &queue_diakgcn_kernel;
 			break;
+#ifdef USE_SCRYPT
+		case KL_SCRYPT:
+			thrdata->queue_kernel_parameters = &queue_scrypt_kernel;
+			break;
+#endif
 		default:
 		case KL_DIABLO:
 			thrdata->queue_kernel_parameters = &queue_diablo_kernel;
@ -1304,7 +1441,7 @@ static bool opencl_thread_init(struct thr_info *thr)
				@@ -1304,7 +1441,7 @@ static bool opencl_thread_init(struct thr_info *thr)
 		return false;
 	}

-	status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
+	status |= clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
 			BUFFERSIZE, blank_res, 0, NULL, NULL);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
@ -1333,7 +1470,12 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
				@@ -1333,7 +1470,12 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)

 static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work)
 {
-	precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
+#ifdef USE_SCRYPT
+	if (opt_scrypt)
+		work->blk.work = work;
+	else
+#endif
+		precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
 	return true;
 }

@ -1348,6 +1490,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
				@@ -1348,6 +1490,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	_clState *clState = clStates[thr_id];
 	const cl_kernel *kernel = &clState->kernel;
 	const int dynamic_us = opt_dynamic_interval * 1000;
+	cl_bool blocking;

 	cl_int status;
 	size_t globalThreads[1];
@ -1355,14 +1498,20 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
				@@ -1355,14 +1498,20 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	unsigned int threads;
 	int64_t hashes;

+	if (gpu->dynamic)
+		blocking = CL_TRUE;
+	else
+		blocking = CL_FALSE;
+
 	/* This finish flushes the readbuffer set with CL_FALSE later */
-	clFinish(clState->commandQueue);
-	gettimeofday(&gpu->tv_gpuend, NULL);
+	if (!blocking)
+		clFinish(clState->commandQueue);

 	if (gpu->dynamic) {
 		struct timeval diff;
 		suseconds_t gpu_us;

+		gettimeofday(&gpu->tv_gpuend, NULL);
 		timersub(&gpu->tv_gpuend, &gpu->tv_gpustart, &diff);
 		gpu_us = diff.tv_sec * 1000000 + diff.tv_usec;
 		if (likely(gpu_us >= 0)) {
@ -1384,6 +1533,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
				@@ -1384,6 +1533,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 			   localThreads[0], gpu->intensity);
 	if (hashes > gpu->max_hashes)
 		gpu->max_hashes = hashes;
+
 	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
@ -1393,7 +1543,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
				@@ -1393,7 +1543,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	/* MAXBUFFERS entry is used as a flag to say nonces exist */
 	if (thrdata->res[FOUND]) {
 		/* Clear the buffer again */
-		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
 				BUFFERSIZE, blank_res, 0, NULL, NULL);
 		if (unlikely(status != CL_SUCCESS)) {
 			applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
@ -1408,7 +1558,8 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
				@@ -1408,7 +1558,8 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 			postcalc_hash_async(thr, work, thrdata->res);
 		}
 		memset(thrdata->res, 0, BUFFERSIZE);
-		clFinish(clState->commandQueue);
+		if (!blocking)
+			clFinish(clState->commandQueue);
 	}

 	gettimeofday(&gpu->tv_gpustart, NULL);
@ -1423,14 +1574,14 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
				@@ -1423,14 +1574,14 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 		status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
 						globalThreads, localThreads, 0,  NULL, NULL);
 	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)");
+		applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
 		return -1;
 	}

-	status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+	status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
 			BUFFERSIZE, thrdata->res, 0, NULL, NULL);
 	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)");
+		applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
 		return -1;
 	}

--- a/driver-opencl.h
+++ b/driver-opencl.h
@ -18,6 +18,11 @@ extern char *set_temp_target(char *arg);
				@@ -18,6 +18,11 @@ extern char *set_temp_target(char *arg);
 extern char *set_intensity(char *arg);
 extern char *set_vector(char *arg);
 extern char *set_worksize(char *arg);
+#ifdef USE_SCRYPT
+extern char *set_shaders(char *arg);
+extern char *set_lookup_gap(char *arg);
+extern char *set_thread_concurrency(char *arg);
+#endif
 extern char *set_kernel(char *arg);
 void manage_gpu(void);
 extern void pause_dynamic_threads(int gpu);
--- a/findnonce.c
+++ b/findnonce.c
@ -17,6 +17,7 @@
				@@ -17,6 +17,7 @@
 #include <string.h>

 #include "findnonce.h"
+#include "scrypt.h"

 const uint32_t SHA256_K[64] = {
 	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
@ -45,7 +46,8 @@ const uint32_t SHA256_K[64] = {
				@@ -45,7 +46,8 @@ const uint32_t SHA256_K[64] = {
 	d = d + h; \
 	h = h + (rotate(a, 30) ^ rotate(a, 19) ^ rotate(a, 10)) + ((a & b) | (c & (a | b)))

-void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
+void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data)
+{
 	cl_uint A, B, C, D, E, F, G, H;

 	A = state[0];
@ -172,7 +174,7 @@ struct pc_data {
				@@ -172,7 +174,7 @@ struct pc_data {
 	pthread_t pth;
 };

-static void send_nonce(struct pc_data *pcd, cl_uint nonce)
+static void send_sha_nonce(struct pc_data *pcd, cl_uint nonce)
 {
 	dev_blk_ctx *blk = &pcd->work->blk;
 	struct thr_info *thr = pcd->thr;
@ -219,6 +221,19 @@ static void send_nonce(struct pc_data *pcd, cl_uint nonce)
				@@ -219,6 +221,19 @@ static void send_nonce(struct pc_data *pcd, cl_uint nonce)
 	}
 }

+static void send_scrypt_nonce(struct pc_data *pcd, uint32_t nonce)
+{
+	struct thr_info *thr = pcd->thr;
+	struct work *work = pcd->work;
+
+	if (scrypt_test(work->data, work->target, nonce))
+		submit_nonce(thr, pcd->work, nonce);
+	else {
+		applog(LOG_INFO, "Scrypt error, review settings");
+		thr->cgpu->hw_errors++;
+	}
+}
+
 static void *postcalc_hash(void *userdata)
 {
 	struct pc_data *pcd = (struct pc_data *)userdata;
@ -228,9 +243,16 @@ static void *postcalc_hash(void *userdata)
				@@ -228,9 +243,16 @@ static void *postcalc_hash(void *userdata)
 	pthread_detach(pthread_self());

 	for (entry = 0; entry < FOUND; entry++) {
-		if (pcd->res[entry])
-			send_nonce(pcd, pcd->res[entry]);
-		nonces++;
+		uint32_t nonce = pcd->res[entry];
+
+		if (nonce) {
+			applog(LOG_DEBUG, "OCL NONCE %u", nonce);
+			if (opt_scrypt)
+				send_scrypt_nonce(pcd, nonce);
+			else
+				send_sha_nonce(pcd, nonce);
+			nonces++;
+		}
 	}

 	free(pcd);
--- a/findnonce.h
+++ b/findnonce.h
@ -4,10 +4,10 @@
				@@ -4,10 +4,10 @@
 #include "config.h"

 #define MAXTHREADS (0xFFFFFFFEULL)
-#define MAXBUFFERS (0xFF)
+#define MAXBUFFERS (0xFFF)
 #define BUFFERSIZE (sizeof(uint32_t) * MAXBUFFERS)
-#define FOUND (0x80)
-/* #define NFLAG (0x7F) Just for reference */
+#define FOUND (0x800)
+/* #define NFLAG (0x7FF) Just for reference */

 #ifdef HAVE_OPENCL
 extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
--- a/fpgautils.c
+++ b/fpgautils.c
@ -38,7 +38,7 @@
				@@ -38,7 +38,7 @@
 #include "miner.h"

 #ifdef HAVE_LIBUDEV
-char
+int
 serial_autodetect_udev(detectone_func_t detectone, const char*prodname)
 {
 	struct udev *udev = udev_new();
@ -69,14 +69,14 @@ serial_autodetect_udev(detectone_func_t detectone, const char*prodname)
				@@ -69,14 +69,14 @@ serial_autodetect_udev(detectone_func_t detectone, const char*prodname)
 	return found;
 }
 #else
-char
+int
 serial_autodetect_udev(__maybe_unused detectone_func_t detectone, __maybe_unused const char*prodname)
 {
 	return 0;
 }
 #endif

-char
+int
 serial_autodetect_devserial(detectone_func_t detectone, const char*prodname)
 {
 #ifndef WIN32
@ -107,7 +107,7 @@ serial_autodetect_devserial(detectone_func_t detectone, const char*prodname)
				@@ -107,7 +107,7 @@ serial_autodetect_devserial(detectone_func_t detectone, const char*prodname)
 #endif
 }

-char
+int
 _serial_detect(const char*dname, detectone_func_t detectone, autoscan_func_t autoscan, bool forceauto)
 {
 	struct string_elist *iter, *tmp;
@ -178,7 +178,8 @@ serial_open(const char*devpath, unsigned long baud, signed short timeout, bool p
				@@ -178,7 +178,8 @@ serial_open(const char*devpath, unsigned long baud, signed short timeout, bool p

 	SetCommConfig(hSerial, &comCfg, sizeof(comCfg));

-	const DWORD ctoms = (timeout == -1) ? 30000 : (timeout * 100);
+	// Code must specify a valid timeout value (0 means don't timeout)
+	const DWORD ctoms = (timeout * 100);
 	COMMTIMEOUTS cto = {ctoms, 0, ctoms, 0, ctoms};
 	SetCommTimeouts(hSerial, &cto);

@ -210,6 +211,10 @@ serial_open(const char*devpath, unsigned long baud, signed short timeout, bool p
				@@ -210,6 +211,10 @@ serial_open(const char*devpath, unsigned long baud, signed short timeout, bool p
 	switch (baud) {
 	case 0:
 		break;
+	case 57600:
+		cfsetispeed( &my_termios, B57600 );
+		cfsetospeed( &my_termios, B57600 );
+		break;
 	case 115200:
 		cfsetispeed( &my_termios, B115200 );
 		cfsetospeed( &my_termios, B115200 );
@ -230,10 +235,9 @@ serial_open(const char*devpath, unsigned long baud, signed short timeout, bool p
				@@ -230,10 +235,9 @@ serial_open(const char*devpath, unsigned long baud, signed short timeout, bool p
 	my_termios.c_oflag &= ~OPOST;
 	my_termios.c_lflag &= ~(ECHO | ECHONL | ICANON | ISIG | IEXTEN);

-	if (timeout >= 0) {
-		my_termios.c_cc[VTIME] = (cc_t)timeout;
-		my_termios.c_cc[VMIN] = 0;
-	}
+	// Code must specify a valid timeout value (0 means don't timeout)
+	my_termios.c_cc[VTIME] = (cc_t)timeout;
+	my_termios.c_cc[VMIN] = 0;

 	tcsetattr(fdDev, TCSANOW, &my_termios);
 	if (purge)
--- a/fpgautils.h
+++ b/fpgautils.h
@ -14,17 +14,17 @@
				@@ -14,17 +14,17 @@
 #include <stdio.h>

 typedef bool(*detectone_func_t)(const char*);
-typedef char(*autoscan_func_t)();
+typedef int(*autoscan_func_t)();

-extern char _serial_detect(const char*dname, detectone_func_t, autoscan_func_t, bool force_autoscan);
+extern int _serial_detect(const char*dname, detectone_func_t, autoscan_func_t, bool force_autoscan);
 #define serial_detect_fauto(dname, detectone, autoscan)  \
 	_serial_detect(dname, detectone, autoscan, true)
 #define serial_detect_auto(dname, detectone, autoscan)  \
 	_serial_detect(dname, detectone, autoscan, false)
 #define serial_detect(dname, detectone)  \
 	_serial_detect(dname, detectone,     NULL, false)
-extern char serial_autodetect_devserial(detectone_func_t, const char*prodname);
-extern char serial_autodetect_udev     (detectone_func_t, const char*prodname);
+extern int serial_autodetect_devserial(detectone_func_t, const char*prodname);
+extern int serial_autodetect_udev     (detectone_func_t, const char*prodname);

 extern int serial_open(const char*devpath, unsigned long baud, signed short timeout, bool purge);
 extern ssize_t _serial_read(int fd, char *buf, size_t buflen, char*eol);
--- a/logging.c
+++ b/logging.c
@ -18,12 +18,15 @@ bool opt_log_output = false;
				@@ -18,12 +18,15 @@ bool opt_log_output = false;
 /* per default priorities higher than LOG_NOTICE are logged */
 int opt_log_level = LOG_NOTICE;

-static void my_log_curses(int prio, char *f, va_list ap)
+static void my_log_curses(__maybe_unused int prio, char *f, va_list ap)
 {
+	if (opt_quiet && prio != LOG_ERR)
+		return;
+
 #ifdef HAVE_CURSES
 	extern bool use_curses;
-	if (use_curses)
-		log_curses(prio, f, ap);
+	if (use_curses && log_curses_only(prio, f, ap))
+		;
 	else
 #endif
 	{
@ -31,57 +34,20 @@ static void my_log_curses(int prio, char *f, va_list ap)
				@@ -31,57 +34,20 @@ static void my_log_curses(int prio, char *f, va_list ap)

 		strcpy(f + len - 1, "                    \n");

-#ifdef HAVE_CURSES
-		log_curses(prio, f, ap);
-#else
+		mutex_lock(&console_lock);
 		vprintf(f, ap);
-#endif
+		mutex_unlock(&console_lock);
 	}
 }

+static void log_generic(int prio, const char *fmt, va_list ap);
+
 void vapplog(int prio, const char *fmt, va_list ap)
 {
 	if (!opt_debug && prio == LOG_DEBUG)
 		return;
-
-#ifdef HAVE_SYSLOG_H
-	if (use_syslog) {
-		vsyslog(prio, fmt, ap);
-	}
-#else
-	if (0) {}
-#endif
-	else if (opt_log_output || prio <= LOG_NOTICE) {
-		char *f;
-		int len;
-		struct timeval tv = {0, 0};
-		struct tm *tm;
-
-		gettimeofday(&tv, NULL);
-
-		tm = localtime(&tv.tv_sec);
-
-		len = 40 + strlen(fmt) + 22;
-		f = alloca(len);
-		sprintf(f, " [%d-%02d-%02d %02d:%02d:%02d] %s\n",
-			tm->tm_year + 1900,
-			tm->tm_mon + 1,
-			tm->tm_mday,
-			tm->tm_hour,
-			tm->tm_min,
-			tm->tm_sec,
-			fmt);
-		/* Only output to stderr if it's not going to the screen as well */
-		if (!isatty(fileno((FILE *)stderr))) {
-			va_list apc;
-
-			va_copy(apc, ap);
-			vfprintf(stderr, f, apc);	/* atomic write to stderr */
-			fflush(stderr);
-		}
-
-		my_log_curses(prio, f, ap);
-	}
+	if (use_syslog || opt_log_output || prio <= LOG_NOTICE)
+		log_generic(prio, fmt, ap);
 }

 void applog(int prio, const char *fmt, ...)
@ -100,7 +66,7 @@ void applog(int prio, const char *fmt, ...)
				@@ -100,7 +66,7 @@ void applog(int prio, const char *fmt, ...)
 * generic log function used by priority specific ones
 * equals vapplog() without additional priority checks
 */
-static void __maybe_unused log_generic(int prio, const char *fmt, va_list ap)
+static void log_generic(int prio, const char *fmt, va_list ap)
 {
 #ifdef HAVE_SYSLOG_H
 	if (use_syslog) {
@ -121,7 +87,7 @@ static void __maybe_unused log_generic(int prio, const char *fmt, va_list ap)
				@@ -121,7 +87,7 @@ static void __maybe_unused log_generic(int prio, const char *fmt, va_list ap)

 		len = 40 + strlen(fmt) + 22;
 		f = alloca(len);
-		sprintf(f, "[%d-%02d-%02d %02d:%02d:%02d] %s\n",
+		sprintf(f, " [%d-%02d-%02d %02d:%02d:%02d] %s\n",
 			tm->tm_year + 1900,
 			tm->tm_mon + 1,
 			tm->tm_mday,
--- a/miner.h
+++ b/miner.h
@ -262,6 +262,7 @@ enum cl_kernels {
				@@ -262,6 +262,7 @@ enum cl_kernels {
 	KL_PHATK,
 	KL_DIAKGCN,
 	KL_DIABLO,
+	KL_SCRYPT,
 };

 enum dev_reason {
@ -335,7 +336,6 @@ struct cgpu_info {
				@@ -335,7 +336,6 @@ struct cgpu_info {
 	int accepted;
 	int rejected;
 	int hw_errors;
-	unsigned int low_count;
 	double rolling;
 	double total_mhashes;
 	double utility;
@ -355,10 +355,17 @@ struct cgpu_info {
				@@ -355,10 +355,17 @@ struct cgpu_info {
 	int virtual_adl;
 	int intensity;
 	bool dynamic;
+
 	cl_uint vwidth;
 	size_t work_size;
 	enum cl_kernels kernel;
+	cl_ulong max_alloc;

+#ifdef USE_SCRYPT
+	int opt_lg, lookup_gap;
+	int opt_tc, thread_concurrency;
+	int shaders;
+#endif
 	struct timeval tv_gpustart;;
 	struct timeval tv_gpuend;
 	double gpu_us_average;
@ -493,6 +500,11 @@ static inline void mutex_unlock(pthread_mutex_t *lock)
				@@ -493,6 +500,11 @@ static inline void mutex_unlock(pthread_mutex_t *lock)
 		quit(1, "WTF MUTEX ERROR ON UNLOCK!");
 }

+static inline int mutex_trylock(pthread_mutex_t *lock)
+{
+	return pthread_mutex_trylock(lock);
+}
+
 static inline void wr_lock(pthread_rwlock_t *lock)
 {
 	if (unlikely(pthread_rwlock_wrlock(lock)))
@ -550,6 +562,7 @@ extern bool opt_api_listen;
				@@ -550,6 +562,7 @@ extern bool opt_api_listen;
 extern bool opt_api_network;
 extern bool opt_delaynet;
 extern bool opt_restart;
+extern char *opt_icarus_options;
 extern char *opt_icarus_timing;
 #ifdef USE_BITFORCE
 extern bool opt_bfl_noncerange;
@ -576,6 +589,8 @@ extern bool fulltest(const unsigned char *hash, const unsigned char *target);
				@@ -576,6 +589,8 @@ extern bool fulltest(const unsigned char *hash, const unsigned char *target);

 extern int opt_scantime;

+extern pthread_mutex_t console_lock;
+
 extern pthread_mutex_t restart_lock;
 extern pthread_cond_t restart_cond;

@ -598,15 +613,20 @@ extern int set_memoryclock(int gpu, int iMemoryClock);
				@@ -598,15 +613,20 @@ extern int set_memoryclock(int gpu, int iMemoryClock);
 extern void api(int thr_id);

 extern struct pool *current_pool(void);
-extern int active_pools(void);
+extern int enabled_pools;
 extern void add_pool_details(bool live, char *url, char *user, char *pass);

 #define MAX_GPUDEVICES 16

 #define MIN_INTENSITY -10
 #define _MIN_INTENSITY_STR "-10"
+#ifdef USE_SCRYPT
+#define MAX_INTENSITY 20
+#define _MAX_INTENSITY_STR "20"
+#else
 #define MAX_INTENSITY 14
 #define _MAX_INTENSITY_STR "14"
+#endif

 extern struct list_head scan_devices;
 extern int nDevs;
@ -614,9 +634,15 @@ extern int opt_n_threads;
				@@ -614,9 +634,15 @@ extern int opt_n_threads;
 extern int num_processors;
 extern int hw_errors;
 extern bool use_syslog;
+extern bool opt_quiet;
 extern struct thr_info *thr_info;
 extern struct cgpu_info gpus[MAX_GPUDEVICES];
 extern int gpu_threads;
+#ifdef USE_SCRYPT
+extern bool opt_scrypt;
+#else
+#define opt_scrypt (0)
+#endif
 extern double total_secs;
 extern int mining_threads;
 extern struct cgpu_info *cpus;
@ -664,6 +690,9 @@ typedef struct {
				@@ -664,6 +690,9 @@ typedef struct {
 	cl_uint B1addK6, PreVal0addK7, W16addK16, W17addK17;
 	cl_uint zeroA, zeroB;
 	cl_uint oneA, twoA, threeA, fourA, fiveA, sixA, sevenA;
+#ifdef USE_SCRYPT
+	struct work *work;
+#endif
 } dev_blk_ctx;
 #else
 typedef struct {
@ -677,9 +706,11 @@ struct curl_ent {
				@@ -677,9 +706,11 @@ struct curl_ent {
 	struct timeval tv;
 };

+/* Disabled needs to be the lowest enum as a freshly calloced value will then
+ * equal disabled */
 enum pool_enable {
-	POOL_ENABLED,
 	POOL_DISABLED,
+	POOL_ENABLED,
 	POOL_REJECTING,
 };

@ -689,6 +720,8 @@ struct pool {
				@@ -689,6 +720,8 @@ struct pool {
 	int accepted, rejected;
 	int seq_rejects;
 	int solved;
+	int queued;
+	int staged;

 	bool submit_fail;
 	bool idle;
@ -796,7 +829,7 @@ extern void switch_pools(struct pool *selected);
				@@ -796,7 +829,7 @@ extern void switch_pools(struct pool *selected);
 extern void remove_pool(struct pool *pool);
 extern void write_config(FILE *fcfg);
 extern void default_save_file(char *filename);
-extern void log_curses(int prio, const char *f, va_list ap);
+extern bool log_curses_only(int prio, const char *f, va_list ap);
 extern void clear_logwin(void);
 extern bool pool_tclear(struct pool *pool, bool *var);
 extern struct thread_q *tq_new(void);
--- a/miner.php
+++ b/miner.php
@ -1,8 +1,9 @@
				@@ -1,8 +1,9 @@
 <?php
 session_start();
 #
-global $miner, $port, $readonly, $notify, $rigs, $socktimeoutsec;
-global $checklastshare, $hidefields;
+global $miner, $port, $readonly, $notify, $rigs;
+global $socksndtimeoutsec, $sockrcvtimeoutsec;
+global $checklastshare, $poolinputs, $hidefields;
 global $ignorerefresh, $changerefresh, $autorefresh;
 global $allowcustompages, $customsummarypages;
 global $miner_font_family, $miner_font_size;
@ -22,13 +23,18 @@ $readonly = false;
				@@ -22,13 +23,18 @@ $readonly = false;
 #  coz it doesn't have notify - it just shows the error status table
 $notify = true;
 #
-# set $checklastshare to true to do the following checks:
+# Set $checklastshare to true to do the following checks:
 # If a device's last share is 12x expected ago then display as an error
 # If a device's last share is 8x expected ago then display as a warning
 # If either of the above is true, also display the whole line highlighted
 # This assumes shares are 1 difficulty shares
 $checklastshare = true;
 #
+# Set $poolinputs to true to show the input fields for adding a pool
+# and changing the pool priorities
+# N.B. also if $readonly is true, it will not display the fields
+$poolinputs = false;
+#
 # Set $rigs to an array of your cgminer rigs that are running
 #  format: 'IP:Port' or 'Host:Port' or 'Host:Port:Name'
 # If you only have one rig, it will just show the detail of that rig
@ -38,12 +44,16 @@ $checklastshare = true;
				@@ -38,12 +44,16 @@ $checklastshare = true;
 # e.g. $rigs = array('127.0.0.1:4028','myrig.com:4028:Sugoi');
 $rigs = array('127.0.0.1:4028');
 #
-# This should be OK for most cases
-# However, the longer it is the longer you have to wait while php
-# hangs if the target cgminer isn't runnning or listening
-# Feel free to increase it if your network is very slow
+# These should be OK for most cases
+# However, the longer SND is, the longer you have to wait while
+# php hangs if the target cgminer isn't runnning or listening
+# RCV should only ever be relevant if cgminer has hung but the
+# API thread is still running, RCV would normally be >= SND
+# Feel free to increase SND if your network is very slow
+# or decrease RCV if that happens often to you
 # Also, on some windows PHP, apparently the $usec is ignored
-$socktimeoutsec = 10;
+$socksndtimeoutsec = 10;
+$sockrcvtimeoutsec = 40;
 #
 # List of fields NOT to be displayed
 # You can use this to hide data you don't want to see or don't want
@ -82,11 +92,13 @@ $mobilepage = array(
				@@ -82,11 +92,13 @@ $mobilepage = array(
 'DATE' => null,
 'RIGS' => null,
 'SUMMARY' => array('Elapsed', 'MHS av', 'Found Blocks=Blks', 'Accepted', 'Rejected=Rej', 'Utility'),
- 'DEVS' => array('ID', 'Name', 'GPU', 'Status', 'MHS av', 'Accepted', 'Rejected=Rej', 'Utility'),
+ 'DEVS+NOTIFY' => array('DEVS.Name=Name', 'DEVS.ID=ID', 'DEVS.Status=Status', 'DEVS.Temperature=Temp',
+			'DEVS.MHS av=MHS av', 'DEVS.Accepted=Accept', 'DEVS.Rejected=Rej',
+			'DEVS.Utility=Utility', 'NOTIFY.Last Not Well=Not Well'),
 'POOL' => array('POOL', 'Status', 'Accepted', 'Rejected=Rej', 'Last Share Time'));
 $mobilesum = array(
 'SUMMARY' => array('MHS av', 'Found Blocks', 'Accepted', 'Rejected', 'Utility'),
- 'DEVS' => array('MHS av', 'Accepted', 'Rejected', 'Utility'),
+ 'DEVS+NOTIFY' => array('DEVS.MHS av', 'DEVS.Accepted', 'DEVS.Rejected', 'DEVS.Utility'),
 'POOL' => array('Accepted', 'Rejected'));
 #
 # customsummarypages is an array of these Custom Summary Pages
@ -190,7 +202,7 @@ function getdom($domname)
				@@ -190,7 +202,7 @@ function getdom($domname)
 function htmlhead($checkapi, $rig, $pg = null)
 {
 global $miner_font_family, $miner_font_size;
- global $error, $readonly, $here;
+ global $error, $readonly, $poolinputs, $here;
 global $ignorerefresh, $autorefresh;

 $extraparams = '';
@ -245,6 +257,8 @@ if ($ignorerefresh == false)
				@@ -245,6 +257,8 @@ if ($ignorerefresh == false)
 echo "function prc(a,m){pr('&arg='+a,m)}
 function prs(a,r){var c=a.substr(3);var z=c.split('|',2);var m=z[0].substr(0,1).toUpperCase()+z[0].substr(1)+' GPU '+z[1];prc(a+'&rig='+r,m)}
 function prs2(a,n,r){var v=document.getElementById('gi'+n).value;var c=a.substr(3);var z=c.split('|',2);var m='Set GPU '+z[1]+' '+z[0].substr(0,1).toUpperCase()+z[0].substr(1)+' to '+v;prc(a+','+v+'&rig='+r,m)}\n";
+	if ($poolinputs === true)
+		echo "function cbs(s){var t=s.replace(/\\\\/g,'\\\\\\\\'); return t.replace(/,/g, '\\\\,')}\nfunction pla(r){var u=document.getElementById('purl').value;var w=document.getElementById('pwork').value;var p=document.getElementById('ppass').value;pr('&rig='+r+'&arg=addpool|'+cbs(u)+','+cbs(w)+','+cbs(p),'Add Pool '+u)}\nfunction psp(r){var p=document.getElementById('prio').value;pr('&rig='+r+'&arg=poolpriority|'+p,'Set Pool Priorities to '+p)}\n";
 }
 ?>
 </script>
@ -260,7 +274,7 @@ $error = null;
				@@ -260,7 +274,7 @@ $error = null;
 #
 function getsock($addr, $port)
 {
- global $haderror, $error, $socktimeoutsec;
+ global $haderror, $error, $socksndtimeoutsec, $sockrcvtimeoutsec;

 $error = null;
 $socket = null;
@ -277,7 +291,8 @@ function getsock($addr, $port)
				@@ -277,7 +291,8 @@ function getsock($addr, $port)
 // Ignore if this fails since the socket connect may work anyway
 //  and nothing is gained by aborting if the option cannot be set
 //  since we don't know in advance if it can connect
- socket_set_option($socket, SOL_SOCKET, SO_SNDTIMEO, array('sec' => $socktimeoutsec, 'usec' => 0));
+ socket_set_option($socket, SOL_SOCKET, SO_SNDTIMEO, array('sec' => $socksndtimeoutsec, 'usec' => 0));
+ socket_set_option($socket, SOL_SOCKET, SO_RCVTIMEO, array('sec' => $sockrcvtimeoutsec, 'usec' => 0));

 $res = socket_connect($socket, $addr, $port);
 if ($res === false)
@ -307,6 +322,46 @@ function readsockline($socket)
				@@ -307,6 +322,46 @@ function readsockline($socket)
 return $line;
 }
 #
+function api_convert_escape($str)
+{
+ $res = '';
+ $len = strlen($str);
+ for ($i = 0; $i < $len; $i++)
+ {
+	$ch = substr($str, $i, 1);
+	if ($ch != '\\' || $i == ($len-1))
+		$res .= $ch;
+	else
+	{
+		$i++;
+		$ch = substr($str, $i, 1);
+		switch ($ch)
+		{
+		case '|':
+			$res .= "\1";
+			break;
+		case '\\':
+			$res .= "\2";
+			break;
+		case '=':
+			$res .= "\3";
+			break;
+		case ',':
+			$res .= "\4";
+			break;
+		default:
+			$res .= $ch;
+		}
+	}
+ }
+ return $res;
+}
+#
+function revert($str)
+{
+ return str_replace(array("\1", "\2", "\3", "\4"), array("|", "\\", "=", ","), $str);
+}
+#
 function api($cmd)
 {
 global $haderror, $error;
@ -328,6 +383,8 @@ function api($cmd)
				@@ -328,6 +383,8 @@ function api($cmd)

 #	print "$cmd returned '$line'\n";

+	$line = api_convert_escape($line);
+
 	$data = array();

 	$objs = explode('|', $line);
@ -365,7 +422,7 @@ function api($cmd)
				@@ -365,7 +422,7 @@ function api($cmd)
 					continue;

 				if (count($id) == 2)
-					$data[$name][$id[0]] = $id[1];
+					$data[$name][$id[0]] = revert($id[1]);
 				else
 					$data[$name][$counter] = $id[0];

@ -440,6 +497,9 @@ function classlastshare($when, $alldata, $warnclass, $errorclass)
				@@ -440,6 +497,9 @@ function classlastshare($when, $alldata, $warnclass, $errorclass)
 if (!isset($alldata['MHS av']))
 	return '';

+ if ($alldata['MHS av'] == 0)
+	return '';
+
 if (!isset($alldata['Last Share Time']))
 	return '';

@ -476,6 +536,10 @@ function fmt($section, $name, $value, $when, $alldata)
				@@ -476,6 +536,10 @@ function fmt($section, $name, $value, $when, $alldata)
 $ret = $value;
 $class = '';

+ $nams = explode('.', $name);
+ if (count($nams) > 1)
+	$name = $nams[count($nams)-1];
+
 if ($value === null)
 	$ret = $b;
 else
@ -483,6 +547,7 @@ function fmt($section, $name, $value, $when, $alldata)
				@@ -483,6 +547,7 @@ function fmt($section, $name, $value, $when, $alldata)
 	{
 	case 'GPU.Last Share Time':
 	case 'PGA.Last Share Time':
+	case 'DEVS.Last Share Time':
 		if ($value == 0
 		||  (isset($alldata['Last Share Pool']) && $alldata['Last Share Pool'] == -1))
 		{
@ -503,6 +568,7 @@ function fmt($section, $name, $value, $when, $alldata)
				@@ -503,6 +568,7 @@ function fmt($section, $name, $value, $when, $alldata)
 		break;
 	case 'GPU.Last Share Pool':
 	case 'PGA.Last Share Pool':
+	case 'DEVS.Last Share Pool':
 		if ($value == -1)
 		{
 			$ret = 'None';
@ -565,6 +631,7 @@ function fmt($section, $name, $value, $when, $alldata)
				@@ -565,6 +631,7 @@ function fmt($section, $name, $value, $when, $alldata)
 		break;
 	case 'GPU.Utility':
 	case 'PGA.Utility':
+	case 'DEVS.Utility':
 	case 'SUMMARY.Utility':
 	case 'total.Utility':
 		$ret = $value.'/m';
@ -585,18 +652,24 @@ function fmt($section, $name, $value, $when, $alldata)
				@@ -585,18 +652,24 @@ function fmt($section, $name, $value, $when, $alldata)
 			}
 		break;
 	case 'PGA.Temperature':
-		$ret = $value.'&deg;C';
-		break;
 	case 'GPU.Temperature':
+	case 'DEVS.Temperature':
 		$ret = $value.'&deg;C';
+		if (!isset($alldata['GPU']))
+			break;
 	case 'GPU.GPU Clock':
+	case 'DEVS.GPU Clock':
 	case 'GPU.Memory Clock':
+	case 'DEVS.Memory Clock':
 	case 'GPU.GPU Voltage':
+	case 'DEVS.GPU Voltage':
 	case 'GPU.GPU Activity':
+	case 'DEVS.GPU Activity':
 		if ($value == 0)
 			$class = $warnclass;
 		break;
 	case 'GPU.Fan Percent':
+	case 'DEVS.Fan Percent':
 		if ($value == 0)
 			$class = $warnclass;
 		else
@ -609,6 +682,7 @@ function fmt($section, $name, $value, $when, $alldata)
				@@ -609,6 +682,7 @@ function fmt($section, $name, $value, $when, $alldata)
 		}
 		break;
 	case 'GPU.Fan Speed':
+	case 'DEVS.Fan Speed':
 		if ($value == 0)
 			$class = $warnclass;
 		else
@ -624,6 +698,7 @@ function fmt($section, $name, $value, $when, $alldata)
				@@ -624,6 +698,7 @@ function fmt($section, $name, $value, $when, $alldata)
 		break;
 	case 'GPU.MHS av':
 	case 'PGA.MHS av':
+	case 'DEVS.MHS av':
 	case 'SUMMARY.MHS av':
 	case 'total.MHS av':
 		$parts = explode('.', $value, 2);
@ -650,6 +725,7 @@ function fmt($section, $name, $value, $when, $alldata)
				@@ -650,6 +725,7 @@ function fmt($section, $name, $value, $when, $alldata)
 		break;
 	case 'GPU.Total MH':
 	case 'PGA.Total MH':
+	case 'DEVS.Total MH':
 	case 'SUMMARY.Total MH':
 	case 'total.Total MH':
 	case 'SUMMARY.Getworks':
@ -657,11 +733,13 @@ function fmt($section, $name, $value, $when, $alldata)
				@@ -657,11 +733,13 @@ function fmt($section, $name, $value, $when, $alldata)
 	case 'total.Getworks':
 	case 'GPU.Accepted':
 	case 'PGA.Accepted':
+	case 'DEVS.Accepted':
 	case 'SUMMARY.Accepted':
 	case 'POOL.Accepted':
 	case 'total.Accepted':
 	case 'GPU.Rejected':
 	case 'PGA.Rejected':
+	case 'DEVS.Rejected':
 	case 'SUMMARY.Rejected':
 	case 'POOL.Rejected':
 	case 'total.Rejected':
@ -679,12 +757,14 @@ function fmt($section, $name, $value, $when, $alldata)
				@@ -679,12 +757,14 @@ function fmt($section, $name, $value, $when, $alldata)
 		break;
 	case 'GPU.Status':
 	case 'PGA.Status':
+	case 'DEVS.Status':
 	case 'POOL.Status':
 		if ($value != 'Alive')
 			$class = $errorclass;
 		break;
 	case 'GPU.Enabled':
 	case 'PGA.Enabled':
+	case 'DEVS.Enabled':
 		if ($value != 'Y')
 			$class = $warnclass;
 		break;
@ -710,13 +790,17 @@ function fmt($section, $name, $value, $when, $alldata)
				@@ -710,13 +790,17 @@ function fmt($section, $name, $value, $when, $alldata)
 if ($class == '' && ($rownum % 2) == 0)
 	$class = $c2class;

+ if ($ret == '')
+	$ret = $b;
+
 return array($ret, $class);
 }
 #
 global $poolcmd;
 $poolcmd = array(	'Switch to'	=> 'switchpool',
 			'Enable'	=> 'enablepool',
-			'Disable'	=> 'disablepool' );
+			'Disable'	=> 'disablepool',
+			'Remove'	=> 'removepool' );
 #
 function showhead($cmd, $values, $justnames = false)
 {
@ -927,6 +1011,43 @@ function processgpus($rig)
				@@ -927,6 +1011,43 @@ function processgpus($rig)
 }
 }
 #
+function showpoolinputs($rig, $ans)
+{
+ global $readonly, $poolinputs;
+
+ if ($readonly === true || $poolinputs === false)
+	return;
+
+ newtable();
+ newrow();
+
+ $inps = array('Pool URL' => array('purl', 20),
+		'Worker Name' => array('pwork', 10),
+		'Worker Password' => array('ppass', 10));
+ $b = '&nbsp;';
+
+ echo "<td align=right class=h> Add a pool: </td><td>";
+
+ foreach ($inps as $text => $name)
+	echo "$text: <input name='".$name[0]."' id='".$name[0]."' value='' type=text size=".$name[1]."> ";
+
+ echo "</td><td align=middle><input type=button value='Add' onclick='pla($rig)'></td>";
+
+ endrow();
+
+ if (count($ans) > 1)
+ {
+	newrow();
+
+	echo '<td align=right class=h> Set pool priorities: </td>';
+	echo "<td> Comma list of pool numbers: <input type=text name=prio id=prio size=20>";
+	echo "</td><td align=middle><input type=button value='Set' onclick='psp($rig)'></td>";
+
+	endrow();
+ }
+ endtable();
+}
+#
 function process($cmds, $rig)
 {
 global $error, $devs;
@ -946,12 +1067,15 @@ function process($cmds, $rig)
				@@ -946,12 +1067,15 @@ function process($cmds, $rig)
 	{
 		details($cmd, $process, $rig);

+		if ($cmd == 'devs')
+			$devs = $process;
+
+		if ($cmd == 'pools')
+			showpoolinputs($rig, $process);
+
 		# Not after the last one
 		if (--$count > 0)
 			otherrow('<td><br><br></td>');
-
-		if ($cmd == 'devs')
-			$devs = $process;
 	}
 }
 }
@ -1268,8 +1392,177 @@ $sectionmap = array(
				@@ -1268,8 +1392,177 @@ $sectionmap = array(
 	'GPU' => 'devs',	// You would normally use DEVS
 	'PGA' => 'devs',	// You would normally use DEVS
 	'NOTIFY' => 'notify',
+	'DEVDETAILS' => 'devdetails',
+	'STATS' => 'stats',
 	'CONFIG' => 'config');
 #
+function joinfields($section1, $section2, $join, $results)
+{
+ global $sectionmap;
+
+ $name1 = $sectionmap[$section1];
+ $name2 = $sectionmap[$section2];
+ $newres = array();
+
+ // foreach rig in section1
+ foreach ($results[$name1] as $rig => $result)
+ {
+	$status = null;
+
+	// foreach answer section in the rig api call
+	foreach ($result as $name1b => $fields1b)
+	{
+		if ($name1b == 'STATUS')
+		{
+			// remember the STATUS from section1
+			$status = $result[$name1b];
+			continue;
+		}
+
+		// foreach answer section in the rig api call (for the other api command)
+		foreach ($results[$name2][$rig] as $name2b => $fields2b)
+		{
+			if ($name2b == 'STATUS')
+				continue;
+
+			// If match the same field values of fields in $join
+			$match = true;
+			foreach ($join as $field)
+				if ($fields1b[$field] != $fields2b[$field])
+				{
+					$match = false;
+					break;
+				}
+
+			if ($match === true)
+			{
+				if ($status != null)
+				{
+					$newres[$rig]['STATUS'] = $status;
+					$status = null;
+				}
+
+				$subsection = $section1.'+'.$section2;
+				$subsection .= preg_replace('/[^0-9]/', '', $name1b.$name2b);
+
+				foreach ($fields1b as $nam => $val)
+					$newres[$rig][$subsection]["$section1.$nam"] = $val;
+				foreach ($fields2b as $nam => $val)
+					$newres[$rig][$subsection]["$section2.$nam"] = $val;
+			}
+		}
+	}
+ }
+ return $newres;
+}
+#
+function joinall($section1, $section2, $results)
+{
+ global $sectionmap;
+
+ $name1 = $sectionmap[$section1];
+ $name2 = $sectionmap[$section2];
+ $newres = array();
+
+ // foreach rig in section1
+ foreach ($results[$name1] as $rig => $result)
+ {
+	// foreach answer section in the rig api call
+	foreach ($result as $name1b => $fields1b)
+	{
+		if ($name1b == 'STATUS')
+		{
+			// copy the STATUS from section1
+			$newres[$rig][$name1b] = $result[$name1b];
+			continue;
+		}
+
+		// foreach answer section in the rig api call (for the other api command)
+		foreach ($results[$name2][$rig] as $name2b => $fields2b)
+		{
+			if ($name2b == 'STATUS')
+				continue;
+
+			$subsection = $section1.'+'.$section2;
+			$subsection .= preg_replace('/[^0-9]/', '', $name1b.$name2b);
+
+			foreach ($fields1b as $nam => $val)
+				$newres[$rig][$subsection]["$section1.$nam"] = $val;
+			foreach ($fields2b as $nam => $val)
+				$newres[$rig][$subsection]["$section2.$nam"] = $val;
+		}
+	}
+ }
+ return $newres;
+}
+#
+function joinsections($sections, $results, $errors)
+{
+ global $sectionmap;
+
+#echo "results['pools']=".print_r($results['pools'],true)."<br>";
+
+ // GPU's don't have Name,ID fields - so create them
+ foreach ($results as $section => $res)
+	foreach ($res as $rig => $result)
+		foreach ($result as $name => $fields)
+		{
+			$subname = preg_replace('/[0-9]/', '', $name);
+			if ($subname == 'GPU' and isset($result[$name]['GPU']))
+			{
+				$results[$section][$rig][$name]['Name'] = 'GPU';
+				$results[$section][$rig][$name]['ID'] = $result[$name]['GPU'];
+			}
+		}
+
+ foreach ($sections as $section => $fields)
+	if ($section != 'DATE' && !isset($sectionmap[$section]))
+	{
+		$both = explode('+', $section, 2);
+		if (count($both) > 1)
+		{
+			switch($both[0])
+			{
+			case 'SUMMARY':
+				switch($both[1])
+				{
+				case 'POOL':
+				case 'DEVS':
+				case 'CONFIG':
+					$sectionmap[$section] = $section;
+					$results[$section] = joinall($both[0], $both[1], $results);
+					break;
+				default:
+					$errors[] = "Error: Invalid section '$section'";
+					break;
+				}
+				break;
+			case 'DEVS':
+				$join = array('Name', 'ID');
+				switch($both[1])
+				{
+				case 'NOTIFY':
+				case 'DEVDETAILS':
+					$sectionmap[$section] = $section;
+					$results[$section] = joinfields($both[0], $both[1], $join, $results);
+					break;
+				default:
+					$errors[] = "Error: Invalid section '$section'";
+					break;
+				}
+				break;
+			default:
+				$errors[] = "Error: Invalid section '$section'";
+				break;
+			}
+		}
+		else
+			$errors[] = "Error: Invalid section '$section'";
+	}
+
+ return array($results, $errors);
+}
+#
 function secmatch($section, $field)
 {
 if ($section == $field)
@ -1329,7 +1622,14 @@ function customset($showfields, $sum, $section, $rig, $isbutton, $result, $total
				@@ -1329,7 +1622,14 @@ function customset($showfields, $sum, $section, $rig, $isbutton, $result, $total
 				$value = null;
 		}

-		list($showvalue, $class) = fmt($secname, $name, $value, $when, $row);
+		if (strpos($secname, '+') === false)
+			list($showvalue, $class) = fmt($secname, $name, $value, $when, $row);
+		else
+		{
+			$parts = explode('.', $name, 2);
+			list($showvalue, $class) = fmt($parts[0], $parts[1], $value, $when, $row);
+		}
+
 		echo "<td$class align=right>$showvalue</td>";
 	}
 	endrow();
@ -1350,15 +1650,19 @@ function processcustompage($pagename, $sections, $sum, $namemap)
				@@ -1350,15 +1650,19 @@ function processcustompage($pagename, $sections, $sum, $namemap)
 $errors = array();
 foreach ($sections as $section => $fields)
 {
-	if (isset($sectionmap[$section]))
+	$all = explode('+', $section);
+	foreach ($all as $section)
 	{
-		$cmd = $sectionmap[$section];
-		if (!isset($cmds[$cmd]))
-			$cmds[$cmd] = 1;
+		if (isset($sectionmap[$section]))
+		{
+			$cmd = $sectionmap[$section];
+			if (!isset($cmds[$cmd]))
+				$cmds[$cmd] = 1;
+		}
+		else
+			if ($section != 'DATE')
+				$errors[] = "Error: unknown section '$section' in custom summary page '$pagename'";
 	}
-	else
-		if ($section != 'DATE')
-			$errors[] = "Error: unknown section '$section' in custom summary page '$pagename'";
 }

 $results = array();
@ -1393,6 +1697,7 @@ function processcustompage($pagename, $sections, $sum, $namemap)
				@@ -1393,6 +1697,7 @@ function processcustompage($pagename, $sections, $sum, $namemap)
 $shownsomething = false;
 if (count($results) > 0)
 {
+	list($results, $errors) = joinsections($sections, $results, $errors);
 	$first = true;
 	foreach ($sections as $section => $fields)
 	{
@ -1586,7 +1891,8 @@ function display()
				@@ -1586,7 +1891,8 @@ function display()
 			$miner = $parts[0];
 			$port = $parts[1];

-			$preprocess = $arg;
+			if ($readonly !== true)
+				$preprocess = $arg;
 		}
 	}
 }
--- a/4
+++ b/4
@ -81,9 +81,9 @@ case $dirmode in
				@@ -81,9 +81,9 @@ case $dirmode in
      echo "mkdir -p -- $*"
      exec mkdir -p -- "$@"
    else
-      # On NextStep and OpenStep, the `mkdir' command does not
+      # On NextStep and OpenStep, the 'mkdir' command does not
      # recognize any option.  It will interpret all options as
-      # directories to create, and then abort because `.' already
+      # directories to create, and then abort because '.' already
      # exists.
      test -d ./-p && rmdir ./-p
      test -d ./--version && rmdir ./--version
--- a/ocl.c
+++ b/ocl.c
@ -33,7 +33,7 @@
				@@ -33,7 +33,7 @@
 #include "findnonce.h"
 #include "ocl.h"

-int opt_platform_id;
+int opt_platform_id = -1;

 char *file_contents(const char *filename, int *length)
 {
@ -80,7 +80,7 @@ int clDevicesNum(void) {
				@@ -80,7 +80,7 @@ int clDevicesNum(void) {
 	cl_uint numPlatforms;
 	cl_platform_id *platforms;
 	cl_platform_id platform = NULL;
-	unsigned int most_devices = 0, i;
+	unsigned int most_devices = 0, i, mdplatform = 0;

 	status = clGetPlatformIDs(0, NULL, &numPlatforms);
 	/* If this fails, assume no GPUs. */
@ -118,11 +118,15 @@ int clDevicesNum(void) {
				@@ -118,11 +118,15 @@ int clDevicesNum(void) {
 		status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
 		if (status != CL_SUCCESS) {
 			applog(LOG_ERR, "Error %d: Getting Device IDs (num)", status);
+			if (i < numPlatforms - 1)
+				continue;
 			return -1;
 		}
 		applog(LOG_INFO, "Platform %d devices: %d", i, numDevices);
-		if (numDevices > most_devices)
+		if (numDevices > most_devices) {
 			most_devices = numDevices;
+			mdplatform = i;
+		}
 		if (numDevices) {
 			unsigned int j;
 			char pbuff[256];
@ -137,6 +141,9 @@ int clDevicesNum(void) {
				@@ -137,6 +141,9 @@ int clDevicesNum(void) {
 		}
 	}

+	if (opt_platform_id < 0)
+		opt_platform_id = mdplatform;;
+
 	return most_devices;
 }

@ -201,6 +208,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -201,6 +208,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 {
 	_clState *clState = calloc(1, sizeof(_clState));
 	bool patchbfi = false, prog_built = false;
+	struct cgpu_info *cgpu = &gpus[gpu];
 	cl_platform_id platform = NULL;
 	char pbuff[256], vbuff[255];
 	cl_platform_id* platforms;
@ -302,6 +310,18 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -302,6 +310,18 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		return NULL;
 	}

+	/////////////////////////////////////////////////////////////////
+	// Create an OpenCL command queue
+	/////////////////////////////////////////////////////////////////
+	clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu],
+						     CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &status);
+	if (status != CL_SUCCESS) /* Try again without OOE enable */
+		clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status);
+	if (status != CL_SUCCESS) {
+		applog(LOG_ERR, "Error %d: Creating Command Queue. (clCreateCommandQueue)", status);
+		return NULL;
+	}
+
 	/* Check for BFI INT support. Hopefully people don't mix devices with
 	 * and without it! */
 	char * extensions = malloc(1024);
@ -344,18 +364,30 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -344,18 +364,30 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	}
 	applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size);

+	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL);
+	if (status != CL_SUCCESS) {
+		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_MEM_ALLOC_SIZE", status);
+		return NULL;
+	}
+	applog(LOG_DEBUG, "Max mem alloc size is %u", cgpu->max_alloc);
+
 	/* Create binary filename based on parameters passed to opencl
 	 * compiler to ensure we only load a binary that matches what would
 	 * have otherwise created. The filename is:
 	 * name + kernelname +/- g(offset) + v + vectors + w + work_size + l + sizeof(long) + .bin
+	 * For scrypt the filename is:
+	 * name + kernelname + g + lg + lookup_gap + tc + thread_concurrency + w + work_size + l + sizeof(long) + .bin
 	 */
 	char binaryfilename[255];
 	char filename[255];
 	char numbuf[10];

-	if (gpus[gpu].kernel == KL_NONE) {
-		/* Detect all 2.6 SDKs not with Tahiti and use diablo kernel */
-		if (!strstr(name, "Tahiti") &&
+	if (cgpu->kernel == KL_NONE) {
+		if (opt_scrypt) {
+			applog(LOG_INFO, "Selecting scrypt kernel");
+			clState->chosen_kernel = KL_SCRYPT;
+		} else if (!strstr(name, "Tahiti") &&
+			/* Detect all 2.6 SDKs not with Tahiti and use diablo kernel */
 			(strstr(vbuff, "844.4") ||  // Linux 64 bit ATI 2.6 SDK
 			 strstr(vbuff, "851.4") ||  // Windows 64 bit ""
 			 strstr(vbuff, "831.4") ||
@ -372,9 +404,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -372,9 +404,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			applog(LOG_INFO, "Selecting phatk kernel");
 			clState->chosen_kernel = KL_PHATK;
 		}
-		gpus[gpu].kernel = clState->chosen_kernel;
+		cgpu->kernel = clState->chosen_kernel;
 	} else {
-		clState->chosen_kernel = gpus[gpu].kernel;
+		clState->chosen_kernel = cgpu->kernel;
 		if (clState->chosen_kernel == KL_PHATK &&
 		    (strstr(vbuff, "844.4") || strstr(vbuff, "851.4") ||
 		     strstr(vbuff, "831.4") || strstr(vbuff, "898.1") ||
@ -407,6 +439,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -407,6 +439,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			strcpy(filename, DIAKGCN_KERNNAME".cl");
 			strcpy(binaryfilename, DIAKGCN_KERNNAME);
 			break;
+		case KL_SCRYPT:
+			strcpy(filename, SCRYPT_KERNNAME".cl");
+			strcpy(binaryfilename, SCRYPT_KERNNAME);
+			/* Scrypt only supports vector 1 */
+			cgpu->vwidth = 1;
+			break;
 		case KL_NONE: /* Shouldn't happen */
 		case KL_DIABLO:
 			strcpy(filename, DIABLO_KERNNAME".cl");
@ -414,24 +452,64 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -414,24 +452,64 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			break;
 	}

-	if (gpus[gpu].vwidth)
-		clState->vwidth = gpus[gpu].vwidth;
+	if (cgpu->vwidth)
+		clState->vwidth = cgpu->vwidth;
 	else {
 		clState->vwidth = preferred_vwidth;
-		gpus[gpu].vwidth = preferred_vwidth;
+		cgpu->vwidth = preferred_vwidth;
 	}

-	if ((clState->chosen_kernel == KL_POCLBM || clState->chosen_kernel == KL_DIABLO || clState->chosen_kernel == KL_DIAKGCN) &&
-		clState->vwidth == 1 && clState->hasOpenCL11plus)
+	if (((clState->chosen_kernel == KL_POCLBM || clState->chosen_kernel == KL_DIABLO || clState->chosen_kernel == KL_DIAKGCN) &&
+		clState->vwidth == 1 && clState->hasOpenCL11plus) || opt_scrypt)
 			clState->goffset = true;

-	if (gpus[gpu].work_size && gpus[gpu].work_size <= clState->max_work_size)
-		clState->wsize = gpus[gpu].work_size;
+	if (cgpu->work_size && cgpu->work_size <= clState->max_work_size)
+		clState->wsize = cgpu->work_size;
 	else if (strstr(name, "Tahiti"))
 		clState->wsize = 64;
 	else
 		clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
-	gpus[gpu].work_size = clState->wsize;
+	cgpu->work_size = clState->wsize;
+
+#ifdef USE_SCRYPT
+	if (opt_scrypt) {
+		cl_ulong ma = cgpu->max_alloc, mt;
+		int pow2 = 0;
+
+		if (!cgpu->opt_lg) {
+			applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
+			cgpu->lookup_gap = 2;
+		} else
+			cgpu->lookup_gap = cgpu->opt_lg;
+
+		if (!cgpu->opt_tc) {
+			cgpu->thread_concurrency = ma / 32768 / cgpu->lookup_gap;
+			if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders) {
+				cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders;
+				if (cgpu->thread_concurrency > cgpu->shaders * 5)
+					cgpu->thread_concurrency = cgpu->shaders * 5;
+			}
+				
+			applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %u",gpu,  cgpu->thread_concurrency);
+		} else
+			cgpu->thread_concurrency = cgpu->opt_tc;
+
+		/* If we have memory to spare, try to find a power of 2 value
+		 * >= required amount to map nicely to an intensity */
+		mt = cgpu->thread_concurrency * 32768 * cgpu->lookup_gap;
+		if (ma > mt) {
+			while (ma >>= 1)
+				pow2++;
+			ma = 1;
+			while (--pow2 && ma < mt)
+				ma <<= 1;
+			if (ma >= mt) {
+				cgpu->max_alloc = ma;
+				applog(LOG_DEBUG, "Max alloc decreased to %lu", cgpu->max_alloc);
+			}
+		}
+	}
+#endif

 	FILE *binaryfile;
 	size_t *binary_sizes;
@ -460,14 +538,18 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -460,14 +538,18 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	strcat(binaryfilename, name);
 	if (clState->goffset)
 		strcat(binaryfilename, "g");
-	strcat(binaryfilename, "v");
-	sprintf(numbuf, "%d", clState->vwidth);
-	strcat(binaryfilename, numbuf);
-	strcat(binaryfilename, "w");
-	sprintf(numbuf, "%d", (int)clState->wsize);
+	if (opt_scrypt) {
+#ifdef USE_SCRYPT
+		sprintf(numbuf, "lg%dtc%d", cgpu->lookup_gap, cgpu->thread_concurrency);
+		strcat(binaryfilename, numbuf);
+#endif
+	} else {
+		sprintf(numbuf, "v%d", clState->vwidth);
+		strcat(binaryfilename, numbuf);
+	}
+	sprintf(numbuf, "w%d", (int)clState->wsize);
 	strcat(binaryfilename, numbuf);
-	strcat(binaryfilename, "l");
-	sprintf(numbuf, "%d", (int)sizeof(long));
+	sprintf(numbuf, "l%d", (int)sizeof(long));
 	strcat(binaryfilename, numbuf);
 	strcat(binaryfilename, ".bin");

@ -528,8 +610,16 @@ build:
				@@ -528,8 +610,16 @@ build:
 	/* create a cl program executable for all the devices specified */
 	char *CompilerOptions = calloc(1, 256);

-	sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d -D WORKVEC=%d",
-		(int)clState->wsize, clState->vwidth, (int)clState->wsize * clState->vwidth);
+#ifdef USE_SCRYPT
+	if (opt_scrypt)
+		sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d",
+			cgpu->lookup_gap, cgpu->thread_concurrency, (int)clState->wsize);
+	else
+#endif
+	{
+		sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d -D WORKVEC=%d",
+			(int)clState->wsize, clState->vwidth, (int)clState->wsize * clState->vwidth);
+	}
 	applog(LOG_DEBUG, "Setting worksize to %d", clState->wsize);
 	if (clState->vwidth > 1)
 		applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->vwidth);
@ -708,18 +798,33 @@ built:
				@@ -708,18 +798,33 @@ built:
 		return NULL;
 	}

-	/////////////////////////////////////////////////////////////////
-	// Create an OpenCL command queue
-	/////////////////////////////////////////////////////////////////
-	clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu],
-						     CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &status);
-	if (status != CL_SUCCESS) /* Try again without OOE enable */
-		clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Creating Command Queue. (clCreateCommandQueue)", status);
-		return NULL;
-	}
+#ifdef USE_SCRYPT
+	if (opt_scrypt) {
+		size_t ipt = (1024 / cgpu->lookup_gap + (1024 % cgpu->lookup_gap > 0));
+		size_t bufsize = 128 * ipt * cgpu->thread_concurrency;
+
+		/* Use the max alloc value which has been rounded to a power of
+		 * 2 greater >= required amount earlier */
+		if (bufsize > cgpu->max_alloc) {
+			applog(LOG_WARNING, "Maximum buffer memory device %d supports says %u, your scrypt settings come to %u",
+			       gpu, cgpu->max_alloc, bufsize);
+		} else
+			bufsize = cgpu->max_alloc;
+		applog(LOG_DEBUG, "Creating scrypt buffer sized %d", bufsize);
+		clState->padbufsize = bufsize;
+		clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);
+		if (status != CL_SUCCESS) {
+			applog(LOG_ERR, "Error %d: clCreateBuffer (padbuffer8), decrease CT or increase LG", status);
+			return NULL;
+		}

+		clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
+		if (status != CL_SUCCESS) {
+			applog(LOG_ERR, "Error %d: clCreateBuffer (CLbuffer0)", status);
+			return NULL;
+		}
+	}
+#endif
 	clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, BUFFERSIZE, NULL, &status);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: clCreateBuffer (outputBuffer)", status);
--- a/ocl.h
+++ b/ocl.h
@ -19,6 +19,12 @@ typedef struct {
				@@ -19,6 +19,12 @@ typedef struct {
 	cl_command_queue commandQueue;
 	cl_program program;
 	cl_mem outputBuffer;
+#ifdef USE_SCRYPT
+	cl_mem CLbuffer0;
+	cl_mem padbuffer8;
+	size_t padbufsize;
+	void * cldata;
+#endif
 	bool hasBitAlign;
 	bool hasOpenCL11plus;
 	bool goffset;
--- a/phatk120724.cl
+++ b/phatk120724.cl
@ -387,8 +387,8 @@ void search(	const uint state0, const uint state1, const uint state2, const uint
				@@ -387,8 +387,8 @@ void search(	const uint state0, const uint state1, const uint state2, const uint
 	W[117] += W[108] + Vals[3] + Vals[7] + P2(124) + P1(124) + Ch((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64),Vals[1],Vals[2]) -
 		(-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64))  + s1(64+59)+ ch(59+64)));

-#define FOUND (0x80)
-#define NFLAG (0x7F)
+#define FOUND (0x800)
+#define NFLAG (0x7FF)

 #ifdef VECTORS4
 	bool result = W[117].x & W[117].y & W[117].z & W[117].w;
--- a/poclbm120724.cl
+++ b/poclbm120724.cl
@ -1311,8 +1311,8 @@ Vals[1]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
				@@ -1311,8 +1311,8 @@ Vals[1]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
 Vals[1]+=K[59];
 Vals[1]+=Vals[5];

-#define FOUND (0x80)
-#define NFLAG (0x7F)
+#define FOUND (0x800)
+#define NFLAG (0x7FF)

 #if defined(VECTORS2) || defined(VECTORS4)
 	Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
--- a/scrypt.c
+++ b/scrypt.c
@ -0,0 +1,466 @@
				@@ -0,0 +1,466 @@
+/*-
+ * Copyright 2009 Colin Percival, 2011 ArtForz
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * This file was originally written by Colin Percival as part of the Tarsnap
+ * online backup system.
+ */
+
+#include "config.h"
+#include "miner.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#define byteswap(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
+
+typedef struct SHA256Context {
+	uint32_t state[8];
+	uint32_t buf[16];
+} SHA256_CTX;
+
+/*
+ * Encode a length len/4 vector of (uint32_t) into a length len vector of
+ * (unsigned char) in big-endian form.  Assumes len is a multiple of 4.
+ */
+static inline void
+be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
+{
+	uint32_t i;
+
+	for (i = 0; i < len; i++)
+		dst[i] = byteswap(src[i]);
+}
+
+/* Elementary functions used by SHA256 */
+#define Ch(x, y, z)	((x & (y ^ z)) ^ z)
+#define Maj(x, y, z)	((x & (y | z)) | (y & z))
+#define SHR(x, n)	(x >> n)
+#define ROTR(x, n)	((x >> n) | (x << (32 - n)))
+#define S0(x)		(ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
+#define S1(x)		(ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
+#define s0(x)		(ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
+#define s1(x)		(ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
+
+/* SHA256 round function */
+#define RND(a, b, c, d, e, f, g, h, k)			\
+	t0 = h + S1(e) + Ch(e, f, g) + k;		\
+	t1 = S0(a) + Maj(a, b, c);			\
+	d += t0;					\
+	h  = t0 + t1;
+
+/* Adjusted round function for rotating state */
+#define RNDr(S, W, i, k)			\
+	RND(S[(64 - i) % 8], S[(65 - i) % 8],	\
+	    S[(66 - i) % 8], S[(67 - i) % 8],	\
+	    S[(68 - i) % 8], S[(69 - i) % 8],	\
+	    S[(70 - i) % 8], S[(71 - i) % 8],	\
+	    W[i] + k)
+
+/*
+ * SHA256 block compression function.  The 256-bit state is transformed via
+ * the 512-bit input block to produce a new state.
+ */
+static void
+SHA256_Transform(uint32_t * state, const uint32_t block[16], int swap)
+{
+	uint32_t W[64];
+	uint32_t S[8];
+	uint32_t t0, t1;
+	int i;
+
+	/* 1. Prepare message schedule W. */
+	if(swap)
+		for (i = 0; i < 16; i++)
+			W[i] = byteswap(block[i]);
+	else
+		memcpy(W, block, 64);
+	for (i = 16; i < 64; i += 2) {
+		W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
+		W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
+	}
+
+	/* 2. Initialize working variables. */
+	memcpy(S, state, 32);
+
+	/* 3. Mix. */
+	RNDr(S, W, 0, 0x428a2f98);
+	RNDr(S, W, 1, 0x71374491);
+	RNDr(S, W, 2, 0xb5c0fbcf);
+	RNDr(S, W, 3, 0xe9b5dba5);
+	RNDr(S, W, 4, 0x3956c25b);
+	RNDr(S, W, 5, 0x59f111f1);
+	RNDr(S, W, 6, 0x923f82a4);
+	RNDr(S, W, 7, 0xab1c5ed5);
+	RNDr(S, W, 8, 0xd807aa98);
+	RNDr(S, W, 9, 0x12835b01);
+	RNDr(S, W, 10, 0x243185be);
+	RNDr(S, W, 11, 0x550c7dc3);
+	RNDr(S, W, 12, 0x72be5d74);
+	RNDr(S, W, 13, 0x80deb1fe);
+	RNDr(S, W, 14, 0x9bdc06a7);
+	RNDr(S, W, 15, 0xc19bf174);
+	RNDr(S, W, 16, 0xe49b69c1);
+	RNDr(S, W, 17, 0xefbe4786);
+	RNDr(S, W, 18, 0x0fc19dc6);
+	RNDr(S, W, 19, 0x240ca1cc);
+	RNDr(S, W, 20, 0x2de92c6f);
+	RNDr(S, W, 21, 0x4a7484aa);
+	RNDr(S, W, 22, 0x5cb0a9dc);
+	RNDr(S, W, 23, 0x76f988da);
+	RNDr(S, W, 24, 0x983e5152);
+	RNDr(S, W, 25, 0xa831c66d);
+	RNDr(S, W, 26, 0xb00327c8);
+	RNDr(S, W, 27, 0xbf597fc7);
+	RNDr(S, W, 28, 0xc6e00bf3);
+	RNDr(S, W, 29, 0xd5a79147);
+	RNDr(S, W, 30, 0x06ca6351);
+	RNDr(S, W, 31, 0x14292967);
+	RNDr(S, W, 32, 0x27b70a85);
+	RNDr(S, W, 33, 0x2e1b2138);
+	RNDr(S, W, 34, 0x4d2c6dfc);
+	RNDr(S, W, 35, 0x53380d13);
+	RNDr(S, W, 36, 0x650a7354);
+	RNDr(S, W, 37, 0x766a0abb);
+	RNDr(S, W, 38, 0x81c2c92e);
+	RNDr(S, W, 39, 0x92722c85);
+	RNDr(S, W, 40, 0xa2bfe8a1);
+	RNDr(S, W, 41, 0xa81a664b);
+	RNDr(S, W, 42, 0xc24b8b70);
+	RNDr(S, W, 43, 0xc76c51a3);
+	RNDr(S, W, 44, 0xd192e819);
+	RNDr(S, W, 45, 0xd6990624);
+	RNDr(S, W, 46, 0xf40e3585);
+	RNDr(S, W, 47, 0x106aa070);
+	RNDr(S, W, 48, 0x19a4c116);
+	RNDr(S, W, 49, 0x1e376c08);
+	RNDr(S, W, 50, 0x2748774c);
+	RNDr(S, W, 51, 0x34b0bcb5);
+	RNDr(S, W, 52, 0x391c0cb3);
+	RNDr(S, W, 53, 0x4ed8aa4a);
+	RNDr(S, W, 54, 0x5b9cca4f);
+	RNDr(S, W, 55, 0x682e6ff3);
+	RNDr(S, W, 56, 0x748f82ee);
+	RNDr(S, W, 57, 0x78a5636f);
+	RNDr(S, W, 58, 0x84c87814);
+	RNDr(S, W, 59, 0x8cc70208);
+	RNDr(S, W, 60, 0x90befffa);
+	RNDr(S, W, 61, 0xa4506ceb);
+	RNDr(S, W, 62, 0xbef9a3f7);
+	RNDr(S, W, 63, 0xc67178f2);
+
+	/* 4. Mix local working variables into global state */
+	for (i = 0; i < 8; i++)
+		state[i] += S[i];
+}
+
+static inline void
+SHA256_InitState(uint32_t * state)
+{
+	/* Magic initialization constants */
+	state[0] = 0x6A09E667;
+	state[1] = 0xBB67AE85;
+	state[2] = 0x3C6EF372;
+	state[3] = 0xA54FF53A;
+	state[4] = 0x510E527F;
+	state[5] = 0x9B05688C;
+	state[6] = 0x1F83D9AB;
+	state[7] = 0x5BE0CD19;
+}
+
+static const uint32_t passwdpad[12] = {0x00000080, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80020000};
+static const uint32_t outerpad[8] = {0x80000000, 0, 0, 0, 0, 0, 0, 0x00000300};
+
+/**
+ * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
+ * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
+ * write the output to buf.  The value dkLen must be at most 32 * (2^32 - 1).
+ */
+static inline void
+PBKDF2_SHA256_80_128(const uint32_t * passwd, uint32_t * buf)
+{
+	SHA256_CTX PShictx, PShoctx;
+	uint32_t tstate[8];
+	uint32_t ihash[8];
+	uint32_t i;
+	uint32_t pad[16];
+	
+	static const uint32_t innerpad[11] = {0x00000080, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xa0040000};
+
+	/* If Klen > 64, the key is really SHA256(K). */
+	SHA256_InitState(tstate);
+	SHA256_Transform(tstate, passwd, 1);
+	memcpy(pad, passwd+16, 16);
+	memcpy(pad+4, passwdpad, 48);
+	SHA256_Transform(tstate, pad, 1);
+	memcpy(ihash, tstate, 32);
+
+	SHA256_InitState(PShictx.state);
+	for (i = 0; i < 8; i++)
+		pad[i] = ihash[i] ^ 0x36363636;
+	for (; i < 16; i++)
+		pad[i] = 0x36363636;
+	SHA256_Transform(PShictx.state, pad, 0);
+	SHA256_Transform(PShictx.state, passwd, 1);
+	be32enc_vect(PShictx.buf, passwd+16, 4);
+	be32enc_vect(PShictx.buf+5, innerpad, 11);
+
+	SHA256_InitState(PShoctx.state);
+	for (i = 0; i < 8; i++)
+		pad[i] = ihash[i] ^ 0x5c5c5c5c;
+	for (; i < 16; i++)
+		pad[i] = 0x5c5c5c5c;
+	SHA256_Transform(PShoctx.state, pad, 0);
+	memcpy(PShoctx.buf+8, outerpad, 32);
+
+	/* Iterate through the blocks. */
+	for (i = 0; i < 4; i++) {
+		uint32_t istate[8];
+		uint32_t ostate[8];
+		
+		memcpy(istate, PShictx.state, 32);
+		PShictx.buf[4] = i + 1;
+		SHA256_Transform(istate, PShictx.buf, 0);
+		memcpy(PShoctx.buf, istate, 32);
+
+		memcpy(ostate, PShoctx.state, 32);
+		SHA256_Transform(ostate, PShoctx.buf, 0);
+		be32enc_vect(buf+i*8, ostate, 8);
+	}
+}
+
+
+static inline uint32_t
+PBKDF2_SHA256_80_128_32(const uint32_t * passwd, const uint32_t * salt)
+{
+	uint32_t tstate[8];
+	uint32_t ostate[8];
+	uint32_t ihash[8];
+	uint32_t i;
+
+	/* Compute HMAC state after processing P and S. */
+	uint32_t pad[16];
+	
+	static const uint32_t ihash_finalblk[16] = {0x00000001,0x80000000,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x00000620};
+
+	/* If Klen > 64, the key is really SHA256(K). */
+	SHA256_InitState(tstate);
+	SHA256_Transform(tstate, passwd, 1);
+	memcpy(pad, passwd+16, 16);
+	memcpy(pad+4, passwdpad, 48);
+	SHA256_Transform(tstate, pad, 1);
+	memcpy(ihash, tstate, 32);
+
+	SHA256_InitState(ostate);
+	for (i = 0; i < 8; i++)
+		pad[i] = ihash[i] ^ 0x5c5c5c5c;
+	for (; i < 16; i++)
+		pad[i] = 0x5c5c5c5c;
+	SHA256_Transform(ostate, pad, 0);
+
+	SHA256_InitState(tstate);
+	for (i = 0; i < 8; i++)
+		pad[i] = ihash[i] ^ 0x36363636;
+	for (; i < 16; i++)
+		pad[i] = 0x36363636;
+	SHA256_Transform(tstate, pad, 0);
+	SHA256_Transform(tstate, salt, 1);
+	SHA256_Transform(tstate, salt+16, 1);
+	SHA256_Transform(tstate, ihash_finalblk, 0);
+	memcpy(pad, tstate, 32);
+	memcpy(pad+8, outerpad, 32);
+
+	/* Feed the inner hash to the outer SHA256 operation. */
+	SHA256_Transform(ostate, pad, 0);
+	/* Finish the outer SHA256 operation. */
+	return byteswap(ostate[7]);
+}
+
+
+/**
+ * salsa20_8(B):
+ * Apply the salsa20/8 core to the provided block.
+ */
+static inline void
+salsa20_8(uint32_t B[16], const uint32_t Bx[16])
+{
+	uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
+	size_t i;
+
+	x00 = (B[ 0] ^= Bx[ 0]);
+	x01 = (B[ 1] ^= Bx[ 1]);
+	x02 = (B[ 2] ^= Bx[ 2]);
+	x03 = (B[ 3] ^= Bx[ 3]);
+	x04 = (B[ 4] ^= Bx[ 4]);
+	x05 = (B[ 5] ^= Bx[ 5]);
+	x06 = (B[ 6] ^= Bx[ 6]);
+	x07 = (B[ 7] ^= Bx[ 7]);
+	x08 = (B[ 8] ^= Bx[ 8]);
+	x09 = (B[ 9] ^= Bx[ 9]);
+	x10 = (B[10] ^= Bx[10]);
+	x11 = (B[11] ^= Bx[11]);
+	x12 = (B[12] ^= Bx[12]);
+	x13 = (B[13] ^= Bx[13]);
+	x14 = (B[14] ^= Bx[14]);
+	x15 = (B[15] ^= Bx[15]);
+	for (i = 0; i < 8; i += 2) {
+#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
+		/* Operate on columns. */
+		x04 ^= R(x00+x12, 7);	x09 ^= R(x05+x01, 7);	x14 ^= R(x10+x06, 7);	x03 ^= R(x15+x11, 7);
+		x08 ^= R(x04+x00, 9);	x13 ^= R(x09+x05, 9);	x02 ^= R(x14+x10, 9);	x07 ^= R(x03+x15, 9);
+		x12 ^= R(x08+x04,13);	x01 ^= R(x13+x09,13);	x06 ^= R(x02+x14,13);	x11 ^= R(x07+x03,13);
+		x00 ^= R(x12+x08,18);	x05 ^= R(x01+x13,18);	x10 ^= R(x06+x02,18);	x15 ^= R(x11+x07,18);
+
+		/* Operate on rows. */
+		x01 ^= R(x00+x03, 7);	x06 ^= R(x05+x04, 7);	x11 ^= R(x10+x09, 7);	x12 ^= R(x15+x14, 7);
+		x02 ^= R(x01+x00, 9);	x07 ^= R(x06+x05, 9);	x08 ^= R(x11+x10, 9);	x13 ^= R(x12+x15, 9);
+		x03 ^= R(x02+x01,13);	x04 ^= R(x07+x06,13);	x09 ^= R(x08+x11,13);	x14 ^= R(x13+x12,13);
+		x00 ^= R(x03+x02,18);	x05 ^= R(x04+x07,18);	x10 ^= R(x09+x08,18);	x15 ^= R(x14+x13,18);
+#undef R
+	}
+	B[ 0] += x00;
+	B[ 1] += x01;
+	B[ 2] += x02;
+	B[ 3] += x03;
+	B[ 4] += x04;
+	B[ 5] += x05;
+	B[ 6] += x06;
+	B[ 7] += x07;
+	B[ 8] += x08;
+	B[ 9] += x09;
+	B[10] += x10;
+	B[11] += x11;
+	B[12] += x12;
+	B[13] += x13;
+	B[14] += x14;
+	B[15] += x15;
+}
+
+/* cpu and memory intensive function to transform a 80 byte buffer into a 32 byte output
+   scratchpad size needs to be at least 63 + (128 * r * p) + (256 * r + 64) + (128 * r * N) bytes
+ */
+static uint32_t scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad)
+{
+	uint32_t * V;
+	uint32_t X[32];
+	uint32_t i;
+	uint32_t j;
+	uint32_t k;
+	uint64_t *p1, *p2;
+
+	p1 = (uint64_t *)X;
+	V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
+
+	PBKDF2_SHA256_80_128(input, X);
+
+	for (i = 0; i < 1024; i += 2) {
+		memcpy(&V[i * 32], X, 128);
+
+		salsa20_8(&X[0], &X[16]);
+		salsa20_8(&X[16], &X[0]);
+
+		memcpy(&V[(i + 1) * 32], X, 128);
+
+		salsa20_8(&X[0], &X[16]);
+		salsa20_8(&X[16], &X[0]);
+	}
+	for (i = 0; i < 1024; i += 2) {
+		j = X[16] & 1023;
+		p2 = (uint64_t *)(&V[j * 32]);
+		for(k = 0; k < 16; k++)
+			p1[k] ^= p2[k];
+
+		salsa20_8(&X[0], &X[16]);
+		salsa20_8(&X[16], &X[0]);
+
+		j = X[16] & 1023;
+		p2 = (uint64_t *)(&V[j * 32]);
+		for(k = 0; k < 16; k++)
+			p1[k] ^= p2[k];
+
+		salsa20_8(&X[0], &X[16]);
+		salsa20_8(&X[16], &X[0]);
+	}
+
+	return PBKDF2_SHA256_80_128_32(input, X);
+}
+
+/* Used externally as confirmation of correct OCL code */
+bool scrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
+{
+	uint32_t tmp_hash7, Htarg = ((const uint32_t *)ptarget)[7];
+	char *scratchbuf;
+	uint32_t data[20];
+
+	be32enc_vect(data, (const uint32_t *)pdata, 19);
+	data[19] = byteswap(nonce);
+	scratchbuf = alloca(131584);
+	tmp_hash7 = scrypt_1024_1_1_256_sp(data, scratchbuf);
+
+	return (tmp_hash7 <= Htarg);
+}
+
+bool scanhash_scrypt(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
+		     unsigned char *pdata, unsigned char __maybe_unused *phash1,
+		     unsigned char __maybe_unused *phash, const unsigned char *ptarget,
+		     uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
+{
+	uint32_t *nonce = (uint32_t *)(pdata + 76);
+	char *scratchbuf;
+	uint32_t data[20];
+	uint32_t tmp_hash7;
+	uint32_t Htarg = ((const uint32_t *)ptarget)[7];
+	bool ret = false;
+
+	be32enc_vect(data, (const uint32_t *)pdata, 19);
+
+	scratchbuf = malloc(131583);
+	if (unlikely(!scratchbuf)) {
+		applog(LOG_ERR, "Failed to malloc scratchbuf in scanhash_scrypt");
+		return ret;
+	}
+
+	while(1) {
+		*nonce = ++n;
+		data[19] = n;
+		tmp_hash7 = scrypt_1024_1_1_256_sp(data, scratchbuf);
+
+		if (unlikely(tmp_hash7 <= Htarg)) {
+			((uint32_t *)pdata)[19] = byteswap(n);
+			*last_nonce = n;
+			ret = true;
+			break;
+		}
+
+		if (unlikely((n >= max_nonce) || thr->work_restart)) {
+			*last_nonce = n;
+			break;
+		}
+	}
+
+	free(scratchbuf);;
+	return ret;
+}
+
--- a/scrypt.h
+++ b/scrypt.h
@ -0,0 +1,13 @@
				@@ -0,0 +1,13 @@
+#ifndef SCRYPT_H
+#define SCRYPT_H
+
+#ifdef USE_SCRYPT
+extern bool scrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce);
+#else /* USE_SCRYPT */
+static inline bool scrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
+{
+	return false;
+}
+#endif /* USE_SCRYPT */
+
+#endif /* SCRYPT_H */
--- a/scrypt120724.cl
+++ b/scrypt120724.cl
@ -0,0 +1,757 @@
				@@ -0,0 +1,757 @@
+#define rotl(x,y) rotate(x,y)
+#define Ch(x,y,z) bitselect(z,y,x)
+#define Maj(x,y,z) Ch((x^z),y,z)
+
+#define EndianSwap(n) (rotl(n&0x00FF00FF,24U)|rotl(n&0xFF00FF00,8U))
+
+#define Tr2(x)		(rotl(x, 30U) ^ rotl(x, 19U) ^ rotl(x, 10U))
+#define Tr1(x)		(rotl(x, 26U) ^ rotl(x, 21U) ^ rotl(x, 7U))
+#define Wr2(x)		(rotl(x, 25U) ^ rotl(x, 14U) ^ (x>>3U))
+#define Wr1(x)		(rotl(x, 15U) ^ rotl(x, 13U) ^ (x>>10U))
+
+#define RND(a, b, c, d, e, f, g, h, k)			\
+	h += Tr1(e) + Ch(e, f, g) + k;		\
+	d += h;					\
+	h += Tr2(a) + Maj(a, b, c);
+
+void SHA256(uint4*restrict state0,uint4*restrict state1, const uint4 block0, const uint4 block1, const uint4 block2, const uint4 block3)
+{
+	uint4 S0 = *state0;
+	uint4 S1 = *state1;
+	
+#define A S0.x
+#define B S0.y
+#define C S0.z
+#define D S0.w
+#define E S1.x
+#define F S1.y
+#define G S1.z
+#define H S1.w
+
+	uint4 W[4];
+
+	W[ 0].x = block0.x;
+	RND(A,B,C,D,E,F,G,H, W[0].x+0x428a2f98U);
+	W[ 0].y = block0.y;
+	RND(H,A,B,C,D,E,F,G, W[0].y+0x71374491U);
+	W[ 0].z = block0.z;
+	RND(G,H,A,B,C,D,E,F, W[0].z+0xb5c0fbcfU);
+	W[ 0].w = block0.w;
+	RND(F,G,H,A,B,C,D,E, W[0].w+0xe9b5dba5U);
+
+	W[ 1].x = block1.x;
+	RND(E,F,G,H,A,B,C,D, W[1].x+0x3956c25bU);
+	W[ 1].y = block1.y;
+	RND(D,E,F,G,H,A,B,C, W[1].y+0x59f111f1U);
+	W[ 1].z = block1.z;
+	RND(C,D,E,F,G,H,A,B, W[1].z+0x923f82a4U);
+	W[ 1].w = block1.w;
+	RND(B,C,D,E,F,G,H,A, W[1].w+0xab1c5ed5U);
+
+	W[ 2].x = block2.x;
+	RND(A,B,C,D,E,F,G,H, W[2].x+0xd807aa98U);
+	W[ 2].y = block2.y;
+	RND(H,A,B,C,D,E,F,G, W[2].y+0x12835b01U);
+	W[ 2].z = block2.z;
+	RND(G,H,A,B,C,D,E,F, W[2].z+0x243185beU);
+	W[ 2].w = block2.w;
+	RND(F,G,H,A,B,C,D,E, W[2].w+0x550c7dc3U);
+
+	W[ 3].x = block3.x;
+	RND(E,F,G,H,A,B,C,D, W[3].x+0x72be5d74U);
+	W[ 3].y = block3.y;
+	RND(D,E,F,G,H,A,B,C, W[3].y+0x80deb1feU);
+	W[ 3].z = block3.z;
+	RND(C,D,E,F,G,H,A,B, W[3].z+0x9bdc06a7U);
+	W[ 3].w = block3.w;
+	RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U);
+
+	W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y);
+	RND(A,B,C,D,E,F,G,H, W[0].x+0xe49b69c1U);
+
+	W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z);
+	RND(H,A,B,C,D,E,F,G, W[0].y+0xefbe4786U);
+
+	W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w);
+	RND(G,H,A,B,C,D,E,F, W[0].z+0x0fc19dc6U);
+
+	W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x);
+	RND(F,G,H,A,B,C,D,E, W[0].w+0x240ca1ccU);
+
+	W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y);
+	RND(E,F,G,H,A,B,C,D, W[1].x+0x2de92c6fU);
+
+	W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z);
+	RND(D,E,F,G,H,A,B,C, W[1].y+0x4a7484aaU);
+
+	W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w);
+	RND(C,D,E,F,G,H,A,B, W[1].z+0x5cb0a9dcU);
+
+	W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x);
+	RND(B,C,D,E,F,G,H,A, W[1].w+0x76f988daU);
+
+	W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y);
+	RND(A,B,C,D,E,F,G,H, W[2].x+0x983e5152U);
+
+	W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z);
+	RND(H,A,B,C,D,E,F,G, W[2].y+0xa831c66dU);
+
+	W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w);
+	RND(G,H,A,B,C,D,E,F, W[2].z+0xb00327c8U);
+
+	W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x);
+	RND(F,G,H,A,B,C,D,E, W[2].w+0xbf597fc7U);
+
+	W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y);
+	RND(E,F,G,H,A,B,C,D, W[3].x+0xc6e00bf3U);
+
+	W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z);
+	RND(D,E,F,G,H,A,B,C, W[3].y+0xd5a79147U);
+
+	W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w);
+	RND(C,D,E,F,G,H,A,B, W[3].z+0x06ca6351U);
+
+	W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x);
+	RND(B,C,D,E,F,G,H,A, W[3].w+0x14292967U);
+
+	W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y);
+	RND(A,B,C,D,E,F,G,H, W[0].x+0x27b70a85U);
+
+	W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z);
+	RND(H,A,B,C,D,E,F,G, W[0].y+0x2e1b2138U);
+
+	W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w);
+	RND(G,H,A,B,C,D,E,F, W[0].z+0x4d2c6dfcU);
+
+	W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x);
+	RND(F,G,H,A,B,C,D,E, W[0].w+0x53380d13U);
+
+	W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y);
+	RND(E,F,G,H,A,B,C,D, W[1].x+0x650a7354U);
+
+	W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z);
+	RND(D,E,F,G,H,A,B,C, W[1].y+0x766a0abbU);
+
+	W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w);
+	RND(C,D,E,F,G,H,A,B, W[1].z+0x81c2c92eU);
+
+	W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x);
+	RND(B,C,D,E,F,G,H,A, W[1].w+0x92722c85U);
+
+	W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y);
+	RND(A,B,C,D,E,F,G,H, W[2].x+0xa2bfe8a1U);
+
+	W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z);
+	RND(H,A,B,C,D,E,F,G, W[2].y+0xa81a664bU);
+
+	W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w);
+	RND(G,H,A,B,C,D,E,F, W[2].z+0xc24b8b70U);
+
+	W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x);
+	RND(F,G,H,A,B,C,D,E, W[2].w+0xc76c51a3U);
+
+	W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y);
+	RND(E,F,G,H,A,B,C,D, W[3].x+0xd192e819U);
+
+	W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z);
+	RND(D,E,F,G,H,A,B,C, W[3].y+0xd6990624U);
+
+	W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w);
+	RND(C,D,E,F,G,H,A,B, W[3].z+0xf40e3585U);
+
+	W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x);
+	RND(B,C,D,E,F,G,H,A, W[3].w+0x106aa070U);
+
+	W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y);
+	RND(A,B,C,D,E,F,G,H, W[0].x+0x19a4c116U);
+
+	W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z);
+	RND(H,A,B,C,D,E,F,G, W[0].y+0x1e376c08U);
+
+	W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w);
+	RND(G,H,A,B,C,D,E,F, W[0].z+0x2748774cU);
+
+	W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x);
+	RND(F,G,H,A,B,C,D,E, W[0].w+0x34b0bcb5U);
+
+	W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y);
+	RND(E,F,G,H,A,B,C,D, W[1].x+0x391c0cb3U);
+
+	W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z);
+	RND(D,E,F,G,H,A,B,C, W[1].y+0x4ed8aa4aU);
+
+	W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w);
+	RND(C,D,E,F,G,H,A,B, W[1].z+0x5b9cca4fU);
+
+	W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x);
+	RND(B,C,D,E,F,G,H,A, W[1].w+0x682e6ff3U);
+
+	W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y);
+	RND(A,B,C,D,E,F,G,H, W[2].x+0x748f82eeU);
+
+	W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z);
+	RND(H,A,B,C,D,E,F,G, W[2].y+0x78a5636fU);
+
+	W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w);
+	RND(G,H,A,B,C,D,E,F, W[2].z+0x84c87814U);
+
+	W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x);
+	RND(F,G,H,A,B,C,D,E, W[2].w+0x8cc70208U);
+
+	W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y);
+	RND(E,F,G,H,A,B,C,D, W[3].x+0x90befffaU);
+
+	W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z);
+	RND(D,E,F,G,H,A,B,C, W[3].y+0xa4506cebU);
+
+	W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w);
+	RND(C,D,E,F,G,H,A,B, W[3].z+0xbef9a3f7U);
+
+	W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x);
+	RND(B,C,D,E,F,G,H,A, W[3].w+0xc67178f2U);
+	
+#undef A
+#undef B
+#undef C
+#undef D
+#undef E
+#undef F
+#undef G
+#undef H
+
+	*state0 += S0;
+	*state1 += S1;
+}
+
+void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block0, const uint4 block1, const uint4 block2, const uint4 block3)
+{
+#define A (*state0).x
+#define B (*state0).y
+#define C (*state0).z
+#define D (*state0).w
+#define E (*state1).x
+#define F (*state1).y
+#define G (*state1).z
+#define H (*state1).w
+
+	uint4 W[4];
+
+	W[0].x = block0.x;
+	D=0x98c7e2a2U+W[0].x;
+	H=0xfc08884dU+W[0].x;
+
+	W[0].y = block0.y;
+	C=0xcd2a11aeU+Tr1(D)+Ch(D,0x510e527fU,0x9b05688cU)+W[0].y;
+	G=0xC3910C8EU+C+Tr2(H)+Ch(H,0xfb6feee7U,0x2a01a605U);
+
+	W[0].z = block0.z;
+	B=0x0c2e12e0U+Tr1(C)+Ch(C,D,0x510e527fU)+W[0].z;
+	F=0x4498517BU+B+Tr2(G)+Maj(G,H,0x6a09e667U);
+
+	W[0].w = block0.w;
+	A=0xa4ce148bU+Tr1(B)+Ch(B,C,D)+W[0].w; 
+	E=0x95F61999U+A+Tr2(F)+Maj(F,G,H);
+
+	W[1].x = block1.x;
+	RND(E,F,G,H,A,B,C,D, W[1].x+0x3956c25bU);
+	W[1].y = block1.y;
+	RND(D,E,F,G,H,A,B,C, W[1].y+0x59f111f1U);
+	W[1].z = block1.z;
+	RND(C,D,E,F,G,H,A,B, W[1].z+0x923f82a4U);
+	W[1].w = block1.w;
+	RND(B,C,D,E,F,G,H,A, W[1].w+0xab1c5ed5U);
+	
+	W[2].x = block2.x;
+	RND(A,B,C,D,E,F,G,H, W[2].x+0xd807aa98U);
+	W[2].y = block2.y;
+	RND(H,A,B,C,D,E,F,G, W[2].y+0x12835b01U);
+	W[2].z = block2.z;
+	RND(G,H,A,B,C,D,E,F, W[2].z+0x243185beU);
+	W[2].w = block2.w;
+	RND(F,G,H,A,B,C,D,E, W[2].w+0x550c7dc3U);
+	
+	W[3].x = block3.x;
+	RND(E,F,G,H,A,B,C,D, W[3].x+0x72be5d74U);
+	W[3].y = block3.y;
+	RND(D,E,F,G,H,A,B,C, W[3].y+0x80deb1feU);
+	W[3].z = block3.z;
+	RND(C,D,E,F,G,H,A,B, W[3].z+0x9bdc06a7U);
+	W[3].w = block3.w;
+	RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U);
+
+	W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y);
+	RND(A,B,C,D,E,F,G,H, W[0].x+0xe49b69c1U);
+
+	W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z);
+	RND(H,A,B,C,D,E,F,G, W[0].y+0xefbe4786U);
+
+	W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w);
+	RND(G,H,A,B,C,D,E,F, W[0].z+0x0fc19dc6U);
+
+	W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x);
+	RND(F,G,H,A,B,C,D,E, W[0].w+0x240ca1ccU);
+
+	W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y);
+	RND(E,F,G,H,A,B,C,D, W[1].x+0x2de92c6fU);
+
+	W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z);
+	RND(D,E,F,G,H,A,B,C, W[1].y+0x4a7484aaU);
+
+	W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w);
+	RND(C,D,E,F,G,H,A,B, W[1].z+0x5cb0a9dcU);
+
+	W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x);
+	RND(B,C,D,E,F,G,H,A, W[1].w+0x76f988daU);
+
+	W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y);
+	RND(A,B,C,D,E,F,G,H, W[2].x+0x983e5152U);
+
+	W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z);
+	RND(H,A,B,C,D,E,F,G, W[2].y+0xa831c66dU);
+
+	W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w);
+	RND(G,H,A,B,C,D,E,F, W[2].z+0xb00327c8U);
+
+	W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x);
+	RND(F,G,H,A,B,C,D,E, W[2].w+0xbf597fc7U);
+
+	W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y);
+	RND(E,F,G,H,A,B,C,D, W[3].x+0xc6e00bf3U);
+
+	W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z);
+	RND(D,E,F,G,H,A,B,C, W[3].y+0xd5a79147U);
+
+	W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w);
+	RND(C,D,E,F,G,H,A,B, W[3].z+0x06ca6351U);
+
+	W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x);
+	RND(B,C,D,E,F,G,H,A, W[3].w+0x14292967U);
+
+	W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y);
+	RND(A,B,C,D,E,F,G,H, W[0].x+0x27b70a85U);
+
+	W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z);
+	RND(H,A,B,C,D,E,F,G, W[0].y+0x2e1b2138U);
+
+	W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w);
+	RND(G,H,A,B,C,D,E,F, W[0].z+0x4d2c6dfcU);
+
+	W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x);
+	RND(F,G,H,A,B,C,D,E, W[0].w+0x53380d13U);
+
+	W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y);
+	RND(E,F,G,H,A,B,C,D, W[1].x+0x650a7354U);
+
+	W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z);
+	RND(D,E,F,G,H,A,B,C, W[1].y+0x766a0abbU);
+
+	W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w);
+	RND(C,D,E,F,G,H,A,B, W[1].z+0x81c2c92eU);
+
+	W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x);
+	RND(B,C,D,E,F,G,H,A, W[1].w+0x92722c85U);
+
+	W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y);
+	RND(A,B,C,D,E,F,G,H, W[2].x+0xa2bfe8a1U);
+
+	W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z);
+	RND(H,A,B,C,D,E,F,G, W[2].y+0xa81a664bU);
+
+	W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w);
+	RND(G,H,A,B,C,D,E,F, W[2].z+0xc24b8b70U);
+
+	W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x);
+	RND(F,G,H,A,B,C,D,E, W[2].w+0xc76c51a3U);
+
+	W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y);
+	RND(E,F,G,H,A,B,C,D, W[3].x+0xd192e819U);
+
+	W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z);
+	RND(D,E,F,G,H,A,B,C, W[3].y+0xd6990624U);
+
+	W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w);
+	RND(C,D,E,F,G,H,A,B, W[3].z+0xf40e3585U);
+
+	W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x);
+	RND(B,C,D,E,F,G,H,A, W[3].w+0x106aa070U);
+
+	W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y);
+	RND(A,B,C,D,E,F,G,H, W[0].x+0x19a4c116U);
+
+	W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z);
+	RND(H,A,B,C,D,E,F,G, W[0].y+0x1e376c08U);
+
+	W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w);
+	RND(G,H,A,B,C,D,E,F, W[0].z+0x2748774cU);
+
+	W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x);
+	RND(F,G,H,A,B,C,D,E, W[0].w+0x34b0bcb5U);
+
+	W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y);
+	RND(E,F,G,H,A,B,C,D, W[1].x+0x391c0cb3U);
+
+	W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z);
+	RND(D,E,F,G,H,A,B,C, W[1].y+0x4ed8aa4aU);
+
+	W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w);
+	RND(C,D,E,F,G,H,A,B, W[1].z+0x5b9cca4fU);
+
+	W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x);
+	RND(B,C,D,E,F,G,H,A, W[1].w+0x682e6ff3U);
+
+	W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y);
+	RND(A,B,C,D,E,F,G,H, W[2].x+0x748f82eeU);
+
+	W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z);
+	RND(H,A,B,C,D,E,F,G, W[2].y+0x78a5636fU);
+
+	W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w);
+	RND(G,H,A,B,C,D,E,F, W[2].z+0x84c87814U);
+
+	W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x);
+	RND(F,G,H,A,B,C,D,E, W[2].w+0x8cc70208U);
+
+	W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y);
+	RND(E,F,G,H,A,B,C,D, W[3].x+0x90befffaU);
+
+	W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z);
+	RND(D,E,F,G,H,A,B,C, W[3].y+0xa4506cebU);
+
+	W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w);
+	RND(C,D,E,F,G,H,A,B, W[3].z+0xbef9a3f7U);
+
+	W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x);
+	RND(B,C,D,E,F,G,H,A, W[3].w+0xc67178f2U);
+	
+#undef A
+#undef B
+#undef C
+#undef D
+#undef E
+#undef F
+#undef G
+#undef H
+
+	*state0 += (uint4)(0x6A09E667U,0xBB67AE85U,0x3C6EF372U,0xA54FF53AU);
+	*state1 += (uint4)(0x510E527FU,0x9B05688CU,0x1F83D9ABU,0x5BE0CD19U);
+}
+
+__constant uint fixedW[64] =
+{
+	0x428a2f99,0xf1374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
+	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf794,
+	0xf59b89c2,0x73924787,0x23c6886e,0xa42ca65c,0x15ed3627,0x4d6edcbf,0xe28217fc,0xef02488f,
+	0xb707775c,0x0468c23f,0xe7e72b4c,0x49e1f1a2,0x4b99c816,0x926d1570,0xaa0fc072,0xadb36e2c,
+	0xad87a3ea,0xbcb1d3a3,0x7b993186,0x562b9420,0xbff3ca0c,0xda4b0c23,0x6cd8711a,0x8f337caa,
+	0xc91b1417,0xc359dce1,0xa83253a7,0x3b13c12d,0x9d3d725d,0xd9031a84,0xb1a03340,0x16f58012,
+	0xe64fb6a2,0xe84d923a,0xe93a5730,0x09837686,0x078ff753,0x29833341,0xd5de0b7e,0x6948ccf4,
+	0xe0a1adbe,0x7c728e11,0x511c78e4,0x315b45bd,0xfca71413,0xea28f96a,0x79703128,0x4e1ef848,
+};
+
+void SHA256_fixed(uint4*restrict state0,uint4*restrict state1)
+{
+	uint4 S0 = *state0;
+	uint4 S1 = *state1;
+
+#define A S0.x
+#define B S0.y
+#define C S0.z
+#define D S0.w
+#define E S1.x
+#define F S1.y
+#define G S1.z
+#define H S1.w
+
+	RND(A,B,C,D,E,F,G,H, fixedW[0]);
+	RND(H,A,B,C,D,E,F,G, fixedW[1]);
+	RND(G,H,A,B,C,D,E,F, fixedW[2]);
+	RND(F,G,H,A,B,C,D,E, fixedW[3]);
+	RND(E,F,G,H,A,B,C,D, fixedW[4]);
+	RND(D,E,F,G,H,A,B,C, fixedW[5]);
+	RND(C,D,E,F,G,H,A,B, fixedW[6]);
+	RND(B,C,D,E,F,G,H,A, fixedW[7]);
+	RND(A,B,C,D,E,F,G,H, fixedW[8]);
+	RND(H,A,B,C,D,E,F,G, fixedW[9]);
+	RND(G,H,A,B,C,D,E,F, fixedW[10]);
+	RND(F,G,H,A,B,C,D,E, fixedW[11]);
+	RND(E,F,G,H,A,B,C,D, fixedW[12]);
+	RND(D,E,F,G,H,A,B,C, fixedW[13]);
+	RND(C,D,E,F,G,H,A,B, fixedW[14]);
+	RND(B,C,D,E,F,G,H,A, fixedW[15]);
+	RND(A,B,C,D,E,F,G,H, fixedW[16]);
+	RND(H,A,B,C,D,E,F,G, fixedW[17]);
+	RND(G,H,A,B,C,D,E,F, fixedW[18]);
+	RND(F,G,H,A,B,C,D,E, fixedW[19]);
+	RND(E,F,G,H,A,B,C,D, fixedW[20]);
+	RND(D,E,F,G,H,A,B,C, fixedW[21]);
+	RND(C,D,E,F,G,H,A,B, fixedW[22]);
+	RND(B,C,D,E,F,G,H,A, fixedW[23]);
+	RND(A,B,C,D,E,F,G,H, fixedW[24]);
+	RND(H,A,B,C,D,E,F,G, fixedW[25]);
+	RND(G,H,A,B,C,D,E,F, fixedW[26]);
+	RND(F,G,H,A,B,C,D,E, fixedW[27]);
+	RND(E,F,G,H,A,B,C,D, fixedW[28]);
+	RND(D,E,F,G,H,A,B,C, fixedW[29]);
+	RND(C,D,E,F,G,H,A,B, fixedW[30]);
+	RND(B,C,D,E,F,G,H,A, fixedW[31]);
+	RND(A,B,C,D,E,F,G,H, fixedW[32]);
+	RND(H,A,B,C,D,E,F,G, fixedW[33]);
+	RND(G,H,A,B,C,D,E,F, fixedW[34]);
+	RND(F,G,H,A,B,C,D,E, fixedW[35]);
+	RND(E,F,G,H,A,B,C,D, fixedW[36]);
+	RND(D,E,F,G,H,A,B,C, fixedW[37]);
+	RND(C,D,E,F,G,H,A,B, fixedW[38]);
+	RND(B,C,D,E,F,G,H,A, fixedW[39]);
+	RND(A,B,C,D,E,F,G,H, fixedW[40]);
+	RND(H,A,B,C,D,E,F,G, fixedW[41]);
+	RND(G,H,A,B,C,D,E,F, fixedW[42]);
+	RND(F,G,H,A,B,C,D,E, fixedW[43]);
+	RND(E,F,G,H,A,B,C,D, fixedW[44]);
+	RND(D,E,F,G,H,A,B,C, fixedW[45]);
+	RND(C,D,E,F,G,H,A,B, fixedW[46]);
+	RND(B,C,D,E,F,G,H,A, fixedW[47]);
+	RND(A,B,C,D,E,F,G,H, fixedW[48]);
+	RND(H,A,B,C,D,E,F,G, fixedW[49]);
+	RND(G,H,A,B,C,D,E,F, fixedW[50]);
+	RND(F,G,H,A,B,C,D,E, fixedW[51]);
+	RND(E,F,G,H,A,B,C,D, fixedW[52]);
+	RND(D,E,F,G,H,A,B,C, fixedW[53]);
+	RND(C,D,E,F,G,H,A,B, fixedW[54]);
+	RND(B,C,D,E,F,G,H,A, fixedW[55]);
+	RND(A,B,C,D,E,F,G,H, fixedW[56]);
+	RND(H,A,B,C,D,E,F,G, fixedW[57]);
+	RND(G,H,A,B,C,D,E,F, fixedW[58]);
+	RND(F,G,H,A,B,C,D,E, fixedW[59]);
+	RND(E,F,G,H,A,B,C,D, fixedW[60]);
+	RND(D,E,F,G,H,A,B,C, fixedW[61]);
+	RND(C,D,E,F,G,H,A,B, fixedW[62]);
+	RND(B,C,D,E,F,G,H,A, fixedW[63]);
+	
+#undef A
+#undef B
+#undef C
+#undef D
+#undef E
+#undef F
+#undef G
+#undef H
+	*state0 += S0;
+	*state1 += S1;
+}
+
+void shittify(uint4 B[8])
+{
+	uint4 tmp[4];
+	tmp[0] = (uint4)(B[1].x,B[2].y,B[3].z,B[0].w);
+	tmp[1] = (uint4)(B[2].x,B[3].y,B[0].z,B[1].w);
+	tmp[2] = (uint4)(B[3].x,B[0].y,B[1].z,B[2].w);
+	tmp[3] = (uint4)(B[0].x,B[1].y,B[2].z,B[3].w);
+	
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+		B[i] = EndianSwap(tmp[i]);
+
+	tmp[0] = (uint4)(B[5].x,B[6].y,B[7].z,B[4].w);
+	tmp[1] = (uint4)(B[6].x,B[7].y,B[4].z,B[5].w);
+	tmp[2] = (uint4)(B[7].x,B[4].y,B[5].z,B[6].w);
+	tmp[3] = (uint4)(B[4].x,B[5].y,B[6].z,B[7].w);
+	
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+		B[i+4] = EndianSwap(tmp[i]);
+}
+
+void unshittify(uint4 B[8])
+{
+	uint4 tmp[4];
+	tmp[0] = (uint4)(B[3].x,B[2].y,B[1].z,B[0].w);
+	tmp[1] = (uint4)(B[0].x,B[3].y,B[2].z,B[1].w);
+	tmp[2] = (uint4)(B[1].x,B[0].y,B[3].z,B[2].w);
+	tmp[3] = (uint4)(B[2].x,B[1].y,B[0].z,B[3].w);
+	
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+		B[i] = EndianSwap(tmp[i]);
+
+	tmp[0] = (uint4)(B[7].x,B[6].y,B[5].z,B[4].w);
+	tmp[1] = (uint4)(B[4].x,B[7].y,B[6].z,B[5].w);
+	tmp[2] = (uint4)(B[5].x,B[4].y,B[7].z,B[6].w);
+	tmp[3] = (uint4)(B[6].x,B[5].y,B[4].z,B[7].w);
+	
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+		B[i+4] = EndianSwap(tmp[i]);
+}
+
+void salsa(uint4 B[8])
+{
+	uint4 w[4];
+
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+		w[i] = (B[i]^=B[i+4]);
+
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+	{
+		w[0] ^= rotl(w[3]     +w[2]     , 7U);
+		w[1] ^= rotl(w[0]     +w[3]     , 9U);
+		w[2] ^= rotl(w[1]     +w[0]     ,13U);
+		w[3] ^= rotl(w[2]     +w[1]     ,18U);
+		w[2] ^= rotl(w[3].wxyz+w[0].zwxy, 7U);
+		w[1] ^= rotl(w[2].wxyz+w[3].zwxy, 9U);
+		w[0] ^= rotl(w[1].wxyz+w[2].zwxy,13U);
+		w[3] ^= rotl(w[0].wxyz+w[1].zwxy,18U);
+	}
+
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+		w[i] = (B[i+4]^=(B[i]+=w[i]));
+
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+	{
+		w[0] ^= rotl(w[3]     +w[2]     , 7U);
+		w[1] ^= rotl(w[0]     +w[3]     , 9U);
+		w[2] ^= rotl(w[1]     +w[0]     ,13U);
+		w[3] ^= rotl(w[2]     +w[1]     ,18U);
+		w[2] ^= rotl(w[3].wxyz+w[0].zwxy, 7U);
+		w[1] ^= rotl(w[2].wxyz+w[3].zwxy, 9U);
+		w[0] ^= rotl(w[1].wxyz+w[2].zwxy,13U);
+		w[3] ^= rotl(w[0].wxyz+w[1].zwxy,18U);
+	}
+
+#pragma unroll
+	for(uint i=0; i<4; ++i)
+		B[i+4] += w[i];
+}
+
+#define Coord(x,y,z) x+y*(x ## SIZE)+z*(y ## SIZE)*(x ## SIZE)
+#define CO Coord(z,x,y)
+
+void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
+{
+	shittify(X);
+	const uint zSIZE = 8;
+	const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0));
+	const uint xSIZE = CONCURRENT_THREADS;
+	uint x = get_global_id(0)%xSIZE;
+
+	for(uint y=0; y<1024/LOOKUP_GAP; ++y)
+	{
+#pragma unroll
+		for(uint z=0; z<zSIZE; ++z)
+			lookup[CO] = X[z];
+		for(uint i=0; i<LOOKUP_GAP; ++i) 
+			salsa(X);
+	}
+#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
+	{
+		uint y = (1024/LOOKUP_GAP);
+#pragma unroll
+		for(uint z=0; z<zSIZE; ++z)
+			lookup[CO] = X[z];
+		for(uint i=0; i<1024%LOOKUP_GAP; ++i)
+			salsa(X); 
+	}
+#endif
+	for (uint i=0; i<1024; ++i) 
+	{
+		uint4 V[8];
+		uint j = X[7].x & 0x3FF;
+		uint y = (j/LOOKUP_GAP);
+#pragma unroll
+		for(uint z=0; z<zSIZE; ++z)
+			V[z] = lookup[CO];
+
+#if (LOOKUP_GAP == 1)
+#elif (LOOKUP_GAP == 2)
+		if (j&1)
+			salsa(V);
+#else
+		uint val = j%LOOKUP_GAP;
+		for (uint z=0; z<val; ++z) 
+			salsa(V);
+#endif
+
+#pragma unroll
+		for(uint z=0; z<zSIZE; ++z)
+			X[z] ^= V[z];
+		salsa(X);
+	}
+	unshittify(X);
+}
+
+#define FOUND (0x800)
+#define NFLAG (0x7FF)
+
+__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
+__kernel void search(__global const uint4 * restrict input,
+__global uint*restrict output, __global uint4*restrict padcache,
+const uint4 midstate0, const uint4 midstate16, const uint target)
+{
+	uint gid = get_global_id(0);
+	uint4 X[8];
+	uint4 tstate0, tstate1, ostate0, ostate1, tmp0, tmp1;
+	uint4 data = (uint4)(input[4].x,input[4].y,input[4].z,gid);
+	uint4 pad0 = midstate0, pad1 = midstate16;
+
+	SHA256(&pad0,&pad1, data, (uint4)(0x80000000U,0,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0,0x280));
+	SHA256_fresh(&ostate0,&ostate1, pad0^0x5C5C5C5CU, pad1^0x5C5C5C5CU, 0x5C5C5C5CU, 0x5C5C5C5CU);
+	SHA256_fresh(&tstate0,&tstate1, pad0^0x36363636U, pad1^0x36363636U, 0x36363636U, 0x36363636U);
+
+	tmp0 = tstate0;
+	tmp1 = tstate1;
+	SHA256(&tstate0, &tstate1, input[0],input[1],input[2],input[3]);
+
+#pragma unroll
+	for (uint i=0; i<4; i++) 
+	{
+		pad0 = tstate0;
+		pad1 = tstate1;
+		X[i*2 ] = ostate0;
+		X[i*2+1] = ostate1;
+
+		SHA256(&pad0,&pad1, data, (uint4)(i+1,0x80000000U,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0,0x4a0U));
+		SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U));
+	}
+	scrypt_core(X,padcache);
+	SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3]);
+	SHA256(&tmp0,&tmp1, X[4], X[5], X[6], X[7]);
+	SHA256_fixed(&tmp0,&tmp1);
+	SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U));
+
+	bool found = (EndianSwap(ostate1.w) <= target);
+	if (found)
+		output[FOUND] = output[NFLAG & gid] = gid;
+}
+
+/*-
+ * Copyright 2009 Colin Percival, 2011 ArtForz, 2011 pooler, 2012 mtrlt,
+ * 2012 Con Kolivas.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * This file was originally written by Colin Percival as part of the Tarsnap
+ * online backup system.
+ */