From c3eb66683a613f10d7e61b9de739485c39377db5 Mon Sep 17 00:00:00 2001
From: Tanguy Pruvot <tanguy.pruvot@gmail.com>
Date: Tue, 9 Sep 2014 21:56:25 +0200
Subject: [PATCH 1/3] Import djm34 qubit, deep and doom algos

Indent, and put commonly used functions proto. in cuda_helper.h

And add them to --cputest function

Also change the color option to --nocolor, -C is no more needed

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
(Which is tired to remove these german copy/pasted comments)
---
 Makefile.am             |   9 +-
 README.txt              |  11 +-
 configure.ac            |   2 +-
 cpu-miner.c             |  39 +++-
 cpuminer-config.h       |   6 +-
 cuda_helper.h           |   4 +
 miner.h                 |  25 +-
 qubit/deep.cu           | 120 ++++++++++
 qubit/doom.cu           |  93 ++++++++
 qubit/qubit.cu          | 146 ++++++++++++
 qubit/qubit_luffa512.cu | 496 ++++++++++++++++++++++++++++++++++++++++
 util.c                  |  12 +
 12 files changed, 943 insertions(+), 20 deletions(-)
 create mode 100644 qubit/deep.cu
 create mode 100644 qubit/doom.cu
 create mode 100644 qubit/qubit.cu
 create mode 100644 qubit/qubit_luffa512.cu

diff --git a/Makefile.am b/Makefile.am
index c2fa11d..2067074 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -31,17 +31,20 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
 			  JHA/cuda_jha_compactionTest.cu quark/cuda_checkhash.cu \
 			  quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
-			  quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \
-			  quark/cuda_quark_compactionTest.cu \
+			  quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu \
+			  quark/quarkcoin.cu quark/animecoin.cu \
+			  quark/cuda_quark_compactionTest.cu  \
 			  cuda_nist5.cu blake32.cu pentablake.cu \
 			  sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \
 			  sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \
 			  sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
 			  sph/shabal.c sph/whirlpool.c sph/sha2big.c sph/haval.c \
+			  qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/doom.cu \
 			  x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
 			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
 			  x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
-			  x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu x15/whirlpool.cu \
+			  x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \
+			  x15/whirlpool.cu \
 			  x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu
 
 
diff --git a/README.txt b/README.txt
index 001ff3f..9a49a0a 100644
--- a/README.txt
+++ b/README.txt
@@ -1,5 +1,5 @@
 
-ccMiner release 1.4.2-tpruvot (Sep 09th 2014) - "Pentablake"
+ccMiner release 1.4.3-tpruvot (Sep 11th 2014) - "DJM34 Algos"
 ---------------------------------------------------------------
 
 ***************************************************************
@@ -35,6 +35,7 @@ TalkCoin
 DarkCoin and other X11 coins
 NEOS blake (256 14-rounds)
 BlakeCoin (256 8-rounds)
+Deep, Doom, Goalcoin and Qubit
 Pentablake (Blake 512 x5)
 
 where some of these coins have a VERY NOTABLE nVidia advantage
@@ -56,12 +57,15 @@ its command line interface and options.
   -a, --algo=ALGO       specify the algorithm to use
                           heavy       use to mine Heavycoin
                           mjollnir    use to mine Mjollnircoin
+                          deep        use to mine Deepcoin
+                          doom        use to mine Doomcoin
                           fugue256    use to mine Fuguecoin
                           groestl     use to mine Groestlcoin
                           dmd-gr      use to mine Diamond-Groestl
                           myr-gr      use to mine Myriad-Groest
                           jackpot     use to mine Jackpotcoin
                           quark       use to mine Quarkcoin
+                          qubit       use to mine Qubit Algo
                           anime       use to mine Animecoin
                           blake       use to mine NEOS (Blake 256)
                           blakecoin   use to mine Old Blake 256
@@ -103,7 +107,7 @@ its command line interface and options.
       --benchmark       run in offline benchmark mode
       --cputest         debug hashes from cpu algorithms
   -c, --config=FILE     load a JSON-format configuration file
-  -C, --color           display colored output in a linux Terminal
+  -K, --nocolor         disable colored console output
   -V, --version         display version information and exit
   -h, --help            display this help text and exit
 
@@ -154,6 +158,9 @@ features.
 
 >>> RELEASE HISTORY <<<
 
+  Sep  11th 2O14
+                  add algos from djm34 (deep,doom,qubit)
+
   Sep.  1st 2014  add X17, optimized x15 and whirl
                   add blake (256 variant)
                   color support on Windows,
diff --git a/configure.ac b/configure.ac
index a4ef290..3710824 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([ccminer], [2014.09.09])
+AC_INIT([ccminer], [2014.09.11])
 
 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
diff --git a/cpu-miner.c b/cpu-miner.c
index cb6e790..287fdc9 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -129,6 +129,8 @@ typedef enum {
 	ALGO_ANIME,
 	ALGO_BLAKE,
 	ALGO_BLAKECOIN,
+	ALGO_DEEP,
+	ALGO_DOOM,
 	ALGO_FRESH,
 	ALGO_FUGUE256,		/* Fugue256 */
 	ALGO_GROESTL,
@@ -139,6 +141,7 @@ typedef enum {
 	ALGO_NIST5,
 	ALGO_PENTABLAKE,
 	ALGO_QUARK,
+	ALGO_QUBIT,
 	ALGO_WHC,
 	ALGO_X11,
 	ALGO_X13,
@@ -152,6 +155,8 @@ static const char *algo_names[] = {
 	"anime",
 	"blake",
 	"blakecoin",
+	"deep",
+	"doom",
 	"fresh",
 	"fugue256",
 	"groestl",
@@ -162,6 +167,7 @@ static const char *algo_names[] = {
 	"nist5",
 	"penta",
 	"quark",
+	"qubit",
 	"whirl",
 	"x11",
 	"x13",
@@ -235,6 +241,8 @@ Options:\n\
                         anime     Animecoin hash\n\
                         blake     Blake 256 (like NEOS blake)\n\
                         blakecoin Old Blake 256 (8 rounds)\n\
+                        deep      Deepcoin hash\n\
+                        doom      Doomcoin hash\n\
                         fresh     Freshcoin hash (shavite 80)\n\
                         fugue256  Fuguecoin hash\n\
                         groestl   Groestlcoin hash\n\
@@ -245,6 +253,7 @@ Options:\n\
                         nist5     NIST5 (TalkCoin) hash\n\
                         penta     Pentablake hash (5x Blake 512)\n\
                         quark     Quark hash\n\
+                        qubit     Qubit hash\n\
                         whirl     Whirlcoin (old whirlpool)\n\
                         x11       X11 (DarkCoin) hash\n\
                         x13       X13 (MaruCoin) hash\n\
@@ -275,7 +284,7 @@ Options:\n\
       --no-longpoll     disable X-Long-Polling support\n\
       --no-stratum      disable X-Stratum support\n\
   -q, --quiet           disable per-thread hashmeter output\n\
-  -C, --color           enable colored output\n\
+  -K, --nocolor         disable colored output\n\
   -D, --debug           enable debug output\n\
   -P, --protocol-dump   verbose dump of protocol-level activities\n"
 #ifdef HAVE_SYSLOG_H
@@ -301,7 +310,7 @@ static char const short_options[] =
 #ifdef HAVE_SYSLOG_H
 	"S"
 #endif
-	"a:c:CDhp:Px:qr:R:s:t:T:o:u:O:Vd:f:mv:";
+	"a:c:CKDhp:Px:qr:R:s:t:T:o:u:O:Vd:f:mv:";
 
 static struct option const options[] = {
 	{ "algo", 1, NULL, 'a' },
@@ -312,7 +321,7 @@ static struct option const options[] = {
 	{ "cputest", 0, NULL, 1006 },
 	{ "cert", 1, NULL, 1001 },
 	{ "config", 1, NULL, 'c' },
-	{ "color", 0, NULL, 'C' },
+	{ "nocolor", 0, NULL, 'K' },
 	{ "debug", 0, NULL, 'D' },
 	{ "help", 0, NULL, 'h' },
 	{ "no-longpoll", 0, NULL, 1003 },
@@ -1083,6 +1092,16 @@ continue_scan:
 			                      max_nonce, &hashes_done, 0, MNR_BLKHDR_SZ);
 			break;
 
+		case ALGO_DEEP:
+			rc = scanhash_deep(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
+
+		case ALGO_DOOM:
+			rc = scanhash_doom(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
+
 		case ALGO_FUGUE256:
 			rc = scanhash_fugue256(thr_id, work.data, work.target,
 			                      max_nonce, &hashes_done);
@@ -1109,6 +1128,11 @@ continue_scan:
 			                      max_nonce, &hashes_done);
 			break;
 
+		case ALGO_QUBIT:
+			rc = scanhash_qubit(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
+
 		case ALGO_ANIME:
 			rc = scanhash_anime(thr_id, work.data, work.target,
 			                      max_nonce, &hashes_done);
@@ -1427,7 +1451,7 @@ out:
 	return NULL;
 }
 
-#define PROGRAM_VERSION "1.4.2"
+#define PROGRAM_VERSION "1.4.3"
 static void show_version_and_exit(void)
 {
 	printf("%s v%s\n"
@@ -1489,8 +1513,12 @@ static void parse_arg (int key, char *arg)
 		break;
 	}
 	case 'C':
+		/* color for compat */
 		use_colors = true;
 		break;
+	case 'K':
+		use_colors = false;
+		break;
 	case 'D':
 		opt_debug = true;
 		break;
@@ -1801,8 +1829,7 @@ int main(int argc, char *argv[])
 #endif
 	printf("\t  based on pooler-cpuminer 2.3.2 (c) 2010 Jeff Garzik, 2012 pooler\n");
 	printf("\t    and HVC extension from http://hvc.1gh.com/" "\n\n");
-	printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n");
-	printf("\t  BTC donation address: 16hJF5mceSojnTD3ZTUDqdRhDyPJzoRakM\n");
+	printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n\n");
 	printf("\tInclude some of djm34 additions, cleaned by Tanguy Pruvot\n");
 	printf("\t  BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo\n\n");
 
diff --git a/cpuminer-config.h b/cpuminer-config.h
index 11edf82..4681e46 100644
--- a/cpuminer-config.h
+++ b/cpuminer-config.h
@@ -156,7 +156,7 @@
 #define PACKAGE_NAME "ccminer"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "ccminer 2014.09.06"
+#define PACKAGE_STRING "ccminer 2014.09.11"
 
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "ccminer"
@@ -165,7 +165,7 @@
 #define PACKAGE_URL ""
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "2014.09.06"
+#define PACKAGE_VERSION "2014.09.11"
 
 /* If using the C implementation of alloca, define if you know the
    direction of stack growth for your system; otherwise it will be
@@ -188,7 +188,7 @@
 #define USE_XOP 1
 
 /* Version number of package */
-#define VERSION "2014.09.06"
+#define VERSION "2014.09.11"
 
 /* Define curl_free() as free() if our version of curl lacks curl_free. */
 /* #undef curl_free */
diff --git a/cuda_helper.h b/cuda_helper.h
index 66c8e7d..9e10968 100644
--- a/cuda_helper.h
+++ b/cuda_helper.h
@@ -12,6 +12,10 @@
 
 #include <stdint.h>
 
+// common functions
+extern void cuda_check_cpu_init(int thr_id, int threads);
+extern void cuda_check_cpu_setTarget(const void *ptarget);
+extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
 
 extern __device__ __device_builtin__ void __syncthreads(void);
diff --git a/miner.h b/miner.h
index d9d29a8..d6127e2 100644
--- a/miner.h
+++ b/miner.h
@@ -205,13 +205,13 @@ extern int scanhash_sha256d(int thr_id, uint32_t *pdata,
 
 extern unsigned char *scrypt_buffer_alloc();
 
-extern int scanhash_scrypt(int thr_id, uint32_t *pdata,
-	unsigned char *scratchbuf, const uint32_t *ptarget,
-	uint32_t max_nonce, unsigned long *hashes_done);
+extern int scanhash_deep(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
 
-extern int scanhash_heavy(int thr_id, uint32_t *pdata,
+extern int scanhash_doom(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
-	unsigned long *hashes_done, uint32_t maxvote, int blocklen);
+	unsigned long *hashes_done);
 
 extern int scanhash_fugue256(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
@@ -221,6 +221,10 @@ extern int scanhash_groestlcoin(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
 
+extern int scanhash_heavy(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done, uint32_t maxvote, int blocklen);
+
 extern int scanhash_myriad(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
@@ -253,6 +257,14 @@ extern int scanhash_pentablake(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
 
+extern int scanhash_qubit(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
+extern int scanhash_scrypt(int thr_id, uint32_t *pdata,
+	unsigned char *scratchbuf, const uint32_t *ptarget,
+	uint32_t max_nonce, unsigned long *hashes_done);
+
 extern int scanhash_whc(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
@@ -426,6 +438,8 @@ void applog_compare_hash(unsigned char *hash, unsigned char *hash2);
 void print_hash_tests(void);
 void animehash(void *state, const void *input);
 void blake256hash(void *output, const void *input, int rounds);
+void deephash(void *state, const void *input);
+void doomhash(void *state, const void *input);
 void fresh_hash(void *state, const void *input);
 void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
 void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
@@ -435,6 +449,7 @@ void myriadhash(void *state, const void *input);
 void nist5hash(void *state, const void *input);
 void pentablakehash(void *output, const void *input);
 void quarkhash(void *state, const void *input);
+void qubithash(void *state, const void *input);
 void wcoinhash(void *state, const void *input);
 void x11hash(void *output, const void *input);
 void x13hash(void *output, const void *input);
diff --git a/qubit/deep.cu b/qubit/deep.cu
new file mode 100644
index 0000000..8e9a8be
--- /dev/null
+++ b/qubit/deep.cu
@@ -0,0 +1,120 @@
+/*
+ * deepcoin algorithm
+ *
+ */
+extern "C" {
+#include "sph/sph_luffa.h"
+#include "sph/sph_cubehash.h"
+#include "sph/sph_shavite.h"
+#include "sph/sph_simd.h"
+#include "sph/sph_echo.h"
+}
+
+#include "miner.h"
+
+#include "cuda_helper.h"
+
+extern int device_map[8];
+
+static uint32_t *d_hash[8];
+
+extern void qubit_luffa512_cpu_init(int thr_id, int threads);
+extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
+extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+extern void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget);
+extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+
+extern void x11_cubehash512_cpu_init(int thr_id, int threads);
+extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_echo512_cpu_init(int thr_id, int threads);
+extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern "C" void deephash(void *state, const void *input)
+{
+	// luffa1-cubehash2-shavite3-simd4-echo5
+	sph_luffa512_context ctx_luffa;
+	sph_cubehash512_context ctx_cubehash;
+	sph_echo512_context ctx_echo;
+
+	uint8_t hash[64];
+
+	sph_luffa512_init(&ctx_luffa);
+	sph_luffa512 (&ctx_luffa, input, 80);
+	sph_luffa512_close(&ctx_luffa, (void*) hash);
+
+	sph_cubehash512_init(&ctx_cubehash);
+	sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
+	sph_cubehash512_close(&ctx_cubehash, (void*) hash);
+
+	sph_echo512_init(&ctx_echo);
+	sph_echo512 (&ctx_echo, (const void*) hash, 64);
+	sph_echo512_close(&ctx_echo, (void*) hash);
+
+	memcpy(state, hash, 32);
+}
+
+
+extern "C" int scanhash_deep(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+	const int throughput = 256*256*8*8;
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	uint32_t endiandata[20];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+
+		qubit_luffa512_cpu_init(thr_id, throughput);
+		x11_cubehash512_cpu_init(thr_id, throughput);
+		x11_echo512_cpu_init(thr_id, throughput);
+
+		cuda_check_cpu_init(thr_id, throughput);
+
+		init[thr_id] = true;
+	}
+
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+
+	qubit_luffa512_cpufinal_setBlock_80((void*)endiandata,ptarget);
+	cuda_check_cpu_setTarget(ptarget);
+
+	do {
+		const uint32_t Htarg = ptarget[7];
+		int order = 0;
+
+		qubit_luffa512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+		x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		if (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			deephash(vhash64, endiandata);
+
+			if (vhash64[7]<=Htarg && fulltest(vhash64, ptarget) )
+			{
+				pdata[19] = foundNonce;
+				*hashes_done = foundNonce - first_nonce + 1;
+				return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
diff --git a/qubit/doom.cu b/qubit/doom.cu
new file mode 100644
index 0000000..30719c7
--- /dev/null
+++ b/qubit/doom.cu
@@ -0,0 +1,93 @@
+/*
+ * qubit algorithm
+ *
+ */
+extern "C" {
+#include "sph/sph_luffa.h"
+}
+
+#include "miner.h"
+
+#include "cuda_helper.h"
+
+extern int device_map[8];
+
+static uint32_t *d_hash[8];
+
+extern void qubit_luffa512_cpu_init(int thr_id, int threads);
+extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
+extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+extern void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget);
+extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+
+extern void doomhash(void *state, const void *input)
+{
+	// luffa512
+	sph_luffa512_context ctx_luffa;
+
+	uint8_t hash[64];
+
+	sph_luffa512_init(&ctx_luffa);
+	sph_luffa512 (&ctx_luffa, input, 80);
+	sph_luffa512_close(&ctx_luffa, (void*) hash);
+
+	memcpy(state, hash, 32);
+}
+
+
+extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+	const int throughput = 256*256*8*8;
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	uint32_t endiandata[20];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+
+		qubit_luffa512_cpu_init(thr_id, throughput);
+
+		init[thr_id] = true;
+	}
+
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+
+	qubit_luffa512_cpufinal_setBlock_80((void*)endiandata,ptarget);
+
+	do {
+		const uint32_t Htarg = ptarget[7];
+		int order = 0;
+
+		uint32_t foundNonce = qubit_luffa512_cpu_finalhash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+		if (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			doomhash(vhash64, endiandata);
+
+			if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget) )
+			{
+				pdata[19] = foundNonce;
+				*hashes_done = foundNonce - first_nonce + 1;
+				return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
diff --git a/qubit/qubit.cu b/qubit/qubit.cu
new file mode 100644
index 0000000..11eb0d1
--- /dev/null
+++ b/qubit/qubit.cu
@@ -0,0 +1,146 @@
+/*
+ * qubit algorithm
+ *
+ */
+extern "C" {
+#include "sph/sph_luffa.h"
+#include "sph/sph_cubehash.h"
+#include "sph/sph_shavite.h"
+#include "sph/sph_simd.h"
+#include "sph/sph_echo.h"
+}
+
+#include "miner.h"
+
+#include "cuda_helper.h"
+
+extern int device_map[8];
+
+static uint32_t *d_hash[8];
+
+extern void qubit_luffa512_cpu_init(int thr_id, int threads);
+extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
+extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+
+extern void x11_cubehash512_cpu_init(int thr_id, int threads);
+extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_shavite512_cpu_init(int thr_id, int threads);
+extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_simd512_cpu_init(int thr_id, int threads);
+extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_echo512_cpu_init(int thr_id, int threads);
+extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_compactTest_cpu_init(int thr_id, int threads);
+extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
+											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
+											int order);
+
+extern "C" void qubithash(void *state, const void *input)
+{
+	// luffa1-cubehash2-shavite3-simd4-echo5
+
+	sph_luffa512_context ctx_luffa;
+	sph_cubehash512_context ctx_cubehash;
+	sph_shavite512_context ctx_shavite;
+	sph_simd512_context ctx_simd;
+	sph_echo512_context ctx_echo;
+
+	uint8_t hash[64];
+
+	sph_luffa512_init(&ctx_luffa);
+	sph_luffa512 (&ctx_luffa, input, 80);
+	sph_luffa512_close(&ctx_luffa, (void*) hash);
+
+	sph_cubehash512_init(&ctx_cubehash);
+	sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
+	sph_cubehash512_close(&ctx_cubehash, (void*) hash);
+
+	sph_shavite512_init(&ctx_shavite);
+	sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
+	sph_shavite512_close(&ctx_shavite, (void*) hash);
+
+	sph_simd512_init(&ctx_simd);
+	sph_simd512 (&ctx_simd, (const void*) hash, 64);
+	sph_simd512_close(&ctx_simd, (void*) hash);
+
+	sph_echo512_init(&ctx_echo);
+	sph_echo512 (&ctx_echo, (const void*) hash, 64);
+	sph_echo512_close(&ctx_echo, (void*) hash);
+
+	memcpy(state, hash, 32);
+}
+
+extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+	const int throughput = 256*256*8;
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	uint32_t endiandata[20];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+
+		qubit_luffa512_cpu_init(thr_id, throughput);
+		x11_cubehash512_cpu_init(thr_id, throughput);
+		x11_shavite512_cpu_init(thr_id, throughput);
+		x11_simd512_cpu_init(thr_id, throughput);
+		x11_echo512_cpu_init(thr_id, throughput);
+
+		cuda_check_cpu_init(thr_id, throughput);
+
+		init[thr_id] = true;
+	}
+
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+
+	qubit_luffa512_cpu_setBlock_80((void*)endiandata);
+	cuda_check_cpu_setTarget(ptarget);
+
+	do {
+		const uint32_t Htarg = ptarget[7];
+		int order = 0;
+
+		// Hash with CUDA
+		qubit_luffa512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+		x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		if (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			qubithash(vhash64, endiandata);
+
+			if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget) )
+			{
+				pdata[19] = foundNonce;
+				*hashes_done = foundNonce - first_nonce + 1;
+				return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
diff --git a/qubit/qubit_luffa512.cu b/qubit/qubit_luffa512.cu
new file mode 100644
index 0000000..1d827d5
--- /dev/null
+++ b/qubit/qubit_luffa512.cu
@@ -0,0 +1,496 @@
+/*
+ * luffa_for_32.c
+ * Version 2.0 (Sep 15th 2009)
+ *
+ * Copyright (C) 2008-2009 Hitachi, Ltd. All rights reserved.
+ *
+ * Hitachi, Ltd. is the owner of this software and hereby grant
+ * the U.S. Government and any interested party the right to use
+ * this software for the purposes of the SHA-3 evaluation process,
+ * notwithstanding that this software is copyrighted.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+#include "cuda_helper.h"
+
+typedef unsigned char BitSequence;
+
+__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
+__constant__ uint32_t pTarget[8];
+
+uint32_t *d_lnounce[8];
+uint32_t *d_LNonce[8];
+
+typedef struct {
+	uint32_t buffer[8]; /* Buffer to be hashed */
+	uint32_t chainv[40];   /* Chaining values */
+} hashState;
+
+#define BYTES_SWAP32(x) cuda_swab32(x)
+
+#define MULT2(a,j)\
+	tmp = a[7+(8*j)];\
+	a[7+(8*j)] = a[6+(8*j)];\
+	a[6+(8*j)] = a[5+(8*j)];\
+	a[5+(8*j)] = a[4+(8*j)];\
+	a[4+(8*j)] = a[3+(8*j)] ^ tmp;\
+	a[3+(8*j)] = a[2+(8*j)] ^ tmp;\
+	a[2+(8*j)] = a[1+(8*j)];\
+	a[1+(8*j)] = a[0+(8*j)] ^ tmp;\
+	a[0+(8*j)] = tmp;
+
+#define TWEAK(a0,a1,a2,a3,j)\
+	a0 = (a0<<(j))|(a0>>(32-j));\
+	a1 = (a1<<(j))|(a1>>(32-j));\
+	a2 = (a2<<(j))|(a2>>(32-j));\
+	a3 = (a3<<(j))|(a3>>(32-j));
+
+#define STEP(c0,c1)\
+	SUBCRUMB(chainv[0],chainv[1],chainv[2],chainv[3],tmp);\
+	SUBCRUMB(chainv[5],chainv[6],chainv[7],chainv[4],tmp);\
+	MIXWORD(chainv[0],chainv[4]);\
+	MIXWORD(chainv[1],chainv[5]);\
+	MIXWORD(chainv[2],chainv[6]);\
+	MIXWORD(chainv[3],chainv[7]);\
+	ADD_CONSTANT(chainv[0],chainv[4],c0,c1);
+
+#define SUBCRUMB(a0,a1,a2,a3,a4)\
+	a4  = a0;\
+	a0 |= a1;\
+	a2 ^= a3;\
+	a1  = ~a1;\
+	a0 ^= a3;\
+	a3 &= a4;\
+	a1 ^= a3;\
+	a3 ^= a2;\
+	a2 &= a0;\
+	a0  = ~a0;\
+	a2 ^= a1;\
+	a1 |= a3;\
+	a4 ^= a1;\
+	a3 ^= a2;\
+	a2 &= a1;\
+	a1 ^= a0;\
+	a0  = a4;
+
+#define MIXWORD(a0,a4)\
+	a4 ^= a0;\
+	a0  = (a0<<2) | (a0>>(30));\
+	a0 ^= a4;\
+	a4  = (a4<<14) | (a4>>(18));\
+	a4 ^= a0;\
+	a0  = (a0<<10) | (a0>>(22));\
+	a0 ^= a4;\
+	a4  = (a4<<1) | (a4>>(31));
+
+#define ADD_CONSTANT(a0,b0,c0,c1)\
+	a0 ^= c0;\
+	b0 ^= c1;
+
+/* initial values of chaining variables */
+__constant__ uint32_t c_IV[40];
+const uint32_t h2_IV[40] = {
+	0x6d251e69,0x44b051e0,0x4eaa6fb4,0xdbf78465,
+	0x6e292011,0x90152df4,0xee058139,0xdef610bb,
+	0xc3b44b95,0xd9d2f256,0x70eee9a0,0xde099fa3,
+	0x5d9b0557,0x8fc944b3,0xcf1ccf0e,0x746cd581,
+	0xf7efc89d,0x5dba5781,0x04016ce5,0xad659c05,
+	0x0306194f,0x666d1836,0x24aa230a,0x8b264ae7,
+	0x858075d5,0x36d79cce,0xe571f7d7,0x204b1f67,
+	0x35870c6a,0x57e9e923,0x14bcb808,0x7cde72ce,
+	0x6c68e9be,0x5ec41e22,0xc825b7c7,0xaffb4363,
+	0xf5df3999,0x0fc688f1,0xb07224cc,0x03e86cea};
+
+__constant__ uint32_t c_CNS[80];
+uint32_t h2_CNS[80] = {
+	0x303994a6,0xe0337818,0xc0e65299,0x441ba90d,
+	0x6cc33a12,0x7f34d442,0xdc56983e,0x9389217f,
+	0x1e00108f,0xe5a8bce6,0x7800423d,0x5274baf4,
+	0x8f5b7882,0x26889ba7,0x96e1db12,0x9a226e9d,
+	0xb6de10ed,0x01685f3d,0x70f47aae,0x05a17cf4,
+	0x0707a3d4,0xbd09caca,0x1c1e8f51,0xf4272b28,
+	0x707a3d45,0x144ae5cc,0xaeb28562,0xfaa7ae2b,
+	0xbaca1589,0x2e48f1c1,0x40a46f3e,0xb923c704,
+	0xfc20d9d2,0xe25e72c1,0x34552e25,0xe623bb72,
+	0x7ad8818f,0x5c58a4a4,0x8438764a,0x1e38e2e7,
+	0xbb6de032,0x78e38b9d,0xedb780c8,0x27586719,
+	0xd9847356,0x36eda57f,0xa2c78434,0x703aace7,
+	0xb213afa5,0xe028c9bf,0xc84ebe95,0x44756f91,
+	0x4e608a22,0x7e8fce32,0x56d858fe,0x956548be,
+	0x343b138f,0xfe191be2,0xd0ec4e3d,0x3cb226e5,
+	0x2ceb4882,0x5944a28e,0xb3ad2208,0xa1c4c355,
+	0xf0d2e9e3,0x5090d577,0xac11d7fa,0x2d1925ab,
+	0x1bcb66f2,0xb46496ac,0x6f2d9bc9,0xd1925ab0,
+	0x78602649,0x29131ab6,0x8edae952,0x0fc053c3,
+	0x3b6ba548,0x3f014f0c,0xedae9520,0xfc053c31};
+
+
+/***************************************************/
+__device__ __forceinline__
+void rnd512(hashState *state)
+{
+	int i,j;
+	uint32_t t[40];
+	uint32_t chainv[8];
+	uint32_t tmp;
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		t[i]=0;
+#pragma unroll 5
+		for(j=0;j<5;j++) {
+			t[i] ^= state->chainv[i+8*j];
+		}
+	}
+
+	MULT2(t, 0);
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+#pragma unroll 8
+		for(i=0;i<8;i++) {
+			state->chainv[i+8*j] ^= t[i];
+		}
+	}
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+#pragma unroll 8
+		for(i=0;i<8;i++) {
+			t[i+8*j] = state->chainv[i+8*j];
+		}
+	}
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+		MULT2(state->chainv, j);
+	}
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+#pragma unroll 8
+		for(i=0;i<8;i++) {
+			state->chainv[8*j+i] ^= t[8*((j+1)%5)+i];
+		}
+	}
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+#pragma unroll 8
+		for(i=0;i<8;i++) {
+			t[i+8*j] = state->chainv[i+8*j];
+		}
+	}
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+		MULT2(state->chainv, j);
+	}
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+#pragma unroll 8
+		for(i=0;i<8;i++) {
+			state->chainv[8*j+i] ^= t[8*((j+4)%5)+i];
+		}
+	}
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+#pragma unroll 8
+		for(i=0;i<8;i++) {
+			state->chainv[i+8*j] ^= state->buffer[i];
+		}
+		MULT2(state->buffer, 0);
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		chainv[i] = state->chainv[i];
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		STEP(c_CNS[(2*i)],c_CNS[(2*i)+1]);
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		state->chainv[i] = chainv[i];
+		chainv[i] = state->chainv[i+8];
+	}
+
+	TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],1);
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		STEP(c_CNS[(2*i)+16],c_CNS[(2*i)+16+1]);
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		state->chainv[i+8] = chainv[i];
+		chainv[i] = state->chainv[i+16];
+	}
+
+	TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],2);
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		STEP(c_CNS[(2*i)+32],c_CNS[(2*i)+32+1]);
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		state->chainv[i+16] = chainv[i];
+		chainv[i] = state->chainv[i+24];
+	}
+
+	TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],3);
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		STEP(c_CNS[(2*i)+48],c_CNS[(2*i)+48+1]);
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		state->chainv[i+24] = chainv[i];
+		chainv[i] = state->chainv[i+32];
+	}
+
+	TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],4);
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		STEP(c_CNS[(2*i)+64],c_CNS[(2*i)+64+1]);
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		state->chainv[i+32] = chainv[i];
+	}
+}
+
+
+__device__ __forceinline__
+void Update512(hashState *state, const BitSequence *data)
+{
+#pragma unroll 8
+	for(int i=0;i<8;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)data)[i]);
+	rnd512(state);
+
+#pragma unroll 8
+	for(int i=0;i<8;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)(data+32))[i]);
+	rnd512(state);
+#pragma unroll 4
+	for(int i=0;i<4;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)(data+64))[i]);
+}
+
+
+/***************************************************/
+__device__ __forceinline__
+void finalization512(hashState *state, uint32_t *b)
+{
+	int i,j;
+
+	state->buffer[4] = 0x80000000;
+#pragma unroll 3
+	for(int i=5;i<8;i++) state->buffer[i] = 0;
+	rnd512(state);
+
+	/*---- blank round with m=0 ----*/
+#pragma unroll 8
+	for(i=0;i<8;i++) state->buffer[i] =0;
+	rnd512(state);
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		b[i] = 0;
+#pragma unroll 5
+		for(j=0;j<5;j++) {
+			b[i] ^= state->chainv[i+8*j];
+		}
+		b[i] = BYTES_SWAP32((b[i]));
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) state->buffer[i]=0;
+	rnd512(state);
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		b[8+i] = 0;
+#pragma unroll 5
+		for(j=0;j<5;j++) {
+			b[8+i] ^= state->chainv[i+8*j];
+		}
+		b[8+i] = BYTES_SWAP32((b[8+i]));
+	}
+}
+
+
+/***************************************************/
+// Die Hash-Funktion
+__global__
+void qubit_luffa512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash)
+{
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+		uint32_t nounce = startNounce + thread;
+		union {
+		uint64_t buf64[16];
+		uint32_t buf32[32];
+		} buff;
+
+#pragma unroll 16
+		for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i];
+
+		// die Nounce durch die thread-spezifische ersetzen
+		buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce));
+
+
+		hashState state;
+#pragma unroll 40
+		for(int i=0;i<40;i++) state.chainv[i] = c_IV[i];
+#pragma unroll 8
+		for(int i=0;i<8;i++) state.buffer[i] = 0;
+		Update512(&state, (BitSequence*)buff.buf32);
+		uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
+		finalization512(&state, (uint32_t*)outHash);
+	}
+}
+
+__global__
+void qubit_luffa512_gpu_finalhash_80(int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce)
+{
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+		uint32_t nounce = startNounce + thread;
+		union {
+		uint64_t buf64[16];
+		uint32_t buf32[32];
+		} buff;
+		uint32_t Hash[16];
+
+#pragma unroll 16
+		for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i];
+
+		// die Nounce durch die thread-spezifische ersetzen
+		buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce));
+
+
+		hashState state;
+#pragma unroll 40
+		for(int i=0;i<40;i++) state.chainv[i] = c_IV[i];
+#pragma unroll 8
+		for(int i=0;i<8;i++) state.buffer[i] = 0;
+		Update512(&state, (BitSequence*)buff.buf32);
+		finalization512(&state, Hash);
+
+		bool rc = true;
+		int position = -1;
+#pragma unroll 8
+		for (int i = 7; i >= 0; i--) {
+			if (Hash[i] > pTarget[i]) {
+				if(position < i) {
+					position = i;
+					rc = false;
+				}
+
+			}
+			if (Hash[i] < pTarget[i]) {
+				if(position < i) {
+					position = i;
+					rc = true;
+				}
+			}
+		}
+
+		if(rc && resNounce[0] > nounce)
+			resNounce[0] = nounce;
+	}
+}
+
+__host__
+void qubit_luffa512_cpu_init(int thr_id, int threads)
+{
+	cudaMemcpyToSymbol( c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice );
+	cudaMemcpyToSymbol( c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice );
+	cudaMalloc(&d_LNonce[thr_id], sizeof(uint32_t));
+	cudaMallocHost(&d_lnounce[thr_id], 1*sizeof(uint32_t));
+}
+
+__host__
+uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order)
+{
+	uint32_t result = 0xffffffff;
+	cudaMemset(d_LNonce[thr_id], 0xffffffff, sizeof(uint32_t));
+	const int threadsperblock = 256;
+
+	dim3 grid((threads + threadsperblock-1)/threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+
+	qubit_luffa512_gpu_finalhash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash, d_LNonce[thr_id]);
+	MyStreamSynchronize(NULL, order, thr_id);
+	cudaMemcpy(d_lnounce[thr_id], d_LNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
+	//cudaThreadSynchronize();
+	result = *d_lnounce[thr_id];
+	return result;
+}
+
+__host__
+void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order)
+{
+	const int threadsperblock = 256;
+
+	dim3 grid((threads + threadsperblock-1)/threadsperblock);
+	dim3 block(threadsperblock);
+	size_t shared_size = 0;
+
+	qubit_luffa512_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
+	MyStreamSynchronize(NULL, order, thr_id);
+}
+
+__host__
+void qubit_luffa512_cpu_setBlock_80(void *pdata)
+{
+	unsigned char PaddedMessage[128];
+	memcpy(PaddedMessage, pdata, 80);
+	memset(PaddedMessage+80, 0, 48);
+	PaddedMessage[80] = 0x80;
+	PaddedMessage[111] = 1;
+	PaddedMessage[126] = 0x02;
+	PaddedMessage[127] = 0x80;
+
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+}
+
+__host__
+void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget)
+{
+	unsigned char PaddedMessage[128];
+	memcpy(PaddedMessage, pdata, 80);
+	memset(PaddedMessage+80, 0, 48);
+	PaddedMessage[80] = 0x80;
+	PaddedMessage[111] = 1;
+	PaddedMessage[126] = 0x02;
+	PaddedMessage[127] = 0x80;
+	cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
+
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+}
\ No newline at end of file
diff --git a/util.c b/util.c
index fe733c0..f8c8192 100644
--- a/util.c
+++ b/util.c
@@ -1440,6 +1440,14 @@ void print_hash_tests(void)
 	blake256hash(&hash[0], &buf[0], 14);
 	printpfx("blake", hash);
 
+	memset(hash, 0, sizeof hash);
+	deephash(&hash[0], &buf[0]);
+	printpfx("deep", hash);
+
+	memset(hash, 0, sizeof hash);
+	doomhash(&hash[0], &buf[0]);
+	printpfx("doom", hash);
+
 	memset(hash, 0, sizeof hash);
 	fresh_hash(&hash[0], &buf[0]);
 	printpfx("fresh", hash);
@@ -1476,6 +1484,10 @@ void print_hash_tests(void)
 	quarkhash(&hash[0], &buf[0]);
 	printpfx("quark", hash);
 
+	memset(hash, 0, sizeof hash);
+	qubithash(&hash[0], &buf[0]);
+	printpfx("qubit", hash);
+
 	memset(hash, 0, sizeof hash);
 	wcoinhash(&hash[0], &buf[0]);
 	printpfx("whirl", hash);

From 7cc5222394255f7359b20923d278e3f9163fc0d1 Mon Sep 17 00:00:00 2001
From: Tanguy Pruvot <tanguy.pruvot@gmail.com>
Date: Tue, 9 Sep 2014 23:04:32 +0200
Subject: [PATCH 2/3] Move common check_cpu functions to root

---
 JHA/jackpotcoin.cu                           |  9 +---
 Makefile.am                                  |  2 +-
 bitslice_transformations_quad.cu             | 10 ++++
 ccminer.vcxproj                              |  4 +-
 ccminer.vcxproj.filters                      |  4 +-
 quark/cuda_checkhash.cu => cuda_checkhash.cu | 54 ++++++++------------
 cuda_nist5.cu                                | 12 +----
 quark/animecoin.cu                           |  9 +---
 quark/quarkcoin.cu                           |  5 --
 x11/fresh.cu                                 | 26 ++--------
 x11/x11.cu                                   | 12 ++---
 x13/x13.cu                                   | 16 ++----
 x15/whirlpool.cu                             |  7 +--
 x15/x14.cu                                   |  4 --
 x15/x15.cu                                   |  4 --
 x17/x17.cu                                   |  5 --
 16 files changed, 54 insertions(+), 129 deletions(-)
 rename quark/cuda_checkhash.cu => cuda_checkhash.cu (58%)

diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu
index 3db2a4f..804e609 100644
--- a/JHA/jackpotcoin.cu
+++ b/JHA/jackpotcoin.cu
@@ -5,14 +5,13 @@ extern "C"
 #include "sph/sph_groestl.h"
 #include "sph/sph_jh.h"
 #include "sph/sph_skein.h"
+}
+
 #include "miner.h"
 #include "cuda_helper.h"
-}
 
-// aus cpu-miner.c
 extern int device_map[8];
 
-// Speicher für Input/Output der verketteten Hashfunktionen
 static uint32_t *d_hash[8];
 
 extern void jackpot_keccak512_cpu_init(int thr_id, int threads);
@@ -31,10 +30,6 @@ extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounc
 extern void quark_skein512_cpu_init(int thr_id, int threads);
 extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern void cuda_check_cpu_init(int thr_id, int threads);
-extern void cuda_check_cpu_setTarget(const void *ptarget);
-extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
-
 extern void jackpot_compactTest_cpu_init(int thr_id, int threads);
 extern void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, 
 											uint32_t *d_nonces1, size_t *nrm1,
diff --git a/Makefile.am b/Makefile.am
index 2067074..7cfaa74 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -29,7 +29,7 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h \
 			  myriadgroestl.cpp cuda_myriadgroestl.cu \
 			  JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
-			  JHA/cuda_jha_compactionTest.cu quark/cuda_checkhash.cu \
+			  JHA/cuda_jha_compactionTest.cu cuda_checkhash.cu \
 			  quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
 			  quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu \
 			  quark/quarkcoin.cu quark/animecoin.cu \
diff --git a/bitslice_transformations_quad.cu b/bitslice_transformations_quad.cu
index 8f7bcf8..48c7f04 100644
--- a/bitslice_transformations_quad.cu
+++ b/bitslice_transformations_quad.cu
@@ -7,6 +7,8 @@
 #define __shfl(var, srcLane, width) (uint32_t)(var)
 #endif
 
+#ifdef __CUDA_ARCH__
+
 __device__ __forceinline__
 void to_bitslice_quad(uint32_t *input, uint32_t *output)
 {
@@ -429,3 +431,11 @@ void from_bitslice_quad(uint32_t *input, uint32_t *output)
         if (threadIdx.x % 4) output[i] = output[i+1] = 0;
     }
 }
+
+#else
+
+/* host "fake" functions */
+#define from_bitslice_quad(st, out)
+#define to_bitslice_quad(in, msg) in[0] = (uint32_t) in[0];
+
+#endif /* device only code */
diff --git a/ccminer.vcxproj b/ccminer.vcxproj
index 06ba665..dc5b3f1 100644
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@@ -432,7 +432,7 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
       <AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
       <TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
     </CudaCompile>
-    <CudaCompile Include="quark\cuda_checkhash.cu">
+    <CudaCompile Include="cuda_checkhash.cu">
       <CInterleavedPTX>false</CInterleavedPTX>
       <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
@@ -570,4 +570,4 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
   <ImportGroup Label="ExtensionTargets">
     <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters
index 065e196..2648137 100644
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@@ -415,7 +415,7 @@
     <CudaCompile Include="x13\x13.cu">
       <Filter>Source Files\CUDA\x13</Filter>
     </CudaCompile>
-    <CudaCompile Include="quark\cuda_checkhash.cu">
+    <CudaCompile Include="cuda_checkhash.cu">
       <Filter>Source Files\CUDA</Filter>
     </CudaCompile>
     <CudaCompile Include="x15\x14.cu">
@@ -449,4 +449,4 @@
       <Filter>Source Files\CUDA</Filter>
     </CudaCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/quark/cuda_checkhash.cu b/cuda_checkhash.cu
similarity index 58%
rename from quark/cuda_checkhash.cu
rename to cuda_checkhash.cu
index 1ce25ec..a7806aa 100644
--- a/quark/cuda_checkhash.cu
+++ b/cuda_checkhash.cu
@@ -9,66 +9,56 @@ __constant__ uint32_t pTarget[8];
 static uint32_t *d_resNounce[8];
 static uint32_t *h_resNounce[8];
 
-// aus heavy.cu
-extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
-
-__global__ void cuda_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce)
+__global__
+void cuda_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce)
 {
 	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)
 	{
-		// bestimme den aktuellen Z�hler
+		// bestimme den aktuellen Zähler
 		uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
 
 		int hashPosition = nounce - startNounce;
-		uint32_t *inpHash = &g_hash[16 * hashPosition];
+		uint32_t *inpHash = &g_hash[hashPosition<<4];
 
 		uint32_t hash[8];
-#pragma unroll 8
+
+		#pragma unroll 8
 		for (int i=0; i < 8; i++)
 			hash[i] = inpHash[i];
 
-		// kopiere Ergebnis
-		int i, position = -1;
-		bool rc = true;
-
-#pragma unroll 8
-		for (i = 7; i >= 0; i--) {
+		for (int i = 7; i >= 0; i--) {
 			if (hash[i] > pTarget[i]) {
-				if(position < i) {
-					position = i;
-					rc = false;
-				}
-	 		}
-	 		if (hash[i] < pTarget[i]) {
-				if(position < i) {
-					position = i;
-					rc = true;
-				}
-	 		}
+				return;
+			}
+			if (hash[i] < pTarget[i]) {
+				break;
+			}
 		}
 
-		if(rc == true)
-			if(resNounce[0] > nounce)
-				resNounce[0] = nounce;
+		if(resNounce[0] > nounce)
+			resNounce[0] = nounce;
 	}
 }
 
 // Setup-Funktionen
-__host__ void cuda_check_cpu_init(int thr_id, int threads)
+__host__
+void cuda_check_cpu_init(int thr_id, int threads)
 {
     cudaMallocHost(&h_resNounce[thr_id], 1*sizeof(uint32_t));
     cudaMalloc(&d_resNounce[thr_id], 1*sizeof(uint32_t));
 }
 
 // Target Difficulty setzen
-__host__ void cuda_check_cpu_setTarget(const void *ptarget)
+__host__
+void cuda_check_cpu_setTarget(const void *ptarget)
 {
 	// die Message zur Berechnung auf der GPU
-	cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
 }
 
-__host__ uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order)
+__host__
+uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order)
 {
 	uint32_t result = 0xffffffff;
 	cudaMemset(d_resNounce[thr_id], 0xff, sizeof(uint32_t));
@@ -79,7 +69,7 @@ __host__ uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t start
 	dim3 grid((threads + threadsperblock-1)/threadsperblock);
 	dim3 block(threadsperblock);
 
-	// Gr��e des dynamischen Shared Memory Bereichs
+	// Größe des dynamischen Shared Memory Bereichs
 	size_t shared_size = 0;
 
 	cuda_check_gpu_hash_64 <<<grid, block, shared_size>>>(threads, startNounce, d_nonceVector, d_inputHash, d_resNounce[thr_id]);
diff --git a/cuda_nist5.cu b/cuda_nist5.cu
index 419c1a5..1189f18 100644
--- a/cuda_nist5.cu
+++ b/cuda_nist5.cu
@@ -11,7 +11,7 @@ extern "C"
 
 #include "cuda_helper.h"
 
-// aus cpu-miner.c
+// in cpu-miner.c
 extern int device_map[8];
 
 // Speicher für Input/Output der verketteten Hashfunktionen
@@ -33,9 +33,6 @@ extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startN
 extern void quark_skein512_cpu_init(int thr_id, int threads);
 extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern void cuda_check_cpu_init(int thr_id, int threads);
-extern void cuda_check_cpu_setTarget(const void *ptarget);
-extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
 
 // Original nist5hash Funktion aus einem miner Quelltext
 extern "C" void nist5hash(void *state, const void *input)
@@ -46,30 +43,25 @@ extern "C" void nist5hash(void *state, const void *input)
     sph_keccak512_context ctx_keccak;
     sph_skein512_context ctx_skein;
     
-    unsigned char hash[64];
+    uint8_t hash[64];
 
     sph_blake512_init(&ctx_blake);
-    // ZBLAKE;
     sph_blake512 (&ctx_blake, input, 80);
     sph_blake512_close(&ctx_blake, (void*) hash);
     
     sph_groestl512_init(&ctx_groestl);
-    // ZGROESTL;
     sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
     sph_groestl512_close(&ctx_groestl, (void*) hash);
 
     sph_jh512_init(&ctx_jh);
-    // ZJH;
     sph_jh512 (&ctx_jh, (const void*) hash, 64);
     sph_jh512_close(&ctx_jh, (void*) hash);
 
     sph_keccak512_init(&ctx_keccak);
-    // ZKECCAK;
     sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
     sph_keccak512_close(&ctx_keccak, (void*) hash);
 
     sph_skein512_init(&ctx_skein);
-    // ZSKEIN;
     sph_skein512 (&ctx_skein, (const void*) hash, 64);
     sph_skein512_close(&ctx_skein, (void*) hash);
 
diff --git a/quark/animecoin.cu b/quark/animecoin.cu
index 4b2d097..0d7466a 100644
--- a/quark/animecoin.cu
+++ b/quark/animecoin.cu
@@ -6,14 +6,12 @@ extern "C"
 #include "sph/sph_skein.h"
 #include "sph/sph_jh.h"
 #include "sph/sph_keccak.h"
+}
 #include "miner.h"
 #include "cuda_helper.h"
-}
 
-// aus cpu-miner.c
 extern int device_map[8];
 
-// Speicher f�r Input/Output der verketteten Hashfunktionen
 static uint32_t *d_hash[8];
 
 // Speicher zur Generierung der Noncevektoren f�r die bedingten Hashes
@@ -43,10 +41,6 @@ extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startN
 extern void quark_jh512_cpu_init(int thr_id, int threads);
 extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern void cuda_check_cpu_init(int thr_id, int threads);
-extern void cuda_check_cpu_setTarget(const void *ptarget);
-extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
-
 extern void quark_compactTest_cpu_init(int thr_id, int threads);
 extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
 											uint32_t *d_nonces1, size_t *nrm1,
@@ -165,7 +159,6 @@ struct HashPredicate
     uint32_t  m_startNonce;
 };
 
-extern bool opt_benchmark;
 
 extern "C" int scanhash_anime(int thr_id, uint32_t *pdata,
     const uint32_t *ptarget, uint32_t max_nonce,
diff --git a/quark/quarkcoin.cu b/quark/quarkcoin.cu
index a905ec4..1adac57 100644
--- a/quark/quarkcoin.cu
+++ b/quark/quarkcoin.cu
@@ -14,7 +14,6 @@ extern "C"
 
 extern int device_map[8];
 
-// Speicher für Input/Output der verketteten Hashfunktionen
 static uint32_t *d_hash[8];
 
 // Speicher zur Generierung der Noncevektoren für die bedingten Hashes
@@ -44,10 +43,6 @@ extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startN
 extern void quark_jh512_cpu_init(int thr_id, int threads);
 extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern void cuda_check_cpu_init(int thr_id, int threads);
-extern void cuda_check_cpu_setTarget(const void *ptarget);
-extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
-
 extern void quark_compactTest_cpu_init(int thr_id, int threads);
 extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
 											uint32_t *d_nonces1, size_t *nrm1,
diff --git a/x11/fresh.cu b/x11/fresh.cu
index 11bdda6..0c5179a 100644
--- a/x11/fresh.cu
+++ b/x11/fresh.cu
@@ -5,9 +5,9 @@ extern "C" {
 #include "sph/sph_shavite.h"
 #include "sph/sph_simd.h"
 #include "sph/sph_echo.h"
+}
 #include "miner.h"
 #include "cuda_helper.h"
-}
 
 // to test gpu hash on a null buffer
 #define NULLTEST 0
@@ -15,7 +15,6 @@ extern "C" {
 static uint32_t *d_hash[8];
 
 extern int device_map[8];
-extern bool opt_benchmark;
 
 extern void x11_shavite512_cpu_init(int thr_id, int threads);
 extern void x11_shavite512_setBlock_80(void *pdata);
@@ -28,10 +27,6 @@ extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounc
 extern void x11_echo512_cpu_init(int thr_id, int threads);
 extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern void cuda_check_cpu_init(int thr_id, int threads);
-extern void cuda_check_cpu_setTarget(const void *ptarget);
-extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
-
 extern void quark_compactTest_cpu_init(int thr_id, int threads);
 extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
 											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
@@ -75,15 +70,6 @@ extern "C" void fresh_hash(void *state, const void *input)
 	memcpy(state, hash, 32);
 }
 
-#if NULLTEST
-static void print_hash(unsigned char *hash)
-{
-	for (int i=0; i < 32; i += 4) {
-		printf("%02x%02x%02x%02x ", hash[i], hash[i+1], hash[i+2], hash[i+3]);
-	}
-}
-#endif
-
 extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done)
@@ -92,15 +78,9 @@ extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
 	const int throughput = 256*256*8;
 	static bool init[8] = {0,0,0,0,0,0,0,0};
 	uint32_t endiandata[20];
-	uint32_t Htarg = ptarget[7];
 
 	if (opt_benchmark)
-		((uint32_t*)ptarget)[7] = Htarg = 0x0000ff;
-
-#if NULLTEST
-	for (int k=0; k < 20; k++)
-		pdata[k] = 0;
-#endif
+		((uint32_t*)ptarget)[7] = 0x00ff;
 
 	if (!init[thr_id])
 	{
@@ -123,6 +103,8 @@ extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
 	x11_shavite512_setBlock_80((void*)endiandata);
 	cuda_check_cpu_setTarget(ptarget);
 	do {
+		uint32_t Htarg = ptarget[7];
+
 		uint32_t foundNonce;
 		int order = 0;
 
diff --git a/x11/x11.cu b/x11/x11.cu
index dc2f97f..60ea553 100644
--- a/x11/x11.cu
+++ b/x11/x11.cu
@@ -1,4 +1,3 @@
-
 extern "C"
 {
 #include "sph/sph_blake.h"
@@ -13,13 +12,13 @@ extern "C"
 #include "sph/sph_shavite.h"
 #include "sph/sph_simd.h"
 #include "sph/sph_echo.h"
+}
 
 #include "miner.h"
 #include "cuda_helper.h"
 
 #include <stdio.h>
 #include <memory.h>
-}
 
 // in cpu-miner.c
 extern int device_map[8];
@@ -62,14 +61,9 @@ extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounc
 extern void x11_echo512_cpu_init(int thr_id, int threads);
 extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern void cuda_check_cpu_init(int thr_id, int threads);
-extern void cuda_check_cpu_setTarget(const void *ptarget);
-extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
-
 extern void quark_compactTest_cpu_init(int thr_id, int threads);
-extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, 
-											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
-											int order);
+extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
+                                          uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
 
 // X11 Hashfunktion
 extern "C" void x11hash(void *output, const void *input)
diff --git a/x13/x13.cu b/x13/x13.cu
index 70a8ee1..c751211 100644
--- a/x13/x13.cu
+++ b/x13/x13.cu
@@ -18,18 +18,16 @@ extern "C"
 
 #include "sph/sph_hamsi.h"
 #include "sph/sph_fugue.h"
-
+}
 #include "miner.h"
 
 #include "cuda_helper.h"
-}
 
-// aus cpu-miner.c
 extern int device_map[8];
 
-// Speicher für Input/Output der verketteten Hashfunktionen
 static uint32_t *d_hash[8];
 
+
 extern void quark_blake512_cpu_init(int thr_id, int threads);
 extern void quark_blake512_cpu_setBlock_80(void *pdata);
 extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
@@ -40,7 +38,6 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNoun
 
 extern void quark_groestl512_cpu_init(int thr_id, int threads);
 extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
-//extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
 extern void quark_skein512_cpu_init(int thr_id, int threads);
 extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@@ -72,14 +69,9 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNoun
 extern void x13_fugue512_cpu_init(int thr_id, int threads);
 extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern void cuda_check_cpu_init(int thr_id, int threads);
-extern void cuda_check_cpu_setTarget(const void *ptarget);
-extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
-
 extern void quark_compactTest_cpu_init(int thr_id, int threads);
 extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, 
-											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
-											int order);
+                                          uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
 
 // X13 Hashfunktion
 extern "C" void x13hash(void *output, const void *input)
@@ -159,8 +151,6 @@ extern "C" void x13hash(void *output, const void *input)
 }
 
 
-extern bool opt_benchmark;
-
 extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
     const uint32_t *ptarget, uint32_t max_nonce,
     unsigned long *hashes_done)
diff --git a/x15/whirlpool.cu b/x15/whirlpool.cu
index eeb50e7..192a71c 100644
--- a/x15/whirlpool.cu
+++ b/x15/whirlpool.cu
@@ -9,20 +9,17 @@ extern "C"
 
 // from cpu-miner.c
 extern int device_map[8];
-extern bool opt_benchmark;
 
 // Speicher für Input/Output der verketteten Hashfunktionen
 static uint32_t *d_hash[8];
 
 extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode);
+extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
 extern void whirlpool512_setBlock_80(void *pdata, const void *ptarget);
 extern void whirlpool512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
-extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern uint32_t whirlpool512_cpu_finalhash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern void cuda_check_cpu_init(int thr_id, int threads);
-extern void cuda_check_cpu_setTarget(const void *ptarget);
-extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
 
 // CPU Hash function
 extern "C" void wcoinhash(void *state, const void *input)
diff --git a/x15/x14.cu b/x15/x14.cu
index b3519cd..7335a0d 100644
--- a/x15/x14.cu
+++ b/x15/x14.cu
@@ -76,10 +76,6 @@ extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNoun
 extern void x14_shabal512_cpu_init(int thr_id, int threads);
 extern void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-extern void cuda_check_cpu_init(int thr_id, int threads);
-extern void cuda_check_cpu_setTarget(const void *ptarget);
-extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
-
 extern void quark_compactTest_cpu_init(int thr_id, int threads);
 extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
 											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
diff --git a/x15/x15.cu b/x15/x15.cu
index faea354..012cafd 100644
--- a/x15/x15.cu
+++ b/x15/x15.cu
@@ -81,10 +81,6 @@ extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode);
 extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x15_whirlpool_cpu_free(int thr_id);
 
-extern void cuda_check_cpu_init(int thr_id, int threads);
-extern void cuda_check_cpu_setTarget(const void *ptarget);
-extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
-
 extern void quark_compactTest_cpu_init(int thr_id, int threads);
 extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
 											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
diff --git a/x17/x17.cu b/x17/x17.cu
index 65d2259..608ce60 100644
--- a/x17/x17.cu
+++ b/x17/x17.cu
@@ -89,11 +89,6 @@ extern void x17_sha512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce
 extern void x17_haval256_cpu_init(int thr_id, int threads);
 extern void x17_haval256_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 
-
-extern void cuda_check_cpu_init(int thr_id, int threads);
-extern void cuda_check_cpu_setTarget(const void *ptarget);
-extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
-
 extern void quark_compactTest_cpu_init(int thr_id, int threads);
 extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
 											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,

From 402e70f63679d203cb165a1c3d26011444952e33 Mon Sep 17 00:00:00 2001
From: Tanguy Pruvot <tanguy.pruvot@gmail.com>
Date: Tue, 9 Sep 2014 23:13:40 +0200
Subject: [PATCH 3/3] Update VS Project

---
 ccminer.vcxproj         | 20 ++++++++++++++++++++
 ccminer.vcxproj.filters | 15 +++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/ccminer.vcxproj b/ccminer.vcxproj
index dc5b3f1..7d15d86 100644
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@@ -463,6 +463,26 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
       <AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
       <TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
     </CudaCompile>
+    <CudaCompile Include="qubit\deep.cu">
+      <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
+      <TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
+    </CudaCompile>
+    <CudaCompile Include="qubit\doom.cu">
+      <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
+      <TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
+    </CudaCompile>
+    <CudaCompile Include="qubit\qubit.cu">
+      <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
+      <TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
+    </CudaCompile>
+    <CudaCompile Include="qubit\qubit_luffa512.cu">
+      <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options=-O2 %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)'=='Debug'">%(AdditionalOptions)</AdditionalOptions>
+      <TargetMachinePlatform Condition="'$(Platform)'=='x64'">64</TargetMachinePlatform>
+    </CudaCompile>
     <CudaCompile Include="x11\cuda_x11_aes.cu">
       <ExcludedFromBuild>true</ExcludedFromBuild>
     </CudaCompile>
diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters
index 2648137..1391df5 100644
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@@ -52,6 +52,9 @@
     <Filter Include="Header Files\CUDA\heavy">
       <UniqueIdentifier>{3281db48-f394-49ea-a1ef-6ebd09828d50}</UniqueIdentifier>
     </Filter>
+    <Filter Include="Source Files\CUDA\qubit">
+      <UniqueIdentifier>{f3ed23a2-8ce7-41a5-b051-6da56047dc35}</UniqueIdentifier>
+    </Filter>
     <Filter Include="Source Files\CUDA\x11">
       <UniqueIdentifier>{dd751f2d-bfd6-42c1-8f9b-cbe94e539353}</UniqueIdentifier>
     </Filter>
@@ -376,6 +379,18 @@
     <CudaCompile Include="quark\quarkcoin.cu">
       <Filter>Source Files\CUDA\quark</Filter>
     </CudaCompile>
+    <CudaCompile Include="qubit\deep.cu">
+      <Filter>Source Files\CUDA\qubit</Filter>
+    </CudaCompile>
+    <CudaCompile Include="qubit\doom.cu">
+      <Filter>Source Files\CUDA\qubit</Filter>
+    </CudaCompile>
+    <CudaCompile Include="qubit\qubit.cu">
+      <Filter>Source Files\CUDA\qubit</Filter>
+    </CudaCompile>
+    <CudaCompile Include="qubit\qubit_luffa512.cu">
+      <Filter>Source Files\CUDA\qubit</Filter>
+    </CudaCompile>
     <CudaCompile Include="cuda_nist5.cu">
       <Filter>Source Files\CUDA</Filter>
     </CudaCompile>