Import djm34 qubit, deep and doom algos

Indent, and put commonly used functions proto. in cuda_helper.h And add them to --cputest function Also change the color option to --nocolor, -C is no more needed Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> (Which is tired to remove these german copy/pasted comments)
2014-09-09 21:56:25 +02:00 · 2014-09-09 21:56:25 +02:00 · c3eb66683a
commit c3eb66683a
parent 429266346c
12 changed files with 944 additions and 21 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -31,17 +31,20 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
 			  JHA/cuda_jha_compactionTest.cu quark/cuda_checkhash.cu \
 			  quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
-			  quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \
-			  quark/cuda_quark_compactionTest.cu \
+			  quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu \
+			  quark/quarkcoin.cu quark/animecoin.cu \
+			  quark/cuda_quark_compactionTest.cu  \
 			  cuda_nist5.cu blake32.cu pentablake.cu \
 			  sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \
 			  sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \
 			  sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
 			  sph/shabal.c sph/whirlpool.c sph/sha2big.c sph/haval.c \
+			  qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/doom.cu \
 			  x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
 			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
 			  x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
-			  x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu x15/whirlpool.cu \
+			  x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \
+			  x15/whirlpool.cu \
 			  x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu


--- a/README.txt
+++ b/README.txt
@ -1,5 +1,5 @@

-ccMiner release 1.4.2-tpruvot (Sep 09th 2014) - "Pentablake"
+ccMiner release 1.4.3-tpruvot (Sep 11th 2014) - "DJM34 Algos"
 ---------------------------------------------------------------

 ***************************************************************
@ -35,6 +35,7 @@ TalkCoin
 DarkCoin and other X11 coins
 NEOS blake (256 14-rounds)
 BlakeCoin (256 8-rounds)
+Deep, Doom, Goalcoin and Qubit
 Pentablake (Blake 512 x5)

 where some of these coins have a VERY NOTABLE nVidia advantage
@ -56,12 +57,15 @@ its command line interface and options.
  -a, --algo=ALGO       specify the algorithm to use
                          heavy       use to mine Heavycoin
                          mjollnir    use to mine Mjollnircoin
+                          deep        use to mine Deepcoin
+                          doom        use to mine Doomcoin
                          fugue256    use to mine Fuguecoin
                          groestl     use to mine Groestlcoin
                          dmd-gr      use to mine Diamond-Groestl
                          myr-gr      use to mine Myriad-Groest
                          jackpot     use to mine Jackpotcoin
                          quark       use to mine Quarkcoin
+                          qubit       use to mine Qubit Algo
                          anime       use to mine Animecoin
                          blake       use to mine NEOS (Blake 256)
                          blakecoin   use to mine Old Blake 256
@ -103,7 +107,7 @@ its command line interface and options.
      --benchmark       run in offline benchmark mode
      --cputest         debug hashes from cpu algorithms
  -c, --config=FILE     load a JSON-format configuration file
-  -C, --color           display colored output in a linux Terminal
+  -K, --nocolor         disable colored console output
  -V, --version         display version information and exit
  -h, --help            display this help text and exit

@ -154,6 +158,9 @@ features.

 >>> RELEASE HISTORY <<<

+  Sep  11th 2O14
+                  add algos from djm34 (deep,doom,qubit)
+
  Sep.  1st 2014  add X17, optimized x15 and whirl
                  add blake (256 variant)
                  color support on Windows,
--- a/configure.ac
+++ b/configure.ac
@ -1,4 +1,4 @@
-AC_INIT([ccminer], [2014.09.09])
+AC_INIT([ccminer], [2014.09.11])

 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
--- a/cpu-miner.c
+++ b/cpu-miner.c
@ -129,6 +129,8 @@ typedef enum {
 	ALGO_ANIME,
 	ALGO_BLAKE,
 	ALGO_BLAKECOIN,
+	ALGO_DEEP,
+	ALGO_DOOM,
 	ALGO_FRESH,
 	ALGO_FUGUE256,		/* Fugue256 */
 	ALGO_GROESTL,
@ -139,6 +141,7 @@ typedef enum {
 	ALGO_NIST5,
 	ALGO_PENTABLAKE,
 	ALGO_QUARK,
+	ALGO_QUBIT,
 	ALGO_WHC,
 	ALGO_X11,
 	ALGO_X13,
@ -152,6 +155,8 @@ static const char *algo_names[] = {
 	"anime",
 	"blake",
 	"blakecoin",
+	"deep",
+	"doom",
 	"fresh",
 	"fugue256",
 	"groestl",
@ -162,6 +167,7 @@ static const char *algo_names[] = {
 	"nist5",
 	"penta",
 	"quark",
+	"qubit",
 	"whirl",
 	"x11",
 	"x13",
@ -235,6 +241,8 @@ Options:\n\
                        anime     Animecoin hash\n\
                        blake     Blake 256 (like NEOS blake)\n\
                        blakecoin Old Blake 256 (8 rounds)\n\
+                        deep      Deepcoin hash\n\
+                        doom      Doomcoin hash\n\
                        fresh     Freshcoin hash (shavite 80)\n\
                        fugue256  Fuguecoin hash\n\
                        groestl   Groestlcoin hash\n\
@ -245,6 +253,7 @@ Options:\n\
                        nist5     NIST5 (TalkCoin) hash\n\
                        penta     Pentablake hash (5x Blake 512)\n\
                        quark     Quark hash\n\
+                        qubit     Qubit hash\n\
                        whirl     Whirlcoin (old whirlpool)\n\
                        x11       X11 (DarkCoin) hash\n\
                        x13       X13 (MaruCoin) hash\n\
@ -275,7 +284,7 @@ Options:\n\
      --no-longpoll     disable X-Long-Polling support\n\
      --no-stratum      disable X-Stratum support\n\
  -q, --quiet           disable per-thread hashmeter output\n\
-  -C, --color           enable colored output\n\
+  -K, --nocolor         disable colored output\n\
  -D, --debug           enable debug output\n\
  -P, --protocol-dump   verbose dump of protocol-level activities\n"
 #ifdef HAVE_SYSLOG_H
@ -301,7 +310,7 @@ static char const short_options[] =
 #ifdef HAVE_SYSLOG_H
 	"S"
 #endif
-	"a:c:CDhp:Px:qr:R:s:t:T:o:u:O:Vd:f:mv:";
+	"a:c:CKDhp:Px:qr:R:s:t:T:o:u:O:Vd:f:mv:";

 static struct option const options[] = {
 	{ "algo", 1, NULL, 'a' },
@ -312,7 +321,7 @@ static struct option const options[] = {
 	{ "cputest", 0, NULL, 1006 },
 	{ "cert", 1, NULL, 1001 },
 	{ "config", 1, NULL, 'c' },
-	{ "color", 0, NULL, 'C' },
+	{ "nocolor", 0, NULL, 'K' },
 	{ "debug", 0, NULL, 'D' },
 	{ "help", 0, NULL, 'h' },
 	{ "no-longpoll", 0, NULL, 1003 },
@ -1083,6 +1092,16 @@ continue_scan:
 			                      max_nonce, &hashes_done, 0, MNR_BLKHDR_SZ);
 			break;

+		case ALGO_DEEP:
+			rc = scanhash_deep(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
+
+		case ALGO_DOOM:
+			rc = scanhash_doom(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
+
 		case ALGO_FUGUE256:
 			rc = scanhash_fugue256(thr_id, work.data, work.target,
 			                      max_nonce, &hashes_done);
@ -1109,6 +1128,11 @@ continue_scan:
 			                      max_nonce, &hashes_done);
 			break;

+		case ALGO_QUBIT:
+			rc = scanhash_qubit(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
+
 		case ALGO_ANIME:
 			rc = scanhash_anime(thr_id, work.data, work.target,
 			                      max_nonce, &hashes_done);
@ -1427,7 +1451,7 @@ out:
 	return NULL;
 }

-#define PROGRAM_VERSION "1.4.2"
+#define PROGRAM_VERSION "1.4.3"
 static void show_version_and_exit(void)
 {
 	printf("%s v%s\n"
@ -1489,8 +1513,12 @@ static void parse_arg (int key, char *arg)
 		break;
 	}
 	case 'C':
+		/* color for compat */
 		use_colors = true;
 		break;
+	case 'K':
+		use_colors = false;
+		break;
 	case 'D':
 		opt_debug = true;
 		break;
@ -1801,8 +1829,7 @@ int main(int argc, char *argv[])
 #endif
 	printf("\t  based on pooler-cpuminer 2.3.2 (c) 2010 Jeff Garzik, 2012 pooler\n");
 	printf("\t    and HVC extension from http://hvc.1gh.com/" "\n\n");
-	printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n");
-	printf("\t  BTC donation address: 16hJF5mceSojnTD3ZTUDqdRhDyPJzoRakM\n");
+	printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n\n");
 	printf("\tInclude some of djm34 additions, cleaned by Tanguy Pruvot\n");
 	printf("\t  BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo\n\n");

--- a/cpuminer-config.h
+++ b/cpuminer-config.h
@ -156,7 +156,7 @@
 #define PACKAGE_NAME "ccminer"

 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "ccminer 2014.09.06"
+#define PACKAGE_STRING "ccminer 2014.09.11"

 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "ccminer"
@ -165,7 +165,7 @@
 #define PACKAGE_URL ""

 /* Define to the version of this package. */
-#define PACKAGE_VERSION "2014.09.06"
+#define PACKAGE_VERSION "2014.09.11"

 /* If using the C implementation of alloca, define if you know the
   direction of stack growth for your system; otherwise it will be
@ -188,7 +188,7 @@
 #define USE_XOP 1

 /* Version number of package */
-#define VERSION "2014.09.06"
+#define VERSION "2014.09.11"

 /* Define curl_free() as free() if our version of curl lacks curl_free. */
 /* #undef curl_free */
--- a/cuda_helper.h
+++ b/cuda_helper.h
@ -12,6 +12,10 @@

 #include <stdint.h>

+// common functions
+extern void cuda_check_cpu_init(int thr_id, int threads);
+extern void cuda_check_cpu_setTarget(const void *ptarget);
+extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);

 extern __device__ __device_builtin__ void __syncthreads(void);
--- a/miner.h
+++ b/miner.h
@ -205,13 +205,13 @@ extern int scanhash_sha256d(int thr_id, uint32_t *pdata,

 extern unsigned char *scrypt_buffer_alloc();

-extern int scanhash_scrypt(int thr_id, uint32_t *pdata,
-	unsigned char *scratchbuf, const uint32_t *ptarget,
-	uint32_t max_nonce, unsigned long *hashes_done);
-
-extern int scanhash_heavy(int thr_id, uint32_t *pdata,
+extern int scanhash_deep(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
-	unsigned long *hashes_done, uint32_t maxvote, int blocklen);
+	unsigned long *hashes_done);
+
+extern int scanhash_doom(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);

 extern int scanhash_fugue256(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
@ -221,6 +221,10 @@ extern int scanhash_groestlcoin(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);

+extern int scanhash_heavy(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done, uint32_t maxvote, int blocklen);
+
 extern int scanhash_myriad(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
@ -253,6 +257,14 @@ extern int scanhash_pentablake(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);

+extern int scanhash_qubit(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
+extern int scanhash_scrypt(int thr_id, uint32_t *pdata,
+	unsigned char *scratchbuf, const uint32_t *ptarget,
+	uint32_t max_nonce, unsigned long *hashes_done);
+
 extern int scanhash_whc(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
@ -426,6 +438,8 @@ void applog_compare_hash(unsigned char *hash, unsigned char *hash2);
 void print_hash_tests(void);
 void animehash(void *state, const void *input);
 void blake256hash(void *output, const void *input, int rounds);
+void deephash(void *state, const void *input);
+void doomhash(void *state, const void *input);
 void fresh_hash(void *state, const void *input);
 void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
 void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
@ -435,6 +449,7 @@ void myriadhash(void *state, const void *input);
 void nist5hash(void *state, const void *input);
 void pentablakehash(void *output, const void *input);
 void quarkhash(void *state, const void *input);
+void qubithash(void *state, const void *input);
 void wcoinhash(void *state, const void *input);
 void x11hash(void *output, const void *input);
 void x13hash(void *output, const void *input);
--- a/qubit/deep.cu
+++ b/qubit/deep.cu
@ -0,0 +1,120 @@
+/*
+ * deepcoin algorithm
+ *
+ */
+extern "C" {
+#include "sph/sph_luffa.h"
+#include "sph/sph_cubehash.h"
+#include "sph/sph_shavite.h"
+#include "sph/sph_simd.h"
+#include "sph/sph_echo.h"
+}
+
+#include "miner.h"
+
+#include "cuda_helper.h"
+
+extern int device_map[8];
+
+static uint32_t *d_hash[8];
+
+extern void qubit_luffa512_cpu_init(int thr_id, int threads);
+extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
+extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+extern void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget);
+extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+
+extern void x11_cubehash512_cpu_init(int thr_id, int threads);
+extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_echo512_cpu_init(int thr_id, int threads);
+extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern "C" void deephash(void *state, const void *input)
+{
+	// luffa1-cubehash2-shavite3-simd4-echo5
+	sph_luffa512_context ctx_luffa;
+	sph_cubehash512_context ctx_cubehash;
+	sph_echo512_context ctx_echo;
+
+	uint8_t hash[64];
+
+	sph_luffa512_init(&ctx_luffa);
+	sph_luffa512 (&ctx_luffa, input, 80);
+	sph_luffa512_close(&ctx_luffa, (void*) hash);
+
+	sph_cubehash512_init(&ctx_cubehash);
+	sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
+	sph_cubehash512_close(&ctx_cubehash, (void*) hash);
+
+	sph_echo512_init(&ctx_echo);
+	sph_echo512 (&ctx_echo, (const void*) hash, 64);
+	sph_echo512_close(&ctx_echo, (void*) hash);
+
+	memcpy(state, hash, 32);
+}
+
+
+extern "C" int scanhash_deep(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+	const int throughput = 256*256*8*8;
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	uint32_t endiandata[20];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+
+		qubit_luffa512_cpu_init(thr_id, throughput);
+		x11_cubehash512_cpu_init(thr_id, throughput);
+		x11_echo512_cpu_init(thr_id, throughput);
+
+		cuda_check_cpu_init(thr_id, throughput);
+
+		init[thr_id] = true;
+	}
+
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+
+	qubit_luffa512_cpufinal_setBlock_80((void*)endiandata,ptarget);
+	cuda_check_cpu_setTarget(ptarget);
+
+	do {
+		const uint32_t Htarg = ptarget[7];
+		int order = 0;
+
+		qubit_luffa512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+		x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		if (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			deephash(vhash64, endiandata);
+
+			if (vhash64[7]<=Htarg && fulltest(vhash64, ptarget) )
+			{
+				pdata[19] = foundNonce;
+				*hashes_done = foundNonce - first_nonce + 1;
+				return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
--- a/qubit/doom.cu
+++ b/qubit/doom.cu
@ -0,0 +1,93 @@
+/*
+ * qubit algorithm
+ *
+ */
+extern "C" {
+#include "sph/sph_luffa.h"
+}
+
+#include "miner.h"
+
+#include "cuda_helper.h"
+
+extern int device_map[8];
+
+static uint32_t *d_hash[8];
+
+extern void qubit_luffa512_cpu_init(int thr_id, int threads);
+extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
+extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+extern void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget);
+extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+
+extern void doomhash(void *state, const void *input)
+{
+	// luffa512
+	sph_luffa512_context ctx_luffa;
+
+	uint8_t hash[64];
+
+	sph_luffa512_init(&ctx_luffa);
+	sph_luffa512 (&ctx_luffa, input, 80);
+	sph_luffa512_close(&ctx_luffa, (void*) hash);
+
+	memcpy(state, hash, 32);
+}
+
+
+extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+	const int throughput = 256*256*8*8;
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	uint32_t endiandata[20];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+
+		qubit_luffa512_cpu_init(thr_id, throughput);
+
+		init[thr_id] = true;
+	}
+
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+
+	qubit_luffa512_cpufinal_setBlock_80((void*)endiandata,ptarget);
+
+	do {
+		const uint32_t Htarg = ptarget[7];
+		int order = 0;
+
+		uint32_t foundNonce = qubit_luffa512_cpu_finalhash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+		if (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			doomhash(vhash64, endiandata);
+
+			if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget) )
+			{
+				pdata[19] = foundNonce;
+				*hashes_done = foundNonce - first_nonce + 1;
+				return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
--- a/qubit/qubit.cu
+++ b/qubit/qubit.cu
@ -0,0 +1,146 @@
+/*
+ * qubit algorithm
+ *
+ */
+extern "C" {
+#include "sph/sph_luffa.h"
+#include "sph/sph_cubehash.h"
+#include "sph/sph_shavite.h"
+#include "sph/sph_simd.h"
+#include "sph/sph_echo.h"
+}
+
+#include "miner.h"
+
+#include "cuda_helper.h"
+
+extern int device_map[8];
+
+static uint32_t *d_hash[8];
+
+extern void qubit_luffa512_cpu_init(int thr_id, int threads);
+extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
+extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+
+extern void x11_cubehash512_cpu_init(int thr_id, int threads);
+extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_shavite512_cpu_init(int thr_id, int threads);
+extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_simd512_cpu_init(int thr_id, int threads);
+extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_echo512_cpu_init(int thr_id, int threads);
+extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_compactTest_cpu_init(int thr_id, int threads);
+extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
+											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
+											int order);
+
+extern "C" void qubithash(void *state, const void *input)
+{
+	// luffa1-cubehash2-shavite3-simd4-echo5
+
+	sph_luffa512_context ctx_luffa;
+	sph_cubehash512_context ctx_cubehash;
+	sph_shavite512_context ctx_shavite;
+	sph_simd512_context ctx_simd;
+	sph_echo512_context ctx_echo;
+
+	uint8_t hash[64];
+
+	sph_luffa512_init(&ctx_luffa);
+	sph_luffa512 (&ctx_luffa, input, 80);
+	sph_luffa512_close(&ctx_luffa, (void*) hash);
+
+	sph_cubehash512_init(&ctx_cubehash);
+	sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
+	sph_cubehash512_close(&ctx_cubehash, (void*) hash);
+
+	sph_shavite512_init(&ctx_shavite);
+	sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
+	sph_shavite512_close(&ctx_shavite, (void*) hash);
+
+	sph_simd512_init(&ctx_simd);
+	sph_simd512 (&ctx_simd, (const void*) hash, 64);
+	sph_simd512_close(&ctx_simd, (void*) hash);
+
+	sph_echo512_init(&ctx_echo);
+	sph_echo512 (&ctx_echo, (const void*) hash, 64);
+	sph_echo512_close(&ctx_echo, (void*) hash);
+
+	memcpy(state, hash, 32);
+}
+
+extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+	const int throughput = 256*256*8;
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	uint32_t endiandata[20];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+
+		qubit_luffa512_cpu_init(thr_id, throughput);
+		x11_cubehash512_cpu_init(thr_id, throughput);
+		x11_shavite512_cpu_init(thr_id, throughput);
+		x11_simd512_cpu_init(thr_id, throughput);
+		x11_echo512_cpu_init(thr_id, throughput);
+
+		cuda_check_cpu_init(thr_id, throughput);
+
+		init[thr_id] = true;
+	}
+
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+
+	qubit_luffa512_cpu_setBlock_80((void*)endiandata);
+	cuda_check_cpu_setTarget(ptarget);
+
+	do {
+		const uint32_t Htarg = ptarget[7];
+		int order = 0;
+
+		// Hash with CUDA
+		qubit_luffa512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+		x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		uint32_t foundNonce = cuda_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		if (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			qubithash(vhash64, endiandata);
+
+			if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget) )
+			{
+				pdata[19] = foundNonce;
+				*hashes_done = foundNonce - first_nonce + 1;
+				return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
--- a/qubit/qubit_luffa512.cu
+++ b/qubit/qubit_luffa512.cu
@ -0,0 +1,496 @@
+/*
+ * luffa_for_32.c
+ * Version 2.0 (Sep 15th 2009)
+ *
+ * Copyright (C) 2008-2009 Hitachi, Ltd. All rights reserved.
+ *
+ * Hitachi, Ltd. is the owner of this software and hereby grant
+ * the U.S. Government and any interested party the right to use
+ * this software for the purposes of the SHA-3 evaluation process,
+ * notwithstanding that this software is copyrighted.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+#include "cuda_helper.h"
+
+typedef unsigned char BitSequence;
+
+__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
+__constant__ uint32_t pTarget[8];
+
+uint32_t *d_lnounce[8];
+uint32_t *d_LNonce[8];
+
+typedef struct {
+	uint32_t buffer[8]; /* Buffer to be hashed */
+	uint32_t chainv[40];   /* Chaining values */
+} hashState;
+
+#define BYTES_SWAP32(x) cuda_swab32(x)
+
+#define MULT2(a,j)\
+	tmp = a[7+(8*j)];\
+	a[7+(8*j)] = a[6+(8*j)];\
+	a[6+(8*j)] = a[5+(8*j)];\
+	a[5+(8*j)] = a[4+(8*j)];\
+	a[4+(8*j)] = a[3+(8*j)] ^ tmp;\
+	a[3+(8*j)] = a[2+(8*j)] ^ tmp;\
+	a[2+(8*j)] = a[1+(8*j)];\
+	a[1+(8*j)] = a[0+(8*j)] ^ tmp;\
+	a[0+(8*j)] = tmp;
+
+#define TWEAK(a0,a1,a2,a3,j)\
+	a0 = (a0<<(j))|(a0>>(32-j));\
+	a1 = (a1<<(j))|(a1>>(32-j));\
+	a2 = (a2<<(j))|(a2>>(32-j));\
+	a3 = (a3<<(j))|(a3>>(32-j));
+
+#define STEP(c0,c1)\
+	SUBCRUMB(chainv[0],chainv[1],chainv[2],chainv[3],tmp);\
+	SUBCRUMB(chainv[5],chainv[6],chainv[7],chainv[4],tmp);\
+	MIXWORD(chainv[0],chainv[4]);\
+	MIXWORD(chainv[1],chainv[5]);\
+	MIXWORD(chainv[2],chainv[6]);\
+	MIXWORD(chainv[3],chainv[7]);\
+	ADD_CONSTANT(chainv[0],chainv[4],c0,c1);
+
+#define SUBCRUMB(a0,a1,a2,a3,a4)\
+	a4  = a0;\
+	a0 |= a1;\
+	a2 ^= a3;\
+	a1  = ~a1;\
+	a0 ^= a3;\
+	a3 &= a4;\
+	a1 ^= a3;\
+	a3 ^= a2;\
+	a2 &= a0;\
+	a0  = ~a0;\
+	a2 ^= a1;\
+	a1 |= a3;\
+	a4 ^= a1;\
+	a3 ^= a2;\
+	a2 &= a1;\
+	a1 ^= a0;\
+	a0  = a4;
+
+#define MIXWORD(a0,a4)\
+	a4 ^= a0;\
+	a0  = (a0<<2) | (a0>>(30));\
+	a0 ^= a4;\
+	a4  = (a4<<14) | (a4>>(18));\
+	a4 ^= a0;\
+	a0  = (a0<<10) | (a0>>(22));\
+	a0 ^= a4;\
+	a4  = (a4<<1) | (a4>>(31));
+
+#define ADD_CONSTANT(a0,b0,c0,c1)\
+	a0 ^= c0;\
+	b0 ^= c1;
+
+/* initial values of chaining variables */
+__constant__ uint32_t c_IV[40];
+const uint32_t h2_IV[40] = {
+	0x6d251e69,0x44b051e0,0x4eaa6fb4,0xdbf78465,
+	0x6e292011,0x90152df4,0xee058139,0xdef610bb,
+	0xc3b44b95,0xd9d2f256,0x70eee9a0,0xde099fa3,
+	0x5d9b0557,0x8fc944b3,0xcf1ccf0e,0x746cd581,
+	0xf7efc89d,0x5dba5781,0x04016ce5,0xad659c05,
+	0x0306194f,0x666d1836,0x24aa230a,0x8b264ae7,
+	0x858075d5,0x36d79cce,0xe571f7d7,0x204b1f67,
+	0x35870c6a,0x57e9e923,0x14bcb808,0x7cde72ce,
+	0x6c68e9be,0x5ec41e22,0xc825b7c7,0xaffb4363,
+	0xf5df3999,0x0fc688f1,0xb07224cc,0x03e86cea};
+
+__constant__ uint32_t c_CNS[80];
+uint32_t h2_CNS[80] = {
+	0x303994a6,0xe0337818,0xc0e65299,0x441ba90d,
+	0x6cc33a12,0x7f34d442,0xdc56983e,0x9389217f,
+	0x1e00108f,0xe5a8bce6,0x7800423d,0x5274baf4,
+	0x8f5b7882,0x26889ba7,0x96e1db12,0x9a226e9d,
+	0xb6de10ed,0x01685f3d,0x70f47aae,0x05a17cf4,
+	0x0707a3d4,0xbd09caca,0x1c1e8f51,0xf4272b28,
+	0x707a3d45,0x144ae5cc,0xaeb28562,0xfaa7ae2b,
+	0xbaca1589,0x2e48f1c1,0x40a46f3e,0xb923c704,
+	0xfc20d9d2,0xe25e72c1,0x34552e25,0xe623bb72,
+	0x7ad8818f,0x5c58a4a4,0x8438764a,0x1e38e2e7,
+	0xbb6de032,0x78e38b9d,0xedb780c8,0x27586719,
+	0xd9847356,0x36eda57f,0xa2c78434,0x703aace7,
+	0xb213afa5,0xe028c9bf,0xc84ebe95,0x44756f91,
+	0x4e608a22,0x7e8fce32,0x56d858fe,0x956548be,
+	0x343b138f,0xfe191be2,0xd0ec4e3d,0x3cb226e5,
+	0x2ceb4882,0x5944a28e,0xb3ad2208,0xa1c4c355,
+	0xf0d2e9e3,0x5090d577,0xac11d7fa,0x2d1925ab,
+	0x1bcb66f2,0xb46496ac,0x6f2d9bc9,0xd1925ab0,
+	0x78602649,0x29131ab6,0x8edae952,0x0fc053c3,
+	0x3b6ba548,0x3f014f0c,0xedae9520,0xfc053c31};
+
+
+/***************************************************/
+__device__ __forceinline__
+void rnd512(hashState *state)
+{
+	int i,j;
+	uint32_t t[40];
+	uint32_t chainv[8];
+	uint32_t tmp;
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		t[i]=0;
+#pragma unroll 5
+		for(j=0;j<5;j++) {
+			t[i] ^= state->chainv[i+8*j];
+		}
+	}
+
+	MULT2(t, 0);
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+#pragma unroll 8
+		for(i=0;i<8;i++) {
+			state->chainv[i+8*j] ^= t[i];
+		}
+	}
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+#pragma unroll 8
+		for(i=0;i<8;i++) {
+			t[i+8*j] = state->chainv[i+8*j];
+		}
+	}
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+		MULT2(state->chainv, j);
+	}
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+#pragma unroll 8
+		for(i=0;i<8;i++) {
+			state->chainv[8*j+i] ^= t[8*((j+1)%5)+i];
+		}
+	}
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+#pragma unroll 8
+		for(i=0;i<8;i++) {
+			t[i+8*j] = state->chainv[i+8*j];
+		}
+	}
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+		MULT2(state->chainv, j);
+	}
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+#pragma unroll 8
+		for(i=0;i<8;i++) {
+			state->chainv[8*j+i] ^= t[8*((j+4)%5)+i];
+		}
+	}
+
+#pragma unroll 5
+	for(j=0;j<5;j++) {
+#pragma unroll 8
+		for(i=0;i<8;i++) {
+			state->chainv[i+8*j] ^= state->buffer[i];
+		}
+		MULT2(state->buffer, 0);
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		chainv[i] = state->chainv[i];
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		STEP(c_CNS[(2*i)],c_CNS[(2*i)+1]);
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		state->chainv[i] = chainv[i];
+		chainv[i] = state->chainv[i+8];
+	}
+
+	TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],1);
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		STEP(c_CNS[(2*i)+16],c_CNS[(2*i)+16+1]);
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		state->chainv[i+8] = chainv[i];
+		chainv[i] = state->chainv[i+16];
+	}
+
+	TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],2);
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		STEP(c_CNS[(2*i)+32],c_CNS[(2*i)+32+1]);
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		state->chainv[i+16] = chainv[i];
+		chainv[i] = state->chainv[i+24];
+	}
+
+	TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],3);
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		STEP(c_CNS[(2*i)+48],c_CNS[(2*i)+48+1]);
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		state->chainv[i+24] = chainv[i];
+		chainv[i] = state->chainv[i+32];
+	}
+
+	TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],4);
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		STEP(c_CNS[(2*i)+64],c_CNS[(2*i)+64+1]);
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		state->chainv[i+32] = chainv[i];
+	}
+}
+
+
+__device__ __forceinline__
+void Update512(hashState *state, const BitSequence *data)
+{
+#pragma unroll 8
+	for(int i=0;i<8;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)data)[i]);
+	rnd512(state);
+
+#pragma unroll 8
+	for(int i=0;i<8;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)(data+32))[i]);
+	rnd512(state);
+#pragma unroll 4
+	for(int i=0;i<4;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)(data+64))[i]);
+}
+
+
+/***************************************************/
+__device__ __forceinline__
+void finalization512(hashState *state, uint32_t *b)
+{
+	int i,j;
+
+	state->buffer[4] = 0x80000000;
+#pragma unroll 3
+	for(int i=5;i<8;i++) state->buffer[i] = 0;
+	rnd512(state);
+
+	/*---- blank round with m=0 ----*/
+#pragma unroll 8
+	for(i=0;i<8;i++) state->buffer[i] =0;
+	rnd512(state);
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		b[i] = 0;
+#pragma unroll 5
+		for(j=0;j<5;j++) {
+			b[i] ^= state->chainv[i+8*j];
+		}
+		b[i] = BYTES_SWAP32((b[i]));
+	}
+
+#pragma unroll 8
+	for(i=0;i<8;i++) state->buffer[i]=0;
+	rnd512(state);
+
+#pragma unroll 8
+	for(i=0;i<8;i++) {
+		b[8+i] = 0;
+#pragma unroll 5
+		for(j=0;j<5;j++) {
+			b[8+i] ^= state->chainv[i+8*j];
+		}
+		b[8+i] = BYTES_SWAP32((b[8+i]));
+	}
+}
+
+
+/***************************************************/
+// Die Hash-Funktion
+__global__
+void qubit_luffa512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash)
+{
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+		uint32_t nounce = startNounce + thread;
+		union {
+		uint64_t buf64[16];
+		uint32_t buf32[32];
+		} buff;
+
+#pragma unroll 16
+		for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i];
+
+		// die Nounce durch die thread-spezifische ersetzen
+		buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce));
+
+
+		hashState state;
+#pragma unroll 40
+		for(int i=0;i<40;i++) state.chainv[i] = c_IV[i];
+#pragma unroll 8
+		for(int i=0;i<8;i++) state.buffer[i] = 0;
+		Update512(&state, (BitSequence*)buff.buf32);
+		uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
+		finalization512(&state, (uint32_t*)outHash);
+	}
+}
+
+__global__
+void qubit_luffa512_gpu_finalhash_80(int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce)
+{
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+		uint32_t nounce = startNounce + thread;
+		union {
+		uint64_t buf64[16];
+		uint32_t buf32[32];
+		} buff;
+		uint32_t Hash[16];
+
+#pragma unroll 16
+		for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i];
+
+		// die Nounce durch die thread-spezifische ersetzen
+		buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce));
+
+
+		hashState state;
+#pragma unroll 40
+		for(int i=0;i<40;i++) state.chainv[i] = c_IV[i];
+#pragma unroll 8
+		for(int i=0;i<8;i++) state.buffer[i] = 0;
+		Update512(&state, (BitSequence*)buff.buf32);
+		finalization512(&state, Hash);
+
+		bool rc = true;
+		int position = -1;
+#pragma unroll 8
+		for (int i = 7; i >= 0; i--) {
+			if (Hash[i] > pTarget[i]) {
+				if(position < i) {
+					position = i;
+					rc = false;
+				}
+
+			}
+			if (Hash[i] < pTarget[i]) {
+				if(position < i) {
+					position = i;
+					rc = true;
+				}
+			}
+		}
+
+		if(rc && resNounce[0] > nounce)
+			resNounce[0] = nounce;
+	}
+}
+
+__host__
+void qubit_luffa512_cpu_init(int thr_id, int threads)
+{
+	cudaMemcpyToSymbol( c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice );
+	cudaMemcpyToSymbol( c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice );
+	cudaMalloc(&d_LNonce[thr_id], sizeof(uint32_t));
+	cudaMallocHost(&d_lnounce[thr_id], 1*sizeof(uint32_t));
+}
+
+__host__
+uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order)
+{
+	uint32_t result = 0xffffffff;
+	cudaMemset(d_LNonce[thr_id], 0xffffffff, sizeof(uint32_t));
+	const int threadsperblock = 256;
+
+	dim3 grid((threads + threadsperblock-1)/threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+
+	qubit_luffa512_gpu_finalhash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash, d_LNonce[thr_id]);
+	MyStreamSynchronize(NULL, order, thr_id);
+	cudaMemcpy(d_lnounce[thr_id], d_LNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
+	//cudaThreadSynchronize();
+	result = *d_lnounce[thr_id];
+	return result;
+}
+
+__host__
+void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order)
+{
+	const int threadsperblock = 256;
+
+	dim3 grid((threads + threadsperblock-1)/threadsperblock);
+	dim3 block(threadsperblock);
+	size_t shared_size = 0;
+
+	qubit_luffa512_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
+	MyStreamSynchronize(NULL, order, thr_id);
+}
+
+__host__
+void qubit_luffa512_cpu_setBlock_80(void *pdata)
+{
+	unsigned char PaddedMessage[128];
+	memcpy(PaddedMessage, pdata, 80);
+	memset(PaddedMessage+80, 0, 48);
+	PaddedMessage[80] = 0x80;
+	PaddedMessage[111] = 1;
+	PaddedMessage[126] = 0x02;
+	PaddedMessage[127] = 0x80;
+
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+}
+
+__host__
+void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget)
+{
+	unsigned char PaddedMessage[128];
+	memcpy(PaddedMessage, pdata, 80);
+	memset(PaddedMessage+80, 0, 48);
+	PaddedMessage[80] = 0x80;
+	PaddedMessage[111] = 1;
+	PaddedMessage[126] = 0x02;
+	PaddedMessage[127] = 0x80;
+	cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
+
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+}
--- a/util.c
+++ b/util.c
@ -1440,6 +1440,14 @@ void print_hash_tests(void)
 	blake256hash(&hash[0], &buf[0], 14);
 	printpfx("blake", hash);

+	memset(hash, 0, sizeof hash);
+	deephash(&hash[0], &buf[0]);
+	printpfx("deep", hash);
+
+	memset(hash, 0, sizeof hash);
+	doomhash(&hash[0], &buf[0]);
+	printpfx("doom", hash);
+
 	memset(hash, 0, sizeof hash);
 	fresh_hash(&hash[0], &buf[0]);
 	printpfx("fresh", hash);
@ -1476,6 +1484,10 @@ void print_hash_tests(void)
 	quarkhash(&hash[0], &buf[0]);
 	printpfx("quark", hash);

+	memset(hash, 0, sizeof hash);
+	qubithash(&hash[0], &buf[0]);
+	printpfx("qubit", hash);
+
 	memset(hash, 0, sizeof hash);
 	wcoinhash(&hash[0], &buf[0]);
 	printpfx("whirl", hash);