Added support for Twecoin

11 years ago · 474091c426
19 changed files with 82397 additions and 1 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -46,6 +46,7 @@ sgminer_SOURCES += darkcoin.c darkcoin.h
				@@ -46,6 +46,7 @@ sgminer_SOURCES += darkcoin.c darkcoin.h
 sgminer_SOURCES += qubitcoin.c qubitcoin.h
 sgminer_SOURCES += quarkcoin.c quarkcoin.h
 sgminer_SOURCES += myriadcoin-groestl.c myriadcoin-groestl.h
+sgminer_SOURCES += twecoin.c twecoin.h
 sgminer_SOURCES += kernel/*.cl

 bin_SCRIPTS	= $(top_srcdir)/kernel/*.cl
--- a/configure.ac
+++ b/configure.ac
@ -349,6 +349,7 @@ AC_DEFINE_UNQUOTED([DARKCOIN_KERNNAME], ["darkcoin"], [Filename for DarkCoin opt
				@@ -349,6 +349,7 @@ AC_DEFINE_UNQUOTED([DARKCOIN_KERNNAME], ["darkcoin"], [Filename for DarkCoin opt
 AC_DEFINE_UNQUOTED([QUBITCOIN_KERNNAME], ["qubitcoin"], [Filename for QubitCoin optimised kernel])
 AC_DEFINE_UNQUOTED([QUARKCOIN_KERNNAME], ["quarkcoin"], [Filename for QuarkCoin optimised kernel])
 AC_DEFINE_UNQUOTED([MYRIADCOIN_GROESTL_KERNNAME], ["myriadcoin-groestl"], [Filename for MyriadCoin-Groestl optimised kernel])
+AC_DEFINE_UNQUOTED([TWECOIN_KERNNAME], ["twecoin"], [Filename for Twecoin optimised kernel])

 AC_SUBST(OPENCL_LIBS)
 AC_SUBST(OPENCL_FLAGS)
--- a/driver-opencl.c
+++ b/driver-opencl.c
@ -215,6 +215,8 @@ static enum cl_kernels select_kernel(char *arg)
				@@ -215,6 +215,8 @@ static enum cl_kernels select_kernel(char *arg)
 		return KL_QUARKCOIN;
 	if (!strcmp(arg, MYRIADCOIN_GROESTL_KERNNAME))
 		return KL_MYRIADCOIN_GROESTL;
+	if (!strcmp(arg, TWECOIN_KERNNAME))
+		return KL_TWECOIN;

 	return KL_NONE;
 }
@ -1367,6 +1369,9 @@ static bool opencl_thread_prepare(struct thr_info *thr)
				@@ -1367,6 +1369,9 @@ static bool opencl_thread_prepare(struct thr_info *thr)
 			case KL_MYRIADCOIN_GROESTL:
 				cgpu->kname = MYRIADCOIN_GROESTL_KERNNAME;
 				break;
+			case KL_TWECOIN:
+				cgpu->kname = TWECOIN_KERNNAME;
+				break;
 			default:
 				break;
 		}
@ -1406,6 +1411,7 @@ static bool opencl_thread_init(struct thr_info *thr)
				@@ -1406,6 +1411,7 @@ static bool opencl_thread_init(struct thr_info *thr)
 	case KL_QUBITCOIN:
 	case KL_QUARKCOIN:
 	case KL_MYRIADCOIN_GROESTL:
+	case KL_TWECOIN:
 		thrdata->queue_kernel_parameters = &queue_sph_kernel;
 		break;
 	default:
--- a/kernel/hamsi.cl
+++ b/kernel/hamsi.cl
@ -0,0 +1,309 @@
				@@ -0,0 +1,309 @@
+/* $Id: hamsi.c 251 2010-10-19 14:31:51Z tp $ */
+/*
+ * Hamsi implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAMSI
+#define SPH_SMALL_FOOTPRINT_HAMSI   1
+#endif
+
+/*
+ * The SPH_HAMSI_EXPAND_* define how many input bits we handle in one
+ * table lookup during message expansion (1 to 8, inclusive). If we note
+ * w the number of bits per message word (w=32 for Hamsi-224/256, w=64
+ * for Hamsi-384/512), r the size of a "row" in 32-bit words (r=8 for
+ * Hamsi-224/256, r=16 for Hamsi-384/512), and n the expansion level,
+ * then we will get t tables (where t=ceil(w/n)) of individual size
+ * 2^n*r*4 (in bytes). The last table may be shorter (e.g. with w=32 and
+ * n=5, there are 7 tables, but the last one uses only two bits on
+ * input, not five).
+ *
+ * Also, we read t rows of r words from RAM. Words in a given row are
+ * concatenated in RAM in that order, so most of the cost is about
+ * reading the first row word; comparatively, cache misses are thus
+ * less expensive with Hamsi-512 (r=16) than with Hamsi-256 (r=8).
+ *
+ * When n=1, tables are "special" in that we omit the first entry of
+ * each table (which always contains 0), so that total table size is
+ * halved.
+ *
+ * We thus have the following (size1 is the cumulative table size of
+ * Hamsi-224/256; size2 is for Hamsi-384/512; similarly, t1 and t2
+ * are for Hamsi-224/256 and Hamsi-384/512, respectively).
+ *
+ *   n      size1      size2    t1    t2
+ * ---------------------------------------
+ *   1       1024       4096    32    64
+ *   2       2048       8192    16    32
+ *   3       2688      10880    11    22
+ *   4       4096      16384     8    16
+ *   5       6272      25600     7    13
+ *   6      10368      41984     6    11
+ *   7      16896      73856     5    10
+ *   8      32768     131072     4     8
+ *
+ * So there is a trade-off: a lower n makes the tables fit better in
+ * L1 cache, but increases the number of memory accesses. The optimal
+ * value depends on the amount of available L1 cache and the relative
+ * impact of a cache miss.
+ *
+ * Experimentally, in ideal benchmark conditions (which are not necessarily
+ * realistic with regards to L1 cache contention), it seems that n=8 is
+ * the best value on "big" architectures (those with 32 kB or more of L1
+ * cache), while n=4 is better on "small" architectures. This was tested
+ * on an Intel Core2 Q6600 (both 32-bit and 64-bit mode), a PowerPC G3
+ * (32 kB L1 cache, hence "big"), and a MIPS-compatible Broadcom BCM3302
+ * (8 kB L1 cache).
+ *
+ * Note: with n=1, the 32 tables (actually implemented as one big table)
+ * are read entirely and sequentially, regardless of the input data,
+ * thus avoiding any data-dependent table access pattern.
+ */
+
+#if !defined SPH_HAMSI_EXPAND_SMALL
+#if SPH_SMALL_FOOTPRINT_HAMSI
+#define SPH_HAMSI_EXPAND_SMALL  4
+#else
+#define SPH_HAMSI_EXPAND_SMALL  8
+#endif
+#endif
+
+#if !defined SPH_HAMSI_EXPAND_BIG
+#define SPH_HAMSI_EXPAND_BIG    8
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+#include "hamsi_helper.cl"
+
+__constant static const sph_u32 HAMSI_IV224[] = {
+	SPH_C32(0xc3967a67), SPH_C32(0xc3bc6c20), SPH_C32(0x4bc3bcc3),
+	SPH_C32(0xa7c3bc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
+	SPH_C32(0x69656b65), SPH_C32(0x20556e69)
+};
+
+/*
+ * This version is the one used in the Hamsi submission package for
+ * round 2 of the SHA-3 competition; the UTF-8 encoding is wrong and
+ * shall soon be corrected in the official Hamsi specification.
+ *
+__constant static const sph_u32 HAMSI_IV224[] = {
+	SPH_C32(0x3c967a67), SPH_C32(0x3cbc6c20), SPH_C32(0xb4c343c3),
+	SPH_C32(0xa73cbc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
+	SPH_C32(0x69656b65), SPH_C32(0x20556e69)
+};
+ */
+
+__constant static const sph_u32 HAMSI_IV256[] = {
+	SPH_C32(0x76657273), SPH_C32(0x69746569), SPH_C32(0x74204c65),
+	SPH_C32(0x7576656e), SPH_C32(0x2c204465), SPH_C32(0x70617274),
+	SPH_C32(0x656d656e), SPH_C32(0x7420456c)
+};
+
+__constant static const sph_u32 HAMSI_IV384[] = {
+	SPH_C32(0x656b7472), SPH_C32(0x6f746563), SPH_C32(0x686e6965),
+	SPH_C32(0x6b2c2043), SPH_C32(0x6f6d7075), SPH_C32(0x74657220),
+	SPH_C32(0x53656375), SPH_C32(0x72697479), SPH_C32(0x20616e64),
+	SPH_C32(0x20496e64), SPH_C32(0x75737472), SPH_C32(0x69616c20),
+	SPH_C32(0x43727970), SPH_C32(0x746f6772), SPH_C32(0x61706879),
+	SPH_C32(0x2c204b61)
+};
+
+__constant static const sph_u32 HAMSI_IV512[] = {
+	SPH_C32(0x73746565), SPH_C32(0x6c706172), SPH_C32(0x6b204172),
+	SPH_C32(0x656e6265), SPH_C32(0x72672031), SPH_C32(0x302c2062),
+	SPH_C32(0x75732032), SPH_C32(0x3434362c), SPH_C32(0x20422d33),
+	SPH_C32(0x30303120), SPH_C32(0x4c657576), SPH_C32(0x656e2d48),
+	SPH_C32(0x65766572), SPH_C32(0x6c65652c), SPH_C32(0x2042656c),
+	SPH_C32(0x6769756d)
+};
+
+__constant static const sph_u32 alpha_n[] = {
+	SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
+	SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
+	SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
+	SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
+	SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
+	SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
+	SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
+	SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
+	SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
+	SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
+	SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
+};
+
+__constant static const sph_u32 alpha_f[] = {
+	SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
+	SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
+	SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
+	SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
+	SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
+	SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
+	SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
+	SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
+	SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
+	SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
+	SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
+};
+
+#define HAMSI_DECL_STATE_SMALL \
+	sph_u32 c0, c1, c2, c3, c4, c5, c6, c7;
+
+#define HAMSI_READ_STATE_SMALL(sc)   do { \
+		c0 = h[0x0]; \
+		c1 = h[0x1]; \
+		c2 = h[0x2]; \
+		c3 = h[0x3]; \
+		c4 = h[0x4]; \
+		c5 = h[0x5]; \
+		c6 = h[0x6]; \
+		c7 = h[0x7]; \
+	} while (0)
+
+#define HAMSI_WRITE_STATE_SMALL(sc)   do { \
+		h[0x0] = c0; \
+		h[0x1] = c1; \
+		h[0x2] = c2; \
+		h[0x3] = c3; \
+		h[0x4] = c4; \
+		h[0x5] = c5; \
+		h[0x6] = c6; \
+		h[0x7] = c7; \
+	} while (0)
+
+#define s0   m0
+#define s1   m1
+#define s2   c0
+#define s3   c1
+#define s4   c2
+#define s5   c3
+#define s6   m2
+#define s7   m3
+#define s8   m4
+#define s9   m5
+#define sA   c4
+#define sB   c5
+#define sC   c6
+#define sD   c7
+#define sE   m6
+#define sF   m7
+
+#define SBOX(a, b, c, d)   do { \
+		sph_u32 t; \
+		t = (a); \
+		(a) &= (c); \
+		(a) ^= (d); \
+		(c) ^= (b); \
+		(c) ^= (a); \
+		(d) |= t; \
+		(d) ^= (b); \
+		t ^= (c); \
+		(b) = (d); \
+		(d) |= t; \
+		(d) ^= (a); \
+		(a) &= (b); \
+		t ^= (a); \
+		(b) ^= (d); \
+		(b) ^= t; \
+		(a) = (c); \
+		(c) = (b); \
+		(b) = (d); \
+		(d) = SPH_T32(~t); \
+	} while (0)
+
+#define L(a, b, c, d)   do { \
+		(a) = SPH_ROTL32(a, 13); \
+		(c) = SPH_ROTL32(c, 3); \
+		(b) ^= (a) ^ (c); \
+		(d) ^= (c) ^ SPH_T32((a) << 3); \
+		(b) = SPH_ROTL32(b, 1); \
+		(d) = SPH_ROTL32(d, 7); \
+		(a) ^= (b) ^ (d); \
+		(c) ^= (d) ^ SPH_T32((b) << 7); \
+		(a) = SPH_ROTL32(a, 5); \
+		(c) = SPH_ROTL32(c, 22); \
+	} while (0)
+
+#define ROUND_SMALL(rc, alpha)   do { \
+		s0 ^= alpha[0x00]; \
+		s1 ^= alpha[0x01] ^ (sph_u32)(rc); \
+		s2 ^= alpha[0x02]; \
+		s3 ^= alpha[0x03]; \
+		s4 ^= alpha[0x08]; \
+		s5 ^= alpha[0x09]; \
+		s6 ^= alpha[0x0A]; \
+		s7 ^= alpha[0x0B]; \
+		s8 ^= alpha[0x10]; \
+		s9 ^= alpha[0x11]; \
+		sA ^= alpha[0x12]; \
+		sB ^= alpha[0x13]; \
+		sC ^= alpha[0x18]; \
+		sD ^= alpha[0x19]; \
+		sE ^= alpha[0x1A]; \
+		sF ^= alpha[0x1B]; \
+		SBOX(s0, s4, s8, sC); \
+		SBOX(s1, s5, s9, sD); \
+		SBOX(s2, s6, sA, sE); \
+		SBOX(s3, s7, sB, sF); \
+		L(s0, s5, sA, sF); \
+		L(s1, s6, sB, sC); \
+		L(s2, s7, s8, sD); \
+		L(s3, s4, s9, sE); \
+	} while (0)
+
+#define P_SMALL   do { \
+		ROUND_SMALL(0, alpha_n); \
+		ROUND_SMALL(1, alpha_n); \
+		ROUND_SMALL(2, alpha_n); \
+	} while (0)
+
+#define PF_SMALL   do { \
+		ROUND_SMALL(0, alpha_f); \
+		ROUND_SMALL(1, alpha_f); \
+		ROUND_SMALL(2, alpha_f); \
+		ROUND_SMALL(3, alpha_f); \
+		ROUND_SMALL(4, alpha_f); \
+		ROUND_SMALL(5, alpha_f); \
+	} while (0)
+
+#define T_SMALL   do { \
+		/* order is important */ \
+		c7 = (h[7] ^= sB); \
+		c6 = (h[6] ^= sA); \
+		c5 = (h[5] ^= s9); \
+		c4 = (h[4] ^= s8); \
+		c3 = (h[3] ^= s3); \
+		c2 = (h[2] ^= s2); \
+		c1 = (h[1] ^= s1); \
+		c0 = (h[0] ^= s0); \
+	} while (0)
+
--- a/kernel/hamsi_helper.cl
+++ b/kernel/hamsi_helper.cl
--- a/kernel/panama.cl
+++ b/kernel/panama.cl
@ -0,0 +1,155 @@
				@@ -0,0 +1,155 @@
+/* $Id: panama.c 216 2010-06-08 09:46:57Z tp $ */
+/*
+ * PANAMA implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#define LVAR17(b)  sph_u32 \
+	b ## 0, b ## 1, b ## 2, b ## 3, b ## 4, b ## 5, \
+	b ## 6, b ## 7, b ## 8, b ## 9, b ## 10, b ## 11, \
+	b ## 12, b ## 13, b ## 14, b ## 15, b ## 16;
+
+#define LVARS   \
+	LVAR17(a) \
+	LVAR17(g) \
+	LVAR17(p) \
+	LVAR17(t)
+
+#define M17(macro)   do { \
+		macro( 0,  1,  2,  4); \
+		macro( 1,  2,  3,  5); \
+		macro( 2,  3,  4,  6); \
+		macro( 3,  4,  5,  7); \
+		macro( 4,  5,  6,  8); \
+		macro( 5,  6,  7,  9); \
+		macro( 6,  7,  8, 10); \
+		macro( 7,  8,  9, 11); \
+		macro( 8,  9, 10, 12); \
+		macro( 9, 10, 11, 13); \
+		macro(10, 11, 12, 14); \
+		macro(11, 12, 13, 15); \
+		macro(12, 13, 14, 16); \
+		macro(13, 14, 15,  0); \
+		macro(14, 15, 16,  1); \
+		macro(15, 16,  0,  2); \
+		macro(16,  0,  1,  3); \
+	} while (0)
+
+#define BUPDATE1(n0, n2)   do { \
+		buffer[ptr24][n0] ^= buffer[ptr31][n2]; \
+		buffer[ptr31][n2] ^= INW1(n2); \
+	} while (0)
+
+#define BUPDATE   do { \
+		BUPDATE1(0, 2); \
+		BUPDATE1(1, 3); \
+		BUPDATE1(2, 4); \
+		BUPDATE1(3, 5); \
+		BUPDATE1(4, 6); \
+		BUPDATE1(5, 7); \
+		BUPDATE1(6, 0); \
+		BUPDATE1(7, 1); \
+	} while (0)
+
+#define RSTATE(n0, n1, n2, n4)    (a ## n0 = state[n0])
+
+#define WSTATE(n0, n1, n2, n4)    (state[n0] = a ## n0)
+
+#define GAMMA(n0, n1, n2, n4)   \
+	(g ## n0 = a ## n0 ^ (a ## n1 | SPH_T32(~a ## n2)))
+
+#define PI_ALL   do { \
+		p0  = g0; \
+		p1  = SPH_ROTL32( g7,  1); \
+		p2  = SPH_ROTL32(g14,  3); \
+		p3  = SPH_ROTL32( g4,  6); \
+		p4  = SPH_ROTL32(g11, 10); \
+		p5  = SPH_ROTL32( g1, 15); \
+		p6  = SPH_ROTL32( g8, 21); \
+		p7  = SPH_ROTL32(g15, 28); \
+		p8  = SPH_ROTL32( g5,  4); \
+		p9  = SPH_ROTL32(g12, 13); \
+		p10 = SPH_ROTL32( g2, 23); \
+		p11 = SPH_ROTL32( g9,  2); \
+		p12 = SPH_ROTL32(g16, 14); \
+		p13 = SPH_ROTL32( g6, 27); \
+		p14 = SPH_ROTL32(g13,  9); \
+		p15 = SPH_ROTL32( g3, 24); \
+		p16 = SPH_ROTL32(g10,  8); \
+	} while (0)
+
+#define THETA(n0, n1, n2, n4)   \
+	(t ## n0 = p ## n0 ^ p ## n1 ^ p ## n4)
+
+#define SIGMA_ALL   do { \
+		a0 = t0 ^ 1; \
+		a1 = t1 ^ INW2(0); \
+		a2 = t2 ^ INW2(1); \
+		a3 = t3 ^ INW2(2); \
+		a4 = t4 ^ INW2(3); \
+		a5 = t5 ^ INW2(4); \
+		a6 = t6 ^ INW2(5); \
+		a7 = t7 ^ INW2(6); \
+		a8 = t8 ^ INW2(7); \
+		a9  =  t9 ^ buffer[ptr16][0]; \
+		a10 = t10 ^ buffer[ptr16][1]; \
+		a11 = t11 ^ buffer[ptr16][2]; \
+		a12 = t12 ^ buffer[ptr16][3]; \
+		a13 = t13 ^ buffer[ptr16][4]; \
+		a14 = t14 ^ buffer[ptr16][5]; \
+		a15 = t15 ^ buffer[ptr16][6]; \
+		a16 = t16 ^ buffer[ptr16][7]; \
+	} while (0)
+
+#define PANAMA_STEP   do { \
+		unsigned ptr16, ptr24, ptr31; \
+ \
+		ptr24 = (ptr0 - 8) & 31; \
+		ptr31 = (ptr0 - 1) & 31; \
+		BUPDATE; \
+		M17(GAMMA); \
+		PI_ALL; \
+		M17(THETA); \
+		ptr16 = ptr0 ^ 16; \
+		SIGMA_ALL; \
+		ptr0 = ptr31; \
+	} while (0)
+
+/*
+ * These macros are used to compute
+ */
+#define INC0     1
+#define INC1     2
+#define INC2     3
+#define INC3     4
+#define INC4     5
+#define INC5     6
+#define INC6     7
+#define INC7     8
+
--- a/kernel/shavite.cl
+++ b/kernel/shavite.cl
@ -605,3 +605,409 @@
				@@ -605,3 +605,409 @@
 	hF ^= p7; \
 	} while (0)

+#define c256(msg)    do { \
+	sph_u32 p0, p1, p2, p3, p4, p5, p6, p7; \
+	sph_u32 x0, x1, x2, x3; \
+        \
+	p0 = h[0x0]; \
+	p1 = h[0x1]; \
+	p2 = h[0x2]; \
+	p3 = h[0x3]; \
+	p4 = h[0x4]; \
+	p5 = h[0x5]; \
+	p6 = h[0x6]; \
+	p7 = h[0x7]; \
+	/* round 0 */ \
+	x0 = p4 ^ rk0; \
+	x1 = p5 ^ rk1; \
+	x2 = p6 ^ rk2; \
+	x3 = p7 ^ rk3; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	x0 ^= rk4; \
+	x1 ^= rk5; \
+	x2 ^= rk6; \
+	x3 ^= rk7; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	x0 ^= rk8; \
+	x1 ^= rk9; \
+	x2 ^= rkA; \
+	x3 ^= rkB; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	p0 ^= x0; \
+	p1 ^= x1; \
+	p2 ^= x2; \
+	p3 ^= x3; \
+	/* round 1 */ \
+	x0 = p0 ^ rkC; \
+	x1 = p1 ^ rkD; \
+	x2 = p2 ^ rkE; \
+	x3 = p3 ^ rkF; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	KEY_EXPAND_ELT(rk0, rk1, rk2, rk3); \
+	rk0 ^= rkC ^ count0; \
+	rk1 ^= rkD ^ SPH_T32(~count1); \
+	rk2 ^= rkE; \
+	rk3 ^= rkF; \
+	x0 ^= rk0; \
+	x1 ^= rk1; \
+	x2 ^= rk2; \
+	x3 ^= rk3; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	KEY_EXPAND_ELT(rk4, rk5, rk6, rk7); \
+	rk4 ^= rk0; \
+	rk5 ^= rk1; \
+	rk6 ^= rk2; \
+	rk7 ^= rk3; \
+	x0 ^= rk4; \
+	x1 ^= rk5; \
+	x2 ^= rk6; \
+	x3 ^= rk7; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	p4 ^= x0; \
+	p5 ^= x1; \
+	p6 ^= x2; \
+	p7 ^= x3; \
+	/* round 2 */ \
+	KEY_EXPAND_ELT(rk8, rk9, rkA, rkB); \
+	rk8 ^= rk4; \
+	rk9 ^= rk5; \
+	rkA ^= rk6; \
+	rkB ^= rk7; \
+	x0 = p4 ^ rk8; \
+	x1 = p5 ^ rk9; \
+	x2 = p6 ^ rkA; \
+	x3 = p7 ^ rkB; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	KEY_EXPAND_ELT(rkC, rkD, rkE, rkF); \
+	rkC ^= rk8; \
+	rkD ^= rk9; \
+	rkE ^= rkA; \
+	rkF ^= rkB; \
+	x0 ^= rkC; \
+	x1 ^= rkD; \
+	x2 ^= rkE; \
+	x3 ^= rkF; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	rk0 ^= rkD; \
+	x0 ^= rk0; \
+	rk1 ^= rkE; \
+	x1 ^= rk1; \
+	rk2 ^= rkF; \
+	x2 ^= rk2; \
+	rk3 ^= rk0; \
+	x3 ^= rk3; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	p0 ^= x0; \
+	p1 ^= x1; \
+	p2 ^= x2; \
+	p3 ^= x3; \
+	/* round 3 */ \
+	rk4 ^= rk1; \
+	x0 = p0 ^ rk4; \
+	rk5 ^= rk2; \
+	x1 = p1 ^ rk5; \
+	rk6 ^= rk3; \
+	x2 = p2 ^ rk6; \
+	rk7 ^= rk4; \
+	x3 = p3 ^ rk7; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	rk8 ^= rk5; \
+	x0 ^= rk8; \
+	rk9 ^= rk6; \
+	x1 ^= rk9; \
+	rkA ^= rk7; \
+	x2 ^= rkA; \
+	rkB ^= rk8; \
+	x3 ^= rkB; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	rkC ^= rk9; \
+	x0 ^= rkC; \
+	rkD ^= rkA; \
+	x1 ^= rkD; \
+	rkE ^= rkB; \
+	x2 ^= rkE; \
+	rkF ^= rkC; \
+	x3 ^= rkF; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	p4 ^= x0; \
+	p5 ^= x1; \
+	p6 ^= x2; \
+	p7 ^= x3; \
+	/* round 4 */ \
+	KEY_EXPAND_ELT(rk0, rk1, rk2, rk3); \
+	rk0 ^= rkC; \
+	rk1 ^= rkD; \
+	rk2 ^= rkE; \
+	rk3 ^= rkF; \
+	x0 = p4 ^ rk0; \
+	x1 = p5 ^ rk1; \
+	x2 = p6 ^ rk2; \
+	x3 = p7 ^ rk3; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	KEY_EXPAND_ELT(rk4, rk5, rk6, rk7); \
+	rk4 ^= rk0; \
+	rk5 ^= rk1; \
+	rk6 ^= rk2; \
+	rk7 ^= rk3; \
+	x0 ^= rk4; \
+	x1 ^= rk5; \
+	x2 ^= rk6; \
+	x3 ^= rk7; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	KEY_EXPAND_ELT(rk8, rk9, rkA, rkB); \
+	rk8 ^= rk4; \
+	rk9 ^= rk5 ^ count1; \
+	rkA ^= rk6 ^ SPH_T32(~count0); \
+	rkB ^= rk7; \
+	x0 ^= rk8; \
+	x1 ^= rk9; \
+	x2 ^= rkA; \
+	x3 ^= rkB; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	p0 ^= x0; \
+	p1 ^= x1; \
+	p2 ^= x2; \
+	p3 ^= x3; \
+	/* round 5 */ \
+	KEY_EXPAND_ELT(rkC, rkD, rkE, rkF); \
+	rkC ^= rk8; \
+	rkD ^= rk9; \
+	rkE ^= rkA; \
+	rkF ^= rkB; \
+	x0 = p0 ^ rkC; \
+	x1 = p1 ^ rkD; \
+	x2 = p2 ^ rkE; \
+	x3 = p3 ^ rkF; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	rk0 ^= rkD; \
+	x0 ^= rk0; \
+	rk1 ^= rkE; \
+	x1 ^= rk1; \
+	rk2 ^= rkF; \
+	x2 ^= rk2; \
+	rk3 ^= rk0; \
+	x3 ^= rk3; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	rk4 ^= rk1; \
+	x0 ^= rk4; \
+	rk5 ^= rk2; \
+	x1 ^= rk5; \
+	rk6 ^= rk3; \
+	x2 ^= rk6; \
+	rk7 ^= rk4; \
+	x3 ^= rk7; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	p4 ^= x0; \
+	p5 ^= x1; \
+	p6 ^= x2; \
+	p7 ^= x3; \
+	/* round 6 */ \
+	rk8 ^= rk5; \
+	x0 = p4 ^ rk8; \
+	rk9 ^= rk6; \
+	x1 = p5 ^ rk9; \
+	rkA ^= rk7; \
+	x2 = p6 ^ rkA; \
+	rkB ^= rk8; \
+	x3 = p7 ^ rkB; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	rkC ^= rk9; \
+	x0 ^= rkC; \
+	rkD ^= rkA; \
+	x1 ^= rkD; \
+	rkE ^= rkB; \
+	x2 ^= rkE; \
+	rkF ^= rkC; \
+	x3 ^= rkF; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	KEY_EXPAND_ELT(rk0, rk1, rk2, rk3); \
+	rk0 ^= rkC; \
+	rk1 ^= rkD; \
+	rk2 ^= rkE; \
+	rk3 ^= rkF; \
+	x0 ^= rk0; \
+	x1 ^= rk1; \
+	x2 ^= rk2; \
+	x3 ^= rk3; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	p0 ^= x0; \
+	p1 ^= x1; \
+	p2 ^= x2; \
+	p3 ^= x3; \
+	/* round 7 */ \
+	KEY_EXPAND_ELT(rk4, rk5, rk6, rk7); \
+	rk4 ^= rk0; \
+	rk5 ^= rk1; \
+	rk6 ^= rk2 ^ count1; \
+	rk7 ^= rk3 ^ SPH_T32(~count0); \
+	x0 = p0 ^ rk4; \
+	x1 = p1 ^ rk5; \
+	x2 = p2 ^ rk6; \
+	x3 = p3 ^ rk7; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	KEY_EXPAND_ELT(rk8, rk9, rkA, rkB); \
+	rk8 ^= rk4; \
+	rk9 ^= rk5; \
+	rkA ^= rk6; \
+	rkB ^= rk7; \
+	x0 ^= rk8; \
+	x1 ^= rk9; \
+	x2 ^= rkA; \
+	x3 ^= rkB; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	KEY_EXPAND_ELT(rkC, rkD, rkE, rkF); \
+	rkC ^= rk8; \
+	rkD ^= rk9; \
+	rkE ^= rkA; \
+	rkF ^= rkB; \
+	x0 ^= rkC; \
+	x1 ^= rkD; \
+	x2 ^= rkE; \
+	x3 ^= rkF; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	p4 ^= x0; \
+	p5 ^= x1; \
+	p6 ^= x2; \
+	p7 ^= x3; \
+	/* round 8 */ \
+	rk0 ^= rkD; \
+	x0 = p4 ^ rk0; \
+	rk1 ^= rkE; \
+	x1 = p5 ^ rk1; \
+	rk2 ^= rkF; \
+	x2 = p6 ^ rk2; \
+	rk3 ^= rk0; \
+	x3 = p7 ^ rk3; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	rk4 ^= rk1; \
+	x0 ^= rk4; \
+	rk5 ^= rk2; \
+	x1 ^= rk5; \
+	rk6 ^= rk3; \
+	x2 ^= rk6; \
+	rk7 ^= rk4; \
+	x3 ^= rk7; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	rk8 ^= rk5; \
+	x0 ^= rk8; \
+	rk9 ^= rk6; \
+	x1 ^= rk9; \
+	rkA ^= rk7; \
+	x2 ^= rkA; \
+	rkB ^= rk8; \
+	x3 ^= rkB; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	p0 ^= x0; \
+	p1 ^= x1; \
+	p2 ^= x2; \
+	p3 ^= x3; \
+	/* round 9 */ \
+	rkC ^= rk9; \
+	x0 = p0 ^ rkC; \
+	rkD ^= rkA; \
+	x1 = p1 ^ rkD; \
+	rkE ^= rkB; \
+	x2 = p2 ^ rkE; \
+	rkF ^= rkC; \
+	x3 = p3 ^ rkF; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	KEY_EXPAND_ELT(rk0, rk1, rk2, rk3); \
+	rk0 ^= rkC; \
+	rk1 ^= rkD; \
+	rk2 ^= rkE; \
+	rk3 ^= rkF; \
+	x0 ^= rk0; \
+	x1 ^= rk1; \
+	x2 ^= rk2; \
+	x3 ^= rk3; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	KEY_EXPAND_ELT(rk4, rk5, rk6, rk7); \
+	rk4 ^= rk0; \
+	rk5 ^= rk1; \
+	rk6 ^= rk2; \
+	rk7 ^= rk3; \
+	x0 ^= rk4; \
+	x1 ^= rk5; \
+	x2 ^= rk6; \
+	x3 ^= rk7; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	p4 ^= x0; \
+	p5 ^= x1; \
+	p6 ^= x2; \
+	p7 ^= x3; \
+	/* round 10 */ \
+	KEY_EXPAND_ELT(rk8, rk9, rkA, rkB); \
+	rk8 ^= rk4; \
+	rk9 ^= rk5; \
+	rkA ^= rk6; \
+	rkB ^= rk7; \
+	x0 = p4 ^ rk8; \
+	x1 = p5 ^ rk9; \
+	x2 = p6 ^ rkA; \
+	x3 = p7 ^ rkB; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	KEY_EXPAND_ELT(rkC, rkD, rkE, rkF); \
+	rkC ^= rk8 ^ count0; \
+	rkD ^= rk9; \
+	rkE ^= rkA; \
+	rkF ^= rkB ^ SPH_T32(~count1); \
+	x0 ^= rkC; \
+	x1 ^= rkD; \
+	x2 ^= rkE; \
+	x3 ^= rkF; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	rk0 ^= rkD; \
+	x0 ^= rk0; \
+	rk1 ^= rkE; \
+	x1 ^= rk1; \
+	rk2 ^= rkF; \
+	x2 ^= rk2; \
+	rk3 ^= rk0; \
+	x3 ^= rk3; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	p0 ^= x0; \
+	p1 ^= x1; \
+	p2 ^= x2; \
+	p3 ^= x3; \
+	/* round 11 */ \
+	rk4 ^= rk1; \
+	x0 = p0 ^ rk4; \
+	rk5 ^= rk2; \
+	x1 = p1 ^ rk5; \
+	rk6 ^= rk3; \
+	x2 = p2 ^ rk6; \
+	rk7 ^= rk4; \
+	x3 = p3 ^ rk7; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	rk8 ^= rk5; \
+	x0 ^= rk8; \
+	rk9 ^= rk6; \
+	x1 ^= rk9; \
+	rkA ^= rk7; \
+	x2 ^= rkA; \
+	rkB ^= rk8; \
+	x3 ^= rkB; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	rkC ^= rk9; \
+	x0 ^= rkC; \
+	rkD ^= rkA; \
+	x1 ^= rkD; \
+	rkE ^= rkB; \
+	x2 ^= rkE; \
+	rkF ^= rkC; \
+	x3 ^= rkF; \
+	AES_ROUND_NOKEY(x0, x1, x2, x3); \
+	p4 ^= x0; \
+	p5 ^= x1; \
+	p6 ^= x2; \
+	p7 ^= x3; \
+	h[0x0] ^= p0; \
+	h[0x1] ^= p1; \
+	h[0x2] ^= p2; \
+	h[0x3] ^= p3; \
+	h[0x4] ^= p4; \
+	h[0x5] ^= p5; \
+	h[0x6] ^= p6; \
+	h[0x7] ^= p7; \
+        } while(0)
+
--- a/kernel/twecoin.cl
+++ b/kernel/twecoin.cl
@ -0,0 +1,428 @@
				@@ -0,0 +1,428 @@
+#if __ENDIAN_LITTLE__
+#define SPH_LITTLE_ENDIAN 1
+#else
+#define SPH_BIG_ENDIAN 1
+#endif
+
+#define SPH_UPTR sph_u64
+
+typedef unsigned int sph_u32;
+typedef int sph_s32;
+#ifndef __OPENCL_VERSION__
+typedef unsigned long long sph_u64;
+typedef long long sph_s64;
+#else
+typedef unsigned long sph_u64;
+typedef long sph_s64;
+#endif
+
+#define SPH_64 1
+#define SPH_64_TRUE 1
+
+#define SPH_C32(x)    ((sph_u32)(x ## U))
+#define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
+#define SPH_ROTL32(x, n)   SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
+#define SPH_ROTR32(x, n)   SPH_ROTL32(x, (32 - (n)))
+
+#define SPH_C64(x)    ((sph_u64)(x ## UL))
+#define SPH_T64(x)    ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
+#define SPH_ROTL64(x, n)   SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
+#define SPH_ROTR64(x, n)   SPH_ROTL64(x, (64 - (n)))
+
+#define SPH_HAMSI_EXPAND_SMALL 1
+
+#include "fugue.cl"
+#include "shavite.cl"
+#include "hamsi.cl"
+#include "panama.cl"
+
+#define SWAP4(x) as_uint(as_uchar4(x).wzyx)
+#define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
+
+#if SPH_BIG_ENDIAN
+    #define DEC32BE(x) (*(const __global sph_u32 *) (x))
+#else
+    #define DEC32BE(x) SWAP4(*(const __global sph_u32 *) (x))
+#endif
+
+#define sph_bswap32(x) SWAP4(x)
+
+static void sph_enc32be(void *dst, sph_u32 val)
+{
+#if defined SPH_UPTR
+#if SPH_UNALIGNED
+#if SPH_LITTLE_ENDIAN
+	val = sph_bswap32(val);
+#endif
+	*(sph_u32 *)dst = val;
+#else
+	if (((SPH_UPTR)dst & 3) == 0) {
+#if SPH_LITTLE_ENDIAN
+		val = sph_bswap32(val);
+#endif
+		*(sph_u32 *)dst = val;
+	} else {
+		((unsigned char *)dst)[0] = (val >> 24);
+		((unsigned char *)dst)[1] = (val >> 16);
+		((unsigned char *)dst)[2] = (val >> 8);
+		((unsigned char *)dst)[3] = val;
+	}
+#endif
+#else
+	((unsigned char *)dst)[0] = (val >> 24);
+	((unsigned char *)dst)[1] = (val >> 16);
+	((unsigned char *)dst)[2] = (val >> 8);
+	((unsigned char *)dst)[3] = val;
+#endif
+}
+
+static void sph_enc32le(void *dst, sph_u32 val)
+{
+#if defined SPH_UPTR
+#if SPH_UNALIGNED
+#if SPH_BIG_ENDIAN
+	val = sph_bswap32(val);
+#endif
+	*(sph_u32 *)dst = val;
+#else
+	if (((SPH_UPTR)dst & 3) == 0) {
+#if SPH_BIG_ENDIAN
+		val = sph_bswap32(val);
+#endif
+		*(sph_u32 *)dst = val;
+	} else {
+		((unsigned char *)dst)[0] = val;
+		((unsigned char *)dst)[1] = (val >> 8);
+		((unsigned char *)dst)[2] = (val >> 16);
+		((unsigned char *)dst)[3] = (val >> 24);
+	}
+#endif
+#else
+	((unsigned char *)dst)[0] = val;
+	((unsigned char *)dst)[1] = (val >> 8);
+	((unsigned char *)dst)[2] = (val >> 16);
+	((unsigned char *)dst)[3] = (val >> 24);
+#endif
+}
+
+static sph_u32 sph_dec32le_aligned(const void *src)
+{
+#if SPH_LITTLE_ENDIAN
+	return *(const sph_u32 *)src;
+#elif SPH_BIG_ENDIAN
+	return sph_bswap32(*(const sph_u32 *)src);
+#else
+	return (sph_u32)(((const unsigned char *)src)[0])
+		| ((sph_u32)(((const unsigned char *)src)[1]) << 8)
+		| ((sph_u32)(((const unsigned char *)src)[2]) << 16)
+		| ((sph_u32)(((const unsigned char *)src)[3]) << 24);
+#endif
+}
+
+
+__kernel void search(__global unsigned char* block, volatile __global uint* output, const ulong target)
+{
+    __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256];
+    int init = get_local_id(0);
+    int step = get_local_size(0);
+    for (int i = init; i < 256; i += step)
+    {
+        AES0[i] = AES0_C[i];
+        AES1[i] = AES1_C[i];
+        AES2[i] = AES2_C[i];
+        AES3[i] = AES3_C[i];
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    unsigned char hash[64];
+    for(unsigned j = 0; j < 64; j++)
+        hash[j] = 0;
+ 
+    sph_u32 gid = get_global_id(0);
+
+    // fugue
+    {
+        sph_u32 S00 = 0, S01 = 0, S02 = 0, S03 = 0, S04 = 0, S05 = 0, S06 = 0, S07 = 0, S08 = 0, S09 = 0; \
+        sph_u32 S10 = 0, S11 = 0, S12 = 0, S13 = 0, S14 = 0, S15 = 0, S16 = 0, S17 = 0, S18 = 0, S19 = 0; \
+        sph_u32 S20 = 0, S21 = 0, S22 = IV256[0], S23 = IV256[1], S24 = IV256[2], S25 = IV256[3], S26 = IV256[4], S27 = IV256[5], S28 = IV256[6], S29 = IV256[7];
+    
+        FUGUE256_5(DEC32BE(block + 0x0), DEC32BE(block + 0x4), DEC32BE(block + 0x8), DEC32BE(block + 0xc), DEC32BE(block + 0x10));
+        FUGUE256_5(DEC32BE(block + 0x14), DEC32BE(block + 0x18), DEC32BE(block + 0x1c), DEC32BE(block + 0x20), DEC32BE(block + 0x24));
+        FUGUE256_5(DEC32BE(block + 0x28), DEC32BE(block + 0x2c), DEC32BE(block + 0x30), DEC32BE(block + 0x34), DEC32BE(block + 0x38));
+        FUGUE256_4(DEC32BE(block + 0x3c), DEC32BE(block + 0x40), DEC32BE(block + 0x44), DEC32BE(block + 0x48));
+    
+        TIX2(SWAP4(gid), S06, S07, S14, S16, S00);
+        CMIX30(S03, S04, S05, S07, S08, S09, S18, S19, S20);
+        SMIX(S03, S04, S05, S06);
+        CMIX30(S00, S01, S02, S04, S05, S06, S15, S16, S17);
+        SMIX(S00, S01, S02, S03);
+    
+        TIX2(0, S00, S01, S08, S10, S24);
+        CMIX30(S27, S28, S29, S01, S02, S03, S12, S13, S14);
+        SMIX(S27, S28, S29, S00);
+        CMIX30(S24, S25, S26, S28, S29, S00, S09, S10, S11);
+        SMIX(S24, S25, S26, S27);
+    
+        TIX2(0x280, S24, S25, S02, S04, S18);
+        CMIX30(S21, S22, S23, S25, S26, S27, S06, S07, S08);
+        SMIX(S21, S22, S23, S24);
+        CMIX30(S18, S19, S20, S22, S23, S24, S03, S04, S05);
+        SMIX(S18, S19, S20, S21);
+    
+        CMIX30(S15, S16, S17, S19, S20, S21, S00, S01, S02);
+        SMIX(S15, S16, S17, S18);
+        CMIX30(S12, S13, S14, S16, S17, S18, S27, S28, S29);
+        SMIX(S12, S13, S14, S15);
+        CMIX30(S09, S10, S11, S13, S14, S15, S24, S25, S26);
+        SMIX(S09, S10, S11, S12);
+        CMIX30(S06, S07, S08, S10, S11, S12, S21, S22, S23);
+        SMIX(S06, S07, S08, S09);
+        CMIX30(S03, S04, S05, S07, S08, S09, S18, S19, S20);
+        SMIX(S03, S04, S05, S06);
+        CMIX30(S00, S01, S02, S04, S05, S06, S15, S16, S17);
+        SMIX(S00, S01, S02, S03);
+        CMIX30(S27, S28, S29, S01, S02, S03, S12, S13, S14);
+        SMIX(S27, S28, S29, S00);
+        CMIX30(S24, S25, S26, S28, S29, S00, S09, S10, S11);
+        SMIX(S24, S25, S26, S27);
+        CMIX30(S21, S22, S23, S25, S26, S27, S06, S07, S08);
+        SMIX(S21, S22, S23, S24);
+        CMIX30(S18, S19, S20, S22, S23, S24, S03, S04, S05);
+        SMIX(S18, S19, S20, S21);
+        S22 ^= S18;
+        S03 ^= S18;
+        SMIX(S03, S04, S05, S06);
+        S07 ^= S03;
+        S19 ^= S03;
+        SMIX(S19, S20, S21, S22);
+        S23 ^= S19;
+        S04 ^= S19;
+        SMIX(S04, S05, S06, S07);
+        S08 ^= S04;
+        S20 ^= S04;
+        SMIX(S20, S21, S22, S23);
+        S24 ^= S20;
+        S05 ^= S20;
+        SMIX(S05, S06, S07, S08);
+        S09 ^= S05;
+        S21 ^= S05;
+        SMIX(S21, S22, S23, S24);
+        S25 ^= S21;
+        S06 ^= S21;
+        SMIX(S06, S07, S08, S09);
+        S10 ^= S06;
+        S22 ^= S06;
+        SMIX(S22, S23, S24, S25);
+        S26 ^= S22;
+        S07 ^= S22;
+        SMIX(S07, S08, S09, S10);
+        S11 ^= S07;
+        S23 ^= S07;
+        SMIX(S23, S24, S25, S26);
+        S27 ^= S23;
+        S08 ^= S23;
+        SMIX(S08, S09, S10, S11);
+        S12 ^= S08;
+        S24 ^= S08;
+        SMIX(S24, S25, S26, S27);
+        S28 ^= S24;
+        S09 ^= S24;
+        SMIX(S09, S10, S11, S12);
+        S13 ^= S09;
+        S25 ^= S09;
+        SMIX(S25, S26, S27, S28);
+        S29 ^= S25;
+        S10 ^= S25;
+        SMIX(S10, S11, S12, S13);
+        S14 ^= S10;
+        S26 ^= S10;
+        SMIX(S26, S27, S28, S29);
+        S00 ^= S26;
+        S11 ^= S26;
+        SMIX(S11, S12, S13, S14);
+        S15 ^= S11;
+        S27 ^= S11;
+        SMIX(S27, S28, S29, S00);
+        S01 ^= S27;
+        S12 ^= S27;
+        SMIX(S12, S13, S14, S15);
+        S16 ^= S12;
+        S28 ^= S12;
+        SMIX(S28, S29, S00, S01);
+        S02 ^= S28;
+        S13 ^= S28;
+        SMIX(S13, S14, S15, S16);
+        S17 ^= S13;
+        S29 ^= S13;
+        SMIX(S29, S00, S01, S02);
+        S03 ^= S29;
+        S14 ^= S29;
+        SMIX(S14, S15, S16, S17);
+        S18 ^= S14;
+        S00 ^= S14;
+        SMIX(S00, S01, S02, S03);
+        S04 ^= S00;
+        S15 ^= S00;
+        SMIX(S15, S16, S17, S18);
+        S19 ^= S15;
+        S01 ^= S15;
+        SMIX(S01, S02, S03, S04);
+    
+        S05 ^= S01;
+        S16 ^= S01;
+    
+        sph_enc32be((unsigned char*) &hash +  0, S02);
+        sph_enc32be((unsigned char*) &hash +  4, S03);
+        sph_enc32be((unsigned char*) &hash +  8, S04);
+        sph_enc32be((unsigned char*) &hash + 12, S05);
+        sph_enc32be((unsigned char*) &hash + 16, S16);
+        sph_enc32be((unsigned char*) &hash + 20, S17);
+        sph_enc32be((unsigned char*) &hash + 24, S18);
+        sph_enc32be((unsigned char*) &hash + 28, S19);
+    }
+
+    // shavite
+    {
+	sph_u32 h[] = { SPH_C32(0x49BB3E47), SPH_C32(0x2674860D), SPH_C32(0xA8B392AC), SPH_C32(0x021AC4E6), SPH_C32(0x409283CF), SPH_C32(0x620E5D86), SPH_C32(0x6D929DCB), SPH_C32(0x96CC2A8B) };
+	sph_u32 rk0, rk1, rk2, rk3, rk4, rk5, rk6, rk7;
+	sph_u32 rk8, rk9, rkA, rkB, rkC, rkD, rkE, rkF;
+	sph_u32 count0, count1;
+
+	rk0 = sph_dec32le_aligned((const unsigned char *)&hash +  0);
+	rk1 = sph_dec32le_aligned((const unsigned char *)&hash +  4);
+	rk2 = sph_dec32le_aligned((const unsigned char *)&hash +  8);
+	rk3 = sph_dec32le_aligned((const unsigned char *)&hash + 12);
+	rk4 = sph_dec32le_aligned((const unsigned char *)&hash + 16);
+	rk5 = sph_dec32le_aligned((const unsigned char *)&hash + 20);
+	rk6 = sph_dec32le_aligned((const unsigned char *)&hash + 24);
+	rk7 = sph_dec32le_aligned((const unsigned char *)&hash + 28);
+	rk8 = sph_dec32le_aligned((const unsigned char *)&hash + 32);
+	rk9 = sph_dec32le_aligned((const unsigned char *)&hash + 36);
+	rkA = sph_dec32le_aligned((const unsigned char *)&hash + 40);
+	rkB = sph_dec32le_aligned((const unsigned char *)&hash + 44);
+	rkC = sph_dec32le_aligned((const unsigned char *)&hash + 48);
+	rkD = sph_dec32le_aligned((const unsigned char *)&hash + 52);
+	rkE = sph_dec32le_aligned((const unsigned char *)&hash + 56);
+	rkF = sph_dec32le_aligned((const unsigned char *)&hash + 60);
+	count0 = 0x200;
+	count1 = 0;
+	c256(buf);
+
+	rk0 = 0x80;
+	rk1 = 0;
+	rk2 = 0;
+	rk3 = 0;
+	rk4 = 0;
+	rk5 = 0;
+	rk6 = 0;
+	rk7 = 0;
+	rk8 = 0;
+	rk9 = 0;
+	rkA = 0;
+	rkB = 0;
+	rkC = 0;
+	rkD = 0x2000000;
+	rkE = 0;
+	rkF = 0x1000000;
+	count0 = 0;
+	count1 = 0;
+	c256(buf);
+
+	for (unsigned u = 0; u < 8; u ++)
+		sph_enc32le((unsigned char *)&hash + (u << 2), h[u]);
+    }
+
+    // hamsi
+    {
+        sph_u32 c0 = HAMSI_IV256[0], c1 = HAMSI_IV256[1], c2 = HAMSI_IV256[2], c3 = HAMSI_IV256[3];
+        sph_u32 c4 = HAMSI_IV256[4], c5 = HAMSI_IV256[5], c6 = HAMSI_IV256[6], c7 = HAMSI_IV256[7];
+        sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
+        sph_u32 h[8] = { c0, c1, c2, c3, c4, c5, c6, c7 };
+
+#define buf(u) hash[i + u]
+        for(int i = 0; i < 64; i += 4) {
+            INPUT_SMALL;
+            P_SMALL;
+            T_SMALL;
+        }
+#undef buf
+#define buf(u) (u == 0 ? 0x80 : 0)
+        INPUT_SMALL;
+        P_SMALL;
+        T_SMALL;
+#undef buf
+#define buf(u) 0
+        INPUT_SMALL;
+        P_SMALL;
+        T_SMALL;
+#undef buf
+#define buf(u) (u == 2 ? 2 : 0)
+        INPUT_SMALL;
+        PF_SMALL;
+        T_SMALL;
+
+        for (unsigned u = 0; u < 8; u ++)
+            sph_enc32be((unsigned char*) &hash + (u << 2), h[u]);
+    }
+
+    // panama
+    {
+        sph_u32 buffer[32][8];
+        sph_u32 state[17];
+        int i, j;
+        for(i = 0; i < 32; i++)
+                for(j = 0; j < 8; j++)
+                        buffer[i][j] = 0;
+        for(i = 0; i < 17; i++)
+                state[i] = 0;
+
+        LVARS
+        unsigned ptr0 = 0;
+#define INW1(i)   sph_dec32le_aligned((unsigned char*) &hash + 4 * (i))
+#define INW2(i)   INW1(i)
+
+        M17(RSTATE);
+        PANAMA_STEP;
+
+#undef INW1
+#undef INW2
+#define INW1(i)   sph_dec32le_aligned((unsigned char*) &hash + 32 + 4 * (i))
+#define INW2(i)   INW1(i)
+        PANAMA_STEP;
+        M17(WSTATE);
+
+#undef INW1
+#undef INW2
+
+#define INW1(i)   (sph_u32) (i == 0)
+#define INW2(i)   INW1(i)
+
+        M17(RSTATE);
+        PANAMA_STEP;
+        M17(WSTATE);
+
+#undef INW1
+#undef INW2
+
+#define INW1(i)     INW_H1(INC ## i)
+#define INW_H1(i)   INW_H2(i)
+#define INW_H2(i)   a ## i
+#define INW2(i)     buffer[ptr4][i]
+
+        M17(RSTATE);
+        for(i = 0; i < 32; i++) {
+                unsigned ptr4 = (ptr0 + 4) & 31;
+                PANAMA_STEP;
+        }
+        M17(WSTATE);
+
+#undef INW1
+#undef INW_H1
+#undef INW_H2
+#undef INW2
+
+        bool result = ((((sph_u64) state[16] << 32) | state[15]) <= target);
+        if (result)
+            output[output[0xFF]++] = SWAP4(gid);
+    }
+}
--- a/miner.h
+++ b/miner.h
@ -385,6 +385,7 @@ enum cl_kernels {
				@@ -385,6 +385,7 @@ enum cl_kernels {
 	KL_QUBITCOIN,
 	KL_DARKCOIN,	// kernels starting from this will have difficulty calculated by using bitcoin algorithm
 	KL_MYRIADCOIN_GROESTL,
+	KL_TWECOIN,
 };

 enum dev_reason {
--- a/ocl.c
+++ b/ocl.c
@ -474,6 +474,11 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
				@@ -474,6 +474,11 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			strcpy(filename, MYRIADCOIN_GROESTL_KERNNAME".cl");
 			strcpy(binaryfilename, MYRIADCOIN_GROESTL_KERNNAME);
 			break;
+		case KL_TWECOIN:
+			applog(LOG_WARNING, "Kernel twecoin is experimental.");
+			strcpy(filename, TWECOIN_KERNNAME".cl");
+			strcpy(binaryfilename, TWECOIN_KERNNAME);
+			break;
 		case KL_NONE: /* Shouldn't happen */
 			break;
 	}
--- a/sgminer.c
+++ b/sgminer.c
@ -4239,6 +4239,9 @@ void write_config(FILE *fcfg)
				@@ -4239,6 +4239,9 @@ void write_config(FILE *fcfg)
 				case KL_MYRIADCOIN_GROESTL:
 					fprintf(fcfg, MYRIADCOIN_GROESTL_KERNNAME);
 					break;
+				case KL_TWECOIN:
+					fprintf(fcfg, TWECOIN_KERNNAME);
+					break;
 			}
 		}

@ -6055,6 +6058,9 @@ static void rebuild_nonce(struct work *work, uint32_t nonce)
				@@ -6055,6 +6058,9 @@ static void rebuild_nonce(struct work *work, uint32_t nonce)
 		case KL_MYRIADCOIN_GROESTL:
 			myriadcoin_groestl_regenhash(work);
 			break;
+		case KL_TWECOIN:
+			twecoin_regenhash(work);
+			break;
 		default:
 			scrypt_regenhash(work);
 			break;
--- a/sph/Makefile.am
+++ b/sph/Makefile.am
@ -1,3 +1,3 @@
				@@ -1,3 +1,3 @@
 noinst_LIBRARIES	= libsph.a

-libsph_a_SOURCES	= bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c
+libsph_a_SOURCES	= bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c
--- a/sph/hamsi.c
+++ b/sph/hamsi.c
@ -0,0 +1,859 @@
				@@ -0,0 +1,859 @@
+/* $Id: hamsi.c 251 2010-10-19 14:31:51Z tp $ */
+/*
+ * Hamsi implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_hamsi.h"
+
+#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAMSI
+#define SPH_SMALL_FOOTPRINT_HAMSI   1
+#endif
+
+/*
+ * The SPH_HAMSI_EXPAND_* define how many input bits we handle in one
+ * table lookup during message expansion (1 to 8, inclusive). If we note
+ * w the number of bits per message word (w=32 for Hamsi-224/256, w=64
+ * for Hamsi-384/512), r the size of a "row" in 32-bit words (r=8 for
+ * Hamsi-224/256, r=16 for Hamsi-384/512), and n the expansion level,
+ * then we will get t tables (where t=ceil(w/n)) of individual size
+ * 2^n*r*4 (in bytes). The last table may be shorter (e.g. with w=32 and
+ * n=5, there are 7 tables, but the last one uses only two bits on
+ * input, not five).
+ *
+ * Also, we read t rows of r words from RAM. Words in a given row are
+ * concatenated in RAM in that order, so most of the cost is about
+ * reading the first row word; comparatively, cache misses are thus
+ * less expensive with Hamsi-512 (r=16) than with Hamsi-256 (r=8).
+ *
+ * When n=1, tables are "special" in that we omit the first entry of
+ * each table (which always contains 0), so that total table size is
+ * halved.
+ *
+ * We thus have the following (size1 is the cumulative table size of
+ * Hamsi-224/256; size2 is for Hamsi-384/512; similarly, t1 and t2
+ * are for Hamsi-224/256 and Hamsi-384/512, respectively).
+ *
+ *   n      size1      size2    t1    t2
+ * ---------------------------------------
+ *   1       1024       4096    32    64
+ *   2       2048       8192    16    32
+ *   3       2688      10880    11    22
+ *   4       4096      16384     8    16
+ *   5       6272      25600     7    13
+ *   6      10368      41984     6    11
+ *   7      16896      73856     5    10
+ *   8      32768     131072     4     8
+ *
+ * So there is a trade-off: a lower n makes the tables fit better in
+ * L1 cache, but increases the number of memory accesses. The optimal
+ * value depends on the amount of available L1 cache and the relative
+ * impact of a cache miss.
+ *
+ * Experimentally, in ideal benchmark conditions (which are not necessarily
+ * realistic with regards to L1 cache contention), it seems that n=8 is
+ * the best value on "big" architectures (those with 32 kB or more of L1
+ * cache), while n=4 is better on "small" architectures. This was tested
+ * on an Intel Core2 Q6600 (both 32-bit and 64-bit mode), a PowerPC G3
+ * (32 kB L1 cache, hence "big"), and a MIPS-compatible Broadcom BCM3302
+ * (8 kB L1 cache).
+ *
+ * Note: with n=1, the 32 tables (actually implemented as one big table)
+ * are read entirely and sequentially, regardless of the input data,
+ * thus avoiding any data-dependent table access pattern.
+ */
+
+#if !defined SPH_HAMSI_EXPAND_SMALL
+#if SPH_SMALL_FOOTPRINT_HAMSI
+#define SPH_HAMSI_EXPAND_SMALL  4
+#else
+#define SPH_HAMSI_EXPAND_SMALL  8
+#endif
+#endif
+
+#if !defined SPH_HAMSI_EXPAND_BIG
+#define SPH_HAMSI_EXPAND_BIG    8
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+#include "hamsi_helper.c"
+
+static const sph_u32 IV224[] = {
+	SPH_C32(0xc3967a67), SPH_C32(0xc3bc6c20), SPH_C32(0x4bc3bcc3),
+	SPH_C32(0xa7c3bc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
+	SPH_C32(0x69656b65), SPH_C32(0x20556e69)
+};
+
+/*
+ * This version is the one used in the Hamsi submission package for
+ * round 2 of the SHA-3 competition; the UTF-8 encoding is wrong and
+ * shall soon be corrected in the official Hamsi specification.
+ *
+static const sph_u32 IV224[] = {
+	SPH_C32(0x3c967a67), SPH_C32(0x3cbc6c20), SPH_C32(0xb4c343c3),
+	SPH_C32(0xa73cbc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
+	SPH_C32(0x69656b65), SPH_C32(0x20556e69)
+};
+ */
+
+static const sph_u32 IV256[] = {
+	SPH_C32(0x76657273), SPH_C32(0x69746569), SPH_C32(0x74204c65),
+	SPH_C32(0x7576656e), SPH_C32(0x2c204465), SPH_C32(0x70617274),
+	SPH_C32(0x656d656e), SPH_C32(0x7420456c)
+};
+
+static const sph_u32 IV384[] = {
+	SPH_C32(0x656b7472), SPH_C32(0x6f746563), SPH_C32(0x686e6965),
+	SPH_C32(0x6b2c2043), SPH_C32(0x6f6d7075), SPH_C32(0x74657220),
+	SPH_C32(0x53656375), SPH_C32(0x72697479), SPH_C32(0x20616e64),
+	SPH_C32(0x20496e64), SPH_C32(0x75737472), SPH_C32(0x69616c20),
+	SPH_C32(0x43727970), SPH_C32(0x746f6772), SPH_C32(0x61706879),
+	SPH_C32(0x2c204b61)
+};
+
+static const sph_u32 IV512[] = {
+	SPH_C32(0x73746565), SPH_C32(0x6c706172), SPH_C32(0x6b204172),
+	SPH_C32(0x656e6265), SPH_C32(0x72672031), SPH_C32(0x302c2062),
+	SPH_C32(0x75732032), SPH_C32(0x3434362c), SPH_C32(0x20422d33),
+	SPH_C32(0x30303120), SPH_C32(0x4c657576), SPH_C32(0x656e2d48),
+	SPH_C32(0x65766572), SPH_C32(0x6c65652c), SPH_C32(0x2042656c),
+	SPH_C32(0x6769756d)
+};
+
+static const sph_u32 alpha_n[] = {
+	SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
+	SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
+	SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
+	SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
+	SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
+	SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
+	SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
+	SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
+	SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
+	SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
+	SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
+};
+
+static const sph_u32 alpha_f[] = {
+	SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
+	SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
+	SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
+	SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
+	SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
+	SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
+	SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
+	SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
+	SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
+	SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
+	SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
+};
+
+#define DECL_STATE_SMALL \
+	sph_u32 c0, c1, c2, c3, c4, c5, c6, c7;
+
+#define READ_STATE_SMALL(sc)   do { \
+		c0 = sc->h[0x0]; \
+		c1 = sc->h[0x1]; \
+		c2 = sc->h[0x2]; \
+		c3 = sc->h[0x3]; \
+		c4 = sc->h[0x4]; \
+		c5 = sc->h[0x5]; \
+		c6 = sc->h[0x6]; \
+		c7 = sc->h[0x7]; \
+	} while (0)
+
+#define WRITE_STATE_SMALL(sc)   do { \
+		sc->h[0x0] = c0; \
+		sc->h[0x1] = c1; \
+		sc->h[0x2] = c2; \
+		sc->h[0x3] = c3; \
+		sc->h[0x4] = c4; \
+		sc->h[0x5] = c5; \
+		sc->h[0x6] = c6; \
+		sc->h[0x7] = c7; \
+	} while (0)
+
+#define s0   m0
+#define s1   m1
+#define s2   c0
+#define s3   c1
+#define s4   c2
+#define s5   c3
+#define s6   m2
+#define s7   m3
+#define s8   m4
+#define s9   m5
+#define sA   c4
+#define sB   c5
+#define sC   c6
+#define sD   c7
+#define sE   m6
+#define sF   m7
+
+#define SBOX(a, b, c, d)   do { \
+		sph_u32 t; \
+		t = (a); \
+		(a) &= (c); \
+		(a) ^= (d); \
+		(c) ^= (b); \
+		(c) ^= (a); \
+		(d) |= t; \
+		(d) ^= (b); \
+		t ^= (c); \
+		(b) = (d); \
+		(d) |= t; \
+		(d) ^= (a); \
+		(a) &= (b); \
+		t ^= (a); \
+		(b) ^= (d); \
+		(b) ^= t; \
+		(a) = (c); \
+		(c) = (b); \
+		(b) = (d); \
+		(d) = SPH_T32(~t); \
+	} while (0)
+
+#define L(a, b, c, d)   do { \
+		(a) = SPH_ROTL32(a, 13); \
+		(c) = SPH_ROTL32(c, 3); \
+		(b) ^= (a) ^ (c); \
+		(d) ^= (c) ^ SPH_T32((a) << 3); \
+		(b) = SPH_ROTL32(b, 1); \
+		(d) = SPH_ROTL32(d, 7); \
+		(a) ^= (b) ^ (d); \
+		(c) ^= (d) ^ SPH_T32((b) << 7); \
+		(a) = SPH_ROTL32(a, 5); \
+		(c) = SPH_ROTL32(c, 22); \
+	} while (0)
+
+#define ROUND_SMALL(rc, alpha)   do { \
+		s0 ^= alpha[0x00]; \
+		s1 ^= alpha[0x01] ^ (sph_u32)(rc); \
+		s2 ^= alpha[0x02]; \
+		s3 ^= alpha[0x03]; \
+		s4 ^= alpha[0x08]; \
+		s5 ^= alpha[0x09]; \
+		s6 ^= alpha[0x0A]; \
+		s7 ^= alpha[0x0B]; \
+		s8 ^= alpha[0x10]; \
+		s9 ^= alpha[0x11]; \
+		sA ^= alpha[0x12]; \
+		sB ^= alpha[0x13]; \
+		sC ^= alpha[0x18]; \
+		sD ^= alpha[0x19]; \
+		sE ^= alpha[0x1A]; \
+		sF ^= alpha[0x1B]; \
+		SBOX(s0, s4, s8, sC); \
+		SBOX(s1, s5, s9, sD); \
+		SBOX(s2, s6, sA, sE); \
+		SBOX(s3, s7, sB, sF); \
+		L(s0, s5, sA, sF); \
+		L(s1, s6, sB, sC); \
+		L(s2, s7, s8, sD); \
+		L(s3, s4, s9, sE); \
+	} while (0)
+
+#define P_SMALL   do { \
+		ROUND_SMALL(0, alpha_n); \
+		ROUND_SMALL(1, alpha_n); \
+		ROUND_SMALL(2, alpha_n); \
+	} while (0)
+
+#define PF_SMALL   do { \
+		ROUND_SMALL(0, alpha_f); \
+		ROUND_SMALL(1, alpha_f); \
+		ROUND_SMALL(2, alpha_f); \
+		ROUND_SMALL(3, alpha_f); \
+		ROUND_SMALL(4, alpha_f); \
+		ROUND_SMALL(5, alpha_f); \
+	} while (0)
+
+#define T_SMALL   do { \
+		/* order is important */ \
+		c7 = (sc->h[7] ^= sB); \
+		c6 = (sc->h[6] ^= sA); \
+		c5 = (sc->h[5] ^= s9); \
+		c4 = (sc->h[4] ^= s8); \
+		c3 = (sc->h[3] ^= s3); \
+		c2 = (sc->h[2] ^= s2); \
+		c1 = (sc->h[1] ^= s1); \
+		c0 = (sc->h[0] ^= s0); \
+	} while (0)
+
+static void
+hamsi_small(sph_hamsi_small_context *sc, const unsigned char *buf, size_t num)
+{
+	DECL_STATE_SMALL
+#if !SPH_64
+	sph_u32 tmp;
+#endif
+
+#if SPH_64
+	sc->count += (sph_u64)num << 5;
+#else
+	tmp = SPH_T32((sph_u32)num << 5);
+	sc->count_low = SPH_T32(sc->count_low + tmp);
+	sc->count_high += (sph_u32)((num >> 13) >> 14);
+	if (sc->count_low < tmp)
+		sc->count_high ++;
+#endif
+	READ_STATE_SMALL(sc);
+	while (num -- > 0) {
+		sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
+
+		INPUT_SMALL;
+		P_SMALL;
+		T_SMALL;
+		buf += 4;
+	}
+	WRITE_STATE_SMALL(sc);
+}
+
+static void
+hamsi_small_final(sph_hamsi_small_context *sc, const unsigned char *buf)
+{
+	sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
+	DECL_STATE_SMALL
+
+	READ_STATE_SMALL(sc);
+	INPUT_SMALL;
+	PF_SMALL;
+	T_SMALL;
+	WRITE_STATE_SMALL(sc);
+}
+
+static void
+hamsi_small_init(sph_hamsi_small_context *sc, const sph_u32 *iv)
+{
+	sc->partial_len = 0;
+	memcpy(sc->h, iv, sizeof sc->h);
+#if SPH_64
+	sc->count = 0;
+#else
+	sc->count_high = sc->count_low = 0;
+#endif
+}
+
+static void
+hamsi_small_core(sph_hamsi_small_context *sc, const void *data, size_t len)
+{
+	if (sc->partial_len != 0) {
+		size_t mlen;
+
+		mlen = 4 - sc->partial_len;
+		if (len < mlen) {
+			memcpy(sc->partial + sc->partial_len, data, len);
+			sc->partial_len += len;
+			return;
+		} else {
+			memcpy(sc->partial + sc->partial_len, data, mlen);
+			len -= mlen;
+			data = (const unsigned char *)data + mlen;
+			hamsi_small(sc, sc->partial, 1);
+			sc->partial_len = 0;
+		}
+	}
+
+	hamsi_small(sc, data, (len >> 2));
+	data = (const unsigned char *)data + (len & ~(size_t)3);
+	len &= (size_t)3;
+	memcpy(sc->partial, data, len);
+	sc->partial_len = len;
+}
+
+static void
+hamsi_small_close(sph_hamsi_small_context *sc,
+	unsigned ub, unsigned n, void *dst, size_t out_size_w32)
+{
+	unsigned char pad[12];
+	size_t ptr, u;
+	unsigned z;
+	unsigned char *out;
+
+	ptr = sc->partial_len;
+	memcpy(pad, sc->partial, ptr);
+#if SPH_64
+	sph_enc64be(pad + 4, sc->count + (ptr << 3) + n);
+#else
+	sph_enc32be(pad + 4, sc->count_high);
+	sph_enc32be(pad + 8, sc->count_low + (ptr << 3) + n);
+#endif
+	z = 0x80 >> n;
+	pad[ptr ++] = ((ub & -z) | z) & 0xFF;
+	while (ptr < 4)
+		pad[ptr ++] = 0;
+	hamsi_small(sc, pad, 2);
+	hamsi_small_final(sc, pad + 8);
+	out = dst;
+	for (u = 0; u < out_size_w32; u ++)
+		sph_enc32be(out + (u << 2), sc->h[u]);
+}
+
+#define DECL_STATE_BIG \
+	sph_u32 c0, c1, c2, c3, c4, c5, c6, c7; \
+	sph_u32 c8, c9, cA, cB, cC, cD, cE, cF;
+
+#define READ_STATE_BIG(sc)   do { \
+		c0 = sc->h[0x0]; \
+		c1 = sc->h[0x1]; \
+		c2 = sc->h[0x2]; \
+		c3 = sc->h[0x3]; \
+		c4 = sc->h[0x4]; \
+		c5 = sc->h[0x5]; \
+		c6 = sc->h[0x6]; \
+		c7 = sc->h[0x7]; \
+		c8 = sc->h[0x8]; \
+		c9 = sc->h[0x9]; \
+		cA = sc->h[0xA]; \
+		cB = sc->h[0xB]; \
+		cC = sc->h[0xC]; \
+		cD = sc->h[0xD]; \
+		cE = sc->h[0xE]; \
+		cF = sc->h[0xF]; \
+	} while (0)
+
+#define WRITE_STATE_BIG(sc)   do { \
+		sc->h[0x0] = c0; \
+		sc->h[0x1] = c1; \
+		sc->h[0x2] = c2; \
+		sc->h[0x3] = c3; \
+		sc->h[0x4] = c4; \
+		sc->h[0x5] = c5; \
+		sc->h[0x6] = c6; \
+		sc->h[0x7] = c7; \
+		sc->h[0x8] = c8; \
+		sc->h[0x9] = c9; \
+		sc->h[0xA] = cA; \
+		sc->h[0xB] = cB; \
+		sc->h[0xC] = cC; \
+		sc->h[0xD] = cD; \
+		sc->h[0xE] = cE; \
+		sc->h[0xF] = cF; \
+	} while (0)
+
+#define s00   m0
+#define s01   m1
+#define s02   c0
+#define s03   c1
+#define s04   m2
+#define s05   m3
+#define s06   c2
+#define s07   c3
+#define s08   c4
+#define s09   c5
+#define s0A   m4
+#define s0B   m5
+#define s0C   c6
+#define s0D   c7
+#define s0E   m6
+#define s0F   m7
+#define s10   m8
+#define s11   m9
+#define s12   c8
+#define s13   c9
+#define s14   mA
+#define s15   mB
+#define s16   cA
+#define s17   cB
+#define s18   cC
+#define s19   cD
+#define s1A   mC
+#define s1B   mD
+#define s1C   cE
+#define s1D   cF
+#define s1E   mE
+#define s1F   mF
+
+#define ROUND_BIG(rc, alpha)   do { \
+		s00 ^= alpha[0x00]; \
+		s01 ^= alpha[0x01] ^ (sph_u32)(rc); \
+		s02 ^= alpha[0x02]; \
+		s03 ^= alpha[0x03]; \
+		s04 ^= alpha[0x04]; \
+		s05 ^= alpha[0x05]; \
+		s06 ^= alpha[0x06]; \
+		s07 ^= alpha[0x07]; \
+		s08 ^= alpha[0x08]; \
+		s09 ^= alpha[0x09]; \
+		s0A ^= alpha[0x0A]; \
+		s0B ^= alpha[0x0B]; \
+		s0C ^= alpha[0x0C]; \
+		s0D ^= alpha[0x0D]; \
+		s0E ^= alpha[0x0E]; \
+		s0F ^= alpha[0x0F]; \
+		s10 ^= alpha[0x10]; \
+		s11 ^= alpha[0x11]; \
+		s12 ^= alpha[0x12]; \
+		s13 ^= alpha[0x13]; \
+		s14 ^= alpha[0x14]; \
+		s15 ^= alpha[0x15]; \
+		s16 ^= alpha[0x16]; \
+		s17 ^= alpha[0x17]; \
+		s18 ^= alpha[0x18]; \
+		s19 ^= alpha[0x19]; \
+		s1A ^= alpha[0x1A]; \
+		s1B ^= alpha[0x1B]; \
+		s1C ^= alpha[0x1C]; \
+		s1D ^= alpha[0x1D]; \
+		s1E ^= alpha[0x1E]; \
+		s1F ^= alpha[0x1F]; \
+		SBOX(s00, s08, s10, s18); \
+		SBOX(s01, s09, s11, s19); \
+		SBOX(s02, s0A, s12, s1A); \
+		SBOX(s03, s0B, s13, s1B); \
+		SBOX(s04, s0C, s14, s1C); \
+		SBOX(s05, s0D, s15, s1D); \
+		SBOX(s06, s0E, s16, s1E); \
+		SBOX(s07, s0F, s17, s1F); \
+		L(s00, s09, s12, s1B); \
+		L(s01, s0A, s13, s1C); \
+		L(s02, s0B, s14, s1D); \
+		L(s03, s0C, s15, s1E); \
+		L(s04, s0D, s16, s1F); \
+		L(s05, s0E, s17, s18); \
+		L(s06, s0F, s10, s19); \
+		L(s07, s08, s11, s1A); \
+		L(s00, s02, s05, s07); \
+		L(s10, s13, s15, s16); \
+		L(s09, s0B, s0C, s0E); \
+		L(s19, s1A, s1C, s1F); \
+	} while (0)
+
+#if SPH_SMALL_FOOTPRINT_HAMSI
+
+#define P_BIG   do { \
+		unsigned r; \
+		for (r = 0; r < 6; r ++) \
+			ROUND_BIG(r, alpha_n); \
+	} while (0)
+
+#define PF_BIG   do { \
+		unsigned r; \
+		for (r = 0; r < 12; r ++) \
+			ROUND_BIG(r, alpha_f); \
+	} while (0)
+
+#else
+
+#define P_BIG   do { \
+		ROUND_BIG(0, alpha_n); \
+		ROUND_BIG(1, alpha_n); \
+		ROUND_BIG(2, alpha_n); \
+		ROUND_BIG(3, alpha_n); \
+		ROUND_BIG(4, alpha_n); \
+		ROUND_BIG(5, alpha_n); \
+	} while (0)
+
+#define PF_BIG   do { \
+		ROUND_BIG(0, alpha_f); \
+		ROUND_BIG(1, alpha_f); \
+		ROUND_BIG(2, alpha_f); \
+		ROUND_BIG(3, alpha_f); \
+		ROUND_BIG(4, alpha_f); \
+		ROUND_BIG(5, alpha_f); \
+		ROUND_BIG(6, alpha_f); \
+		ROUND_BIG(7, alpha_f); \
+		ROUND_BIG(8, alpha_f); \
+		ROUND_BIG(9, alpha_f); \
+		ROUND_BIG(10, alpha_f); \
+		ROUND_BIG(11, alpha_f); \
+	} while (0)
+
+#endif
+
+#define T_BIG   do { \
+		/* order is important */ \
+		cF = (sc->h[0xF] ^= s17); \
+		cE = (sc->h[0xE] ^= s16); \
+		cD = (sc->h[0xD] ^= s15); \
+		cC = (sc->h[0xC] ^= s14); \
+		cB = (sc->h[0xB] ^= s13); \
+		cA = (sc->h[0xA] ^= s12); \
+		c9 = (sc->h[0x9] ^= s11); \
+		c8 = (sc->h[0x8] ^= s10); \
+		c7 = (sc->h[0x7] ^= s07); \
+		c6 = (sc->h[0x6] ^= s06); \
+		c5 = (sc->h[0x5] ^= s05); \
+		c4 = (sc->h[0x4] ^= s04); \
+		c3 = (sc->h[0x3] ^= s03); \
+		c2 = (sc->h[0x2] ^= s02); \
+		c1 = (sc->h[0x1] ^= s01); \
+		c0 = (sc->h[0x0] ^= s00); \
+	} while (0)
+
+static void
+hamsi_big(sph_hamsi_big_context *sc, const unsigned char *buf, size_t num)
+{
+	DECL_STATE_BIG
+#if !SPH_64
+	sph_u32 tmp;
+#endif
+
+#if SPH_64
+	sc->count += (sph_u64)num << 6;
+#else
+	tmp = SPH_T32((sph_u32)num << 6);
+	sc->count_low = SPH_T32(sc->count_low + tmp);
+	sc->count_high += (sph_u32)((num >> 13) >> 13);
+	if (sc->count_low < tmp)
+		sc->count_high ++;
+#endif
+	READ_STATE_BIG(sc);
+	while (num -- > 0) {
+		sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
+		sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
+
+		INPUT_BIG;
+		P_BIG;
+		T_BIG;
+		buf += 8;
+	}
+	WRITE_STATE_BIG(sc);
+}
+
+static void
+hamsi_big_final(sph_hamsi_big_context *sc, const unsigned char *buf)
+{
+	sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
+	sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
+	DECL_STATE_BIG
+
+	READ_STATE_BIG(sc);
+	INPUT_BIG;
+	PF_BIG;
+	T_BIG;
+	WRITE_STATE_BIG(sc);
+}
+
+static void
+hamsi_big_init(sph_hamsi_big_context *sc, const sph_u32 *iv)
+{
+	sc->partial_len = 0;
+	memcpy(sc->h, iv, sizeof sc->h);
+#if SPH_64
+	sc->count = 0;
+#else
+	sc->count_high = sc->count_low = 0;
+#endif
+}
+
+static void
+hamsi_big_core(sph_hamsi_big_context *sc, const void *data, size_t len)
+{
+	if (sc->partial_len != 0) {
+		size_t mlen;
+
+		mlen = 8 - sc->partial_len;
+		if (len < mlen) {
+			memcpy(sc->partial + sc->partial_len, data, len);
+			sc->partial_len += len;
+			return;
+		} else {
+			memcpy(sc->partial + sc->partial_len, data, mlen);
+			len -= mlen;
+			data = (const unsigned char *)data + mlen;
+			hamsi_big(sc, sc->partial, 1);
+			sc->partial_len = 0;
+		}
+	}
+
+	hamsi_big(sc, data, (len >> 3));
+	data = (const unsigned char *)data + (len & ~(size_t)7);
+	len &= (size_t)7;
+	memcpy(sc->partial, data, len);
+	sc->partial_len = len;
+}
+
+static void
+hamsi_big_close(sph_hamsi_big_context *sc,
+	unsigned ub, unsigned n, void *dst, size_t out_size_w32)
+{
+	unsigned char pad[8];
+	size_t ptr, u;
+	unsigned z;
+	unsigned char *out;
+
+	ptr = sc->partial_len;
+#if SPH_64
+	sph_enc64be(pad, sc->count + (ptr << 3) + n);
+#else
+	sph_enc32be(pad, sc->count_high);
+	sph_enc32be(pad + 4, sc->count_low + (ptr << 3) + n);
+#endif
+	z = 0x80 >> n;
+	sc->partial[ptr ++] = ((ub & -z) | z) & 0xFF;
+	while (ptr < 8)
+		sc->partial[ptr ++] = 0;
+	hamsi_big(sc, sc->partial, 1);
+	hamsi_big_final(sc, pad);
+	out = dst;
+	if (out_size_w32 == 12) {
+		sph_enc32be(out +  0, sc->h[ 0]);
+		sph_enc32be(out +  4, sc->h[ 1]);
+		sph_enc32be(out +  8, sc->h[ 3]);
+		sph_enc32be(out + 12, sc->h[ 4]);
+		sph_enc32be(out + 16, sc->h[ 5]);
+		sph_enc32be(out + 20, sc->h[ 6]);
+		sph_enc32be(out + 24, sc->h[ 8]);
+		sph_enc32be(out + 28, sc->h[ 9]);
+		sph_enc32be(out + 32, sc->h[10]);
+		sph_enc32be(out + 36, sc->h[12]);
+		sph_enc32be(out + 40, sc->h[13]);
+		sph_enc32be(out + 44, sc->h[15]);
+	} else {
+		for (u = 0; u < 16; u ++)
+			sph_enc32be(out + (u << 2), sc->h[u]);
+	}
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi224_init(void *cc)
+{
+	hamsi_small_init(cc, IV224);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi224(void *cc, const void *data, size_t len)
+{
+	hamsi_small_core(cc, data, len);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi224_close(void *cc, void *dst)
+{
+	hamsi_small_close(cc, 0, 0, dst, 7);
+	hamsi_small_init(cc, IV224);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	hamsi_small_close(cc, ub, n, dst, 7);
+	hamsi_small_init(cc, IV224);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi256_init(void *cc)
+{
+	hamsi_small_init(cc, IV256);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi256(void *cc, const void *data, size_t len)
+{
+	hamsi_small_core(cc, data, len);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi256_close(void *cc, void *dst)
+{
+	hamsi_small_close(cc, 0, 0, dst, 8);
+	hamsi_small_init(cc, IV256);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	hamsi_small_close(cc, ub, n, dst, 8);
+	hamsi_small_init(cc, IV256);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi384_init(void *cc)
+{
+	hamsi_big_init(cc, IV384);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi384(void *cc, const void *data, size_t len)
+{
+	hamsi_big_core(cc, data, len);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi384_close(void *cc, void *dst)
+{
+	hamsi_big_close(cc, 0, 0, dst, 12);
+	hamsi_big_init(cc, IV384);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	hamsi_big_close(cc, ub, n, dst, 12);
+	hamsi_big_init(cc, IV384);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi512_init(void *cc)
+{
+	hamsi_big_init(cc, IV512);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi512(void *cc, const void *data, size_t len)
+{
+	hamsi_big_core(cc, data, len);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi512_close(void *cc, void *dst)
+{
+	hamsi_big_close(cc, 0, 0, dst, 16);
+	hamsi_big_init(cc, IV512);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	hamsi_big_close(cc, ub, n, dst, 16);
+	hamsi_big_init(cc, IV512);
+}
--- a/sph/hamsi_helper.c
+++ b/sph/hamsi_helper.c
--- a/sph/panama.c
+++ b/sph/panama.c
@ -0,0 +1,334 @@
				@@ -0,0 +1,334 @@
+/* $Id: panama.c 216 2010-06-08 09:46:57Z tp $ */
+/*
+ * PANAMA implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_panama.h"
+
+#define LVAR17(b)  sph_u32 \
+	b ## 0, b ## 1, b ## 2, b ## 3, b ## 4, b ## 5, \
+	b ## 6, b ## 7, b ## 8, b ## 9, b ## 10, b ## 11, \
+	b ## 12, b ## 13, b ## 14, b ## 15, b ## 16;
+
+#define LVARS   \
+	LVAR17(a) \
+	LVAR17(g) \
+	LVAR17(p) \
+	LVAR17(t)
+
+#define M17(macro)   do { \
+		macro( 0,  1,  2,  4); \
+		macro( 1,  2,  3,  5); \
+		macro( 2,  3,  4,  6); \
+		macro( 3,  4,  5,  7); \
+		macro( 4,  5,  6,  8); \
+		macro( 5,  6,  7,  9); \
+		macro( 6,  7,  8, 10); \
+		macro( 7,  8,  9, 11); \
+		macro( 8,  9, 10, 12); \
+		macro( 9, 10, 11, 13); \
+		macro(10, 11, 12, 14); \
+		macro(11, 12, 13, 15); \
+		macro(12, 13, 14, 16); \
+		macro(13, 14, 15,  0); \
+		macro(14, 15, 16,  1); \
+		macro(15, 16,  0,  2); \
+		macro(16,  0,  1,  3); \
+	} while (0)
+
+#define BUPDATE1(n0, n2)   do { \
+		sc->buffer[ptr24][n0] ^= sc->buffer[ptr31][n2]; \
+		sc->buffer[ptr31][n2] ^= INW1(n2); \
+	} while (0)
+
+#define BUPDATE   do { \
+		BUPDATE1(0, 2); \
+		BUPDATE1(1, 3); \
+		BUPDATE1(2, 4); \
+		BUPDATE1(3, 5); \
+		BUPDATE1(4, 6); \
+		BUPDATE1(5, 7); \
+		BUPDATE1(6, 0); \
+		BUPDATE1(7, 1); \
+	} while (0)
+
+#define RSTATE(n0, n1, n2, n4)    (a ## n0 = sc->state[n0])
+
+#define WSTATE(n0, n1, n2, n4)    (sc->state[n0] = a ## n0)
+
+#define GAMMA(n0, n1, n2, n4)   \
+	(g ## n0 = a ## n0 ^ (a ## n1 | SPH_T32(~a ## n2)))
+
+#define PI_ALL   do { \
+		p0  = g0; \
+		p1  = SPH_ROTL32( g7,  1); \
+		p2  = SPH_ROTL32(g14,  3); \
+		p3  = SPH_ROTL32( g4,  6); \
+		p4  = SPH_ROTL32(g11, 10); \
+		p5  = SPH_ROTL32( g1, 15); \
+		p6  = SPH_ROTL32( g8, 21); \
+		p7  = SPH_ROTL32(g15, 28); \
+		p8  = SPH_ROTL32( g5,  4); \
+		p9  = SPH_ROTL32(g12, 13); \
+		p10 = SPH_ROTL32( g2, 23); \
+		p11 = SPH_ROTL32( g9,  2); \
+		p12 = SPH_ROTL32(g16, 14); \
+		p13 = SPH_ROTL32( g6, 27); \
+		p14 = SPH_ROTL32(g13,  9); \
+		p15 = SPH_ROTL32( g3, 24); \
+		p16 = SPH_ROTL32(g10,  8); \
+	} while (0)
+
+#define THETA(n0, n1, n2, n4)   \
+	(t ## n0 = p ## n0 ^ p ## n1 ^ p ## n4)
+
+#define SIGMA_ALL   do { \
+		a0 = t0 ^ 1; \
+		a1 = t1 ^ INW2(0); \
+		a2 = t2 ^ INW2(1); \
+		a3 = t3 ^ INW2(2); \
+		a4 = t4 ^ INW2(3); \
+		a5 = t5 ^ INW2(4); \
+		a6 = t6 ^ INW2(5); \
+		a7 = t7 ^ INW2(6); \
+		a8 = t8 ^ INW2(7); \
+		a9  =  t9 ^ sc->buffer[ptr16][0]; \
+		a10 = t10 ^ sc->buffer[ptr16][1]; \
+		a11 = t11 ^ sc->buffer[ptr16][2]; \
+		a12 = t12 ^ sc->buffer[ptr16][3]; \
+		a13 = t13 ^ sc->buffer[ptr16][4]; \
+		a14 = t14 ^ sc->buffer[ptr16][5]; \
+		a15 = t15 ^ sc->buffer[ptr16][6]; \
+		a16 = t16 ^ sc->buffer[ptr16][7]; \
+	} while (0)
+
+#define PANAMA_STEP   do { \
+		unsigned ptr16, ptr24, ptr31; \
+ \
+		ptr24 = (ptr0 - 8) & 31; \
+		ptr31 = (ptr0 - 1) & 31; \
+		BUPDATE; \
+		M17(GAMMA); \
+		PI_ALL; \
+		M17(THETA); \
+		ptr16 = ptr0 ^ 16; \
+		SIGMA_ALL; \
+		ptr0 = ptr31; \
+	} while (0)
+
+/*
+ * These macros are used to compute
+ */
+#define INC0     1
+#define INC1     2
+#define INC2     3
+#define INC3     4
+#define INC4     5
+#define INC5     6
+#define INC6     7
+#define INC7     8
+
+/*
+ * Push data by blocks of 32 bytes. "pbuf" must be 32-bit aligned. Each
+ * iteration processes 32 data bytes; "num" contains the number of
+ * iterations.
+ */
+static void
+panama_push(sph_panama_context *sc, const unsigned char *pbuf, size_t num)
+{
+	LVARS
+	unsigned ptr0;
+#if SPH_LITTLE_FAST
+#define INW1(i)   sph_dec32le_aligned(pbuf + 4 * (i))
+#else
+	sph_u32 X_var[8];
+#define INW1(i)   X_var[i]
+#endif
+#define INW2(i)   INW1(i)
+
+	M17(RSTATE);
+	ptr0 = sc->buffer_ptr;
+	while (num -- > 0) {
+#if !SPH_LITTLE_FAST
+		int i;
+
+		for (i = 0; i < 8; i ++)
+			X_var[i] = sph_dec32le_aligned(pbuf + 4 * (i));
+#endif
+		PANAMA_STEP;
+		pbuf = (const unsigned char *)pbuf + 32;
+	}
+	M17(WSTATE);
+	sc->buffer_ptr = ptr0;
+
+#undef INW1
+#undef INW2
+}
+
+/*
+ * Perform the "pull" operation repeatedly ("num" times). The hash output
+ * will be extracted from the state afterwards.
+ */
+static void
+panama_pull(sph_panama_context *sc, unsigned num)
+{
+	LVARS
+	unsigned ptr0;
+#define INW1(i)     INW_H1(INC ## i)
+#define INW_H1(i)   INW_H2(i)
+#define INW_H2(i)   a ## i
+#define INW2(i)     sc->buffer[ptr4][i]
+
+	M17(RSTATE);
+	ptr0 = sc->buffer_ptr;
+	while (num -- > 0) {
+		unsigned ptr4;
+
+		ptr4 = (ptr0 + 4) & 31;
+		PANAMA_STEP;
+	}
+	M17(WSTATE);
+
+#undef INW1
+#undef INW_H1
+#undef INW_H2
+#undef INW2
+}
+
+/* see sph_panama.h */
+void
+sph_panama_init(void *cc)
+{
+	sph_panama_context *sc;
+
+	sc = cc;
+	/*
+	 * This is not completely conformant, but "it will work
+	 * everywhere". Initial state consists of zeroes everywhere.
+	 * Conceptually, the sph_u32 type may have padding bits which
+	 * must not be set to 0; but such an architecture remains to
+	 * be seen.
+	 */
+	sc->data_ptr = 0;
+	memset(sc->buffer, 0, sizeof sc->buffer);
+	sc->buffer_ptr = 0;
+	memset(sc->state, 0, sizeof sc->state);
+}
+
+#ifdef SPH_UPTR
+static void
+panama_short(void *cc, const void *data, size_t len)
+#else
+void
+sph_panama(void *cc, const void *data, size_t len)
+#endif
+{
+	sph_panama_context *sc;
+	unsigned current;
+
+	sc = cc;
+	current = sc->data_ptr;
+	while (len > 0) {
+		unsigned clen;
+
+		clen = (sizeof sc->data) - current;
+		if (clen > len)
+			clen = len;
+		memcpy(sc->data + current, data, clen);
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+		current += clen;
+		if (current == sizeof sc->data) {
+			current = 0;
+			panama_push(sc, sc->data, 1);
+		}
+	}
+	sc->data_ptr = current;
+}
+
+#ifdef SPH_UPTR
+/* see sph_panama.h */
+void
+sph_panama(void *cc, const void *data, size_t len)
+{
+	sph_panama_context *sc;
+	unsigned current;
+	size_t rlen;
+
+	if (len < (2 * sizeof sc->data)) {
+		panama_short(cc, data, len);
+		return;
+	}
+	sc = cc;
+	current = sc->data_ptr;
+	if (current > 0) {
+		unsigned t;
+
+		t = (sizeof sc->data) - current;
+		panama_short(sc, data, t);
+		data = (const unsigned char *)data + t;
+		len -= t;
+	}
+#if !SPH_UNALIGNED
+	if (((SPH_UPTR)data & 3) != 0) {
+		panama_short(sc, data, len);
+		return;
+	}
+#endif
+	panama_push(sc, data, len >> 5);
+	rlen = len & 31;
+	if (rlen > 0)
+		memcpy(sc->data,
+			(const unsigned char *)data + len - rlen, rlen);
+	sc->data_ptr = rlen;
+}
+#endif
+
+/* see sph_panama.h */
+void
+sph_panama_close(void *cc, void *dst)
+{
+	sph_panama_context *sc;
+	unsigned current;
+	int i;
+
+	sc = cc;
+	current = sc->data_ptr;
+	sc->data[current ++] = 0x01;
+	memset(sc->data + current, 0, (sizeof sc->data) - current);
+	panama_push(sc, sc->data, 1);
+	panama_pull(sc, 32);
+	for (i = 0; i < 8; i ++)
+		sph_enc32le((unsigned char *)dst + 4 * i, sc->state[i + 9]);
+	sph_panama_init(sc);
+}
--- a/sph/sph_hamsi.h
+++ b/sph/sph_hamsi.h
@ -0,0 +1,311 @@
				@@ -0,0 +1,311 @@
+/* $Id: sph_hamsi.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * Hamsi interface. This code implements Hamsi with the recommended
+ * parameters for SHA-3, with outputs of 224, 256, 384 and 512 bits.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_hamsi.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_HAMSI_H__
+#define SPH_HAMSI_H__
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for Hamsi-224.
+ */
+#define SPH_SIZE_hamsi224   224
+
+/**
+ * Output size (in bits) for Hamsi-256.
+ */
+#define SPH_SIZE_hamsi256   256
+
+/**
+ * Output size (in bits) for Hamsi-384.
+ */
+#define SPH_SIZE_hamsi384   384
+
+/**
+ * Output size (in bits) for Hamsi-512.
+ */
+#define SPH_SIZE_hamsi512   512
+
+/**
+ * This structure is a context for Hamsi-224 and Hamsi-256 computations:
+ * it contains the intermediate values and some data from the last
+ * entered block. Once a Hamsi computation has been performed, the
+ * context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running Hamsi
+ * computation can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char partial[4];
+	size_t partial_len;
+	sph_u32 h[8];
+#if SPH_64
+	sph_u64 count;
+#else
+	sph_u32 count_high, count_low;
+#endif
+#endif
+} sph_hamsi_small_context;
+
+/**
+ * This structure is a context for Hamsi-224 computations. It is
+ * identical to the common <code>sph_hamsi_small_context</code>.
+ */
+typedef sph_hamsi_small_context sph_hamsi224_context;
+
+/**
+ * This structure is a context for Hamsi-256 computations. It is
+ * identical to the common <code>sph_hamsi_small_context</code>.
+ */
+typedef sph_hamsi_small_context sph_hamsi256_context;
+
+/**
+ * This structure is a context for Hamsi-384 and Hamsi-512 computations:
+ * it contains the intermediate values and some data from the last
+ * entered block. Once a Hamsi computation has been performed, the
+ * context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running Hamsi
+ * computation can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char partial[8];
+	size_t partial_len;
+	sph_u32 h[16];
+#if SPH_64
+	sph_u64 count;
+#else
+	sph_u32 count_high, count_low;
+#endif
+#endif
+} sph_hamsi_big_context;
+
+/**
+ * This structure is a context for Hamsi-384 computations. It is
+ * identical to the common <code>sph_hamsi_small_context</code>.
+ */
+typedef sph_hamsi_big_context sph_hamsi384_context;
+
+/**
+ * This structure is a context for Hamsi-512 computations. It is
+ * identical to the common <code>sph_hamsi_small_context</code>.
+ */
+typedef sph_hamsi_big_context sph_hamsi512_context;
+
+/**
+ * Initialize a Hamsi-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the Hamsi-224 context (pointer to a
+ *             <code>sph_hamsi224_context</code>)
+ */
+void sph_hamsi224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Hamsi-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_hamsi224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Hamsi-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Hamsi-224 context
+ * @param dst   the destination buffer
+ */
+void sph_hamsi224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Hamsi-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_hamsi224_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Hamsi-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the Hamsi-256 context (pointer to a
+ *             <code>sph_hamsi256_context</code>)
+ */
+void sph_hamsi256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Hamsi-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_hamsi256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Hamsi-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Hamsi-256 context
+ * @param dst   the destination buffer
+ */
+void sph_hamsi256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Hamsi-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_hamsi256_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Hamsi-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the Hamsi-384 context (pointer to a
+ *             <code>sph_hamsi384_context</code>)
+ */
+void sph_hamsi384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Hamsi-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_hamsi384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Hamsi-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Hamsi-384 context
+ * @param dst   the destination buffer
+ */
+void sph_hamsi384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Hamsi-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_hamsi384_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Hamsi-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the Hamsi-512 context (pointer to a
+ *             <code>sph_hamsi512_context</code>)
+ */
+void sph_hamsi512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Hamsi-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_hamsi512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Hamsi-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Hamsi-512 context
+ * @param dst   the destination buffer
+ */
+void sph_hamsi512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Hamsi-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_hamsi512_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+#endif
--- a/sph/sph_panama.h
+++ b/sph/sph_panama.h
@ -0,0 +1,118 @@
				@@ -0,0 +1,118 @@
+/* $Id: sph_panama.h 154 2010-04-26 17:00:24Z tp $ */
+/**
+ * PANAMA interface.
+ *
+ * PANAMA has been published in: J. Daemen and C. Clapp, "Fast Hashing
+ * and Stream Encryption with PANAMA", Fast Software Encryption -
+ * FSE'98, LNCS 1372, Springer (1998), pp. 60--74.
+ *
+ * PANAMA is not fully defined with regards to endianness and related
+ * topics. This implementation follows strict little-endian conventions:
+ * <ul>
+ * <li>Each 32-byte input block is split into eight 32-bit words, the
+ * first (leftmost) word being numbered 0.</li>
+ * <li>Each such 32-bit word is decoded from memory in little-endian
+ * convention.</li>
+ * <li>The additional padding bit equal to "1" is added by considering
+ * the least significant bit in a byte to come first; practically, this
+ * means that a single byte of value 0x01 is appended to the (byte-oriented)
+ * message, and then 0 to 31 bytes of value 0x00.</li>
+ * <li>The output consists of eight 32-bit words; the word numbered 0 is
+ * written first (in leftmost position) and it is encoded in little-endian
+ * convention.
+ * </ul>
+ * With these conventions, PANAMA is sometimes known as "PANAMA-LE". The
+ * PANAMA reference implementation uses our conventions for input, but
+ * prescribes no convention for output.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_panama.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_PANAMA_H__
+#define SPH_PANAMA_H__
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for PANAMA.
+ */
+#define SPH_SIZE_panama   256
+
+/**
+ * This structure is a context for PANAMA computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a PANAMA computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running PANAMA computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char data[32];   /* first field, for alignment */
+	unsigned data_ptr;
+
+	sph_u32 buffer[32][8];
+	unsigned buffer_ptr;
+
+	sph_u32 state[17];
+#endif
+} sph_panama_context;
+
+/**
+ * Initialize a PANAMA context. This process performs no memory allocation.
+ *
+ * @param cc   the PANAMA context (pointer to a <code>sph_panama_context</code>)
+ */
+void sph_panama_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the PANAMA context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_panama(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current PANAMA computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the PANAMA context
+ * @param dst   the destination buffer
+ */
+void sph_panama_close(void *cc, void *dst);
+
+#endif
--- a/twecoin.c
+++ b/twecoin.c
@ -0,0 +1,166 @@
				@@ -0,0 +1,166 @@
+/*-
+ * Copyright 2009 Colin Percival, 2014 phm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * This file was originally written by Colin Percival as part of the Tarsnap
+ * online backup system.
+ */
+
+#include "config.h"
+#include "miner.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "sph/sph_fugue.h"
+#include "sph/sph_shavite.h"
+#include "sph/sph_hamsi.h"
+#include "sph/sph_panama.h"
+
+/*
+ * Encode a length len/4 vector of (uint32_t) into a length len vector of
+ * (unsigned char) in big-endian form.  Assumes len is a multiple of 4.
+ */
+static inline void
+be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
+{
+	uint32_t i;
+
+	for (i = 0; i < len; i++)
+		dst[i] = htobe32(src[i]);
+}
+
+
+inline void twehash(void *state, const void *input)
+{
+    sph_fugue256_context     ctx_fugue;
+    sph_shavite256_context   ctx_shavite;
+    sph_hamsi256_context     ctx_hamsi;
+    sph_panama_context       ctx_panama;
+
+    unsigned char hash[4][64];
+    memset(hash, 0, sizeof(hash));
+
+    sph_fugue256_init(&ctx_fugue);
+    sph_fugue256 (&ctx_fugue, input, 80);
+    sph_fugue256_close(&ctx_fugue, &hash[0]);
+        
+    sph_shavite256_init(&ctx_shavite);
+    sph_shavite256(&ctx_shavite, &hash[0], 64);
+    sph_shavite256_close(&ctx_shavite, &hash[1]);
+
+    sph_hamsi256_init(&ctx_hamsi);
+    sph_hamsi256(&ctx_hamsi, &hash[1], 64);
+    sph_hamsi256_close(&ctx_hamsi, &hash[2]);
+
+    sph_panama_init(&ctx_panama);
+    sph_panama(&ctx_panama, &hash[2], 64);
+    sph_panama_close(&ctx_panama, &hash[3]);
+
+    memcpy(state, hash[3], 32);
+}
+
+static const uint32_t diff1targ = 0x0000ffff;
+
+
+/* Used externally as confirmation of correct OCL code */
+int twecoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
+{
+	uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
+	uint32_t data[20], ohash[8];
+	//char *scratchbuf;
+
+	be32enc_vect(data, (const uint32_t *)pdata, 19);
+	data[19] = htobe32(nonce);
+	//scratchbuf = alloca(SCRATCHBUF_SIZE);
+	twehash(ohash, data);
+	tmp_hash7 = be32toh(ohash[7]);
+
+	applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
+				(long unsigned int)Htarg,
+				(long unsigned int)diff1targ,
+				(long unsigned int)tmp_hash7);
+	if (tmp_hash7 > diff1targ)
+		return -1;
+	if (tmp_hash7 > Htarg)
+		return 0;
+	return 1;
+}
+
+void twecoin_regenhash(struct work *work)
+{
+        uint32_t data[20];
+        char *scratchbuf;
+        uint32_t *nonce = (uint32_t *)(work->data + 76);
+        uint32_t *ohash = (uint32_t *)(work->hash);
+
+        be32enc_vect(data, (const uint32_t *)work->data, 19);
+        data[19] = htobe32(*nonce);
+        twehash(ohash, data);
+}
+
+bool scanhash_twecoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
+		     unsigned char *pdata, unsigned char __maybe_unused *phash1,
+		     unsigned char __maybe_unused *phash, const unsigned char *ptarget,
+		     uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
+{
+	uint32_t *nonce = (uint32_t *)(pdata + 76);
+	char *scratchbuf;
+	uint32_t data[20];
+	uint32_t tmp_hash7;
+	uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
+	bool ret = false;
+
+	be32enc_vect(data, (const uint32_t *)pdata, 19);
+
+	while(1) {
+		uint32_t ostate[8];
+
+		*nonce = ++n;
+		data[19] = (n);
+		twehash(ostate, data);
+		tmp_hash7 = (ostate[7]);
+
+		applog(LOG_INFO, "data7 %08lx",
+					(long unsigned int)data[7]);
+
+		if (unlikely(tmp_hash7 <= Htarg)) {
+			((uint32_t *)pdata)[19] = htobe32(n);
+			*last_nonce = n;
+			ret = true;
+			break;
+		}
+
+		if (unlikely((n >= max_nonce) || thr->work_restart)) {
+			*last_nonce = n;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+
+
--- a/twecoin.h
+++ b/twecoin.h
@ -0,0 +1,10 @@
				@@ -0,0 +1,10 @@
+#ifndef TWECOIN_H
+#define TWECOIN_H
+
+#include "miner.h"
+
+extern int twecoin_test(unsigned char *pdata, const unsigned char *ptarget,
+			uint32_t nonce);
+extern void twecoin_regenhash(struct work *work);
+
+#endif /* TWECOIN_H */