Browse Source

Merge branch 'twecoin'

Conflicts:
	Makefile.am
	configure.ac
	driver-opencl.c
	miner.h
	ocl.c
	sgminer.c
	sph/Makefile.am
djm34
phm 11 years ago
parent
commit
769c9f8036
  1. 1
      Makefile.am
  2. 1
      configure.ac
  3. 6
      driver-opencl.c
  4. 309
      kernel/hamsi.cl
  5. 39640
      kernel/hamsi_helper.cl
  6. 155
      kernel/panama.cl
  7. 406
      kernel/shavite.cl
  8. 428
      kernel/twecoin.cl
  9. 1
      miner.h
  10. 5
      ocl.c
  11. 6
      sgminer.c
  12. 2
      sph/Makefile.am
  13. 859
      sph/hamsi.c
  14. 39640
      sph/hamsi_helper.c
  15. 334
      sph/panama.c
  16. 311
      sph/sph_hamsi.h
  17. 118
      sph/sph_panama.h
  18. 166
      twecoin.c
  19. 10
      twecoin.h

1
Makefile.am

@ -51,6 +51,7 @@ sgminer_SOURCES += inkcoin.c inkcoin.h @@ -51,6 +51,7 @@ sgminer_SOURCES += inkcoin.c inkcoin.h
sgminer_SOURCES += animecoin.c animecoin.h
sgminer_SOURCES += groestlcoin.c groestlcoin.h
sgminer_SOURCES += sifcoin.c sifcoin.h
sgminer_SOURCES += twecoin.c twecoin.h
sgminer_SOURCES += kernel/*.cl
bin_SCRIPTS = $(top_srcdir)/kernel/*.cl

1
configure.ac

@ -354,6 +354,7 @@ AC_DEFINE_UNQUOTED([INKCOIN_KERNNAME], ["inkcoin"], [Filename for InkCoin optimi @@ -354,6 +354,7 @@ AC_DEFINE_UNQUOTED([INKCOIN_KERNNAME], ["inkcoin"], [Filename for InkCoin optimi
AC_DEFINE_UNQUOTED([ANIMECOIN_KERNNAME], ["animecoin"], [Filename for AnimeCoin optimised kernel])
AC_DEFINE_UNQUOTED([GROESTLCOIN_KERNNAME], ["groestlcoin"], [Filename for GroestlCoin optimised kernel])
AC_DEFINE_UNQUOTED([SIFCOIN_KERNNAME], ["sifcoin"], [Filename for Sifcoin optimised kernel])
AC_DEFINE_UNQUOTED([TWECOIN_KERNNAME], ["twecoin"], [Filename for Twecoin optimised kernel])
AC_SUBST(OPENCL_LIBS)
AC_SUBST(OPENCL_FLAGS)

6
driver-opencl.c

@ -225,6 +225,8 @@ static enum cl_kernels select_kernel(char *arg) @@ -225,6 +225,8 @@ static enum cl_kernels select_kernel(char *arg)
return KL_GROESTLCOIN;
if (!strcmp(arg, SIFCOIN_KERNNAME))
return KL_SIFCOIN;
if (!strcmp(arg, TWECOIN_KERNNAME))
return KL_TWECOIN;
return KL_NONE;
}
@ -1392,6 +1394,9 @@ static bool opencl_thread_prepare(struct thr_info *thr) @@ -1392,6 +1394,9 @@ static bool opencl_thread_prepare(struct thr_info *thr)
case KL_SIFCOIN:
cgpu->kname = SIFCOIN_KERNNAME;
break;
case KL_TWECOIN:
cgpu->kname = TWECOIN_KERNNAME;
break;
default:
break;
}
@ -1436,6 +1441,7 @@ static bool opencl_thread_init(struct thr_info *thr) @@ -1436,6 +1441,7 @@ static bool opencl_thread_init(struct thr_info *thr)
case KL_ANIMECOIN:
case KL_GROESTLCOIN:
case KL_SIFCOIN:
case KL_TWECOIN:
thrdata->queue_kernel_parameters = &queue_sph_kernel;
break;
default:

309
kernel/hamsi.cl

@ -0,0 +1,309 @@ @@ -0,0 +1,309 @@
/* $Id: hamsi.c 251 2010-10-19 14:31:51Z tp $ */
/*
* Hamsi implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAMSI
#define SPH_SMALL_FOOTPRINT_HAMSI 1
#endif
/*
* The SPH_HAMSI_EXPAND_* define how many input bits we handle in one
* table lookup during message expansion (1 to 8, inclusive). If we note
* w the number of bits per message word (w=32 for Hamsi-224/256, w=64
* for Hamsi-384/512), r the size of a "row" in 32-bit words (r=8 for
* Hamsi-224/256, r=16 for Hamsi-384/512), and n the expansion level,
* then we will get t tables (where t=ceil(w/n)) of individual size
* 2^n*r*4 (in bytes). The last table may be shorter (e.g. with w=32 and
* n=5, there are 7 tables, but the last one uses only two bits on
* input, not five).
*
* Also, we read t rows of r words from RAM. Words in a given row are
* concatenated in RAM in that order, so most of the cost is about
* reading the first row word; comparatively, cache misses are thus
* less expensive with Hamsi-512 (r=16) than with Hamsi-256 (r=8).
*
* When n=1, tables are "special" in that we omit the first entry of
* each table (which always contains 0), so that total table size is
* halved.
*
* We thus have the following (size1 is the cumulative table size of
* Hamsi-224/256; size2 is for Hamsi-384/512; similarly, t1 and t2
* are for Hamsi-224/256 and Hamsi-384/512, respectively).
*
* n size1 size2 t1 t2
* ---------------------------------------
* 1 1024 4096 32 64
* 2 2048 8192 16 32
* 3 2688 10880 11 22
* 4 4096 16384 8 16
* 5 6272 25600 7 13
* 6 10368 41984 6 11
* 7 16896 73856 5 10
* 8 32768 131072 4 8
*
* So there is a trade-off: a lower n makes the tables fit better in
* L1 cache, but increases the number of memory accesses. The optimal
* value depends on the amount of available L1 cache and the relative
* impact of a cache miss.
*
* Experimentally, in ideal benchmark conditions (which are not necessarily
* realistic with regards to L1 cache contention), it seems that n=8 is
* the best value on "big" architectures (those with 32 kB or more of L1
* cache), while n=4 is better on "small" architectures. This was tested
* on an Intel Core2 Q6600 (both 32-bit and 64-bit mode), a PowerPC G3
* (32 kB L1 cache, hence "big"), and a MIPS-compatible Broadcom BCM3302
* (8 kB L1 cache).
*
* Note: with n=1, the 32 tables (actually implemented as one big table)
* are read entirely and sequentially, regardless of the input data,
* thus avoiding any data-dependent table access pattern.
*/
#if !defined SPH_HAMSI_EXPAND_SMALL
#if SPH_SMALL_FOOTPRINT_HAMSI
#define SPH_HAMSI_EXPAND_SMALL 4
#else
#define SPH_HAMSI_EXPAND_SMALL 8
#endif
#endif
#if !defined SPH_HAMSI_EXPAND_BIG
#define SPH_HAMSI_EXPAND_BIG 8
#endif
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
#include "hamsi_helper.cl"
__constant static const sph_u32 HAMSI_IV224[] = {
SPH_C32(0xc3967a67), SPH_C32(0xc3bc6c20), SPH_C32(0x4bc3bcc3),
SPH_C32(0xa7c3bc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
};
/*
* This version is the one used in the Hamsi submission package for
* round 2 of the SHA-3 competition; the UTF-8 encoding is wrong and
* shall soon be corrected in the official Hamsi specification.
*
__constant static const sph_u32 HAMSI_IV224[] = {
SPH_C32(0x3c967a67), SPH_C32(0x3cbc6c20), SPH_C32(0xb4c343c3),
SPH_C32(0xa73cbc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
};
*/
__constant static const sph_u32 HAMSI_IV256[] = {
SPH_C32(0x76657273), SPH_C32(0x69746569), SPH_C32(0x74204c65),
SPH_C32(0x7576656e), SPH_C32(0x2c204465), SPH_C32(0x70617274),
SPH_C32(0x656d656e), SPH_C32(0x7420456c)
};
__constant static const sph_u32 HAMSI_IV384[] = {
SPH_C32(0x656b7472), SPH_C32(0x6f746563), SPH_C32(0x686e6965),
SPH_C32(0x6b2c2043), SPH_C32(0x6f6d7075), SPH_C32(0x74657220),
SPH_C32(0x53656375), SPH_C32(0x72697479), SPH_C32(0x20616e64),
SPH_C32(0x20496e64), SPH_C32(0x75737472), SPH_C32(0x69616c20),
SPH_C32(0x43727970), SPH_C32(0x746f6772), SPH_C32(0x61706879),
SPH_C32(0x2c204b61)
};
__constant static const sph_u32 HAMSI_IV512[] = {
SPH_C32(0x73746565), SPH_C32(0x6c706172), SPH_C32(0x6b204172),
SPH_C32(0x656e6265), SPH_C32(0x72672031), SPH_C32(0x302c2062),
SPH_C32(0x75732032), SPH_C32(0x3434362c), SPH_C32(0x20422d33),
SPH_C32(0x30303120), SPH_C32(0x4c657576), SPH_C32(0x656e2d48),
SPH_C32(0x65766572), SPH_C32(0x6c65652c), SPH_C32(0x2042656c),
SPH_C32(0x6769756d)
};
__constant static const sph_u32 alpha_n[] = {
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
};
__constant static const sph_u32 alpha_f[] = {
SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
};
#define HAMSI_DECL_STATE_SMALL \
sph_u32 c0, c1, c2, c3, c4, c5, c6, c7;
#define HAMSI_READ_STATE_SMALL(sc) do { \
c0 = h[0x0]; \
c1 = h[0x1]; \
c2 = h[0x2]; \
c3 = h[0x3]; \
c4 = h[0x4]; \
c5 = h[0x5]; \
c6 = h[0x6]; \
c7 = h[0x7]; \
} while (0)
#define HAMSI_WRITE_STATE_SMALL(sc) do { \
h[0x0] = c0; \
h[0x1] = c1; \
h[0x2] = c2; \
h[0x3] = c3; \
h[0x4] = c4; \
h[0x5] = c5; \
h[0x6] = c6; \
h[0x7] = c7; \
} while (0)
#define s0 m0
#define s1 m1
#define s2 c0
#define s3 c1
#define s4 c2
#define s5 c3
#define s6 m2
#define s7 m3
#define s8 m4
#define s9 m5
#define sA c4
#define sB c5
#define sC c6
#define sD c7
#define sE m6
#define sF m7
#define SBOX(a, b, c, d) do { \
sph_u32 t; \
t = (a); \
(a) &= (c); \
(a) ^= (d); \
(c) ^= (b); \
(c) ^= (a); \
(d) |= t; \
(d) ^= (b); \
t ^= (c); \
(b) = (d); \
(d) |= t; \
(d) ^= (a); \
(a) &= (b); \
t ^= (a); \
(b) ^= (d); \
(b) ^= t; \
(a) = (c); \
(c) = (b); \
(b) = (d); \
(d) = SPH_T32(~t); \
} while (0)
#define L(a, b, c, d) do { \
(a) = SPH_ROTL32(a, 13); \
(c) = SPH_ROTL32(c, 3); \
(b) ^= (a) ^ (c); \
(d) ^= (c) ^ SPH_T32((a) << 3); \
(b) = SPH_ROTL32(b, 1); \
(d) = SPH_ROTL32(d, 7); \
(a) ^= (b) ^ (d); \
(c) ^= (d) ^ SPH_T32((b) << 7); \
(a) = SPH_ROTL32(a, 5); \
(c) = SPH_ROTL32(c, 22); \
} while (0)
#define ROUND_SMALL(rc, alpha) do { \
s0 ^= alpha[0x00]; \
s1 ^= alpha[0x01] ^ (sph_u32)(rc); \
s2 ^= alpha[0x02]; \
s3 ^= alpha[0x03]; \
s4 ^= alpha[0x08]; \
s5 ^= alpha[0x09]; \
s6 ^= alpha[0x0A]; \
s7 ^= alpha[0x0B]; \
s8 ^= alpha[0x10]; \
s9 ^= alpha[0x11]; \
sA ^= alpha[0x12]; \
sB ^= alpha[0x13]; \
sC ^= alpha[0x18]; \
sD ^= alpha[0x19]; \
sE ^= alpha[0x1A]; \
sF ^= alpha[0x1B]; \
SBOX(s0, s4, s8, sC); \
SBOX(s1, s5, s9, sD); \
SBOX(s2, s6, sA, sE); \
SBOX(s3, s7, sB, sF); \
L(s0, s5, sA, sF); \
L(s1, s6, sB, sC); \
L(s2, s7, s8, sD); \
L(s3, s4, s9, sE); \
} while (0)
#define P_SMALL do { \
ROUND_SMALL(0, alpha_n); \
ROUND_SMALL(1, alpha_n); \
ROUND_SMALL(2, alpha_n); \
} while (0)
#define PF_SMALL do { \
ROUND_SMALL(0, alpha_f); \
ROUND_SMALL(1, alpha_f); \
ROUND_SMALL(2, alpha_f); \
ROUND_SMALL(3, alpha_f); \
ROUND_SMALL(4, alpha_f); \
ROUND_SMALL(5, alpha_f); \
} while (0)
#define T_SMALL do { \
/* order is important */ \
c7 = (h[7] ^= sB); \
c6 = (h[6] ^= sA); \
c5 = (h[5] ^= s9); \
c4 = (h[4] ^= s8); \
c3 = (h[3] ^= s3); \
c2 = (h[2] ^= s2); \
c1 = (h[1] ^= s1); \
c0 = (h[0] ^= s0); \
} while (0)

39640
kernel/hamsi_helper.cl

File diff suppressed because it is too large Load Diff

155
kernel/panama.cl

@ -0,0 +1,155 @@ @@ -0,0 +1,155 @@
/* $Id: panama.c 216 2010-06-08 09:46:57Z tp $ */
/*
* PANAMA implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#define LVAR17(b) sph_u32 \
b ## 0, b ## 1, b ## 2, b ## 3, b ## 4, b ## 5, \
b ## 6, b ## 7, b ## 8, b ## 9, b ## 10, b ## 11, \
b ## 12, b ## 13, b ## 14, b ## 15, b ## 16;
#define LVARS \
LVAR17(a) \
LVAR17(g) \
LVAR17(p) \
LVAR17(t)
#define M17(macro) do { \
macro( 0, 1, 2, 4); \
macro( 1, 2, 3, 5); \
macro( 2, 3, 4, 6); \
macro( 3, 4, 5, 7); \
macro( 4, 5, 6, 8); \
macro( 5, 6, 7, 9); \
macro( 6, 7, 8, 10); \
macro( 7, 8, 9, 11); \
macro( 8, 9, 10, 12); \
macro( 9, 10, 11, 13); \
macro(10, 11, 12, 14); \
macro(11, 12, 13, 15); \
macro(12, 13, 14, 16); \
macro(13, 14, 15, 0); \
macro(14, 15, 16, 1); \
macro(15, 16, 0, 2); \
macro(16, 0, 1, 3); \
} while (0)
#define BUPDATE1(n0, n2) do { \
buffer[ptr24][n0] ^= buffer[ptr31][n2]; \
buffer[ptr31][n2] ^= INW1(n2); \
} while (0)
#define BUPDATE do { \
BUPDATE1(0, 2); \
BUPDATE1(1, 3); \
BUPDATE1(2, 4); \
BUPDATE1(3, 5); \
BUPDATE1(4, 6); \
BUPDATE1(5, 7); \
BUPDATE1(6, 0); \
BUPDATE1(7, 1); \
} while (0)
#define RSTATE(n0, n1, n2, n4) (a ## n0 = state[n0])
#define WSTATE(n0, n1, n2, n4) (state[n0] = a ## n0)
#define GAMMA(n0, n1, n2, n4) \
(g ## n0 = a ## n0 ^ (a ## n1 | SPH_T32(~a ## n2)))
#define PI_ALL do { \
p0 = g0; \
p1 = SPH_ROTL32( g7, 1); \
p2 = SPH_ROTL32(g14, 3); \
p3 = SPH_ROTL32( g4, 6); \
p4 = SPH_ROTL32(g11, 10); \
p5 = SPH_ROTL32( g1, 15); \
p6 = SPH_ROTL32( g8, 21); \
p7 = SPH_ROTL32(g15, 28); \
p8 = SPH_ROTL32( g5, 4); \
p9 = SPH_ROTL32(g12, 13); \
p10 = SPH_ROTL32( g2, 23); \
p11 = SPH_ROTL32( g9, 2); \
p12 = SPH_ROTL32(g16, 14); \
p13 = SPH_ROTL32( g6, 27); \
p14 = SPH_ROTL32(g13, 9); \
p15 = SPH_ROTL32( g3, 24); \
p16 = SPH_ROTL32(g10, 8); \
} while (0)
#define THETA(n0, n1, n2, n4) \
(t ## n0 = p ## n0 ^ p ## n1 ^ p ## n4)
#define SIGMA_ALL do { \
a0 = t0 ^ 1; \
a1 = t1 ^ INW2(0); \
a2 = t2 ^ INW2(1); \
a3 = t3 ^ INW2(2); \
a4 = t4 ^ INW2(3); \
a5 = t5 ^ INW2(4); \
a6 = t6 ^ INW2(5); \
a7 = t7 ^ INW2(6); \
a8 = t8 ^ INW2(7); \
a9 = t9 ^ buffer[ptr16][0]; \
a10 = t10 ^ buffer[ptr16][1]; \
a11 = t11 ^ buffer[ptr16][2]; \
a12 = t12 ^ buffer[ptr16][3]; \
a13 = t13 ^ buffer[ptr16][4]; \
a14 = t14 ^ buffer[ptr16][5]; \
a15 = t15 ^ buffer[ptr16][6]; \
a16 = t16 ^ buffer[ptr16][7]; \
} while (0)
#define PANAMA_STEP do { \
unsigned ptr16, ptr24, ptr31; \
\
ptr24 = (ptr0 - 8) & 31; \
ptr31 = (ptr0 - 1) & 31; \
BUPDATE; \
M17(GAMMA); \
PI_ALL; \
M17(THETA); \
ptr16 = ptr0 ^ 16; \
SIGMA_ALL; \
ptr0 = ptr31; \
} while (0)
/*
* These macros are used to compute
*/
#define INC0 1
#define INC1 2
#define INC2 3
#define INC3 4
#define INC4 5
#define INC5 6
#define INC6 7
#define INC7 8

406
kernel/shavite.cl

@ -605,3 +605,409 @@ @@ -605,3 +605,409 @@
hF ^= p7; \
} while (0)
#define c256(msg) do { \
sph_u32 p0, p1, p2, p3, p4, p5, p6, p7; \
sph_u32 x0, x1, x2, x3; \
\
p0 = h[0x0]; \
p1 = h[0x1]; \
p2 = h[0x2]; \
p3 = h[0x3]; \
p4 = h[0x4]; \
p5 = h[0x5]; \
p6 = h[0x6]; \
p7 = h[0x7]; \
/* round 0 */ \
x0 = p4 ^ rk0; \
x1 = p5 ^ rk1; \
x2 = p6 ^ rk2; \
x3 = p7 ^ rk3; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
x0 ^= rk4; \
x1 ^= rk5; \
x2 ^= rk6; \
x3 ^= rk7; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
x0 ^= rk8; \
x1 ^= rk9; \
x2 ^= rkA; \
x3 ^= rkB; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p0 ^= x0; \
p1 ^= x1; \
p2 ^= x2; \
p3 ^= x3; \
/* round 1 */ \
x0 = p0 ^ rkC; \
x1 = p1 ^ rkD; \
x2 = p2 ^ rkE; \
x3 = p3 ^ rkF; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk0, rk1, rk2, rk3); \
rk0 ^= rkC ^ count0; \
rk1 ^= rkD ^ SPH_T32(~count1); \
rk2 ^= rkE; \
rk3 ^= rkF; \
x0 ^= rk0; \
x1 ^= rk1; \
x2 ^= rk2; \
x3 ^= rk3; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk4, rk5, rk6, rk7); \
rk4 ^= rk0; \
rk5 ^= rk1; \
rk6 ^= rk2; \
rk7 ^= rk3; \
x0 ^= rk4; \
x1 ^= rk5; \
x2 ^= rk6; \
x3 ^= rk7; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p4 ^= x0; \
p5 ^= x1; \
p6 ^= x2; \
p7 ^= x3; \
/* round 2 */ \
KEY_EXPAND_ELT(rk8, rk9, rkA, rkB); \
rk8 ^= rk4; \
rk9 ^= rk5; \
rkA ^= rk6; \
rkB ^= rk7; \
x0 = p4 ^ rk8; \
x1 = p5 ^ rk9; \
x2 = p6 ^ rkA; \
x3 = p7 ^ rkB; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rkC, rkD, rkE, rkF); \
rkC ^= rk8; \
rkD ^= rk9; \
rkE ^= rkA; \
rkF ^= rkB; \
x0 ^= rkC; \
x1 ^= rkD; \
x2 ^= rkE; \
x3 ^= rkF; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk0 ^= rkD; \
x0 ^= rk0; \
rk1 ^= rkE; \
x1 ^= rk1; \
rk2 ^= rkF; \
x2 ^= rk2; \
rk3 ^= rk0; \
x3 ^= rk3; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p0 ^= x0; \
p1 ^= x1; \
p2 ^= x2; \
p3 ^= x3; \
/* round 3 */ \
rk4 ^= rk1; \
x0 = p0 ^ rk4; \
rk5 ^= rk2; \
x1 = p1 ^ rk5; \
rk6 ^= rk3; \
x2 = p2 ^ rk6; \
rk7 ^= rk4; \
x3 = p3 ^ rk7; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk8 ^= rk5; \
x0 ^= rk8; \
rk9 ^= rk6; \
x1 ^= rk9; \
rkA ^= rk7; \
x2 ^= rkA; \
rkB ^= rk8; \
x3 ^= rkB; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rkC ^= rk9; \
x0 ^= rkC; \
rkD ^= rkA; \
x1 ^= rkD; \
rkE ^= rkB; \
x2 ^= rkE; \
rkF ^= rkC; \
x3 ^= rkF; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p4 ^= x0; \
p5 ^= x1; \
p6 ^= x2; \
p7 ^= x3; \
/* round 4 */ \
KEY_EXPAND_ELT(rk0, rk1, rk2, rk3); \
rk0 ^= rkC; \
rk1 ^= rkD; \
rk2 ^= rkE; \
rk3 ^= rkF; \
x0 = p4 ^ rk0; \
x1 = p5 ^ rk1; \
x2 = p6 ^ rk2; \
x3 = p7 ^ rk3; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk4, rk5, rk6, rk7); \
rk4 ^= rk0; \
rk5 ^= rk1; \
rk6 ^= rk2; \
rk7 ^= rk3; \
x0 ^= rk4; \
x1 ^= rk5; \
x2 ^= rk6; \
x3 ^= rk7; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk8, rk9, rkA, rkB); \
rk8 ^= rk4; \
rk9 ^= rk5 ^ count1; \
rkA ^= rk6 ^ SPH_T32(~count0); \
rkB ^= rk7; \
x0 ^= rk8; \
x1 ^= rk9; \
x2 ^= rkA; \
x3 ^= rkB; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p0 ^= x0; \
p1 ^= x1; \
p2 ^= x2; \
p3 ^= x3; \
/* round 5 */ \
KEY_EXPAND_ELT(rkC, rkD, rkE, rkF); \
rkC ^= rk8; \
rkD ^= rk9; \
rkE ^= rkA; \
rkF ^= rkB; \
x0 = p0 ^ rkC; \
x1 = p1 ^ rkD; \
x2 = p2 ^ rkE; \
x3 = p3 ^ rkF; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk0 ^= rkD; \
x0 ^= rk0; \
rk1 ^= rkE; \
x1 ^= rk1; \
rk2 ^= rkF; \
x2 ^= rk2; \
rk3 ^= rk0; \
x3 ^= rk3; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk4 ^= rk1; \
x0 ^= rk4; \
rk5 ^= rk2; \
x1 ^= rk5; \
rk6 ^= rk3; \
x2 ^= rk6; \
rk7 ^= rk4; \
x3 ^= rk7; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p4 ^= x0; \
p5 ^= x1; \
p6 ^= x2; \
p7 ^= x3; \
/* round 6 */ \
rk8 ^= rk5; \
x0 = p4 ^ rk8; \
rk9 ^= rk6; \
x1 = p5 ^ rk9; \
rkA ^= rk7; \
x2 = p6 ^ rkA; \
rkB ^= rk8; \
x3 = p7 ^ rkB; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rkC ^= rk9; \
x0 ^= rkC; \
rkD ^= rkA; \
x1 ^= rkD; \
rkE ^= rkB; \
x2 ^= rkE; \
rkF ^= rkC; \
x3 ^= rkF; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk0, rk1, rk2, rk3); \
rk0 ^= rkC; \
rk1 ^= rkD; \
rk2 ^= rkE; \
rk3 ^= rkF; \
x0 ^= rk0; \
x1 ^= rk1; \
x2 ^= rk2; \
x3 ^= rk3; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p0 ^= x0; \
p1 ^= x1; \
p2 ^= x2; \
p3 ^= x3; \
/* round 7 */ \
KEY_EXPAND_ELT(rk4, rk5, rk6, rk7); \
rk4 ^= rk0; \
rk5 ^= rk1; \
rk6 ^= rk2 ^ count1; \
rk7 ^= rk3 ^ SPH_T32(~count0); \
x0 = p0 ^ rk4; \
x1 = p1 ^ rk5; \
x2 = p2 ^ rk6; \
x3 = p3 ^ rk7; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk8, rk9, rkA, rkB); \
rk8 ^= rk4; \
rk9 ^= rk5; \
rkA ^= rk6; \
rkB ^= rk7; \
x0 ^= rk8; \
x1 ^= rk9; \
x2 ^= rkA; \
x3 ^= rkB; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rkC, rkD, rkE, rkF); \
rkC ^= rk8; \
rkD ^= rk9; \
rkE ^= rkA; \
rkF ^= rkB; \
x0 ^= rkC; \
x1 ^= rkD; \
x2 ^= rkE; \
x3 ^= rkF; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p4 ^= x0; \
p5 ^= x1; \
p6 ^= x2; \
p7 ^= x3; \
/* round 8 */ \
rk0 ^= rkD; \
x0 = p4 ^ rk0; \
rk1 ^= rkE; \
x1 = p5 ^ rk1; \
rk2 ^= rkF; \
x2 = p6 ^ rk2; \
rk3 ^= rk0; \
x3 = p7 ^ rk3; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk4 ^= rk1; \
x0 ^= rk4; \
rk5 ^= rk2; \
x1 ^= rk5; \
rk6 ^= rk3; \
x2 ^= rk6; \
rk7 ^= rk4; \
x3 ^= rk7; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk8 ^= rk5; \
x0 ^= rk8; \
rk9 ^= rk6; \
x1 ^= rk9; \
rkA ^= rk7; \
x2 ^= rkA; \
rkB ^= rk8; \
x3 ^= rkB; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p0 ^= x0; \
p1 ^= x1; \
p2 ^= x2; \
p3 ^= x3; \
/* round 9 */ \
rkC ^= rk9; \
x0 = p0 ^ rkC; \
rkD ^= rkA; \
x1 = p1 ^ rkD; \
rkE ^= rkB; \
x2 = p2 ^ rkE; \
rkF ^= rkC; \
x3 = p3 ^ rkF; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk0, rk1, rk2, rk3); \
rk0 ^= rkC; \
rk1 ^= rkD; \
rk2 ^= rkE; \
rk3 ^= rkF; \
x0 ^= rk0; \
x1 ^= rk1; \
x2 ^= rk2; \
x3 ^= rk3; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk4, rk5, rk6, rk7); \
rk4 ^= rk0; \
rk5 ^= rk1; \
rk6 ^= rk2; \
rk7 ^= rk3; \
x0 ^= rk4; \
x1 ^= rk5; \
x2 ^= rk6; \
x3 ^= rk7; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p4 ^= x0; \
p5 ^= x1; \
p6 ^= x2; \
p7 ^= x3; \
/* round 10 */ \
KEY_EXPAND_ELT(rk8, rk9, rkA, rkB); \
rk8 ^= rk4; \
rk9 ^= rk5; \
rkA ^= rk6; \
rkB ^= rk7; \
x0 = p4 ^ rk8; \
x1 = p5 ^ rk9; \
x2 = p6 ^ rkA; \
x3 = p7 ^ rkB; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rkC, rkD, rkE, rkF); \
rkC ^= rk8 ^ count0; \
rkD ^= rk9; \
rkE ^= rkA; \
rkF ^= rkB ^ SPH_T32(~count1); \
x0 ^= rkC; \
x1 ^= rkD; \
x2 ^= rkE; \
x3 ^= rkF; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk0 ^= rkD; \
x0 ^= rk0; \
rk1 ^= rkE; \
x1 ^= rk1; \
rk2 ^= rkF; \
x2 ^= rk2; \
rk3 ^= rk0; \
x3 ^= rk3; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p0 ^= x0; \
p1 ^= x1; \
p2 ^= x2; \
p3 ^= x3; \
/* round 11 */ \
rk4 ^= rk1; \
x0 = p0 ^ rk4; \
rk5 ^= rk2; \
x1 = p1 ^ rk5; \
rk6 ^= rk3; \
x2 = p2 ^ rk6; \
rk7 ^= rk4; \
x3 = p3 ^ rk7; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk8 ^= rk5; \
x0 ^= rk8; \
rk9 ^= rk6; \
x1 ^= rk9; \
rkA ^= rk7; \
x2 ^= rkA; \
rkB ^= rk8; \
x3 ^= rkB; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rkC ^= rk9; \
x0 ^= rkC; \
rkD ^= rkA; \
x1 ^= rkD; \
rkE ^= rkB; \
x2 ^= rkE; \
rkF ^= rkC; \
x3 ^= rkF; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p4 ^= x0; \
p5 ^= x1; \
p6 ^= x2; \
p7 ^= x3; \
h[0x0] ^= p0; \
h[0x1] ^= p1; \
h[0x2] ^= p2; \
h[0x3] ^= p3; \
h[0x4] ^= p4; \
h[0x5] ^= p5; \
h[0x6] ^= p6; \
h[0x7] ^= p7; \
} while(0)

428
kernel/twecoin.cl

@ -0,0 +1,428 @@ @@ -0,0 +1,428 @@
#if __ENDIAN_LITTLE__
#define SPH_LITTLE_ENDIAN 1
#else
#define SPH_BIG_ENDIAN 1
#endif
#define SPH_UPTR sph_u64
typedef unsigned int sph_u32;
typedef int sph_s32;
#ifndef __OPENCL_VERSION__
typedef unsigned long long sph_u64;
typedef long long sph_s64;
#else
typedef unsigned long sph_u64;
typedef long sph_s64;
#endif
#define SPH_64 1
#define SPH_64_TRUE 1
#define SPH_C32(x) ((sph_u32)(x ## U))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
#define SPH_C64(x) ((sph_u64)(x ## UL))
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n)))
#define SPH_HAMSI_EXPAND_SMALL 1
#include "fugue.cl"
#include "shavite.cl"
#include "hamsi.cl"
#include "panama.cl"
#define SWAP4(x) as_uint(as_uchar4(x).wzyx)
#define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
#if SPH_BIG_ENDIAN
#define DEC32BE(x) (*(const __global sph_u32 *) (x))
#else
#define DEC32BE(x) SWAP4(*(const __global sph_u32 *) (x))
#endif
#define sph_bswap32(x) SWAP4(x)
static void sph_enc32be(void *dst, sph_u32 val)
{
#if defined SPH_UPTR
#if SPH_UNALIGNED
#if SPH_LITTLE_ENDIAN
val = sph_bswap32(val);
#endif
*(sph_u32 *)dst = val;
#else
if (((SPH_UPTR)dst & 3) == 0) {
#if SPH_LITTLE_ENDIAN
val = sph_bswap32(val);
#endif
*(sph_u32 *)dst = val;
} else {
((unsigned char *)dst)[0] = (val >> 24);
((unsigned char *)dst)[1] = (val >> 16);
((unsigned char *)dst)[2] = (val >> 8);
((unsigned char *)dst)[3] = val;
}
#endif
#else
((unsigned char *)dst)[0] = (val >> 24);
((unsigned char *)dst)[1] = (val >> 16);
((unsigned char *)dst)[2] = (val >> 8);
((unsigned char *)dst)[3] = val;
#endif
}
static void sph_enc32le(void *dst, sph_u32 val)
{
#if defined SPH_UPTR
#if SPH_UNALIGNED
#if SPH_BIG_ENDIAN
val = sph_bswap32(val);
#endif
*(sph_u32 *)dst = val;
#else
if (((SPH_UPTR)dst & 3) == 0) {
#if SPH_BIG_ENDIAN
val = sph_bswap32(val);
#endif
*(sph_u32 *)dst = val;
} else {
((unsigned char *)dst)[0] = val;
((unsigned char *)dst)[1] = (val >> 8);
((unsigned char *)dst)[2] = (val >> 16);
((unsigned char *)dst)[3] = (val >> 24);
}
#endif
#else
((unsigned char *)dst)[0] = val;
((unsigned char *)dst)[1] = (val >> 8);
((unsigned char *)dst)[2] = (val >> 16);
((unsigned char *)dst)[3] = (val >> 24);
#endif
}
static sph_u32 sph_dec32le_aligned(const void *src)
{
#if SPH_LITTLE_ENDIAN
return *(const sph_u32 *)src;
#elif SPH_BIG_ENDIAN
return sph_bswap32(*(const sph_u32 *)src);
#else
return (sph_u32)(((const unsigned char *)src)[0])
| ((sph_u32)(((const unsigned char *)src)[1]) << 8)
| ((sph_u32)(((const unsigned char *)src)[2]) << 16)
| ((sph_u32)(((const unsigned char *)src)[3]) << 24);
#endif
}
__kernel void search(__global unsigned char* block, volatile __global uint* output, const ulong target)
{
__local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256];
int init = get_local_id(0);
int step = get_local_size(0);
for (int i = init; i < 256; i += step)
{
AES0[i] = AES0_C[i];
AES1[i] = AES1_C[i];
AES2[i] = AES2_C[i];
AES3[i] = AES3_C[i];
}
barrier(CLK_LOCAL_MEM_FENCE);
unsigned char hash[64];
for(unsigned j = 0; j < 64; j++)
hash[j] = 0;
sph_u32 gid = get_global_id(0);
// fugue
{
sph_u32 S00 = 0, S01 = 0, S02 = 0, S03 = 0, S04 = 0, S05 = 0, S06 = 0, S07 = 0, S08 = 0, S09 = 0; \
sph_u32 S10 = 0, S11 = 0, S12 = 0, S13 = 0, S14 = 0, S15 = 0, S16 = 0, S17 = 0, S18 = 0, S19 = 0; \
sph_u32 S20 = 0, S21 = 0, S22 = IV256[0], S23 = IV256[1], S24 = IV256[2], S25 = IV256[3], S26 = IV256[4], S27 = IV256[5], S28 = IV256[6], S29 = IV256[7];
FUGUE256_5(DEC32BE(block + 0x0), DEC32BE(block + 0x4), DEC32BE(block + 0x8), DEC32BE(block + 0xc), DEC32BE(block + 0x10));
FUGUE256_5(DEC32BE(block + 0x14), DEC32BE(block + 0x18), DEC32BE(block + 0x1c), DEC32BE(block + 0x20), DEC32BE(block + 0x24));
FUGUE256_5(DEC32BE(block + 0x28), DEC32BE(block + 0x2c), DEC32BE(block + 0x30), DEC32BE(block + 0x34), DEC32BE(block + 0x38));
FUGUE256_4(DEC32BE(block + 0x3c), DEC32BE(block + 0x40), DEC32BE(block + 0x44), DEC32BE(block + 0x48));
TIX2(SWAP4(gid), S06, S07, S14, S16, S00);
CMIX30(S03, S04, S05, S07, S08, S09, S18, S19, S20);
SMIX(S03, S04, S05, S06);
CMIX30(S00, S01, S02, S04, S05, S06, S15, S16, S17);
SMIX(S00, S01, S02, S03);
TIX2(0, S00, S01, S08, S10, S24);
CMIX30(S27, S28, S29, S01, S02, S03, S12, S13, S14);
SMIX(S27, S28, S29, S00);
CMIX30(S24, S25, S26, S28, S29, S00, S09, S10, S11);
SMIX(S24, S25, S26, S27);
TIX2(0x280, S24, S25, S02, S04, S18);
CMIX30(S21, S22, S23, S25, S26, S27, S06, S07, S08);
SMIX(S21, S22, S23, S24);
CMIX30(S18, S19, S20, S22, S23, S24, S03, S04, S05);
SMIX(S18, S19, S20, S21);
CMIX30(S15, S16, S17, S19, S20, S21, S00, S01, S02);
SMIX(S15, S16, S17, S18);
CMIX30(S12, S13, S14, S16, S17, S18, S27, S28, S29);
SMIX(S12, S13, S14, S15);
CMIX30(S09, S10, S11, S13, S14, S15, S24, S25, S26);
SMIX(S09, S10, S11, S12);
CMIX30(S06, S07, S08, S10, S11, S12, S21, S22, S23);
SMIX(S06, S07, S08, S09);
CMIX30(S03, S04, S05, S07, S08, S09, S18, S19, S20);
SMIX(S03, S04, S05, S06);
CMIX30(S00, S01, S02, S04, S05, S06, S15, S16, S17);
SMIX(S00, S01, S02, S03);
CMIX30(S27, S28, S29, S01, S02, S03, S12, S13, S14);
SMIX(S27, S28, S29, S00);
CMIX30(S24, S25, S26, S28, S29, S00, S09, S10, S11);
SMIX(S24, S25, S26, S27);
CMIX30(S21, S22, S23, S25, S26, S27, S06, S07, S08);
SMIX(S21, S22, S23, S24);
CMIX30(S18, S19, S20, S22, S23, S24, S03, S04, S05);
SMIX(S18, S19, S20, S21);
S22 ^= S18;
S03 ^= S18;
SMIX(S03, S04, S05, S06);
S07 ^= S03;
S19 ^= S03;
SMIX(S19, S20, S21, S22);
S23 ^= S19;
S04 ^= S19;
SMIX(S04, S05, S06, S07);
S08 ^= S04;
S20 ^= S04;
SMIX(S20, S21, S22, S23);
S24 ^= S20;
S05 ^= S20;
SMIX(S05, S06, S07, S08);
S09 ^= S05;
S21 ^= S05;
SMIX(S21, S22, S23, S24);
S25 ^= S21;
S06 ^= S21;
SMIX(S06, S07, S08, S09);
S10 ^= S06;
S22 ^= S06;
SMIX(S22, S23, S24, S25);
S26 ^= S22;
S07 ^= S22;
SMIX(S07, S08, S09, S10);
S11 ^= S07;
S23 ^= S07;
SMIX(S23, S24, S25, S26);
S27 ^= S23;
S08 ^= S23;
SMIX(S08, S09, S10, S11);
S12 ^= S08;
S24 ^= S08;
SMIX(S24, S25, S26, S27);
S28 ^= S24;
S09 ^= S24;
SMIX(S09, S10, S11, S12);
S13 ^= S09;
S25 ^= S09;
SMIX(S25, S26, S27, S28);
S29 ^= S25;
S10 ^= S25;
SMIX(S10, S11, S12, S13);
S14 ^= S10;
S26 ^= S10;
SMIX(S26, S27, S28, S29);
S00 ^= S26;
S11 ^= S26;
SMIX(S11, S12, S13, S14);
S15 ^= S11;
S27 ^= S11;
SMIX(S27, S28, S29, S00);
S01 ^= S27;
S12 ^= S27;
SMIX(S12, S13, S14, S15);
S16 ^= S12;
S28 ^= S12;
SMIX(S28, S29, S00, S01);
S02 ^= S28;
S13 ^= S28;
SMIX(S13, S14, S15, S16);
S17 ^= S13;
S29 ^= S13;
SMIX(S29, S00, S01, S02);
S03 ^= S29;
S14 ^= S29;
SMIX(S14, S15, S16, S17);
S18 ^= S14;
S00 ^= S14;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S15 ^= S00;
SMIX(S15, S16, S17, S18);
S19 ^= S15;
S01 ^= S15;
SMIX(S01, S02, S03, S04);
S05 ^= S01;
S16 ^= S01;
sph_enc32be((unsigned char*) &hash + 0, S02);
sph_enc32be((unsigned char*) &hash + 4, S03);
sph_enc32be((unsigned char*) &hash + 8, S04);
sph_enc32be((unsigned char*) &hash + 12, S05);
sph_enc32be((unsigned char*) &hash + 16, S16);
sph_enc32be((unsigned char*) &hash + 20, S17);
sph_enc32be((unsigned char*) &hash + 24, S18);
sph_enc32be((unsigned char*) &hash + 28, S19);
}
// shavite
{
sph_u32 h[] = { SPH_C32(0x49BB3E47), SPH_C32(0x2674860D), SPH_C32(0xA8B392AC), SPH_C32(0x021AC4E6), SPH_C32(0x409283CF), SPH_C32(0x620E5D86), SPH_C32(0x6D929DCB), SPH_C32(0x96CC2A8B) };
sph_u32 rk0, rk1, rk2, rk3, rk4, rk5, rk6, rk7;
sph_u32 rk8, rk9, rkA, rkB, rkC, rkD, rkE, rkF;
sph_u32 count0, count1;
rk0 = sph_dec32le_aligned((const unsigned char *)&hash + 0);
rk1 = sph_dec32le_aligned((const unsigned char *)&hash + 4);
rk2 = sph_dec32le_aligned((const unsigned char *)&hash + 8);
rk3 = sph_dec32le_aligned((const unsigned char *)&hash + 12);
rk4 = sph_dec32le_aligned((const unsigned char *)&hash + 16);
rk5 = sph_dec32le_aligned((const unsigned char *)&hash + 20);
rk6 = sph_dec32le_aligned((const unsigned char *)&hash + 24);
rk7 = sph_dec32le_aligned((const unsigned char *)&hash + 28);
rk8 = sph_dec32le_aligned((const unsigned char *)&hash + 32);
rk9 = sph_dec32le_aligned((const unsigned char *)&hash + 36);
rkA = sph_dec32le_aligned((const unsigned char *)&hash + 40);
rkB = sph_dec32le_aligned((const unsigned char *)&hash + 44);
rkC = sph_dec32le_aligned((const unsigned char *)&hash + 48);
rkD = sph_dec32le_aligned((const unsigned char *)&hash + 52);
rkE = sph_dec32le_aligned((const unsigned char *)&hash + 56);
rkF = sph_dec32le_aligned((const unsigned char *)&hash + 60);
count0 = 0x200;
count1 = 0;
c256(buf);
rk0 = 0x80;
rk1 = 0;
rk2 = 0;
rk3 = 0;
rk4 = 0;
rk5 = 0;
rk6 = 0;
rk7 = 0;
rk8 = 0;
rk9 = 0;
rkA = 0;
rkB = 0;
rkC = 0;
rkD = 0x2000000;
rkE = 0;
rkF = 0x1000000;
count0 = 0;
count1 = 0;
c256(buf);
for (unsigned u = 0; u < 8; u ++)
sph_enc32le((unsigned char *)&hash + (u << 2), h[u]);
}
// hamsi
{
sph_u32 c0 = HAMSI_IV256[0], c1 = HAMSI_IV256[1], c2 = HAMSI_IV256[2], c3 = HAMSI_IV256[3];
sph_u32 c4 = HAMSI_IV256[4], c5 = HAMSI_IV256[5], c6 = HAMSI_IV256[6], c7 = HAMSI_IV256[7];
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
sph_u32 h[8] = { c0, c1, c2, c3, c4, c5, c6, c7 };
#define buf(u) hash[i + u]
for(int i = 0; i < 64; i += 4) {
INPUT_SMALL;
P_SMALL;
T_SMALL;
}
#undef buf
#define buf(u) (u == 0 ? 0x80 : 0)
INPUT_SMALL;
P_SMALL;
T_SMALL;
#undef buf
#define buf(u) 0
INPUT_SMALL;
P_SMALL;
T_SMALL;
#undef buf
#define buf(u) (u == 2 ? 2 : 0)
INPUT_SMALL;
PF_SMALL;
T_SMALL;
for (unsigned u = 0; u < 8; u ++)
sph_enc32be((unsigned char*) &hash + (u << 2), h[u]);
}
// panama
{
sph_u32 buffer[32][8];
sph_u32 state[17];
int i, j;
for(i = 0; i < 32; i++)
for(j = 0; j < 8; j++)
buffer[i][j] = 0;
for(i = 0; i < 17; i++)
state[i] = 0;
LVARS
unsigned ptr0 = 0;
#define INW1(i) sph_dec32le_aligned((unsigned char*) &hash + 4 * (i))
#define INW2(i) INW1(i)
M17(RSTATE);
PANAMA_STEP;
#undef INW1
#undef INW2
#define INW1(i) sph_dec32le_aligned((unsigned char*) &hash + 32 + 4 * (i))
#define INW2(i) INW1(i)
PANAMA_STEP;
M17(WSTATE);
#undef INW1
#undef INW2
#define INW1(i) (sph_u32) (i == 0)
#define INW2(i) INW1(i)
M17(RSTATE);
PANAMA_STEP;
M17(WSTATE);
#undef INW1
#undef INW2
#define INW1(i) INW_H1(INC ## i)
#define INW_H1(i) INW_H2(i)
#define INW_H2(i) a ## i
#define INW2(i) buffer[ptr4][i]
M17(RSTATE);
for(i = 0; i < 32; i++) {
unsigned ptr4 = (ptr0 + 4) & 31;
PANAMA_STEP;
}
M17(WSTATE);
#undef INW1
#undef INW_H1
#undef INW_H2
#undef INW2
bool result = ((((sph_u64) state[16] << 32) | state[15]) <= target);
if (result)
output[output[0xFF]++] = SWAP4(gid);
}
}

1
miner.h

@ -390,6 +390,7 @@ enum cl_kernels { @@ -390,6 +390,7 @@ enum cl_kernels {
KL_MYRIADCOIN_GROESTL,
KL_FUGUECOIN,
KL_GROESTLCOIN,
KL_TWECOIN,
};
enum dev_reason {

5
ocl.c

@ -499,6 +499,11 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) @@ -499,6 +499,11 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
strcpy(filename, SIFCOIN_KERNNAME".cl");
strcpy(binaryfilename, SIFCOIN_KERNNAME);
break;
case KL_TWECOIN:
applog(LOG_WARNING, "Kernel twecoin is experimental.");
strcpy(filename, TWECOIN_KERNNAME".cl");
strcpy(binaryfilename, TWECOIN_KERNNAME);
break;
case KL_NONE: /* Shouldn't happen */
break;
}

6
sgminer.c

@ -4254,6 +4254,9 @@ void write_config(FILE *fcfg) @@ -4254,6 +4254,9 @@ void write_config(FILE *fcfg)
case KL_SIFCOIN:
fprintf(fcfg, SIFCOIN_KERNNAME);
break;
case KL_TWECOIN:
fprintf(fcfg, TWECOIN_KERNNAME);
break;
}
}
@ -6085,6 +6088,9 @@ static void rebuild_nonce(struct work *work, uint32_t nonce) @@ -6085,6 +6088,9 @@ static void rebuild_nonce(struct work *work, uint32_t nonce)
case KL_SIFCOIN:
sifcoin_regenhash(work);
break;
case KL_TWECOIN:
twecoin_regenhash(work);
break;
default:
scrypt_regenhash(work);
break;

2
sph/Makefile.am

@ -1,3 +1,3 @@ @@ -1,3 +1,3 @@
noinst_LIBRARIES = libsph.a
libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c
libsph_a_SOURCES = bmw.c echo.c jh.c luffa.c simd.c blake.c cubehash.c groestl.c keccak.c shavite.c skein.c sha2.c sha2big.c fugue.c hamsi.c panama.c

859
sph/hamsi.c

@ -0,0 +1,859 @@ @@ -0,0 +1,859 @@
/* $Id: hamsi.c 251 2010-10-19 14:31:51Z tp $ */
/*
* Hamsi implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include <stddef.h>
#include <string.h>
#include "sph_hamsi.h"
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAMSI
#define SPH_SMALL_FOOTPRINT_HAMSI 1
#endif
/*
* The SPH_HAMSI_EXPAND_* define how many input bits we handle in one
* table lookup during message expansion (1 to 8, inclusive). If we note
* w the number of bits per message word (w=32 for Hamsi-224/256, w=64
* for Hamsi-384/512), r the size of a "row" in 32-bit words (r=8 for
* Hamsi-224/256, r=16 for Hamsi-384/512), and n the expansion level,
* then we will get t tables (where t=ceil(w/n)) of individual size
* 2^n*r*4 (in bytes). The last table may be shorter (e.g. with w=32 and
* n=5, there are 7 tables, but the last one uses only two bits on
* input, not five).
*
* Also, we read t rows of r words from RAM. Words in a given row are
* concatenated in RAM in that order, so most of the cost is about
* reading the first row word; comparatively, cache misses are thus
* less expensive with Hamsi-512 (r=16) than with Hamsi-256 (r=8).
*
* When n=1, tables are "special" in that we omit the first entry of
* each table (which always contains 0), so that total table size is
* halved.
*
* We thus have the following (size1 is the cumulative table size of
* Hamsi-224/256; size2 is for Hamsi-384/512; similarly, t1 and t2
* are for Hamsi-224/256 and Hamsi-384/512, respectively).
*
* n size1 size2 t1 t2
* ---------------------------------------
* 1 1024 4096 32 64
* 2 2048 8192 16 32
* 3 2688 10880 11 22
* 4 4096 16384 8 16
* 5 6272 25600 7 13
* 6 10368 41984 6 11
* 7 16896 73856 5 10
* 8 32768 131072 4 8
*
* So there is a trade-off: a lower n makes the tables fit better in
* L1 cache, but increases the number of memory accesses. The optimal
* value depends on the amount of available L1 cache and the relative
* impact of a cache miss.
*
* Experimentally, in ideal benchmark conditions (which are not necessarily
* realistic with regards to L1 cache contention), it seems that n=8 is
* the best value on "big" architectures (those with 32 kB or more of L1
* cache), while n=4 is better on "small" architectures. This was tested
* on an Intel Core2 Q6600 (both 32-bit and 64-bit mode), a PowerPC G3
* (32 kB L1 cache, hence "big"), and a MIPS-compatible Broadcom BCM3302
* (8 kB L1 cache).
*
* Note: with n=1, the 32 tables (actually implemented as one big table)
* are read entirely and sequentially, regardless of the input data,
* thus avoiding any data-dependent table access pattern.
*/
#if !defined SPH_HAMSI_EXPAND_SMALL
#if SPH_SMALL_FOOTPRINT_HAMSI
#define SPH_HAMSI_EXPAND_SMALL 4
#else
#define SPH_HAMSI_EXPAND_SMALL 8
#endif
#endif
#if !defined SPH_HAMSI_EXPAND_BIG
#define SPH_HAMSI_EXPAND_BIG 8
#endif
#ifdef _MSC_VER
#pragma warning (disable: 4146)
#endif
#include "hamsi_helper.c"
static const sph_u32 IV224[] = {
SPH_C32(0xc3967a67), SPH_C32(0xc3bc6c20), SPH_C32(0x4bc3bcc3),
SPH_C32(0xa7c3bc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
};
/*
* This version is the one used in the Hamsi submission package for
* round 2 of the SHA-3 competition; the UTF-8 encoding is wrong and
* shall soon be corrected in the official Hamsi specification.
*
static const sph_u32 IV224[] = {
SPH_C32(0x3c967a67), SPH_C32(0x3cbc6c20), SPH_C32(0xb4c343c3),
SPH_C32(0xa73cbc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
};
*/
static const sph_u32 IV256[] = {
SPH_C32(0x76657273), SPH_C32(0x69746569), SPH_C32(0x74204c65),
SPH_C32(0x7576656e), SPH_C32(0x2c204465), SPH_C32(0x70617274),
SPH_C32(0x656d656e), SPH_C32(0x7420456c)
};
static const sph_u32 IV384[] = {
SPH_C32(0x656b7472), SPH_C32(0x6f746563), SPH_C32(0x686e6965),
SPH_C32(0x6b2c2043), SPH_C32(0x6f6d7075), SPH_C32(0x74657220),
SPH_C32(0x53656375), SPH_C32(0x72697479), SPH_C32(0x20616e64),
SPH_C32(0x20496e64), SPH_C32(0x75737472), SPH_C32(0x69616c20),
SPH_C32(0x43727970), SPH_C32(0x746f6772), SPH_C32(0x61706879),
SPH_C32(0x2c204b61)
};
static const sph_u32 IV512[] = {
SPH_C32(0x73746565), SPH_C32(0x6c706172), SPH_C32(0x6b204172),
SPH_C32(0x656e6265), SPH_C32(0x72672031), SPH_C32(0x302c2062),
SPH_C32(0x75732032), SPH_C32(0x3434362c), SPH_C32(0x20422d33),
SPH_C32(0x30303120), SPH_C32(0x4c657576), SPH_C32(0x656e2d48),
SPH_C32(0x65766572), SPH_C32(0x6c65652c), SPH_C32(0x2042656c),
SPH_C32(0x6769756d)
};
static const sph_u32 alpha_n[] = {
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
};
static const sph_u32 alpha_f[] = {
SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
};
#define DECL_STATE_SMALL \
sph_u32 c0, c1, c2, c3, c4, c5, c6, c7;
#define READ_STATE_SMALL(sc) do { \
c0 = sc->h[0x0]; \
c1 = sc->h[0x1]; \
c2 = sc->h[0x2]; \
c3 = sc->h[0x3]; \
c4 = sc->h[0x4]; \
c5 = sc->h[0x5]; \
c6 = sc->h[0x6]; \
c7 = sc->h[0x7]; \
} while (0)
#define WRITE_STATE_SMALL(sc) do { \
sc->h[0x0] = c0; \
sc->h[0x1] = c1; \
sc->h[0x2] = c2; \
sc->h[0x3] = c3; \
sc->h[0x4] = c4; \
sc->h[0x5] = c5; \
sc->h[0x6] = c6; \
sc->h[0x7] = c7; \
} while (0)
#define s0 m0
#define s1 m1
#define s2 c0
#define s3 c1
#define s4 c2
#define s5 c3
#define s6 m2
#define s7 m3
#define s8 m4
#define s9 m5
#define sA c4
#define sB c5
#define sC c6
#define sD c7
#define sE m6
#define sF m7
#define SBOX(a, b, c, d) do { \
sph_u32 t; \
t = (a); \
(a) &= (c); \
(a) ^= (d); \
(c) ^= (b); \
(c) ^= (a); \
(d) |= t; \
(d) ^= (b); \
t ^= (c); \
(b) = (d); \
(d) |= t; \
(d) ^= (a); \
(a) &= (b); \
t ^= (a); \
(b) ^= (d); \
(b) ^= t; \
(a) = (c); \
(c) = (b); \
(b) = (d); \
(d) = SPH_T32(~t); \
} while (0)
#define L(a, b, c, d) do { \
(a) = SPH_ROTL32(a, 13); \
(c) = SPH_ROTL32(c, 3); \
(b) ^= (a) ^ (c); \
(d) ^= (c) ^ SPH_T32((a) << 3); \
(b) = SPH_ROTL32(b, 1); \
(d) = SPH_ROTL32(d, 7); \
(a) ^= (b) ^ (d); \
(c) ^= (d) ^ SPH_T32((b) << 7); \
(a) = SPH_ROTL32(a, 5); \
(c) = SPH_ROTL32(c, 22); \
} while (0)
#define ROUND_SMALL(rc, alpha) do { \
s0 ^= alpha[0x00]; \
s1 ^= alpha[0x01] ^ (sph_u32)(rc); \
s2 ^= alpha[0x02]; \
s3 ^= alpha[0x03]; \
s4 ^= alpha[0x08]; \
s5 ^= alpha[0x09]; \
s6 ^= alpha[0x0A]; \
s7 ^= alpha[0x0B]; \
s8 ^= alpha[0x10]; \
s9 ^= alpha[0x11]; \
sA ^= alpha[0x12]; \
sB ^= alpha[0x13]; \
sC ^= alpha[0x18]; \
sD ^= alpha[0x19]; \
sE ^= alpha[0x1A]; \
sF ^= alpha[0x1B]; \
SBOX(s0, s4, s8, sC); \
SBOX(s1, s5, s9, sD); \
SBOX(s2, s6, sA, sE); \
SBOX(s3, s7, sB, sF); \
L(s0, s5, sA, sF); \
L(s1, s6, sB, sC); \
L(s2, s7, s8, sD); \
L(s3, s4, s9, sE); \
} while (0)
#define P_SMALL do { \
ROUND_SMALL(0, alpha_n); \
ROUND_SMALL(1, alpha_n); \
ROUND_SMALL(2, alpha_n); \
} while (0)
#define PF_SMALL do { \
ROUND_SMALL(0, alpha_f); \
ROUND_SMALL(1, alpha_f); \
ROUND_SMALL(2, alpha_f); \
ROUND_SMALL(3, alpha_f); \
ROUND_SMALL(4, alpha_f); \
ROUND_SMALL(5, alpha_f); \
} while (0)
#define T_SMALL do { \
/* order is important */ \
c7 = (sc->h[7] ^= sB); \
c6 = (sc->h[6] ^= sA); \
c5 = (sc->h[5] ^= s9); \
c4 = (sc->h[4] ^= s8); \
c3 = (sc->h[3] ^= s3); \
c2 = (sc->h[2] ^= s2); \
c1 = (sc->h[1] ^= s1); \
c0 = (sc->h[0] ^= s0); \
} while (0)
static void
hamsi_small(sph_hamsi_small_context *sc, const unsigned char *buf, size_t num)
{
DECL_STATE_SMALL
#if !SPH_64
sph_u32 tmp;
#endif
#if SPH_64
sc->count += (sph_u64)num << 5;
#else
tmp = SPH_T32((sph_u32)num << 5);
sc->count_low = SPH_T32(sc->count_low + tmp);
sc->count_high += (sph_u32)((num >> 13) >> 14);
if (sc->count_low < tmp)
sc->count_high ++;
#endif
READ_STATE_SMALL(sc);
while (num -- > 0) {
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
INPUT_SMALL;
P_SMALL;
T_SMALL;
buf += 4;
}
WRITE_STATE_SMALL(sc);
}
static void
hamsi_small_final(sph_hamsi_small_context *sc, const unsigned char *buf)
{
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
DECL_STATE_SMALL
READ_STATE_SMALL(sc);
INPUT_SMALL;
PF_SMALL;
T_SMALL;
WRITE_STATE_SMALL(sc);
}
static void
hamsi_small_init(sph_hamsi_small_context *sc, const sph_u32 *iv)
{
sc->partial_len = 0;
memcpy(sc->h, iv, sizeof sc->h);
#if SPH_64
sc->count = 0;
#else
sc->count_high = sc->count_low = 0;
#endif
}
static void
hamsi_small_core(sph_hamsi_small_context *sc, const void *data, size_t len)
{
if (sc->partial_len != 0) {
size_t mlen;
mlen = 4 - sc->partial_len;
if (len < mlen) {
memcpy(sc->partial + sc->partial_len, data, len);
sc->partial_len += len;
return;
} else {
memcpy(sc->partial + sc->partial_len, data, mlen);
len -= mlen;
data = (const unsigned char *)data + mlen;
hamsi_small(sc, sc->partial, 1);
sc->partial_len = 0;
}
}
hamsi_small(sc, data, (len >> 2));
data = (const unsigned char *)data + (len & ~(size_t)3);
len &= (size_t)3;
memcpy(sc->partial, data, len);
sc->partial_len = len;
}
static void
hamsi_small_close(sph_hamsi_small_context *sc,
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
{
unsigned char pad[12];
size_t ptr, u;
unsigned z;
unsigned char *out;
ptr = sc->partial_len;
memcpy(pad, sc->partial, ptr);
#if SPH_64
sph_enc64be(pad + 4, sc->count + (ptr << 3) + n);
#else
sph_enc32be(pad + 4, sc->count_high);
sph_enc32be(pad + 8, sc->count_low + (ptr << 3) + n);
#endif
z = 0x80 >> n;
pad[ptr ++] = ((ub & -z) | z) & 0xFF;
while (ptr < 4)
pad[ptr ++] = 0;
hamsi_small(sc, pad, 2);
hamsi_small_final(sc, pad + 8);
out = dst;
for (u = 0; u < out_size_w32; u ++)
sph_enc32be(out + (u << 2), sc->h[u]);
}
#define DECL_STATE_BIG \
sph_u32 c0, c1, c2, c3, c4, c5, c6, c7; \
sph_u32 c8, c9, cA, cB, cC, cD, cE, cF;
#define READ_STATE_BIG(sc) do { \
c0 = sc->h[0x0]; \
c1 = sc->h[0x1]; \
c2 = sc->h[0x2]; \
c3 = sc->h[0x3]; \
c4 = sc->h[0x4]; \
c5 = sc->h[0x5]; \
c6 = sc->h[0x6]; \
c7 = sc->h[0x7]; \
c8 = sc->h[0x8]; \
c9 = sc->h[0x9]; \
cA = sc->h[0xA]; \
cB = sc->h[0xB]; \
cC = sc->h[0xC]; \
cD = sc->h[0xD]; \
cE = sc->h[0xE]; \
cF = sc->h[0xF]; \
} while (0)
#define WRITE_STATE_BIG(sc) do { \
sc->h[0x0] = c0; \
sc->h[0x1] = c1; \
sc->h[0x2] = c2; \
sc->h[0x3] = c3; \
sc->h[0x4] = c4; \
sc->h[0x5] = c5; \
sc->h[0x6] = c6; \
sc->h[0x7] = c7; \
sc->h[0x8] = c8; \
sc->h[0x9] = c9; \
sc->h[0xA] = cA; \
sc->h[0xB] = cB; \
sc->h[0xC] = cC; \
sc->h[0xD] = cD; \
sc->h[0xE] = cE; \
sc->h[0xF] = cF; \
} while (0)
#define s00 m0
#define s01 m1
#define s02 c0
#define s03 c1
#define s04 m2
#define s05 m3
#define s06 c2
#define s07 c3
#define s08 c4
#define s09 c5
#define s0A m4
#define s0B m5
#define s0C c6
#define s0D c7
#define s0E m6
#define s0F m7
#define s10 m8
#define s11 m9
#define s12 c8
#define s13 c9
#define s14 mA
#define s15 mB
#define s16 cA
#define s17 cB
#define s18 cC
#define s19 cD
#define s1A mC
#define s1B mD
#define s1C cE
#define s1D cF
#define s1E mE
#define s1F mF
#define ROUND_BIG(rc, alpha) do { \
s00 ^= alpha[0x00]; \
s01 ^= alpha[0x01] ^ (sph_u32)(rc); \
s02 ^= alpha[0x02]; \
s03 ^= alpha[0x03]; \
s04 ^= alpha[0x04]; \
s05 ^= alpha[0x05]; \
s06 ^= alpha[0x06]; \
s07 ^= alpha[0x07]; \
s08 ^= alpha[0x08]; \
s09 ^= alpha[0x09]; \
s0A ^= alpha[0x0A]; \
s0B ^= alpha[0x0B]; \
s0C ^= alpha[0x0C]; \
s0D ^= alpha[0x0D]; \
s0E ^= alpha[0x0E]; \
s0F ^= alpha[0x0F]; \
s10 ^= alpha[0x10]; \
s11 ^= alpha[0x11]; \
s12 ^= alpha[0x12]; \
s13 ^= alpha[0x13]; \
s14 ^= alpha[0x14]; \
s15 ^= alpha[0x15]; \
s16 ^= alpha[0x16]; \
s17 ^= alpha[0x17]; \
s18 ^= alpha[0x18]; \
s19 ^= alpha[0x19]; \
s1A ^= alpha[0x1A]; \
s1B ^= alpha[0x1B]; \
s1C ^= alpha[0x1C]; \
s1D ^= alpha[0x1D]; \
s1E ^= alpha[0x1E]; \
s1F ^= alpha[0x1F]; \
SBOX(s00, s08, s10, s18); \
SBOX(s01, s09, s11, s19); \
SBOX(s02, s0A, s12, s1A); \
SBOX(s03, s0B, s13, s1B); \
SBOX(s04, s0C, s14, s1C); \
SBOX(s05, s0D, s15, s1D); \
SBOX(s06, s0E, s16, s1E); \
SBOX(s07, s0F, s17, s1F); \
L(s00, s09, s12, s1B); \
L(s01, s0A, s13, s1C); \
L(s02, s0B, s14, s1D); \
L(s03, s0C, s15, s1E); \
L(s04, s0D, s16, s1F); \
L(s05, s0E, s17, s18); \
L(s06, s0F, s10, s19); \
L(s07, s08, s11, s1A); \
L(s00, s02, s05, s07); \
L(s10, s13, s15, s16); \
L(s09, s0B, s0C, s0E); \
L(s19, s1A, s1C, s1F); \
} while (0)
#if SPH_SMALL_FOOTPRINT_HAMSI
#define P_BIG do { \
unsigned r; \
for (r = 0; r < 6; r ++) \
ROUND_BIG(r, alpha_n); \
} while (0)
#define PF_BIG do { \
unsigned r; \
for (r = 0; r < 12; r ++) \
ROUND_BIG(r, alpha_f); \
} while (0)
#else
#define P_BIG do { \
ROUND_BIG(0, alpha_n); \
ROUND_BIG(1, alpha_n); \
ROUND_BIG(2, alpha_n); \
ROUND_BIG(3, alpha_n); \
ROUND_BIG(4, alpha_n); \
ROUND_BIG(5, alpha_n); \
} while (0)
#define PF_BIG do { \
ROUND_BIG(0, alpha_f); \
ROUND_BIG(1, alpha_f); \
ROUND_BIG(2, alpha_f); \
ROUND_BIG(3, alpha_f); \
ROUND_BIG(4, alpha_f); \
ROUND_BIG(5, alpha_f); \
ROUND_BIG(6, alpha_f); \
ROUND_BIG(7, alpha_f); \
ROUND_BIG(8, alpha_f); \
ROUND_BIG(9, alpha_f); \
ROUND_BIG(10, alpha_f); \
ROUND_BIG(11, alpha_f); \
} while (0)
#endif
#define T_BIG do { \
/* order is important */ \
cF = (sc->h[0xF] ^= s17); \
cE = (sc->h[0xE] ^= s16); \
cD = (sc->h[0xD] ^= s15); \
cC = (sc->h[0xC] ^= s14); \
cB = (sc->h[0xB] ^= s13); \
cA = (sc->h[0xA] ^= s12); \
c9 = (sc->h[0x9] ^= s11); \
c8 = (sc->h[0x8] ^= s10); \
c7 = (sc->h[0x7] ^= s07); \
c6 = (sc->h[0x6] ^= s06); \
c5 = (sc->h[0x5] ^= s05); \
c4 = (sc->h[0x4] ^= s04); \
c3 = (sc->h[0x3] ^= s03); \
c2 = (sc->h[0x2] ^= s02); \
c1 = (sc->h[0x1] ^= s01); \
c0 = (sc->h[0x0] ^= s00); \
} while (0)
static void
hamsi_big(sph_hamsi_big_context *sc, const unsigned char *buf, size_t num)
{
DECL_STATE_BIG
#if !SPH_64
sph_u32 tmp;
#endif
#if SPH_64
sc->count += (sph_u64)num << 6;
#else
tmp = SPH_T32((sph_u32)num << 6);
sc->count_low = SPH_T32(sc->count_low + tmp);
sc->count_high += (sph_u32)((num >> 13) >> 13);
if (sc->count_low < tmp)
sc->count_high ++;
#endif
READ_STATE_BIG(sc);
while (num -- > 0) {
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
INPUT_BIG;
P_BIG;
T_BIG;
buf += 8;
}
WRITE_STATE_BIG(sc);
}
static void
hamsi_big_final(sph_hamsi_big_context *sc, const unsigned char *buf)
{
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
DECL_STATE_BIG
READ_STATE_BIG(sc);
INPUT_BIG;
PF_BIG;
T_BIG;
WRITE_STATE_BIG(sc);
}
static void
hamsi_big_init(sph_hamsi_big_context *sc, const sph_u32 *iv)
{
sc->partial_len = 0;
memcpy(sc->h, iv, sizeof sc->h);
#if SPH_64
sc->count = 0;
#else
sc->count_high = sc->count_low = 0;
#endif
}
static void
hamsi_big_core(sph_hamsi_big_context *sc, const void *data, size_t len)
{
if (sc->partial_len != 0) {
size_t mlen;
mlen = 8 - sc->partial_len;
if (len < mlen) {
memcpy(sc->partial + sc->partial_len, data, len);
sc->partial_len += len;
return;
} else {
memcpy(sc->partial + sc->partial_len, data, mlen);
len -= mlen;
data = (const unsigned char *)data + mlen;
hamsi_big(sc, sc->partial, 1);
sc->partial_len = 0;
}
}
hamsi_big(sc, data, (len >> 3));
data = (const unsigned char *)data + (len & ~(size_t)7);
len &= (size_t)7;
memcpy(sc->partial, data, len);
sc->partial_len = len;
}
static void
hamsi_big_close(sph_hamsi_big_context *sc,
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
{
unsigned char pad[8];
size_t ptr, u;
unsigned z;
unsigned char *out;
ptr = sc->partial_len;
#if SPH_64
sph_enc64be(pad, sc->count + (ptr << 3) + n);
#else
sph_enc32be(pad, sc->count_high);
sph_enc32be(pad + 4, sc->count_low + (ptr << 3) + n);
#endif
z = 0x80 >> n;
sc->partial[ptr ++] = ((ub & -z) | z) & 0xFF;
while (ptr < 8)
sc->partial[ptr ++] = 0;
hamsi_big(sc, sc->partial, 1);
hamsi_big_final(sc, pad);
out = dst;
if (out_size_w32 == 12) {
sph_enc32be(out + 0, sc->h[ 0]);
sph_enc32be(out + 4, sc->h[ 1]);
sph_enc32be(out + 8, sc->h[ 3]);
sph_enc32be(out + 12, sc->h[ 4]);
sph_enc32be(out + 16, sc->h[ 5]);
sph_enc32be(out + 20, sc->h[ 6]);
sph_enc32be(out + 24, sc->h[ 8]);
sph_enc32be(out + 28, sc->h[ 9]);
sph_enc32be(out + 32, sc->h[10]);
sph_enc32be(out + 36, sc->h[12]);
sph_enc32be(out + 40, sc->h[13]);
sph_enc32be(out + 44, sc->h[15]);
} else {
for (u = 0; u < 16; u ++)
sph_enc32be(out + (u << 2), sc->h[u]);
}
}
/* see sph_hamsi.h */
void
sph_hamsi224_init(void *cc)
{
hamsi_small_init(cc, IV224);
}
/* see sph_hamsi.h */
void
sph_hamsi224(void *cc, const void *data, size_t len)
{
hamsi_small_core(cc, data, len);
}
/* see sph_hamsi.h */
void
sph_hamsi224_close(void *cc, void *dst)
{
hamsi_small_close(cc, 0, 0, dst, 7);
hamsi_small_init(cc, IV224);
}
/* see sph_hamsi.h */
void
sph_hamsi224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
hamsi_small_close(cc, ub, n, dst, 7);
hamsi_small_init(cc, IV224);
}
/* see sph_hamsi.h */
void
sph_hamsi256_init(void *cc)
{
hamsi_small_init(cc, IV256);
}
/* see sph_hamsi.h */
void
sph_hamsi256(void *cc, const void *data, size_t len)
{
hamsi_small_core(cc, data, len);
}
/* see sph_hamsi.h */
void
sph_hamsi256_close(void *cc, void *dst)
{
hamsi_small_close(cc, 0, 0, dst, 8);
hamsi_small_init(cc, IV256);
}
/* see sph_hamsi.h */
void
sph_hamsi256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
hamsi_small_close(cc, ub, n, dst, 8);
hamsi_small_init(cc, IV256);
}
/* see sph_hamsi.h */
void
sph_hamsi384_init(void *cc)
{
hamsi_big_init(cc, IV384);
}
/* see sph_hamsi.h */
void
sph_hamsi384(void *cc, const void *data, size_t len)
{
hamsi_big_core(cc, data, len);
}
/* see sph_hamsi.h */
void
sph_hamsi384_close(void *cc, void *dst)
{
hamsi_big_close(cc, 0, 0, dst, 12);
hamsi_big_init(cc, IV384);
}
/* see sph_hamsi.h */
void
sph_hamsi384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
hamsi_big_close(cc, ub, n, dst, 12);
hamsi_big_init(cc, IV384);
}
/* see sph_hamsi.h */
void
sph_hamsi512_init(void *cc)
{
hamsi_big_init(cc, IV512);
}
/* see sph_hamsi.h */
void
sph_hamsi512(void *cc, const void *data, size_t len)
{
hamsi_big_core(cc, data, len);
}
/* see sph_hamsi.h */
void
sph_hamsi512_close(void *cc, void *dst)
{
hamsi_big_close(cc, 0, 0, dst, 16);
hamsi_big_init(cc, IV512);
}
/* see sph_hamsi.h */
void
sph_hamsi512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
hamsi_big_close(cc, ub, n, dst, 16);
hamsi_big_init(cc, IV512);
}

39640
sph/hamsi_helper.c

File diff suppressed because it is too large Load Diff

334
sph/panama.c

@ -0,0 +1,334 @@ @@ -0,0 +1,334 @@
/* $Id: panama.c 216 2010-06-08 09:46:57Z tp $ */
/*
* PANAMA implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include <stddef.h>
#include <string.h>
#include "sph_panama.h"
#define LVAR17(b) sph_u32 \
b ## 0, b ## 1, b ## 2, b ## 3, b ## 4, b ## 5, \
b ## 6, b ## 7, b ## 8, b ## 9, b ## 10, b ## 11, \
b ## 12, b ## 13, b ## 14, b ## 15, b ## 16;
#define LVARS \
LVAR17(a) \
LVAR17(g) \
LVAR17(p) \
LVAR17(t)
#define M17(macro) do { \
macro( 0, 1, 2, 4); \
macro( 1, 2, 3, 5); \
macro( 2, 3, 4, 6); \
macro( 3, 4, 5, 7); \
macro( 4, 5, 6, 8); \
macro( 5, 6, 7, 9); \
macro( 6, 7, 8, 10); \
macro( 7, 8, 9, 11); \
macro( 8, 9, 10, 12); \
macro( 9, 10, 11, 13); \
macro(10, 11, 12, 14); \
macro(11, 12, 13, 15); \
macro(12, 13, 14, 16); \
macro(13, 14, 15, 0); \
macro(14, 15, 16, 1); \
macro(15, 16, 0, 2); \
macro(16, 0, 1, 3); \
} while (0)
#define BUPDATE1(n0, n2) do { \
sc->buffer[ptr24][n0] ^= sc->buffer[ptr31][n2]; \
sc->buffer[ptr31][n2] ^= INW1(n2); \
} while (0)
#define BUPDATE do { \
BUPDATE1(0, 2); \
BUPDATE1(1, 3); \
BUPDATE1(2, 4); \
BUPDATE1(3, 5); \
BUPDATE1(4, 6); \
BUPDATE1(5, 7); \
BUPDATE1(6, 0); \
BUPDATE1(7, 1); \
} while (0)
#define RSTATE(n0, n1, n2, n4) (a ## n0 = sc->state[n0])
#define WSTATE(n0, n1, n2, n4) (sc->state[n0] = a ## n0)
#define GAMMA(n0, n1, n2, n4) \
(g ## n0 = a ## n0 ^ (a ## n1 | SPH_T32(~a ## n2)))
#define PI_ALL do { \
p0 = g0; \
p1 = SPH_ROTL32( g7, 1); \
p2 = SPH_ROTL32(g14, 3); \
p3 = SPH_ROTL32( g4, 6); \
p4 = SPH_ROTL32(g11, 10); \
p5 = SPH_ROTL32( g1, 15); \
p6 = SPH_ROTL32( g8, 21); \
p7 = SPH_ROTL32(g15, 28); \
p8 = SPH_ROTL32( g5, 4); \
p9 = SPH_ROTL32(g12, 13); \
p10 = SPH_ROTL32( g2, 23); \
p11 = SPH_ROTL32( g9, 2); \
p12 = SPH_ROTL32(g16, 14); \
p13 = SPH_ROTL32( g6, 27); \
p14 = SPH_ROTL32(g13, 9); \
p15 = SPH_ROTL32( g3, 24); \
p16 = SPH_ROTL32(g10, 8); \
} while (0)
#define THETA(n0, n1, n2, n4) \
(t ## n0 = p ## n0 ^ p ## n1 ^ p ## n4)
#define SIGMA_ALL do { \
a0 = t0 ^ 1; \
a1 = t1 ^ INW2(0); \
a2 = t2 ^ INW2(1); \
a3 = t3 ^ INW2(2); \
a4 = t4 ^ INW2(3); \
a5 = t5 ^ INW2(4); \
a6 = t6 ^ INW2(5); \
a7 = t7 ^ INW2(6); \
a8 = t8 ^ INW2(7); \
a9 = t9 ^ sc->buffer[ptr16][0]; \
a10 = t10 ^ sc->buffer[ptr16][1]; \
a11 = t11 ^ sc->buffer[ptr16][2]; \
a12 = t12 ^ sc->buffer[ptr16][3]; \
a13 = t13 ^ sc->buffer[ptr16][4]; \
a14 = t14 ^ sc->buffer[ptr16][5]; \
a15 = t15 ^ sc->buffer[ptr16][6]; \
a16 = t16 ^ sc->buffer[ptr16][7]; \
} while (0)
#define PANAMA_STEP do { \
unsigned ptr16, ptr24, ptr31; \
\
ptr24 = (ptr0 - 8) & 31; \
ptr31 = (ptr0 - 1) & 31; \
BUPDATE; \
M17(GAMMA); \
PI_ALL; \
M17(THETA); \
ptr16 = ptr0 ^ 16; \
SIGMA_ALL; \
ptr0 = ptr31; \
} while (0)
/*
* These macros are used to compute
*/
#define INC0 1
#define INC1 2
#define INC2 3
#define INC3 4
#define INC4 5
#define INC5 6
#define INC6 7
#define INC7 8
/*
* Push data by blocks of 32 bytes. "pbuf" must be 32-bit aligned. Each
* iteration processes 32 data bytes; "num" contains the number of
* iterations.
*/
static void
panama_push(sph_panama_context *sc, const unsigned char *pbuf, size_t num)
{
LVARS
unsigned ptr0;
#if SPH_LITTLE_FAST
#define INW1(i) sph_dec32le_aligned(pbuf + 4 * (i))
#else
sph_u32 X_var[8];
#define INW1(i) X_var[i]
#endif
#define INW2(i) INW1(i)
M17(RSTATE);
ptr0 = sc->buffer_ptr;
while (num -- > 0) {
#if !SPH_LITTLE_FAST
int i;
for (i = 0; i < 8; i ++)
X_var[i] = sph_dec32le_aligned(pbuf + 4 * (i));
#endif
PANAMA_STEP;
pbuf = (const unsigned char *)pbuf + 32;
}
M17(WSTATE);
sc->buffer_ptr = ptr0;
#undef INW1
#undef INW2
}
/*
* Perform the "pull" operation repeatedly ("num" times). The hash output
* will be extracted from the state afterwards.
*/
static void
panama_pull(sph_panama_context *sc, unsigned num)
{
LVARS
unsigned ptr0;
#define INW1(i) INW_H1(INC ## i)
#define INW_H1(i) INW_H2(i)
#define INW_H2(i) a ## i
#define INW2(i) sc->buffer[ptr4][i]
M17(RSTATE);
ptr0 = sc->buffer_ptr;
while (num -- > 0) {
unsigned ptr4;
ptr4 = (ptr0 + 4) & 31;
PANAMA_STEP;
}
M17(WSTATE);
#undef INW1
#undef INW_H1
#undef INW_H2
#undef INW2
}
/* see sph_panama.h */
void
sph_panama_init(void *cc)
{
sph_panama_context *sc;
sc = cc;
/*
* This is not completely conformant, but "it will work
* everywhere". Initial state consists of zeroes everywhere.
* Conceptually, the sph_u32 type may have padding bits which
* must not be set to 0; but such an architecture remains to
* be seen.
*/
sc->data_ptr = 0;
memset(sc->buffer, 0, sizeof sc->buffer);
sc->buffer_ptr = 0;
memset(sc->state, 0, sizeof sc->state);
}
#ifdef SPH_UPTR
static void
panama_short(void *cc, const void *data, size_t len)
#else
void
sph_panama(void *cc, const void *data, size_t len)
#endif
{
sph_panama_context *sc;
unsigned current;
sc = cc;
current = sc->data_ptr;
while (len > 0) {
unsigned clen;
clen = (sizeof sc->data) - current;
if (clen > len)
clen = len;
memcpy(sc->data + current, data, clen);
data = (const unsigned char *)data + clen;
len -= clen;
current += clen;
if (current == sizeof sc->data) {
current = 0;
panama_push(sc, sc->data, 1);
}
}
sc->data_ptr = current;
}
#ifdef SPH_UPTR
/* see sph_panama.h */
void
sph_panama(void *cc, const void *data, size_t len)
{
sph_panama_context *sc;
unsigned current;
size_t rlen;
if (len < (2 * sizeof sc->data)) {
panama_short(cc, data, len);
return;
}
sc = cc;
current = sc->data_ptr;
if (current > 0) {
unsigned t;
t = (sizeof sc->data) - current;
panama_short(sc, data, t);
data = (const unsigned char *)data + t;
len -= t;
}
#if !SPH_UNALIGNED
if (((SPH_UPTR)data & 3) != 0) {
panama_short(sc, data, len);
return;
}
#endif
panama_push(sc, data, len >> 5);
rlen = len & 31;
if (rlen > 0)
memcpy(sc->data,
(const unsigned char *)data + len - rlen, rlen);
sc->data_ptr = rlen;
}
#endif
/* see sph_panama.h */
void
sph_panama_close(void *cc, void *dst)
{
sph_panama_context *sc;
unsigned current;
int i;
sc = cc;
current = sc->data_ptr;
sc->data[current ++] = 0x01;
memset(sc->data + current, 0, (sizeof sc->data) - current);
panama_push(sc, sc->data, 1);
panama_pull(sc, 32);
for (i = 0; i < 8; i ++)
sph_enc32le((unsigned char *)dst + 4 * i, sc->state[i + 9]);
sph_panama_init(sc);
}

311
sph/sph_hamsi.h

@ -0,0 +1,311 @@ @@ -0,0 +1,311 @@
/* $Id: sph_hamsi.h 216 2010-06-08 09:46:57Z tp $ */
/**
* Hamsi interface. This code implements Hamsi with the recommended
* parameters for SHA-3, with outputs of 224, 256, 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_hamsi.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_HAMSI_H__
#define SPH_HAMSI_H__
#include <stddef.h>
#include "sph_types.h"
/**
* Output size (in bits) for Hamsi-224.
*/
#define SPH_SIZE_hamsi224 224
/**
* Output size (in bits) for Hamsi-256.
*/
#define SPH_SIZE_hamsi256 256
/**
* Output size (in bits) for Hamsi-384.
*/
#define SPH_SIZE_hamsi384 384
/**
* Output size (in bits) for Hamsi-512.
*/
#define SPH_SIZE_hamsi512 512
/**
* This structure is a context for Hamsi-224 and Hamsi-256 computations:
* it contains the intermediate values and some data from the last
* entered block. Once a Hamsi computation has been performed, the
* context can be reused for another computation.
*
* The contents of this structure are private. A running Hamsi
* computation can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char partial[4];
size_t partial_len;
sph_u32 h[8];
#if SPH_64
sph_u64 count;
#else
sph_u32 count_high, count_low;
#endif
#endif
} sph_hamsi_small_context;
/**
* This structure is a context for Hamsi-224 computations. It is
* identical to the common <code>sph_hamsi_small_context</code>.
*/
typedef sph_hamsi_small_context sph_hamsi224_context;
/**
* This structure is a context for Hamsi-256 computations. It is
* identical to the common <code>sph_hamsi_small_context</code>.
*/
typedef sph_hamsi_small_context sph_hamsi256_context;
/**
* This structure is a context for Hamsi-384 and Hamsi-512 computations:
* it contains the intermediate values and some data from the last
* entered block. Once a Hamsi computation has been performed, the
* context can be reused for another computation.
*
* The contents of this structure are private. A running Hamsi
* computation can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char partial[8];
size_t partial_len;
sph_u32 h[16];
#if SPH_64
sph_u64 count;
#else
sph_u32 count_high, count_low;
#endif
#endif
} sph_hamsi_big_context;
/**
* This structure is a context for Hamsi-384 computations. It is
* identical to the common <code>sph_hamsi_small_context</code>.
*/
typedef sph_hamsi_big_context sph_hamsi384_context;
/**
* This structure is a context for Hamsi-512 computations. It is
* identical to the common <code>sph_hamsi_small_context</code>.
*/
typedef sph_hamsi_big_context sph_hamsi512_context;
/**
* Initialize a Hamsi-224 context. This process performs no memory allocation.
*
* @param cc the Hamsi-224 context (pointer to a
* <code>sph_hamsi224_context</code>)
*/
void sph_hamsi224_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Hamsi-224 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_hamsi224(void *cc, const void *data, size_t len);
/**
* Terminate the current Hamsi-224 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (28 bytes). The context is automatically
* reinitialized.
*
* @param cc the Hamsi-224 context
* @param dst the destination buffer
*/
void sph_hamsi224_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (28 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Hamsi-224 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_hamsi224_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Hamsi-256 context. This process performs no memory allocation.
*
* @param cc the Hamsi-256 context (pointer to a
* <code>sph_hamsi256_context</code>)
*/
void sph_hamsi256_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Hamsi-256 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_hamsi256(void *cc, const void *data, size_t len);
/**
* Terminate the current Hamsi-256 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (32 bytes). The context is automatically
* reinitialized.
*
* @param cc the Hamsi-256 context
* @param dst the destination buffer
*/
void sph_hamsi256_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (32 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Hamsi-256 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_hamsi256_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Hamsi-384 context. This process performs no memory allocation.
*
* @param cc the Hamsi-384 context (pointer to a
* <code>sph_hamsi384_context</code>)
*/
void sph_hamsi384_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Hamsi-384 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_hamsi384(void *cc, const void *data, size_t len);
/**
* Terminate the current Hamsi-384 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (48 bytes). The context is automatically
* reinitialized.
*
* @param cc the Hamsi-384 context
* @param dst the destination buffer
*/
void sph_hamsi384_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (48 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Hamsi-384 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_hamsi384_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize a Hamsi-512 context. This process performs no memory allocation.
*
* @param cc the Hamsi-512 context (pointer to a
* <code>sph_hamsi512_context</code>)
*/
void sph_hamsi512_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Hamsi-512 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_hamsi512(void *cc, const void *data, size_t len);
/**
* Terminate the current Hamsi-512 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the Hamsi-512 context
* @param dst the destination buffer
*/
void sph_hamsi512_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (64 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the Hamsi-512 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_hamsi512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#endif

118
sph/sph_panama.h

@ -0,0 +1,118 @@ @@ -0,0 +1,118 @@
/* $Id: sph_panama.h 154 2010-04-26 17:00:24Z tp $ */
/**
* PANAMA interface.
*
* PANAMA has been published in: J. Daemen and C. Clapp, "Fast Hashing
* and Stream Encryption with PANAMA", Fast Software Encryption -
* FSE'98, LNCS 1372, Springer (1998), pp. 60--74.
*
* PANAMA is not fully defined with regards to endianness and related
* topics. This implementation follows strict little-endian conventions:
* <ul>
* <li>Each 32-byte input block is split into eight 32-bit words, the
* first (leftmost) word being numbered 0.</li>
* <li>Each such 32-bit word is decoded from memory in little-endian
* convention.</li>
* <li>The additional padding bit equal to "1" is added by considering
* the least significant bit in a byte to come first; practically, this
* means that a single byte of value 0x01 is appended to the (byte-oriented)
* message, and then 0 to 31 bytes of value 0x00.</li>
* <li>The output consists of eight 32-bit words; the word numbered 0 is
* written first (in leftmost position) and it is encoded in little-endian
* convention.
* </ul>
* With these conventions, PANAMA is sometimes known as "PANAMA-LE". The
* PANAMA reference implementation uses our conventions for input, but
* prescribes no convention for output.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_panama.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_PANAMA_H__
#define SPH_PANAMA_H__
#include <stddef.h>
#include "sph_types.h"
/**
* Output size (in bits) for PANAMA.
*/
#define SPH_SIZE_panama 256
/**
* This structure is a context for PANAMA computations: it contains the
* intermediate values and some data from the last entered block. Once
* a PANAMA computation has been performed, the context can be reused for
* another computation.
*
* The contents of this structure are private. A running PANAMA computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char data[32]; /* first field, for alignment */
unsigned data_ptr;
sph_u32 buffer[32][8];
unsigned buffer_ptr;
sph_u32 state[17];
#endif
} sph_panama_context;
/**
* Initialize a PANAMA context. This process performs no memory allocation.
*
* @param cc the PANAMA context (pointer to a <code>sph_panama_context</code>)
*/
void sph_panama_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the PANAMA context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_panama(void *cc, const void *data, size_t len);
/**
* Terminate the current PANAMA computation and output the result into the
* provided buffer. The destination buffer must be wide enough to
* accomodate the result (32 bytes). The context is automatically
* reinitialized.
*
* @param cc the PANAMA context
* @param dst the destination buffer
*/
void sph_panama_close(void *cc, void *dst);
#endif

166
twecoin.c

@ -0,0 +1,166 @@ @@ -0,0 +1,166 @@
/*-
* Copyright 2009 Colin Percival, 2014 phm
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
*/
#include "config.h"
#include "miner.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "sph/sph_fugue.h"
#include "sph/sph_shavite.h"
#include "sph/sph_hamsi.h"
#include "sph/sph_panama.h"
/*
* Encode a length len/4 vector of (uint32_t) into a length len vector of
* (unsigned char) in big-endian form. Assumes len is a multiple of 4.
*/
static inline void
be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
{
uint32_t i;
for (i = 0; i < len; i++)
dst[i] = htobe32(src[i]);
}
inline void twehash(void *state, const void *input)
{
sph_fugue256_context ctx_fugue;
sph_shavite256_context ctx_shavite;
sph_hamsi256_context ctx_hamsi;
sph_panama_context ctx_panama;
unsigned char hash[4][64];
memset(hash, 0, sizeof(hash));
sph_fugue256_init(&ctx_fugue);
sph_fugue256 (&ctx_fugue, input, 80);
sph_fugue256_close(&ctx_fugue, &hash[0]);
sph_shavite256_init(&ctx_shavite);
sph_shavite256(&ctx_shavite, &hash[0], 64);
sph_shavite256_close(&ctx_shavite, &hash[1]);
sph_hamsi256_init(&ctx_hamsi);
sph_hamsi256(&ctx_hamsi, &hash[1], 64);
sph_hamsi256_close(&ctx_hamsi, &hash[2]);
sph_panama_init(&ctx_panama);
sph_panama(&ctx_panama, &hash[2], 64);
sph_panama_close(&ctx_panama, &hash[3]);
memcpy(state, hash[3], 32);
}
static const uint32_t diff1targ = 0x0000ffff;
/* Used externally as confirmation of correct OCL code */
int twecoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
{
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
uint32_t data[20], ohash[8];
//char *scratchbuf;
be32enc_vect(data, (const uint32_t *)pdata, 19);
data[19] = htobe32(nonce);
//scratchbuf = alloca(SCRATCHBUF_SIZE);
twehash(ohash, data);
tmp_hash7 = be32toh(ohash[7]);
applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx",
(long unsigned int)Htarg,
(long unsigned int)diff1targ,
(long unsigned int)tmp_hash7);
if (tmp_hash7 > diff1targ)
return -1;
if (tmp_hash7 > Htarg)
return 0;
return 1;
}
void twecoin_regenhash(struct work *work)
{
uint32_t data[20];
char *scratchbuf;
uint32_t *nonce = (uint32_t *)(work->data + 76);
uint32_t *ohash = (uint32_t *)(work->hash);
be32enc_vect(data, (const uint32_t *)work->data, 19);
data[19] = htobe32(*nonce);
twehash(ohash, data);
}
bool scanhash_twecoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
unsigned char *pdata, unsigned char __maybe_unused *phash1,
unsigned char __maybe_unused *phash, const unsigned char *ptarget,
uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
{
uint32_t *nonce = (uint32_t *)(pdata + 76);
char *scratchbuf;
uint32_t data[20];
uint32_t tmp_hash7;
uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]);
bool ret = false;
be32enc_vect(data, (const uint32_t *)pdata, 19);
while(1) {
uint32_t ostate[8];
*nonce = ++n;
data[19] = (n);
twehash(ostate, data);
tmp_hash7 = (ostate[7]);
applog(LOG_INFO, "data7 %08lx",
(long unsigned int)data[7]);
if (unlikely(tmp_hash7 <= Htarg)) {
((uint32_t *)pdata)[19] = htobe32(n);
*last_nonce = n;
ret = true;
break;
}
if (unlikely((n >= max_nonce) || thr->work_restart)) {
*last_nonce = n;
break;
}
}
return ret;
}

10
twecoin.h

@ -0,0 +1,10 @@ @@ -0,0 +1,10 @@
#ifndef TWECOIN_H
#define TWECOIN_H
#include "miner.h"
extern int twecoin_test(unsigned char *pdata, const unsigned char *ptarget,
uint32_t nonce);
extern void twecoin_regenhash(struct work *work);
#endif /* TWECOIN_H */
Loading…
Cancel
Save