diff --git a/.gitignore b/.gitignore
index 43042b3..904341b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,7 +25,7 @@ config.sub
mingw32-config.cache
-*/.dirstamp
+.dirstamp
.DS_Store
Desktop.ini
Thumbs.db
diff --git a/Makefile.am b/Makefile.am
index afd5390..fc7f075 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -40,6 +40,8 @@ ccminer_SOURCES = elist.h miner.h compat.h \
Algo256/blake2s.cu sph/blake2s.c \
Algo256/bmw.cu Algo256/cuda_bmw.cu \
crypto/xmr-rpc.cpp crypto/wildkeccak-cpu.cpp crypto/wildkeccak.cu \
+ crypto/cryptonight.cu crypto/cuda_cryptonight_core.cu crypto/cuda_cryptonight_extra.cu \
+ crypto/cryptonight-cpu.cpp crypto/oaes_lib.cpp crypto/aesb.cpp crypto/cpu/c_keccak.c \
JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
JHA/cuda_jha_compactionTest.cu cuda_checkhash.cu \
quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
@@ -105,6 +107,9 @@ Algo256/blake256.o: Algo256/blake256.cu
Algo256/cuda_bmw.o: Algo256/cuda_bmw.cu
$(NVCC) $(nvcc_FLAGS) --maxrregcount=76 -o $@ -c $<
+crypto/cuda_cryptonight_extra.o: crypto/cuda_cryptonight_extra.cu
+ $(NVCC) $(nvcc_FLAGS) -o $@ -c $<
+
heavy/cuda_hefty1.o: heavy/cuda_hefty1.cu
$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $<
diff --git a/algos.h b/algos.h
index f960b25..9db1a13 100644
--- a/algos.h
+++ b/algos.h
@@ -10,6 +10,7 @@ enum sha_algos {
ALGO_BLAKE2S,
ALGO_BMW,
ALGO_C11,
+ ALGO_CRYPTONIGHT,
ALGO_DEEP,
ALGO_DECRED,
ALGO_DMD_GR,
@@ -62,6 +63,7 @@ static const char *algo_names[] = {
"blake2s",
"bmw",
"c11",
+ "cryptonight",
"deep",
"decred",
"dmd-gr",
diff --git a/bench.cpp b/bench.cpp
index 1aa3611..7e045dc 100644
--- a/bench.cpp
+++ b/bench.cpp
@@ -48,6 +48,7 @@ void algo_free_all(int thr_id)
free_blake2s(thr_id);
free_bmw(thr_id);
free_c11(thr_id);
+ free_cryptonight(thr_id);
free_decred(thr_id);
free_deep(thr_id);
free_keccak256(thr_id);
diff --git a/ccminer.cpp b/ccminer.cpp
index de22c51..1e6d104 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -620,6 +620,7 @@ static bool work_decode(const json_t *val, struct work *work)
data_size = 80;
adata_sz = data_size / 4;
break;
+ case ALGO_CRYPTONIGHT:
case ALGO_WILDKECCAK:
return rpc2_job_decode(val, work);
default:
@@ -1766,12 +1767,15 @@ static void *miner_thread(void *userdata)
nonceptr = (uint32_t*) (((char*)work.data) + 1);
wcmpoft = 2;
wcmplen = 32;
+ } else if (opt_algo == ALGO_CRYPTONIGHT) {
+ nonceptr = (uint32_t*) (((char*)work.data) + 39);
+ wcmplen = 39;
}
if (have_stratum) {
uint32_t sleeptime = 0;
- if (opt_algo == ALGO_DECRED || stratum.rpc2)
+ if (opt_algo == ALGO_DECRED || opt_algo == ALGO_WILDKECCAK /* getjob */)
work_done = true; // force "regen" hash
while (!work_done && time(NULL) >= (g_work_time + opt_scantime)) {
usleep(100*1000);
@@ -1798,6 +1802,8 @@ static void *miner_thread(void *userdata)
extrajob = false;
if (stratum_gen_work(&stratum, &g_work))
g_work_time = time(NULL);
+ if (opt_algo == ALGO_CRYPTONIGHT)
+ nonceptr[0] += 0x100000;
}
} else {
uint32_t secs = 0;
@@ -1839,7 +1845,20 @@ static void *miner_thread(void *userdata)
wcmplen -= 4;
}
- if (memcmp(&work.data[wcmpoft], &g_work.data[wcmpoft], wcmplen)) {
+ if (opt_algo == ALGO_CRYPTONIGHT) {
+ uint32_t oldpos = nonceptr[0];
+ if (memcmp(&work.data[wcmpoft], &g_work.data[wcmpoft], wcmplen)) {
+ memcpy(&work, &g_work, sizeof(struct work));
+ nonceptr[0] = (UINT32_MAX / opt_n_threads) * thr_id; // reset cursor
+ }
+ // also check the end, nonce in the middle
+ else if (memcmp(&work.data[44/4], &g_work.data[0], 76-44)) {
+ memcpy(&work, &g_work, sizeof(struct work));
+ }
+ if (oldpos & 0xFFFF) nonceptr[0] = oldpos + 0x100000;
+ }
+
+ else if (memcmp(&work.data[wcmpoft], &g_work.data[wcmpoft], wcmplen)) {
#if 0
if (opt_debug) {
for (int n=0; n <= (wcmplen-8); n+=8) {
@@ -1916,7 +1935,7 @@ static void *miner_thread(void *userdata)
gpulog(LOG_DEBUG, thr_id, "no data");
continue;
}
- if (stratum.rpc2 && !scratchpad_size) {
+ if (opt_algo == ALGO_WILDKECCAK && !scratchpad_size) {
sleep(1);
if (!thr_id) pools[cur_pooln].wait_time += 1;
continue;
@@ -2078,6 +2097,7 @@ static void *miner_thread(void *userdata)
case ALGO_VELTOR:
minmax = 0x80000;
break;
+ case ALGO_CRYPTONIGHT:
case ALGO_SCRYPT_JANE:
minmax = 0x1000;
break;
@@ -2140,6 +2160,9 @@ static void *miner_thread(void *userdata)
case ALGO_C11:
rc = scanhash_c11(thr_id, &work, max_nonce, &hashes_done);
break;
+ case ALGO_CRYPTONIGHT:
+ rc = scanhash_cryptonight(thr_id, &work, max_nonce, &hashes_done);
+ break;
case ALGO_DECRED:
//applog(LOG_BLUE, "version %x, nbits %x, ntime %x extra %x",
// work.data[0], work.data[29], work.data[34], work.data[38]);
@@ -2283,6 +2306,7 @@ static void *miner_thread(void *userdata)
// todo: update all algos to use work->nonces and pdata[19] as counter
switch (opt_algo) {
case ALGO_BLAKE2S:
+ case ALGO_CRYPTONIGHT:
case ALGO_DECRED:
case ALGO_LBRY:
case ALGO_SIA:
@@ -2710,7 +2734,7 @@ wait_stratum_url:
}
}
- if (opt_algo == ALGO_WILDKECCAK) {
+ if (stratum.rpc2) {
rpc2_stratum_thread_stuff(pool);
}
@@ -2806,7 +2830,7 @@ static void show_usage_and_exit(int status)
if (opt_algo == ALGO_SCRYPT || opt_algo == ALGO_SCRYPT_JANE) {
printf(scrypt_usage);
}
- if (opt_algo == ALGO_WILDKECCAK) {
+ if (opt_algo == ALGO_WILDKECCAK || opt_algo == ALGO_CRYPTONIGHT) {
printf(xmr_usage);
}
proper_exit(status);
@@ -3661,9 +3685,14 @@ int main(int argc, char *argv[])
allow_mininginfo = false;
}
+ if (opt_algo == ALGO_CRYPTONIGHT) {
+ rpc2_init();
+ if (!opt_quiet) applog(LOG_INFO, "Using JSON-RPC 2.0");
+ }
+
if (opt_algo == ALGO_WILDKECCAK) {
rpc2_init();
- applog(LOG_INFO, "Using CryptoNote JSON-RPC 2.0");
+ if (!opt_quiet) applog(LOG_INFO, "Using JSON-RPC 2.0");
GetScratchpad();
}
diff --git a/ccminer.vcxproj b/ccminer.vcxproj
index 9a27e9e..72e42a8 100644
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@@ -233,6 +233,10 @@
+
+
+
+
@@ -251,9 +255,6 @@
-
-
-
@@ -264,8 +265,17 @@
76
+
+ 128
+
+
+ 128
+
+
+ 255
+
- 128
+ 128
160
@@ -319,6 +329,10 @@
+
+
+
+
diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters
index 6cfc1d4..17f81c6 100644
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@@ -91,6 +91,12 @@
{fea0fce3-c0fe-42f7-aa37-0cbba10b008a}
+
+ {af52b078-ed91-4c6e-b07a-e9243acc85d2}
+
+
+ {af387eac-e9e6-4e91-a5e8-637b1e7a8d93}
+
@@ -285,11 +291,23 @@
Source Files\crypto
+
+ Source Files\crypto\xmr
+
+
+ Source Files\crypto\xmr
+
+
+ Source Files\crypto\xmr
+
+
+ Source Files\crypto\xmr
+
- Source Files\crypto
+ Source Files\crypto\bbr
- Source Files\crypto
+ Source Files\crypto\bbr
@@ -494,14 +512,17 @@
Source Files\sia
-
+
Source Files\crypto
-
- Source Files\crypto
+
+ Source Files\crypto\xmr
-
- Source Files\crypto
+
+ Source Files\crypto\bbr
+
+
+ Source Files\crypto\bbr
@@ -784,6 +805,15 @@
Source Files\crypto
+
+ Source Files\crypto
+
+
+ Source Files\crypto
+
+
+ Source Files\crypto
+
@@ -800,5 +830,4 @@
Ressources
-
-
+
\ No newline at end of file
diff --git a/crypto/aesb.cpp b/crypto/aesb.cpp
new file mode 100644
index 0000000..2e18b2b
--- /dev/null
+++ b/crypto/aesb.cpp
@@ -0,0 +1,170 @@
+/*
+---------------------------------------------------------------------------
+Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
+
+The redistribution and use of this software (with or without changes)
+is allowed without the payment of fees or royalties provided that:
+
+ source code distributions include the above copyright notice, this
+ list of conditions and the following disclaimer;
+
+ binary distributions include the above copyright notice, this list
+ of conditions and the following disclaimer in their documentation.
+
+This software is provided 'as is' with no explicit or implied warranties
+in respect of its operation, including, but not limited to, correctness
+and fitness for purpose.
+---------------------------------------------------------------------------
+Issue Date: 20/12/2007
+*/
+
+#include
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#define TABLE_ALIGN 32
+#define WPOLY 0x011b
+#define N_COLS 4
+#define AES_BLOCK_SIZE 16
+#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2))
+
+#if defined(_MSC_VER)
+#define ALIGN __declspec(align(TABLE_ALIGN))
+#elif defined(__GNUC__)
+#define ALIGN __attribute__ ((aligned(16)))
+#else
+#define ALIGN
+#endif
+
+#define rf1(r,c) (r)
+#define word_in(x,c) (*((uint32_t*)(x)+(c)))
+#define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v))
+
+#define s(x,c) x[c]
+#define si(y,x,c) (s(y,c) = word_in(x, c))
+#define so(y,x,c) word_out(y, c, s(x,c))
+#define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3)
+#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
+#define round(y,x,k) \
+y[0] = (k)[0] ^ (t_fn[0][x[0] & 0xff] ^ t_fn[1][(x[1] >> 8) & 0xff] ^ t_fn[2][(x[2] >> 16) & 0xff] ^ t_fn[3][x[3] >> 24]); \
+y[1] = (k)[1] ^ (t_fn[0][x[1] & 0xff] ^ t_fn[1][(x[2] >> 8) & 0xff] ^ t_fn[2][(x[3] >> 16) & 0xff] ^ t_fn[3][x[0] >> 24]); \
+y[2] = (k)[2] ^ (t_fn[0][x[2] & 0xff] ^ t_fn[1][(x[3] >> 8) & 0xff] ^ t_fn[2][(x[0] >> 16) & 0xff] ^ t_fn[3][x[1] >> 24]); \
+y[3] = (k)[3] ^ (t_fn[0][x[3] & 0xff] ^ t_fn[1][(x[0] >> 8) & 0xff] ^ t_fn[2][(x[1] >> 16) & 0xff] ^ t_fn[3][x[2] >> 24]);
+#define to_byte(x) ((x) & 0xff)
+#define bval(x,n) to_byte((x) >> (8 * (n)))
+
+#define fwd_var(x,r,c)\
+ ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
+ : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
+ : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
+ : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
+
+#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
+
+#define sb_data(w) {\
+ w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
+ w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
+ w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
+ w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
+ w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
+ w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
+ w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
+ w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
+ w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
+ w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
+ w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
+ w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
+ w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
+ w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
+ w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
+ w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
+ w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
+ w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
+ w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
+ w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
+ w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
+ w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
+ w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
+ w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
+ w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
+ w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
+ w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
+ w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
+ w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
+ w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
+ w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
+ w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
+
+#define rc_data(w) {\
+ w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\
+ w(0x1b), w(0x36) }
+
+#define bytes2word(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \
+ ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
+
+#define h0(x) (x)
+#define w0(p) bytes2word(p, 0, 0, 0)
+#define w1(p) bytes2word(0, p, 0, 0)
+#define w2(p) bytes2word(0, 0, p, 0)
+#define w3(p) bytes2word(0, 0, 0, p)
+
+#define u0(p) bytes2word(f2(p), p, p, f3(p))
+#define u1(p) bytes2word(f3(p), f2(p), p, p)
+#define u2(p) bytes2word(p, f3(p), f2(p), p)
+#define u3(p) bytes2word(p, p, f3(p), f2(p))
+
+#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p))
+#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p))
+#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p))
+#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p))
+
+#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY))
+#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
+#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) ^ (((x>>5) & 4) * WPOLY))
+#define f3(x) (f2(x) ^ x)
+#define f9(x) (f8(x) ^ x)
+#define fb(x) (f8(x) ^ f2(x) ^ x)
+#define fd(x) (f8(x) ^ f4(x) ^ x)
+#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
+
+#define t_dec(m,n) t_##m##n
+#define t_set(m,n) t_##m##n
+#define t_use(m,n) t_##m##n
+
+#define d_4(t,n,b,e,f,g,h) ALIGN const t n[4][256] = { b(e), b(f), b(g), b(h) }
+
+#define four_tables(x,tab,vf,rf,c) \
+ (tab[0][bval(vf(x,0,c),rf(0,c))] \
+ ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
+ ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
+ ^ tab[3][bval(vf(x,3,c),rf(3,c))])
+
+d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3);
+
+void aesb_single_round(const uint8_t *in, uint8_t *out, uint8_t *expandedKey)
+{
+ round(((uint32_t*) out), ((uint32_t*) in), ((uint32_t*) expandedKey));
+}
+
+void aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey)
+{
+ uint32_t b1[4];
+ round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey));
+ round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 1 * N_COLS);
+ round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 2 * N_COLS);
+ round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 3 * N_COLS);
+ round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 4 * N_COLS);
+ round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 5 * N_COLS);
+ round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 6 * N_COLS);
+ round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 7 * N_COLS);
+ round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 8 * N_COLS);
+ round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 9 * N_COLS);
+}
+
+
+#if defined(__cplusplus)
+}
+#endif
diff --git a/crypto/cn_aes.cuh b/crypto/cn_aes.cuh
new file mode 100644
index 0000000..74ca290
--- /dev/null
+++ b/crypto/cn_aes.cuh
@@ -0,0 +1,319 @@
+
+#define N_COLS 4
+#define WPOLY 0x011b
+
+static __constant__ uint32_t d_t_fn[1024] =
+{
+ 0xa56363c6U, 0x847c7cf8U, 0x997777eeU, 0x8d7b7bf6U,
+ 0x0df2f2ffU, 0xbd6b6bd6U, 0xb16f6fdeU, 0x54c5c591U,
+ 0x50303060U, 0x03010102U, 0xa96767ceU, 0x7d2b2b56U,
+ 0x19fefee7U, 0x62d7d7b5U, 0xe6abab4dU, 0x9a7676ecU,
+ 0x45caca8fU, 0x9d82821fU, 0x40c9c989U, 0x877d7dfaU,
+ 0x15fafaefU, 0xeb5959b2U, 0xc947478eU, 0x0bf0f0fbU,
+ 0xecadad41U, 0x67d4d4b3U, 0xfda2a25fU, 0xeaafaf45U,
+ 0xbf9c9c23U, 0xf7a4a453U, 0x967272e4U, 0x5bc0c09bU,
+ 0xc2b7b775U, 0x1cfdfde1U, 0xae93933dU, 0x6a26264cU,
+ 0x5a36366cU, 0x413f3f7eU, 0x02f7f7f5U, 0x4fcccc83U,
+ 0x5c343468U, 0xf4a5a551U, 0x34e5e5d1U, 0x08f1f1f9U,
+ 0x937171e2U, 0x73d8d8abU, 0x53313162U, 0x3f15152aU,
+ 0x0c040408U, 0x52c7c795U, 0x65232346U, 0x5ec3c39dU,
+ 0x28181830U, 0xa1969637U, 0x0f05050aU, 0xb59a9a2fU,
+ 0x0907070eU, 0x36121224U, 0x9b80801bU, 0x3de2e2dfU,
+ 0x26ebebcdU, 0x6927274eU, 0xcdb2b27fU, 0x9f7575eaU,
+ 0x1b090912U, 0x9e83831dU, 0x742c2c58U, 0x2e1a1a34U,
+ 0x2d1b1b36U, 0xb26e6edcU, 0xee5a5ab4U, 0xfba0a05bU,
+ 0xf65252a4U, 0x4d3b3b76U, 0x61d6d6b7U, 0xceb3b37dU,
+ 0x7b292952U, 0x3ee3e3ddU, 0x712f2f5eU, 0x97848413U,
+ 0xf55353a6U, 0x68d1d1b9U, 0x00000000U, 0x2cededc1U,
+ 0x60202040U, 0x1ffcfce3U, 0xc8b1b179U, 0xed5b5bb6U,
+ 0xbe6a6ad4U, 0x46cbcb8dU, 0xd9bebe67U, 0x4b393972U,
+ 0xde4a4a94U, 0xd44c4c98U, 0xe85858b0U, 0x4acfcf85U,
+ 0x6bd0d0bbU, 0x2aefefc5U, 0xe5aaaa4fU, 0x16fbfbedU,
+ 0xc5434386U, 0xd74d4d9aU, 0x55333366U, 0x94858511U,
+ 0xcf45458aU, 0x10f9f9e9U, 0x06020204U, 0x817f7ffeU,
+ 0xf05050a0U, 0x443c3c78U, 0xba9f9f25U, 0xe3a8a84bU,
+ 0xf35151a2U, 0xfea3a35dU, 0xc0404080U, 0x8a8f8f05U,
+ 0xad92923fU, 0xbc9d9d21U, 0x48383870U, 0x04f5f5f1U,
+ 0xdfbcbc63U, 0xc1b6b677U, 0x75dadaafU, 0x63212142U,
+ 0x30101020U, 0x1affffe5U, 0x0ef3f3fdU, 0x6dd2d2bfU,
+ 0x4ccdcd81U, 0x140c0c18U, 0x35131326U, 0x2fececc3U,
+ 0xe15f5fbeU, 0xa2979735U, 0xcc444488U, 0x3917172eU,
+ 0x57c4c493U, 0xf2a7a755U, 0x827e7efcU, 0x473d3d7aU,
+ 0xac6464c8U, 0xe75d5dbaU, 0x2b191932U, 0x957373e6U,
+ 0xa06060c0U, 0x98818119U, 0xd14f4f9eU, 0x7fdcdca3U,
+ 0x66222244U, 0x7e2a2a54U, 0xab90903bU, 0x8388880bU,
+ 0xca46468cU, 0x29eeeec7U, 0xd3b8b86bU, 0x3c141428U,
+ 0x79dedea7U, 0xe25e5ebcU, 0x1d0b0b16U, 0x76dbdbadU,
+ 0x3be0e0dbU, 0x56323264U, 0x4e3a3a74U, 0x1e0a0a14U,
+ 0xdb494992U, 0x0a06060cU, 0x6c242448U, 0xe45c5cb8U,
+ 0x5dc2c29fU, 0x6ed3d3bdU, 0xefacac43U, 0xa66262c4U,
+ 0xa8919139U, 0xa4959531U, 0x37e4e4d3U, 0x8b7979f2U,
+ 0x32e7e7d5U, 0x43c8c88bU, 0x5937376eU, 0xb76d6ddaU,
+ 0x8c8d8d01U, 0x64d5d5b1U, 0xd24e4e9cU, 0xe0a9a949U,
+ 0xb46c6cd8U, 0xfa5656acU, 0x07f4f4f3U, 0x25eaeacfU,
+ 0xaf6565caU, 0x8e7a7af4U, 0xe9aeae47U, 0x18080810U,
+ 0xd5baba6fU, 0x887878f0U, 0x6f25254aU, 0x722e2e5cU,
+ 0x241c1c38U, 0xf1a6a657U, 0xc7b4b473U, 0x51c6c697U,
+ 0x23e8e8cbU, 0x7cdddda1U, 0x9c7474e8U, 0x211f1f3eU,
+ 0xdd4b4b96U, 0xdcbdbd61U, 0x868b8b0dU, 0x858a8a0fU,
+ 0x907070e0U, 0x423e3e7cU, 0xc4b5b571U, 0xaa6666ccU,
+ 0xd8484890U, 0x05030306U, 0x01f6f6f7U, 0x120e0e1cU,
+ 0xa36161c2U, 0x5f35356aU, 0xf95757aeU, 0xd0b9b969U,
+ 0x91868617U, 0x58c1c199U, 0x271d1d3aU, 0xb99e9e27U,
+ 0x38e1e1d9U, 0x13f8f8ebU, 0xb398982bU, 0x33111122U,
+ 0xbb6969d2U, 0x70d9d9a9U, 0x898e8e07U, 0xa7949433U,
+ 0xb69b9b2dU, 0x221e1e3cU, 0x92878715U, 0x20e9e9c9U,
+ 0x49cece87U, 0xff5555aaU, 0x78282850U, 0x7adfdfa5U,
+ 0x8f8c8c03U, 0xf8a1a159U, 0x80898909U, 0x170d0d1aU,
+ 0xdabfbf65U, 0x31e6e6d7U, 0xc6424284U, 0xb86868d0U,
+ 0xc3414182U, 0xb0999929U, 0x772d2d5aU, 0x110f0f1eU,
+ 0xcbb0b07bU, 0xfc5454a8U, 0xd6bbbb6dU, 0x3a16162cU,
+ 0x6363c6a5U, 0x7c7cf884U, 0x7777ee99U, 0x7b7bf68dU,
+ 0xf2f2ff0dU, 0x6b6bd6bdU, 0x6f6fdeb1U, 0xc5c59154U,
+ 0x30306050U, 0x01010203U, 0x6767cea9U, 0x2b2b567dU,
+ 0xfefee719U, 0xd7d7b562U, 0xabab4de6U, 0x7676ec9aU,
+ 0xcaca8f45U, 0x82821f9dU, 0xc9c98940U, 0x7d7dfa87U,
+ 0xfafaef15U, 0x5959b2ebU, 0x47478ec9U, 0xf0f0fb0bU,
+ 0xadad41ecU, 0xd4d4b367U, 0xa2a25ffdU, 0xafaf45eaU,
+ 0x9c9c23bfU, 0xa4a453f7U, 0x7272e496U, 0xc0c09b5bU,
+ 0xb7b775c2U, 0xfdfde11cU, 0x93933daeU, 0x26264c6aU,
+ 0x36366c5aU, 0x3f3f7e41U, 0xf7f7f502U, 0xcccc834fU,
+ 0x3434685cU, 0xa5a551f4U, 0xe5e5d134U, 0xf1f1f908U,
+ 0x7171e293U, 0xd8d8ab73U, 0x31316253U, 0x15152a3fU,
+ 0x0404080cU, 0xc7c79552U, 0x23234665U, 0xc3c39d5eU,
+ 0x18183028U, 0x969637a1U, 0x05050a0fU, 0x9a9a2fb5U,
+ 0x07070e09U, 0x12122436U, 0x80801b9bU, 0xe2e2df3dU,
+ 0xebebcd26U, 0x27274e69U, 0xb2b27fcdU, 0x7575ea9fU,
+ 0x0909121bU, 0x83831d9eU, 0x2c2c5874U, 0x1a1a342eU,
+ 0x1b1b362dU, 0x6e6edcb2U, 0x5a5ab4eeU, 0xa0a05bfbU,
+ 0x5252a4f6U, 0x3b3b764dU, 0xd6d6b761U, 0xb3b37dceU,
+ 0x2929527bU, 0xe3e3dd3eU, 0x2f2f5e71U, 0x84841397U,
+ 0x5353a6f5U, 0xd1d1b968U, 0x00000000U, 0xededc12cU,
+ 0x20204060U, 0xfcfce31fU, 0xb1b179c8U, 0x5b5bb6edU,
+ 0x6a6ad4beU, 0xcbcb8d46U, 0xbebe67d9U, 0x3939724bU,
+ 0x4a4a94deU, 0x4c4c98d4U, 0x5858b0e8U, 0xcfcf854aU,
+ 0xd0d0bb6bU, 0xefefc52aU, 0xaaaa4fe5U, 0xfbfbed16U,
+ 0x434386c5U, 0x4d4d9ad7U, 0x33336655U, 0x85851194U,
+ 0x45458acfU, 0xf9f9e910U, 0x02020406U, 0x7f7ffe81U,
+ 0x5050a0f0U, 0x3c3c7844U, 0x9f9f25baU, 0xa8a84be3U,
+ 0x5151a2f3U, 0xa3a35dfeU, 0x404080c0U, 0x8f8f058aU,
+ 0x92923fadU, 0x9d9d21bcU, 0x38387048U, 0xf5f5f104U,
+ 0xbcbc63dfU, 0xb6b677c1U, 0xdadaaf75U, 0x21214263U,
+ 0x10102030U, 0xffffe51aU, 0xf3f3fd0eU, 0xd2d2bf6dU,
+ 0xcdcd814cU, 0x0c0c1814U, 0x13132635U, 0xececc32fU,
+ 0x5f5fbee1U, 0x979735a2U, 0x444488ccU, 0x17172e39U,
+ 0xc4c49357U, 0xa7a755f2U, 0x7e7efc82U, 0x3d3d7a47U,
+ 0x6464c8acU, 0x5d5dbae7U, 0x1919322bU, 0x7373e695U,
+ 0x6060c0a0U, 0x81811998U, 0x4f4f9ed1U, 0xdcdca37fU,
+ 0x22224466U, 0x2a2a547eU, 0x90903babU, 0x88880b83U,
+ 0x46468ccaU, 0xeeeec729U, 0xb8b86bd3U, 0x1414283cU,
+ 0xdedea779U, 0x5e5ebce2U, 0x0b0b161dU, 0xdbdbad76U,
+ 0xe0e0db3bU, 0x32326456U, 0x3a3a744eU, 0x0a0a141eU,
+ 0x494992dbU, 0x06060c0aU, 0x2424486cU, 0x5c5cb8e4U,
+ 0xc2c29f5dU, 0xd3d3bd6eU, 0xacac43efU, 0x6262c4a6U,
+ 0x919139a8U, 0x959531a4U, 0xe4e4d337U, 0x7979f28bU,
+ 0xe7e7d532U, 0xc8c88b43U, 0x37376e59U, 0x6d6ddab7U,
+ 0x8d8d018cU, 0xd5d5b164U, 0x4e4e9cd2U, 0xa9a949e0U,
+ 0x6c6cd8b4U, 0x5656acfaU, 0xf4f4f307U, 0xeaeacf25U,
+ 0x6565caafU, 0x7a7af48eU, 0xaeae47e9U, 0x08081018U,
+ 0xbaba6fd5U, 0x7878f088U, 0x25254a6fU, 0x2e2e5c72U,
+ 0x1c1c3824U, 0xa6a657f1U, 0xb4b473c7U, 0xc6c69751U,
+ 0xe8e8cb23U, 0xdddda17cU, 0x7474e89cU, 0x1f1f3e21U,
+ 0x4b4b96ddU, 0xbdbd61dcU, 0x8b8b0d86U, 0x8a8a0f85U,
+ 0x7070e090U, 0x3e3e7c42U, 0xb5b571c4U, 0x6666ccaaU,
+ 0x484890d8U, 0x03030605U, 0xf6f6f701U, 0x0e0e1c12U,
+ 0x6161c2a3U, 0x35356a5fU, 0x5757aef9U, 0xb9b969d0U,
+ 0x86861791U, 0xc1c19958U, 0x1d1d3a27U, 0x9e9e27b9U,
+ 0xe1e1d938U, 0xf8f8eb13U, 0x98982bb3U, 0x11112233U,
+ 0x6969d2bbU, 0xd9d9a970U, 0x8e8e0789U, 0x949433a7U,
+ 0x9b9b2db6U, 0x1e1e3c22U, 0x87871592U, 0xe9e9c920U,
+ 0xcece8749U, 0x5555aaffU, 0x28285078U, 0xdfdfa57aU,
+ 0x8c8c038fU, 0xa1a159f8U, 0x89890980U, 0x0d0d1a17U,
+ 0xbfbf65daU, 0xe6e6d731U, 0x424284c6U, 0x6868d0b8U,
+ 0x414182c3U, 0x999929b0U, 0x2d2d5a77U, 0x0f0f1e11U,
+ 0xb0b07bcbU, 0x5454a8fcU, 0xbbbb6dd6U, 0x16162c3aU,
+ 0x63c6a563U, 0x7cf8847cU, 0x77ee9977U, 0x7bf68d7bU,
+ 0xf2ff0df2U, 0x6bd6bd6bU, 0x6fdeb16fU, 0xc59154c5U,
+ 0x30605030U, 0x01020301U, 0x67cea967U, 0x2b567d2bU,
+ 0xfee719feU, 0xd7b562d7U, 0xab4de6abU, 0x76ec9a76U,
+ 0xca8f45caU, 0x821f9d82U, 0xc98940c9U, 0x7dfa877dU,
+ 0xfaef15faU, 0x59b2eb59U, 0x478ec947U, 0xf0fb0bf0U,
+ 0xad41ecadU, 0xd4b367d4U, 0xa25ffda2U, 0xaf45eaafU,
+ 0x9c23bf9cU, 0xa453f7a4U, 0x72e49672U, 0xc09b5bc0U,
+ 0xb775c2b7U, 0xfde11cfdU, 0x933dae93U, 0x264c6a26U,
+ 0x366c5a36U, 0x3f7e413fU, 0xf7f502f7U, 0xcc834fccU,
+ 0x34685c34U, 0xa551f4a5U, 0xe5d134e5U, 0xf1f908f1U,
+ 0x71e29371U, 0xd8ab73d8U, 0x31625331U, 0x152a3f15U,
+ 0x04080c04U, 0xc79552c7U, 0x23466523U, 0xc39d5ec3U,
+ 0x18302818U, 0x9637a196U, 0x050a0f05U, 0x9a2fb59aU,
+ 0x070e0907U, 0x12243612U, 0x801b9b80U, 0xe2df3de2U,
+ 0xebcd26ebU, 0x274e6927U, 0xb27fcdb2U, 0x75ea9f75U,
+ 0x09121b09U, 0x831d9e83U, 0x2c58742cU, 0x1a342e1aU,
+ 0x1b362d1bU, 0x6edcb26eU, 0x5ab4ee5aU, 0xa05bfba0U,
+ 0x52a4f652U, 0x3b764d3bU, 0xd6b761d6U, 0xb37dceb3U,
+ 0x29527b29U, 0xe3dd3ee3U, 0x2f5e712fU, 0x84139784U,
+ 0x53a6f553U, 0xd1b968d1U, 0x00000000U, 0xedc12cedU,
+ 0x20406020U, 0xfce31ffcU, 0xb179c8b1U, 0x5bb6ed5bU,
+ 0x6ad4be6aU, 0xcb8d46cbU, 0xbe67d9beU, 0x39724b39U,
+ 0x4a94de4aU, 0x4c98d44cU, 0x58b0e858U, 0xcf854acfU,
+ 0xd0bb6bd0U, 0xefc52aefU, 0xaa4fe5aaU, 0xfbed16fbU,
+ 0x4386c543U, 0x4d9ad74dU, 0x33665533U, 0x85119485U,
+ 0x458acf45U, 0xf9e910f9U, 0x02040602U, 0x7ffe817fU,
+ 0x50a0f050U, 0x3c78443cU, 0x9f25ba9fU, 0xa84be3a8U,
+ 0x51a2f351U, 0xa35dfea3U, 0x4080c040U, 0x8f058a8fU,
+ 0x923fad92U, 0x9d21bc9dU, 0x38704838U, 0xf5f104f5U,
+ 0xbc63dfbcU, 0xb677c1b6U, 0xdaaf75daU, 0x21426321U,
+ 0x10203010U, 0xffe51affU, 0xf3fd0ef3U, 0xd2bf6dd2U,
+ 0xcd814ccdU, 0x0c18140cU, 0x13263513U, 0xecc32fecU,
+ 0x5fbee15fU, 0x9735a297U, 0x4488cc44U, 0x172e3917U,
+ 0xc49357c4U, 0xa755f2a7U, 0x7efc827eU, 0x3d7a473dU,
+ 0x64c8ac64U, 0x5dbae75dU, 0x19322b19U, 0x73e69573U,
+ 0x60c0a060U, 0x81199881U, 0x4f9ed14fU, 0xdca37fdcU,
+ 0x22446622U, 0x2a547e2aU, 0x903bab90U, 0x880b8388U,
+ 0x468cca46U, 0xeec729eeU, 0xb86bd3b8U, 0x14283c14U,
+ 0xdea779deU, 0x5ebce25eU, 0x0b161d0bU, 0xdbad76dbU,
+ 0xe0db3be0U, 0x32645632U, 0x3a744e3aU, 0x0a141e0aU,
+ 0x4992db49U, 0x060c0a06U, 0x24486c24U, 0x5cb8e45cU,
+ 0xc29f5dc2U, 0xd3bd6ed3U, 0xac43efacU, 0x62c4a662U,
+ 0x9139a891U, 0x9531a495U, 0xe4d337e4U, 0x79f28b79U,
+ 0xe7d532e7U, 0xc88b43c8U, 0x376e5937U, 0x6ddab76dU,
+ 0x8d018c8dU, 0xd5b164d5U, 0x4e9cd24eU, 0xa949e0a9U,
+ 0x6cd8b46cU, 0x56acfa56U, 0xf4f307f4U, 0xeacf25eaU,
+ 0x65caaf65U, 0x7af48e7aU, 0xae47e9aeU, 0x08101808U,
+ 0xba6fd5baU, 0x78f08878U, 0x254a6f25U, 0x2e5c722eU,
+ 0x1c38241cU, 0xa657f1a6U, 0xb473c7b4U, 0xc69751c6U,
+ 0xe8cb23e8U, 0xdda17cddU, 0x74e89c74U, 0x1f3e211fU,
+ 0x4b96dd4bU, 0xbd61dcbdU, 0x8b0d868bU, 0x8a0f858aU,
+ 0x70e09070U, 0x3e7c423eU, 0xb571c4b5U, 0x66ccaa66U,
+ 0x4890d848U, 0x03060503U, 0xf6f701f6U, 0x0e1c120eU,
+ 0x61c2a361U, 0x356a5f35U, 0x57aef957U, 0xb969d0b9U,
+ 0x86179186U, 0xc19958c1U, 0x1d3a271dU, 0x9e27b99eU,
+ 0xe1d938e1U, 0xf8eb13f8U, 0x982bb398U, 0x11223311U,
+ 0x69d2bb69U, 0xd9a970d9U, 0x8e07898eU, 0x9433a794U,
+ 0x9b2db69bU, 0x1e3c221eU, 0x87159287U, 0xe9c920e9U,
+ 0xce8749ceU, 0x55aaff55U, 0x28507828U, 0xdfa57adfU,
+ 0x8c038f8cU, 0xa159f8a1U, 0x89098089U, 0x0d1a170dU,
+ 0xbf65dabfU, 0xe6d731e6U, 0x4284c642U, 0x68d0b868U,
+ 0x4182c341U, 0x9929b099U, 0x2d5a772dU, 0x0f1e110fU,
+ 0xb07bcbb0U, 0x54a8fc54U, 0xbb6dd6bbU, 0x162c3a16U,
+ 0xc6a56363U, 0xf8847c7cU, 0xee997777U, 0xf68d7b7bU,
+ 0xff0df2f2U, 0xd6bd6b6bU, 0xdeb16f6fU, 0x9154c5c5U,
+ 0x60503030U, 0x02030101U, 0xcea96767U, 0x567d2b2bU,
+ 0xe719fefeU, 0xb562d7d7U, 0x4de6ababU, 0xec9a7676U,
+ 0x8f45cacaU, 0x1f9d8282U, 0x8940c9c9U, 0xfa877d7dU,
+ 0xef15fafaU, 0xb2eb5959U, 0x8ec94747U, 0xfb0bf0f0U,
+ 0x41ecadadU, 0xb367d4d4U, 0x5ffda2a2U, 0x45eaafafU,
+ 0x23bf9c9cU, 0x53f7a4a4U, 0xe4967272U, 0x9b5bc0c0U,
+ 0x75c2b7b7U, 0xe11cfdfdU, 0x3dae9393U, 0x4c6a2626U,
+ 0x6c5a3636U, 0x7e413f3fU, 0xf502f7f7U, 0x834fccccU,
+ 0x685c3434U, 0x51f4a5a5U, 0xd134e5e5U, 0xf908f1f1U,
+ 0xe2937171U, 0xab73d8d8U, 0x62533131U, 0x2a3f1515U,
+ 0x080c0404U, 0x9552c7c7U, 0x46652323U, 0x9d5ec3c3U,
+ 0x30281818U, 0x37a19696U, 0x0a0f0505U, 0x2fb59a9aU,
+ 0x0e090707U, 0x24361212U, 0x1b9b8080U, 0xdf3de2e2U,
+ 0xcd26ebebU, 0x4e692727U, 0x7fcdb2b2U, 0xea9f7575U,
+ 0x121b0909U, 0x1d9e8383U, 0x58742c2cU, 0x342e1a1aU,
+ 0x362d1b1bU, 0xdcb26e6eU, 0xb4ee5a5aU, 0x5bfba0a0U,
+ 0xa4f65252U, 0x764d3b3bU, 0xb761d6d6U, 0x7dceb3b3U,
+ 0x527b2929U, 0xdd3ee3e3U, 0x5e712f2fU, 0x13978484U,
+ 0xa6f55353U, 0xb968d1d1U, 0x00000000U, 0xc12cededU,
+ 0x40602020U, 0xe31ffcfcU, 0x79c8b1b1U, 0xb6ed5b5bU,
+ 0xd4be6a6aU, 0x8d46cbcbU, 0x67d9bebeU, 0x724b3939U,
+ 0x94de4a4aU, 0x98d44c4cU, 0xb0e85858U, 0x854acfcfU,
+ 0xbb6bd0d0U, 0xc52aefefU, 0x4fe5aaaaU, 0xed16fbfbU,
+ 0x86c54343U, 0x9ad74d4dU, 0x66553333U, 0x11948585U,
+ 0x8acf4545U, 0xe910f9f9U, 0x04060202U, 0xfe817f7fU,
+ 0xa0f05050U, 0x78443c3cU, 0x25ba9f9fU, 0x4be3a8a8U,
+ 0xa2f35151U, 0x5dfea3a3U, 0x80c04040U, 0x058a8f8fU,
+ 0x3fad9292U, 0x21bc9d9dU, 0x70483838U, 0xf104f5f5U,
+ 0x63dfbcbcU, 0x77c1b6b6U, 0xaf75dadaU, 0x42632121U,
+ 0x20301010U, 0xe51affffU, 0xfd0ef3f3U, 0xbf6dd2d2U,
+ 0x814ccdcdU, 0x18140c0cU, 0x26351313U, 0xc32fececU,
+ 0xbee15f5fU, 0x35a29797U, 0x88cc4444U, 0x2e391717U,
+ 0x9357c4c4U, 0x55f2a7a7U, 0xfc827e7eU, 0x7a473d3dU,
+ 0xc8ac6464U, 0xbae75d5dU, 0x322b1919U, 0xe6957373U,
+ 0xc0a06060U, 0x19988181U, 0x9ed14f4fU, 0xa37fdcdcU,
+ 0x44662222U, 0x547e2a2aU, 0x3bab9090U, 0x0b838888U,
+ 0x8cca4646U, 0xc729eeeeU, 0x6bd3b8b8U, 0x283c1414U,
+ 0xa779dedeU, 0xbce25e5eU, 0x161d0b0bU, 0xad76dbdbU,
+ 0xdb3be0e0U, 0x64563232U, 0x744e3a3aU, 0x141e0a0aU,
+ 0x92db4949U, 0x0c0a0606U, 0x486c2424U, 0xb8e45c5cU,
+ 0x9f5dc2c2U, 0xbd6ed3d3U, 0x43efacacU, 0xc4a66262U,
+ 0x39a89191U, 0x31a49595U, 0xd337e4e4U, 0xf28b7979U,
+ 0xd532e7e7U, 0x8b43c8c8U, 0x6e593737U, 0xdab76d6dU,
+ 0x018c8d8dU, 0xb164d5d5U, 0x9cd24e4eU, 0x49e0a9a9U,
+ 0xd8b46c6cU, 0xacfa5656U, 0xf307f4f4U, 0xcf25eaeaU,
+ 0xcaaf6565U, 0xf48e7a7aU, 0x47e9aeaeU, 0x10180808U,
+ 0x6fd5babaU, 0xf0887878U, 0x4a6f2525U, 0x5c722e2eU,
+ 0x38241c1cU, 0x57f1a6a6U, 0x73c7b4b4U, 0x9751c6c6U,
+ 0xcb23e8e8U, 0xa17cddddU, 0xe89c7474U, 0x3e211f1fU,
+ 0x96dd4b4bU, 0x61dcbdbdU, 0x0d868b8bU, 0x0f858a8aU,
+ 0xe0907070U, 0x7c423e3eU, 0x71c4b5b5U, 0xccaa6666U,
+ 0x90d84848U, 0x06050303U, 0xf701f6f6U, 0x1c120e0eU,
+ 0xc2a36161U, 0x6a5f3535U, 0xaef95757U, 0x69d0b9b9U,
+ 0x17918686U, 0x9958c1c1U, 0x3a271d1dU, 0x27b99e9eU,
+ 0xd938e1e1U, 0xeb13f8f8U, 0x2bb39898U, 0x22331111U,
+ 0xd2bb6969U, 0xa970d9d9U, 0x07898e8eU, 0x33a79494U,
+ 0x2db69b9bU, 0x3c221e1eU, 0x15928787U, 0xc920e9e9U,
+ 0x8749ceceU, 0xaaff5555U, 0x50782828U, 0xa57adfdfU,
+ 0x038f8c8cU, 0x59f8a1a1U, 0x09808989U, 0x1a170d0dU,
+ 0x65dabfbfU, 0xd731e6e6U, 0x84c64242U, 0xd0b86868U,
+ 0x82c34141U, 0x29b09999U, 0x5a772d2dU, 0x1e110f0fU,
+ 0x7bcbb0b0U, 0xa8fc5454U, 0x6dd6bbbbU, 0x2c3a1616U
+};
+
+#define t_fn0(x) (sharedMemory[ (x)])
+#define t_fn1(x) (sharedMemory[256 + (x)])
+#define t_fn2(x) (sharedMemory[512 + (x)])
+#define t_fn3(x) (sharedMemory[768 + (x)])
+
+
+#define round(dummy,y,x,k) \
+ y[0] = (k)[0] ^ (t_fn0(x[0] & 0xff) ^ t_fn1((x[1] >> 8) & 0xff) ^ t_fn2((x[2] >> 16) & 0xff) ^ t_fn3((x[3] >> 24) & 0xff)); \
+ y[1] = (k)[1] ^ (t_fn0(x[1] & 0xff) ^ t_fn1((x[2] >> 8) & 0xff) ^ t_fn2((x[3] >> 16) & 0xff) ^ t_fn3((x[0] >> 24) & 0xff)); \
+ y[2] = (k)[2] ^ (t_fn0(x[2] & 0xff) ^ t_fn1((x[3] >> 8) & 0xff) ^ t_fn2((x[0] >> 16) & 0xff) ^ t_fn3((x[1] >> 24) & 0xff)); \
+ y[3] = (k)[3] ^ (t_fn0(x[3] & 0xff) ^ t_fn1((x[0] >> 8) & 0xff) ^ t_fn2((x[1] >> 16) & 0xff) ^ t_fn3((x[2] >> 24) & 0xff));
+
+__device__ __forceinline__ static void cn_aes_single_round(uint32_t * __restrict__ sharedMemory, const uint32_t * __restrict__ in, uint32_t * __restrict__ out, const uint32_t * __restrict__ expandedKey)
+{
+ round(sharedMemory, out, in, expandedKey);
+}
+
+__device__ __forceinline__ static void cn_aes_pseudo_round_mut(const uint32_t * __restrict__ sharedMemory, uint32_t * __restrict__ val, const uint32_t * __restrict__ expandedKey)
+{
+ uint32_t b1[4];
+ round(sharedMemory, b1, val, expandedKey);
+ round(sharedMemory, val, b1, expandedKey + 1 * N_COLS);
+ round(sharedMemory, b1, val, expandedKey + 2 * N_COLS);
+ round(sharedMemory, val, b1, expandedKey + 3 * N_COLS);
+ round(sharedMemory, b1, val, expandedKey + 4 * N_COLS);
+ round(sharedMemory, val, b1, expandedKey + 5 * N_COLS);
+ round(sharedMemory, b1, val, expandedKey + 6 * N_COLS);
+ round(sharedMemory, val, b1, expandedKey + 7 * N_COLS);
+ round(sharedMemory, b1, val, expandedKey + 8 * N_COLS);
+ round(sharedMemory, val, b1, expandedKey + 9 * N_COLS);
+}
+
+__device__ __forceinline__ static void cn_aes_gpu_init(uint32_t *sharedMemory)
+{
+ if(blockDim.x >= 32)
+ {
+ if(threadIdx.x < 32)
+ {
+ for(int i = 0; i < 1024; i += 32)
+ {
+ sharedMemory[threadIdx.x + i] = d_t_fn[threadIdx.x + i];
+ }
+ }
+ }
+ else
+ {
+ if(threadIdx.x < 4)
+ {
+ for(int i = 0; i < 1024; i += 4)
+ {
+ sharedMemory[threadIdx.x + i] = d_t_fn[threadIdx.x + i];
+ }
+ }
+ }
+}
diff --git a/crypto/cn_blake.cuh b/crypto/cn_blake.cuh
new file mode 100644
index 0000000..5c0d09f
--- /dev/null
+++ b/crypto/cn_blake.cuh
@@ -0,0 +1,180 @@
+//#include
+
+typedef struct {
+ uint32_t h[8], s[4], t[2];
+ int buflen, nullt;
+ uint8_t buf[64];
+} blake_state;
+
+#define U8TO32(p) \
+ (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \
+ ((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]) ))
+
+#define U32TO8(p, v) \
+ (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
+ (p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) );
+
+#define BLAKE_ROT(x,n) ROTR32(x, n)
+#define BLAKE_G(a,b,c,d,e) \
+ v[a] += (m[d_blake_sigma[i][e]] ^ d_blake_cst[d_blake_sigma[i][e+1]]) + v[b]; \
+ v[d] = BLAKE_ROT(v[d] ^ v[a],16); \
+ v[c] += v[d]; \
+ v[b] = BLAKE_ROT(v[b] ^ v[c],12); \
+ v[a] += (m[d_blake_sigma[i][e+1]] ^ d_blake_cst[d_blake_sigma[i][e]]) + v[b]; \
+ v[d] = BLAKE_ROT(v[d] ^ v[a], 8); \
+ v[c] += v[d]; \
+ v[b] = BLAKE_ROT(v[b] ^ v[c], 7);
+
+__constant__ uint8_t d_blake_sigma[14][16] = {
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+ {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+ {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
+ {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
+ {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
+ {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
+ {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
+ {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
+ {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
+ {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+ {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+ {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
+ {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}
+};
+
+__constant__ uint32_t d_blake_cst[16] = {
+ 0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344,
+ 0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89,
+ 0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C,
+ 0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917
+};
+
+__device__
+void cn_blake_compress(blake_state * __restrict__ S, const uint8_t * __restrict__ block)
+{
+ uint32_t v[16], m[16], i;
+
+ for (i = 0; i < 16; ++i) m[i] = U8TO32(block + i * 4);
+ for (i = 0; i < 8; ++i) v[i] = S->h[i];
+ v[ 8] = S->s[0] ^ 0x243F6A88;
+ v[ 9] = S->s[1] ^ 0x85A308D3;
+ v[10] = S->s[2] ^ 0x13198A2E;
+ v[11] = S->s[3] ^ 0x03707344;
+ v[12] = 0xA4093822;
+ v[13] = 0x299F31D0;
+ v[14] = 0x082EFA98;
+ v[15] = 0xEC4E6C89;
+
+ if (S->nullt == 0) {
+ v[12] ^= S->t[0];
+ v[13] ^= S->t[0];
+ v[14] ^= S->t[1];
+ v[15] ^= S->t[1];
+ }
+
+ for (i = 0; i < 14; ++i) {
+ BLAKE_G(0, 4, 8, 12, 0);
+ BLAKE_G(1, 5, 9, 13, 2);
+ BLAKE_G(2, 6, 10, 14, 4);
+ BLAKE_G(3, 7, 11, 15, 6);
+ BLAKE_G(3, 4, 9, 14, 14);
+ BLAKE_G(2, 7, 8, 13, 12);
+ BLAKE_G(0, 5, 10, 15, 8);
+ BLAKE_G(1, 6, 11, 12, 10);
+ }
+
+ for (i = 0; i < 16; ++i) S->h[i % 8] ^= v[i];
+ for (i = 0; i < 8; ++i) S->h[i] ^= S->s[i % 4];
+}
+
+__device__ void cn_blake_update(blake_state * __restrict__ S, const uint8_t * __restrict__ data, uint64_t datalen)
+{
+ int left = S->buflen >> 3;
+ int fill = 64 - left;
+
+ if (left && (((datalen >> 3) & 0x3F) >= (unsigned) fill)) {
+ memcpy((void *) (S->buf + left), (void *) data, fill);
+ S->t[0] += 512;
+ if (S->t[0] == 0) S->t[1]++;
+ cn_blake_compress(S, S->buf);
+ data += fill;
+ datalen -= (fill << 3);
+ left = 0;
+ }
+
+ while (datalen >= 512) {
+ S->t[0] += 512;
+ if (S->t[0] == 0) S->t[1]++;
+ cn_blake_compress(S, data);
+ data += 64;
+ datalen -= 512;
+ }
+
+ if (datalen > 0) {
+ memcpy((void *) (S->buf + left), (void *) data, datalen >> 3);
+ S->buflen = (left << 3) + datalen;
+ } else {
+ S->buflen = 0;
+ }
+}
+
+__device__
+void cn_blake_final(blake_state * __restrict__ S, uint8_t * __restrict__ digest)
+{
+ const uint8_t padding[] = {
+ 0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+ };
+ uint8_t pa = 0x81, pb = 0x01;
+ uint8_t msglen[8];
+ uint32_t lo = S->t[0] + S->buflen, hi = S->t[1];
+ if (lo < (unsigned) S->buflen) hi++;
+ U32TO8(msglen + 0, hi);
+ U32TO8(msglen + 4, lo);
+
+ if (S->buflen == 440) {
+ S->t[0] -= 8;
+ cn_blake_update(S, &pa, 8);
+ } else {
+ if (S->buflen < 440) {
+ if (S->buflen == 0) S->nullt = 1;
+ S->t[0] -= 440 - S->buflen;
+ cn_blake_update(S, padding, 440 - S->buflen);
+ } else {
+ S->t[0] -= 512 - S->buflen;
+ cn_blake_update(S, padding, 512 - S->buflen);
+ S->t[0] -= 440;
+ cn_blake_update(S, padding + 1, 440);
+ S->nullt = 1;
+ }
+ cn_blake_update(S, &pb, 8);
+ S->t[0] -= 8;
+ }
+ S->t[0] -= 64;
+ cn_blake_update(S, msglen, 64);
+
+ U32TO8(digest + 0, S->h[0]);
+ U32TO8(digest + 4, S->h[1]);
+ U32TO8(digest + 8, S->h[2]);
+ U32TO8(digest + 12, S->h[3]);
+ U32TO8(digest + 16, S->h[4]);
+ U32TO8(digest + 20, S->h[5]);
+ U32TO8(digest + 24, S->h[6]);
+ U32TO8(digest + 28, S->h[7]);
+}
+
+__device__
+void cn_blake(const uint8_t * __restrict__ in, uint64_t inlen, uint8_t * __restrict__ out)
+{
+ blake_state bs;
+ blake_state *S = (blake_state *)&bs;
+
+ S->h[0] = 0x6A09E667; S->h[1] = 0xBB67AE85; S->h[2] = 0x3C6EF372;
+ S->h[3] = 0xA54FF53A; S->h[4] = 0x510E527F; S->h[5] = 0x9B05688C;
+ S->h[6] = 0x1F83D9AB; S->h[7] = 0x5BE0CD19;
+ S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
+ S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
+
+ cn_blake_update(S, (uint8_t *)in, inlen * 8);
+ cn_blake_final(S, (uint8_t *)out);
+}
diff --git a/crypto/cn_groestl.cuh b/crypto/cn_groestl.cuh
new file mode 100644
index 0000000..62530d4
--- /dev/null
+++ b/crypto/cn_groestl.cuh
@@ -0,0 +1,347 @@
+
+#define GROESTL_ROWS 8
+#define GROESTL_LENGTHFIELDLEN GROESTL_ROWS
+#define GROESTL_COLS512 8
+#define GROESTL_SIZE512 (GROESTL_ROWS*GROESTL_COLS512)
+#define GROESTL_ROUNDS512 10
+#define GROESTL_HASH_BIT_LEN 256
+
+#define GROESTL_ROTL32(v, n) ROTL32(v, n)
+
+#define li_32(h) 0x##h##u
+#define GROESTL_EXT_BYTE(var,n) ((uint8_t)((uint32_t)(var) >> (8*n)))
+#define u32BIG(a) \
+ ((GROESTL_ROTL32(a,8) & li_32(00FF00FF)) | \
+ (GROESTL_ROTL32(a,24) & li_32(FF00FF00)))
+
+typedef struct {
+ uint32_t chaining[GROESTL_SIZE512/sizeof(uint32_t)];
+ uint32_t block_counter1, block_counter2;
+ BitSequence buffer[GROESTL_SIZE512];
+ int buf_ptr;
+ int bits_in_last_byte;
+} groestlHashState;
+
+__constant__ uint32_t d_groestl_T[512] = {
+ 0xa5f432c6, 0xc6a597f4, 0x84976ff8, 0xf884eb97, 0x99b05eee, 0xee99c7b0, 0x8d8c7af6, 0xf68df78c, 0x0d17e8ff, 0xff0de517, 0xbddc0ad6, 0xd6bdb7dc, 0xb1c816de, 0xdeb1a7c8, 0x54fc6d91, 0x915439fc,
+ 0x50f09060, 0x6050c0f0, 0x03050702, 0x02030405, 0xa9e02ece, 0xcea987e0, 0x7d87d156, 0x567dac87, 0x192bcce7, 0xe719d52b, 0x62a613b5, 0xb56271a6, 0xe6317c4d, 0x4de69a31, 0x9ab559ec, 0xec9ac3b5,
+ 0x45cf408f, 0x8f4505cf, 0x9dbca31f, 0x1f9d3ebc, 0x40c04989, 0x894009c0, 0x879268fa, 0xfa87ef92, 0x153fd0ef, 0xef15c53f, 0xeb2694b2, 0xb2eb7f26, 0xc940ce8e, 0x8ec90740, 0x0b1de6fb, 0xfb0bed1d,
+ 0xec2f6e41, 0x41ec822f, 0x67a91ab3, 0xb3677da9, 0xfd1c435f, 0x5ffdbe1c, 0xea256045, 0x45ea8a25, 0xbfdaf923, 0x23bf46da, 0xf7025153, 0x53f7a602, 0x96a145e4, 0xe496d3a1, 0x5bed769b, 0x9b5b2ded,
+ 0xc25d2875, 0x75c2ea5d, 0x1c24c5e1, 0xe11cd924, 0xaee9d43d, 0x3dae7ae9, 0x6abef24c, 0x4c6a98be, 0x5aee826c, 0x6c5ad8ee, 0x41c3bd7e, 0x7e41fcc3, 0x0206f3f5, 0xf502f106, 0x4fd15283, 0x834f1dd1,
+ 0x5ce48c68, 0x685cd0e4, 0xf4075651, 0x51f4a207, 0x345c8dd1, 0xd134b95c, 0x0818e1f9, 0xf908e918, 0x93ae4ce2, 0xe293dfae, 0x73953eab, 0xab734d95, 0x53f59762, 0x6253c4f5, 0x3f416b2a, 0x2a3f5441,
+ 0x0c141c08, 0x080c1014, 0x52f66395, 0x955231f6, 0x65afe946, 0x46658caf, 0x5ee27f9d, 0x9d5e21e2, 0x28784830, 0x30286078, 0xa1f8cf37, 0x37a16ef8, 0x0f111b0a, 0x0a0f1411, 0xb5c4eb2f, 0x2fb55ec4,
+ 0x091b150e, 0x0e091c1b, 0x365a7e24, 0x2436485a, 0x9bb6ad1b, 0x1b9b36b6, 0x3d4798df, 0xdf3da547, 0x266aa7cd, 0xcd26816a, 0x69bbf54e, 0x4e699cbb, 0xcd4c337f, 0x7fcdfe4c, 0x9fba50ea, 0xea9fcfba,
+ 0x1b2d3f12, 0x121b242d, 0x9eb9a41d, 0x1d9e3ab9, 0x749cc458, 0x5874b09c, 0x2e724634, 0x342e6872, 0x2d774136, 0x362d6c77, 0xb2cd11dc, 0xdcb2a3cd, 0xee299db4, 0xb4ee7329, 0xfb164d5b, 0x5bfbb616,
+ 0xf601a5a4, 0xa4f65301, 0x4dd7a176, 0x764decd7, 0x61a314b7, 0xb76175a3, 0xce49347d, 0x7dcefa49, 0x7b8ddf52, 0x527ba48d, 0x3e429fdd, 0xdd3ea142, 0x7193cd5e, 0x5e71bc93, 0x97a2b113, 0x139726a2,
+ 0xf504a2a6, 0xa6f55704, 0x68b801b9, 0xb96869b8, 0, 0, 0x2c74b5c1, 0xc12c9974, 0x60a0e040, 0x406080a0, 0x1f21c2e3, 0xe31fdd21, 0xc8433a79, 0x79c8f243, 0xed2c9ab6, 0xb6ed772c,
+ 0xbed90dd4, 0xd4beb3d9, 0x46ca478d, 0x8d4601ca, 0xd9701767, 0x67d9ce70, 0x4bddaf72, 0x724be4dd, 0xde79ed94, 0x94de3379, 0xd467ff98, 0x98d42b67, 0xe82393b0, 0xb0e87b23, 0x4ade5b85, 0x854a11de,
+ 0x6bbd06bb, 0xbb6b6dbd, 0x2a7ebbc5, 0xc52a917e, 0xe5347b4f, 0x4fe59e34, 0x163ad7ed, 0xed16c13a, 0xc554d286, 0x86c51754, 0xd762f89a, 0x9ad72f62, 0x55ff9966, 0x6655ccff, 0x94a7b611, 0x119422a7,
+ 0xcf4ac08a, 0x8acf0f4a, 0x1030d9e9, 0xe910c930, 0x060a0e04, 0x0406080a, 0x819866fe, 0xfe81e798, 0xf00baba0, 0xa0f05b0b, 0x44ccb478, 0x7844f0cc, 0xbad5f025, 0x25ba4ad5, 0xe33e754b, 0x4be3963e,
+ 0xf30eaca2, 0xa2f35f0e, 0xfe19445d, 0x5dfeba19, 0xc05bdb80, 0x80c01b5b, 0x8a858005, 0x058a0a85, 0xadecd33f, 0x3fad7eec, 0xbcdffe21, 0x21bc42df, 0x48d8a870, 0x7048e0d8, 0x040cfdf1, 0xf104f90c,
+ 0xdf7a1963, 0x63dfc67a, 0xc1582f77, 0x77c1ee58, 0x759f30af, 0xaf75459f, 0x63a5e742, 0x426384a5, 0x30507020, 0x20304050, 0x1a2ecbe5, 0xe51ad12e, 0x0e12effd, 0xfd0ee112, 0x6db708bf, 0xbf6d65b7,
+ 0x4cd45581, 0x814c19d4, 0x143c2418, 0x1814303c, 0x355f7926, 0x26354c5f, 0x2f71b2c3, 0xc32f9d71, 0xe13886be, 0xbee16738, 0xa2fdc835, 0x35a26afd, 0xcc4fc788, 0x88cc0b4f, 0x394b652e, 0x2e395c4b,
+ 0x57f96a93, 0x93573df9, 0xf20d5855, 0x55f2aa0d, 0x829d61fc, 0xfc82e39d, 0x47c9b37a, 0x7a47f4c9, 0xacef27c8, 0xc8ac8bef, 0xe73288ba, 0xbae76f32, 0x2b7d4f32, 0x322b647d, 0x95a442e6, 0xe695d7a4,
+ 0xa0fb3bc0, 0xc0a09bfb, 0x98b3aa19, 0x199832b3, 0xd168f69e, 0x9ed12768, 0x7f8122a3, 0xa37f5d81, 0x66aaee44, 0x446688aa, 0x7e82d654, 0x547ea882, 0xabe6dd3b, 0x3bab76e6, 0x839e950b, 0xb83169e,
+ 0xca45c98c, 0x8cca0345, 0x297bbcc7, 0xc729957b, 0xd36e056b, 0x6bd3d66e, 0x3c446c28, 0x283c5044, 0x798b2ca7, 0xa779558b, 0xe23d81bc, 0xbce2633d, 0x1d273116, 0x161d2c27, 0x769a37ad, 0xad76419a,
+ 0x3b4d96db, 0xdb3bad4d, 0x56fa9e64, 0x6456c8fa, 0x4ed2a674, 0x744ee8d2, 0x1e223614, 0x141e2822, 0xdb76e492, 0x92db3f76, 0x0a1e120c, 0x0c0a181e, 0x6cb4fc48, 0x486c90b4, 0xe4378fb8, 0xb8e46b37,
+ 0x5de7789f, 0x9f5d25e7, 0x6eb20fbd, 0xbd6e61b2, 0xef2a6943, 0x43ef862a, 0xa6f135c4, 0xc4a693f1, 0xa8e3da39, 0x39a872e3, 0xa4f7c631, 0x31a462f7, 0x37598ad3, 0xd337bd59, 0x8b8674f2, 0xf28bff86,
+ 0x325683d5, 0xd532b156, 0x43c54e8b, 0x8b430dc5, 0x59eb856e, 0x6e59dceb, 0xb7c218da, 0xdab7afc2, 0x8c8f8e01, 0x018c028f, 0x64ac1db1, 0xb16479ac, 0xd26df19c, 0x9cd2236d, 0xe03b7249, 0x49e0923b,
+ 0xb4c71fd8, 0xd8b4abc7, 0xfa15b9ac, 0xacfa4315, 0x0709faf3, 0xf307fd09, 0x256fa0cf, 0xcf25856f, 0xafea20ca, 0xcaaf8fea, 0x8e897df4, 0xf48ef389, 0xe9206747, 0x47e98e20, 0x18283810, 0x10182028,
+ 0xd5640b6f, 0x6fd5de64, 0x888373f0, 0xf088fb83, 0x6fb1fb4a, 0x4a6f94b1, 0x7296ca5c, 0x5c72b896, 0x246c5438, 0x3824706c, 0xf1085f57, 0x57f1ae08, 0xc7522173, 0x73c7e652, 0x51f36497, 0x975135f3,
+ 0x2365aecb, 0xcb238d65, 0x7c8425a1, 0xa17c5984, 0x9cbf57e8, 0xe89ccbbf, 0x21635d3e, 0x3e217c63, 0xdd7cea96, 0x96dd377c, 0xdc7f1e61, 0x61dcc27f, 0x86919c0d, 0x0d861a91, 0x85949b0f, 0xf851e94,
+ 0x90ab4be0, 0xe090dbab, 0x42c6ba7c, 0x7c42f8c6, 0xc4572671, 0x71c4e257, 0xaae529cc, 0xccaa83e5, 0xd873e390, 0x90d83b73, 0x050f0906, 0x06050c0f, 0x0103f4f7, 0xf701f503, 0x12362a1c, 0x1c123836,
+ 0xa3fe3cc2, 0xc2a39ffe, 0x5fe18b6a, 0x6a5fd4e1, 0xf910beae, 0xaef94710, 0xd06b0269, 0x69d0d26b, 0x91a8bf17, 0x17912ea8, 0x58e87199, 0x995829e8, 0x2769533a, 0x3a277469, 0xb9d0f727, 0x27b94ed0,
+ 0x384891d9, 0xd938a948, 0x1335deeb, 0xeb13cd35, 0xb3cee52b, 0x2bb356ce, 0x33557722, 0x22334455, 0xbbd604d2, 0xd2bbbfd6, 0x709039a9, 0xa9704990, 0x89808707, 0x07890e80, 0xa7f2c133, 0x33a766f2,
+ 0xb6c1ec2d, 0x2db65ac1, 0x22665a3c, 0x3c227866, 0x92adb815, 0x15922aad, 0x2060a9c9, 0xc9208960, 0x49db5c87, 0x874915db, 0xff1ab0aa, 0xaaff4f1a, 0x7888d850, 0x5078a088, 0x7a8e2ba5, 0xa57a518e,
+ 0x8f8a8903, 0x038f068a, 0xf8134a59, 0x59f8b213, 0x809b9209, 0x0980129b, 0x1739231a, 0x1a173439, 0xda751065, 0x65daca75, 0x315384d7, 0xd731b553, 0xc651d584, 0x84c61351, 0xb8d303d0, 0xd0b8bbd3,
+ 0xc35edc82, 0x82c31f5e, 0xb0cbe229, 0x29b052cb, 0x7799c35a, 0x5a77b499, 0x11332d1e, 0x1e113c33, 0xcb463d7b, 0x7bcbf646, 0xfc1fb7a8, 0xa8fc4b1f, 0xd6610c6d, 0x6dd6da61, 0x3a4e622c, 0x2c3a584e
+};
+
+#define GROESTL_ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) { \
+ temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \
+ v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \
+ v1 = temp_var; \
+}
+
+#define GROESTL_COLUMN(x,y,i, c0,c1,c2,c3,c4,c5,c6,c7, tv1,tv2,tu,tl,t) \
+ tu = d_groestl_T[2*(uint32_t)x[4*c0+0]]; \
+ tl = d_groestl_T[2*(uint32_t)x[4*c0+0]+1]; \
+ tv1 = d_groestl_T[2*(uint32_t)x[4*c1+1]]; \
+ tv2 = d_groestl_T[2*(uint32_t)x[4*c1+1]+1]; \
+ GROESTL_ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \
+ tu ^= tv1; \
+ tl ^= tv2; \
+ tv1 = d_groestl_T[2*(uint32_t)x[4*c2+2]]; \
+ tv2 = d_groestl_T[2*(uint32_t)x[4*c2+2]+1]; \
+ GROESTL_ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \
+ tu ^= tv1; \
+ tl ^= tv2; \
+ tv1 = d_groestl_T[2*(uint32_t)x[4*c3+3]]; \
+ tv2 = d_groestl_T[2*(uint32_t)x[4*c3+3]+1]; \
+ GROESTL_ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \
+ tu ^= tv1; \
+ tl ^= tv2; \
+ tl ^= d_groestl_T[2*(uint32_t)x[4*c4+0]]; \
+ tu ^= d_groestl_T[2*(uint32_t)x[4*c4+0]+1]; \
+ tv1 = d_groestl_T[2*(uint32_t)x[4*c5+1]]; \
+ tv2 = d_groestl_T[2*(uint32_t)x[4*c5+1]+1]; \
+ GROESTL_ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \
+ tl ^= tv1; \
+ tu ^= tv2; \
+ tv1 = d_groestl_T[2*(uint32_t)x[4*c6+2]]; \
+ tv2 = d_groestl_T[2*(uint32_t)x[4*c6+2]+1]; \
+ GROESTL_ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \
+ tl ^= tv1; \
+ tu ^= tv2; \
+ tv1 = d_groestl_T[2*(uint32_t)x[4*c7+3]]; \
+ tv2 = d_groestl_T[2*(uint32_t)x[4*c7+3]+1]; \
+ GROESTL_ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \
+ tl ^= tv1; \
+ tu ^= tv2; \
+ y[i] = tu; \
+ y[i+1] = tl;
+
+__device__
+void cn_groestl_RND512P(uint8_t * __restrict__ x, uint32_t * __restrict__ y, uint32_t r)
+{
+ uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
+ uint32_t* x32 = (uint32_t*)x;
+ x32[ 0] ^= 0x00000000^r;
+ x32[ 2] ^= 0x00000010^r;
+ x32[ 4] ^= 0x00000020^r;
+ x32[ 6] ^= 0x00000030^r;
+ x32[ 8] ^= 0x00000040^r;
+ x32[10] ^= 0x00000050^r;
+ x32[12] ^= 0x00000060^r;
+ x32[14] ^= 0x00000070^r;
+ GROESTL_COLUMN(x,y, 0, 0, 2, 4, 6, 9, 11, 13, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ GROESTL_COLUMN(x,y, 2, 2, 4, 6, 8, 11, 13, 15, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ GROESTL_COLUMN(x,y, 4, 4, 6, 8, 10, 13, 15, 1, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ GROESTL_COLUMN(x,y, 6, 6, 8, 10, 12, 15, 1, 3, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ GROESTL_COLUMN(x,y, 8, 8, 10, 12, 14, 1, 3, 5, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ GROESTL_COLUMN(x,y,10, 10, 12, 14, 0, 3, 5, 7, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ GROESTL_COLUMN(x,y,12, 12, 14, 0, 2, 5, 7, 9, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ GROESTL_COLUMN(x,y,14, 14, 0, 2, 4, 7, 9, 11, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+}
+
+__device__
+void cn_groestl_RND512Q(uint8_t * __restrict__ x, uint32_t * __restrict__ y, uint32_t r)
+{
+ uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
+ uint32_t* x32 = (uint32_t*)x;
+ x32[ 0] = ~x32[ 0];
+ x32[ 1] ^= 0xffffffff^r;
+ x32[ 2] = ~x32[ 2];
+ x32[ 3] ^= 0xefffffff^r;
+ x32[ 4] = ~x32[ 4];
+ x32[ 5] ^= 0xdfffffff^r;
+ x32[ 6] = ~x32[ 6];
+ x32[ 7] ^= 0xcfffffff^r;
+ x32[ 8] = ~x32[ 8];
+ x32[ 9] ^= 0xbfffffff^r;
+ x32[10] = ~x32[10];
+ x32[11] ^= 0xafffffff^r;
+ x32[12] = ~x32[12];
+ x32[13] ^= 0x9fffffff^r;
+ x32[14] = ~x32[14];
+ x32[15] ^= 0x8fffffff^r;
+ GROESTL_COLUMN(x,y, 0, 2, 6, 10, 14, 1, 5, 9, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ GROESTL_COLUMN(x,y, 2, 4, 8, 12, 0, 3, 7, 11, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ GROESTL_COLUMN(x,y, 4, 6, 10, 14, 2, 5, 9, 13, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ GROESTL_COLUMN(x,y, 6, 8, 12, 0, 4, 7, 11, 15, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ GROESTL_COLUMN(x,y, 8, 10, 14, 2, 6, 9, 13, 1, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ GROESTL_COLUMN(x,y,10, 12, 0, 4, 8, 11, 15, 3, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ GROESTL_COLUMN(x,y,12, 14, 2, 6, 10, 13, 1, 5, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ GROESTL_COLUMN(x,y,14, 0, 4, 8, 12, 15, 3, 7, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+}
+
+__device__
+void cn_groestl_F512(uint32_t * __restrict__ h, const uint32_t * __restrict__ m)
+{
+ int i;
+ uint32_t Ptmp[2*GROESTL_COLS512];
+ uint32_t Qtmp[2*GROESTL_COLS512];
+ uint32_t y[2*GROESTL_COLS512];
+ uint32_t z[2*GROESTL_COLS512];
+
+ for (i = 0; i < 2*GROESTL_COLS512; i++) {
+ z[i] = m[i];
+ Ptmp[i] = h[i]^m[i];
+ }
+
+ cn_groestl_RND512Q((uint8_t*)z, y, 0x00000000);
+ cn_groestl_RND512Q((uint8_t*)y, z, 0x01000000);
+ cn_groestl_RND512Q((uint8_t*)z, y, 0x02000000);
+ cn_groestl_RND512Q((uint8_t*)y, z, 0x03000000);
+ cn_groestl_RND512Q((uint8_t*)z, y, 0x04000000);
+ cn_groestl_RND512Q((uint8_t*)y, z, 0x05000000);
+ cn_groestl_RND512Q((uint8_t*)z, y, 0x06000000);
+ cn_groestl_RND512Q((uint8_t*)y, z, 0x07000000);
+ cn_groestl_RND512Q((uint8_t*)z, y, 0x08000000);
+ cn_groestl_RND512Q((uint8_t*)y, Qtmp, 0x09000000);
+
+ cn_groestl_RND512P((uint8_t*)Ptmp, y, 0x00000000);
+ cn_groestl_RND512P((uint8_t*)y, z, 0x00000001);
+ cn_groestl_RND512P((uint8_t*)z, y, 0x00000002);
+ cn_groestl_RND512P((uint8_t*)y, z, 0x00000003);
+ cn_groestl_RND512P((uint8_t*)z, y, 0x00000004);
+ cn_groestl_RND512P((uint8_t*)y, z, 0x00000005);
+ cn_groestl_RND512P((uint8_t*)z, y, 0x00000006);
+ cn_groestl_RND512P((uint8_t*)y, z, 0x00000007);
+ cn_groestl_RND512P((uint8_t*)z, y, 0x00000008);
+ cn_groestl_RND512P((uint8_t*)y, Ptmp, 0x00000009);
+
+ for (i = 0; i < 2*GROESTL_COLS512; i++)
+ h[i] ^= Ptmp[i]^Qtmp[i];
+}
+
+__device__
+void cn_groestl_outputtransformation(groestlHashState *ctx)
+{
+ int j;
+ uint32_t temp[2*GROESTL_COLS512];
+ uint32_t y[2*GROESTL_COLS512];
+ uint32_t z[2*GROESTL_COLS512];
+
+ for (j = 0; j < 2*GROESTL_COLS512; j++)
+ temp[j] = ctx->chaining[j];
+
+ cn_groestl_RND512P((uint8_t*)temp, y, 0x00000000);
+ cn_groestl_RND512P((uint8_t*)y, z, 0x00000001);
+ cn_groestl_RND512P((uint8_t*)z, y, 0x00000002);
+ cn_groestl_RND512P((uint8_t*)y, z, 0x00000003);
+ cn_groestl_RND512P((uint8_t*)z, y, 0x00000004);
+ cn_groestl_RND512P((uint8_t*)y, z, 0x00000005);
+ cn_groestl_RND512P((uint8_t*)z, y, 0x00000006);
+ cn_groestl_RND512P((uint8_t*)y, z, 0x00000007);
+ cn_groestl_RND512P((uint8_t*)z, y, 0x00000008);
+ cn_groestl_RND512P((uint8_t*)y, temp, 0x00000009);
+
+ for (j = 0; j < 2*GROESTL_COLS512; j++)
+ ctx->chaining[j] ^= temp[j];
+}
+
+__device__
+void cn_groestl_transform(groestlHashState * __restrict__ ctx,
+ const uint8_t * __restrict__ input, int msglen)
+{
+ for (; msglen >= GROESTL_SIZE512; msglen -= GROESTL_SIZE512, input += GROESTL_SIZE512) {
+ cn_groestl_F512(ctx->chaining,(uint32_t*)input);
+ ctx->block_counter1++;
+ if (ctx->block_counter1 == 0) ctx->block_counter2++;
+ }
+}
+
+__device__
+void cn_groestl_final(groestlHashState* __restrict__ ctx, BitSequence* __restrict__ output)
+{
+ int i, j = 0, hashbytelen = GROESTL_HASH_BIT_LEN/8;
+ uint8_t *s = (BitSequence*)ctx->chaining;
+
+ if (ctx->bits_in_last_byte) {
+ ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<bits_in_last_byte)-1)<<(8-ctx->bits_in_last_byte);
+ ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-ctx->bits_in_last_byte);
+ ctx->bits_in_last_byte = 0;
+ }
+ else ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
+
+ if (ctx->buf_ptr > GROESTL_SIZE512-GROESTL_LENGTHFIELDLEN) {
+ while (ctx->buf_ptr < GROESTL_SIZE512) {
+ ctx->buffer[(int)ctx->buf_ptr++] = 0;
+ }
+ cn_groestl_transform(ctx, ctx->buffer, GROESTL_SIZE512);
+ ctx->buf_ptr = 0;
+ }
+ while (ctx->buf_ptr < GROESTL_SIZE512-GROESTL_LENGTHFIELDLEN) {
+ ctx->buffer[(int)ctx->buf_ptr++] = 0;
+ }
+
+ ctx->block_counter1++;
+ if (ctx->block_counter1 == 0) ctx->block_counter2++;
+ ctx->buf_ptr = GROESTL_SIZE512;
+
+ while (ctx->buf_ptr > GROESTL_SIZE512-(int)sizeof(uint32_t)) {
+ ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter1;
+ ctx->block_counter1 >>= 8;
+ }
+ while (ctx->buf_ptr > GROESTL_SIZE512-GROESTL_LENGTHFIELDLEN) {
+ ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter2;
+ ctx->block_counter2 >>= 8;
+ }
+ cn_groestl_transform(ctx, ctx->buffer, GROESTL_SIZE512);
+ cn_groestl_outputtransformation(ctx);
+
+ for (i = GROESTL_SIZE512-hashbytelen; i < GROESTL_SIZE512; i++,j++) {
+ output[j] = s[i];
+ }
+
+ for (i = 0; i < GROESTL_COLS512; i++) {
+ ctx->chaining[i] = 0;
+ }
+ for (i = 0; i < GROESTL_SIZE512; i++) {
+ ctx->buffer[i] = 0;
+ }
+}
+
+__device__
+void cn_groestl_update(groestlHashState* __restrict__ ctx,
+ const BitSequence* __restrict__ input, DataLength databitlen)
+{
+ int index = 0;
+ int msglen = (int)(databitlen/8);
+ int rem = (int)(databitlen%8);
+
+ if (ctx->buf_ptr) {
+ while (ctx->buf_ptr < GROESTL_SIZE512 && index < msglen) {
+ ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
+ }
+ if (ctx->buf_ptr < GROESTL_SIZE512) {
+ if (rem) {
+ ctx->bits_in_last_byte = rem;
+ ctx->buffer[(int)ctx->buf_ptr++] = input[index];
+ }
+ return;
+ }
+
+ ctx->buf_ptr = 0;
+ cn_groestl_transform(ctx, ctx->buffer, GROESTL_SIZE512);
+ }
+
+ cn_groestl_transform(ctx, input+index, msglen-index);
+ index += ((msglen-index)/GROESTL_SIZE512)*GROESTL_SIZE512;
+
+ while (index < msglen) {
+ ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
+ }
+
+ if (rem) {
+ ctx->bits_in_last_byte = rem;
+ ctx->buffer[(int)ctx->buf_ptr++] = input[index];
+ }
+}
+
+__device__
+void cn_groestl_init(groestlHashState* ctx)
+{
+ int i = 0;
+
+ for(;i<(GROESTL_SIZE512/sizeof(uint32_t));i++)
+ ctx->chaining[i] = 0;
+
+ ctx->chaining[2*GROESTL_COLS512-1] = u32BIG((uint32_t)GROESTL_HASH_BIT_LEN);
+ ctx->buf_ptr = 0;
+ ctx->block_counter1 = 0;
+ ctx->block_counter2 = 0;
+ ctx->bits_in_last_byte = 0;
+}
+
+__device__
+void cn_groestl(const BitSequence * __restrict__ data, DataLength len, BitSequence * __restrict__ hashval)
+{
+ DataLength databitlen = len << 3;
+ groestlHashState context;
+
+ cn_groestl_init(&context);
+ cn_groestl_update(&context, data, databitlen);
+ cn_groestl_final(&context, hashval);
+}
diff --git a/crypto/cn_jh.cuh b/crypto/cn_jh.cuh
new file mode 100644
index 0000000..4bcb64b
--- /dev/null
+++ b/crypto/cn_jh.cuh
@@ -0,0 +1,293 @@
+
+typedef struct {
+ int hashbitlen;
+ unsigned long long databitlen;
+ unsigned long long datasize_in_buffer;
+ uint64_t x[8][2];
+ unsigned char buffer[64];
+} jhHashState;
+
+__constant__ unsigned char d_JH256_H0[512] = {
+ 0xeb, 0x98, 0xa3, 0x41, 0x2c, 0x20, 0xd3, 0xeb, 0x92, 0xcd, 0xbe, 0x7b, 0x9c, 0xb2, 0x45, 0xc1,
+ 0x1c, 0x93, 0x51, 0x91, 0x60, 0xd4, 0xc7, 0xfa, 0x26, 0x0, 0x82, 0xd6, 0x7e, 0x50, 0x8a, 0x3,
+ 0xa4, 0x23, 0x9e, 0x26, 0x77, 0x26, 0xb9, 0x45, 0xe0, 0xfb, 0x1a, 0x48, 0xd4, 0x1a, 0x94, 0x77,
+ 0xcd, 0xb5, 0xab, 0x26, 0x2, 0x6b, 0x17, 0x7a, 0x56, 0xf0, 0x24, 0x42, 0xf, 0xff, 0x2f, 0xa8,
+ 0x71, 0xa3, 0x96, 0x89, 0x7f, 0x2e, 0x4d, 0x75, 0x1d, 0x14, 0x49, 0x8, 0xf7, 0x7d, 0xe2, 0x62,
+ 0x27, 0x76, 0x95, 0xf7, 0x76, 0x24, 0x8f, 0x94, 0x87, 0xd5, 0xb6, 0x57, 0x47, 0x80, 0x29, 0x6c,
+ 0x5c, 0x5e, 0x27, 0x2d, 0xac, 0x8e, 0xd, 0x6c, 0x51, 0x84, 0x50, 0xc6, 0x57, 0x5, 0x7a, 0xf,
+ 0x7b, 0xe4, 0xd3, 0x67, 0x70, 0x24, 0x12, 0xea, 0x89, 0xe3, 0xab, 0x13, 0xd3, 0x1c, 0xd7, 0x69
+};
+
+__constant__ unsigned char d_E8_rc[42][32] = {
+ {0x72, 0xd5, 0xde, 0xa2, 0xdf, 0x15, 0xf8, 0x67, 0x7b, 0x84, 0x15, 0xa, 0xb7, 0x23, 0x15, 0x57, 0x81, 0xab, 0xd6, 0x90, 0x4d, 0x5a, 0x87, 0xf6, 0x4e, 0x9f, 0x4f, 0xc5, 0xc3, 0xd1, 0x2b, 0x40},
+ {0xea, 0x98, 0x3a, 0xe0, 0x5c, 0x45, 0xfa, 0x9c, 0x3, 0xc5, 0xd2, 0x99, 0x66, 0xb2, 0x99, 0x9a, 0x66, 0x2, 0x96, 0xb4, 0xf2, 0xbb, 0x53, 0x8a, 0xb5, 0x56, 0x14, 0x1a, 0x88, 0xdb, 0xa2, 0x31},
+ {0x3, 0xa3, 0x5a, 0x5c, 0x9a, 0x19, 0xe, 0xdb, 0x40, 0x3f, 0xb2, 0xa, 0x87, 0xc1, 0x44, 0x10, 0x1c, 0x5, 0x19, 0x80, 0x84, 0x9e, 0x95, 0x1d, 0x6f, 0x33, 0xeb, 0xad, 0x5e, 0xe7, 0xcd, 0xdc},
+ {0x10, 0xba, 0x13, 0x92, 0x2, 0xbf, 0x6b, 0x41, 0xdc, 0x78, 0x65, 0x15, 0xf7, 0xbb, 0x27, 0xd0, 0xa, 0x2c, 0x81, 0x39, 0x37, 0xaa, 0x78, 0x50, 0x3f, 0x1a, 0xbf, 0xd2, 0x41, 0x0, 0x91, 0xd3},
+ {0x42, 0x2d, 0x5a, 0xd, 0xf6, 0xcc, 0x7e, 0x90, 0xdd, 0x62, 0x9f, 0x9c, 0x92, 0xc0, 0x97, 0xce, 0x18, 0x5c, 0xa7, 0xb, 0xc7, 0x2b, 0x44, 0xac, 0xd1, 0xdf, 0x65, 0xd6, 0x63, 0xc6, 0xfc, 0x23},
+ {0x97, 0x6e, 0x6c, 0x3, 0x9e, 0xe0, 0xb8, 0x1a, 0x21, 0x5, 0x45, 0x7e, 0x44, 0x6c, 0xec, 0xa8, 0xee, 0xf1, 0x3, 0xbb, 0x5d, 0x8e, 0x61, 0xfa, 0xfd, 0x96, 0x97, 0xb2, 0x94, 0x83, 0x81, 0x97},
+ {0x4a, 0x8e, 0x85, 0x37, 0xdb, 0x3, 0x30, 0x2f, 0x2a, 0x67, 0x8d, 0x2d, 0xfb, 0x9f, 0x6a, 0x95, 0x8a, 0xfe, 0x73, 0x81, 0xf8, 0xb8, 0x69, 0x6c, 0x8a, 0xc7, 0x72, 0x46, 0xc0, 0x7f, 0x42, 0x14},
+ {0xc5, 0xf4, 0x15, 0x8f, 0xbd, 0xc7, 0x5e, 0xc4, 0x75, 0x44, 0x6f, 0xa7, 0x8f, 0x11, 0xbb, 0x80, 0x52, 0xde, 0x75, 0xb7, 0xae, 0xe4, 0x88, 0xbc, 0x82, 0xb8, 0x0, 0x1e, 0x98, 0xa6, 0xa3, 0xf4},
+ {0x8e, 0xf4, 0x8f, 0x33, 0xa9, 0xa3, 0x63, 0x15, 0xaa, 0x5f, 0x56, 0x24, 0xd5, 0xb7, 0xf9, 0x89, 0xb6, 0xf1, 0xed, 0x20, 0x7c, 0x5a, 0xe0, 0xfd, 0x36, 0xca, 0xe9, 0x5a, 0x6, 0x42, 0x2c, 0x36},
+ {0xce, 0x29, 0x35, 0x43, 0x4e, 0xfe, 0x98, 0x3d, 0x53, 0x3a, 0xf9, 0x74, 0x73, 0x9a, 0x4b, 0xa7, 0xd0, 0xf5, 0x1f, 0x59, 0x6f, 0x4e, 0x81, 0x86, 0xe, 0x9d, 0xad, 0x81, 0xaf, 0xd8, 0x5a, 0x9f},
+ {0xa7, 0x5, 0x6, 0x67, 0xee, 0x34, 0x62, 0x6a, 0x8b, 0xb, 0x28, 0xbe, 0x6e, 0xb9, 0x17, 0x27, 0x47, 0x74, 0x7, 0x26, 0xc6, 0x80, 0x10, 0x3f, 0xe0, 0xa0, 0x7e, 0x6f, 0xc6, 0x7e, 0x48, 0x7b},
+ {0xd, 0x55, 0xa, 0xa5, 0x4a, 0xf8, 0xa4, 0xc0, 0x91, 0xe3, 0xe7, 0x9f, 0x97, 0x8e, 0xf1, 0x9e, 0x86, 0x76, 0x72, 0x81, 0x50, 0x60, 0x8d, 0xd4, 0x7e, 0x9e, 0x5a, 0x41, 0xf3, 0xe5, 0xb0, 0x62},
+ {0xfc, 0x9f, 0x1f, 0xec, 0x40, 0x54, 0x20, 0x7a, 0xe3, 0xe4, 0x1a, 0x0, 0xce, 0xf4, 0xc9, 0x84, 0x4f, 0xd7, 0x94, 0xf5, 0x9d, 0xfa, 0x95, 0xd8, 0x55, 0x2e, 0x7e, 0x11, 0x24, 0xc3, 0x54, 0xa5},
+ {0x5b, 0xdf, 0x72, 0x28, 0xbd, 0xfe, 0x6e, 0x28, 0x78, 0xf5, 0x7f, 0xe2, 0xf, 0xa5, 0xc4, 0xb2, 0x5, 0x89, 0x7c, 0xef, 0xee, 0x49, 0xd3, 0x2e, 0x44, 0x7e, 0x93, 0x85, 0xeb, 0x28, 0x59, 0x7f},
+ {0x70, 0x5f, 0x69, 0x37, 0xb3, 0x24, 0x31, 0x4a, 0x5e, 0x86, 0x28, 0xf1, 0x1d, 0xd6, 0xe4, 0x65, 0xc7, 0x1b, 0x77, 0x4, 0x51, 0xb9, 0x20, 0xe7, 0x74, 0xfe, 0x43, 0xe8, 0x23, 0xd4, 0x87, 0x8a},
+ {0x7d, 0x29, 0xe8, 0xa3, 0x92, 0x76, 0x94, 0xf2, 0xdd, 0xcb, 0x7a, 0x9, 0x9b, 0x30, 0xd9, 0xc1, 0x1d, 0x1b, 0x30, 0xfb, 0x5b, 0xdc, 0x1b, 0xe0, 0xda, 0x24, 0x49, 0x4f, 0xf2, 0x9c, 0x82, 0xbf},
+ {0xa4, 0xe7, 0xba, 0x31, 0xb4, 0x70, 0xbf, 0xff, 0xd, 0x32, 0x44, 0x5, 0xde, 0xf8, 0xbc, 0x48, 0x3b, 0xae, 0xfc, 0x32, 0x53, 0xbb, 0xd3, 0x39, 0x45, 0x9f, 0xc3, 0xc1, 0xe0, 0x29, 0x8b, 0xa0},
+ {0xe5, 0xc9, 0x5, 0xfd, 0xf7, 0xae, 0x9, 0xf, 0x94, 0x70, 0x34, 0x12, 0x42, 0x90, 0xf1, 0x34, 0xa2, 0x71, 0xb7, 0x1, 0xe3, 0x44, 0xed, 0x95, 0xe9, 0x3b, 0x8e, 0x36, 0x4f, 0x2f, 0x98, 0x4a},
+ {0x88, 0x40, 0x1d, 0x63, 0xa0, 0x6c, 0xf6, 0x15, 0x47, 0xc1, 0x44, 0x4b, 0x87, 0x52, 0xaf, 0xff, 0x7e, 0xbb, 0x4a, 0xf1, 0xe2, 0xa, 0xc6, 0x30, 0x46, 0x70, 0xb6, 0xc5, 0xcc, 0x6e, 0x8c, 0xe6},
+ {0xa4, 0xd5, 0xa4, 0x56, 0xbd, 0x4f, 0xca, 0x0, 0xda, 0x9d, 0x84, 0x4b, 0xc8, 0x3e, 0x18, 0xae, 0x73, 0x57, 0xce, 0x45, 0x30, 0x64, 0xd1, 0xad, 0xe8, 0xa6, 0xce, 0x68, 0x14, 0x5c, 0x25, 0x67},
+ {0xa3, 0xda, 0x8c, 0xf2, 0xcb, 0xe, 0xe1, 0x16, 0x33, 0xe9, 0x6, 0x58, 0x9a, 0x94, 0x99, 0x9a, 0x1f, 0x60, 0xb2, 0x20, 0xc2, 0x6f, 0x84, 0x7b, 0xd1, 0xce, 0xac, 0x7f, 0xa0, 0xd1, 0x85, 0x18},
+ {0x32, 0x59, 0x5b, 0xa1, 0x8d, 0xdd, 0x19, 0xd3, 0x50, 0x9a, 0x1c, 0xc0, 0xaa, 0xa5, 0xb4, 0x46, 0x9f, 0x3d, 0x63, 0x67, 0xe4, 0x4, 0x6b, 0xba, 0xf6, 0xca, 0x19, 0xab, 0xb, 0x56, 0xee, 0x7e},
+ {0x1f, 0xb1, 0x79, 0xea, 0xa9, 0x28, 0x21, 0x74, 0xe9, 0xbd, 0xf7, 0x35, 0x3b, 0x36, 0x51, 0xee, 0x1d, 0x57, 0xac, 0x5a, 0x75, 0x50, 0xd3, 0x76, 0x3a, 0x46, 0xc2, 0xfe, 0xa3, 0x7d, 0x70, 0x1},
+ {0xf7, 0x35, 0xc1, 0xaf, 0x98, 0xa4, 0xd8, 0x42, 0x78, 0xed, 0xec, 0x20, 0x9e, 0x6b, 0x67, 0x79, 0x41, 0x83, 0x63, 0x15, 0xea, 0x3a, 0xdb, 0xa8, 0xfa, 0xc3, 0x3b, 0x4d, 0x32, 0x83, 0x2c, 0x83},
+ {0xa7, 0x40, 0x3b, 0x1f, 0x1c, 0x27, 0x47, 0xf3, 0x59, 0x40, 0xf0, 0x34, 0xb7, 0x2d, 0x76, 0x9a, 0xe7, 0x3e, 0x4e, 0x6c, 0xd2, 0x21, 0x4f, 0xfd, 0xb8, 0xfd, 0x8d, 0x39, 0xdc, 0x57, 0x59, 0xef},
+ {0x8d, 0x9b, 0xc, 0x49, 0x2b, 0x49, 0xeb, 0xda, 0x5b, 0xa2, 0xd7, 0x49, 0x68, 0xf3, 0x70, 0xd, 0x7d, 0x3b, 0xae, 0xd0, 0x7a, 0x8d, 0x55, 0x84, 0xf5, 0xa5, 0xe9, 0xf0, 0xe4, 0xf8, 0x8e, 0x65},
+ {0xa0, 0xb8, 0xa2, 0xf4, 0x36, 0x10, 0x3b, 0x53, 0xc, 0xa8, 0x7, 0x9e, 0x75, 0x3e, 0xec, 0x5a, 0x91, 0x68, 0x94, 0x92, 0x56, 0xe8, 0x88, 0x4f, 0x5b, 0xb0, 0x5c, 0x55, 0xf8, 0xba, 0xbc, 0x4c},
+ {0xe3, 0xbb, 0x3b, 0x99, 0xf3, 0x87, 0x94, 0x7b, 0x75, 0xda, 0xf4, 0xd6, 0x72, 0x6b, 0x1c, 0x5d, 0x64, 0xae, 0xac, 0x28, 0xdc, 0x34, 0xb3, 0x6d, 0x6c, 0x34, 0xa5, 0x50, 0xb8, 0x28, 0xdb, 0x71},
+ {0xf8, 0x61, 0xe2, 0xf2, 0x10, 0x8d, 0x51, 0x2a, 0xe3, 0xdb, 0x64, 0x33, 0x59, 0xdd, 0x75, 0xfc, 0x1c, 0xac, 0xbc, 0xf1, 0x43, 0xce, 0x3f, 0xa2, 0x67, 0xbb, 0xd1, 0x3c, 0x2, 0xe8, 0x43, 0xb0},
+ {0x33, 0xa, 0x5b, 0xca, 0x88, 0x29, 0xa1, 0x75, 0x7f, 0x34, 0x19, 0x4d, 0xb4, 0x16, 0x53, 0x5c, 0x92, 0x3b, 0x94, 0xc3, 0xe, 0x79, 0x4d, 0x1e, 0x79, 0x74, 0x75, 0xd7, 0xb6, 0xee, 0xaf, 0x3f},
+ {0xea, 0xa8, 0xd4, 0xf7, 0xbe, 0x1a, 0x39, 0x21, 0x5c, 0xf4, 0x7e, 0x9, 0x4c, 0x23, 0x27, 0x51, 0x26, 0xa3, 0x24, 0x53, 0xba, 0x32, 0x3c, 0xd2, 0x44, 0xa3, 0x17, 0x4a, 0x6d, 0xa6, 0xd5, 0xad},
+ {0xb5, 0x1d, 0x3e, 0xa6, 0xaf, 0xf2, 0xc9, 0x8, 0x83, 0x59, 0x3d, 0x98, 0x91, 0x6b, 0x3c, 0x56, 0x4c, 0xf8, 0x7c, 0xa1, 0x72, 0x86, 0x60, 0x4d, 0x46, 0xe2, 0x3e, 0xcc, 0x8, 0x6e, 0xc7, 0xf6},
+ {0x2f, 0x98, 0x33, 0xb3, 0xb1, 0xbc, 0x76, 0x5e, 0x2b, 0xd6, 0x66, 0xa5, 0xef, 0xc4, 0xe6, 0x2a, 0x6, 0xf4, 0xb6, 0xe8, 0xbe, 0xc1, 0xd4, 0x36, 0x74, 0xee, 0x82, 0x15, 0xbc, 0xef, 0x21, 0x63},
+ {0xfd, 0xc1, 0x4e, 0xd, 0xf4, 0x53, 0xc9, 0x69, 0xa7, 0x7d, 0x5a, 0xc4, 0x6, 0x58, 0x58, 0x26, 0x7e, 0xc1, 0x14, 0x16, 0x6, 0xe0, 0xfa, 0x16, 0x7e, 0x90, 0xaf, 0x3d, 0x28, 0x63, 0x9d, 0x3f},
+ {0xd2, 0xc9, 0xf2, 0xe3, 0x0, 0x9b, 0xd2, 0xc, 0x5f, 0xaa, 0xce, 0x30, 0xb7, 0xd4, 0xc, 0x30, 0x74, 0x2a, 0x51, 0x16, 0xf2, 0xe0, 0x32, 0x98, 0xd, 0xeb, 0x30, 0xd8, 0xe3, 0xce, 0xf8, 0x9a},
+ {0x4b, 0xc5, 0x9e, 0x7b, 0xb5, 0xf1, 0x79, 0x92, 0xff, 0x51, 0xe6, 0x6e, 0x4, 0x86, 0x68, 0xd3, 0x9b, 0x23, 0x4d, 0x57, 0xe6, 0x96, 0x67, 0x31, 0xcc, 0xe6, 0xa6, 0xf3, 0x17, 0xa, 0x75, 0x5},
+ {0xb1, 0x76, 0x81, 0xd9, 0x13, 0x32, 0x6c, 0xce, 0x3c, 0x17, 0x52, 0x84, 0xf8, 0x5, 0xa2, 0x62, 0xf4, 0x2b, 0xcb, 0xb3, 0x78, 0x47, 0x15, 0x47, 0xff, 0x46, 0x54, 0x82, 0x23, 0x93, 0x6a, 0x48},
+ {0x38, 0xdf, 0x58, 0x7, 0x4e, 0x5e, 0x65, 0x65, 0xf2, 0xfc, 0x7c, 0x89, 0xfc, 0x86, 0x50, 0x8e, 0x31, 0x70, 0x2e, 0x44, 0xd0, 0xb, 0xca, 0x86, 0xf0, 0x40, 0x9, 0xa2, 0x30, 0x78, 0x47, 0x4e},
+ {0x65, 0xa0, 0xee, 0x39, 0xd1, 0xf7, 0x38, 0x83, 0xf7, 0x5e, 0xe9, 0x37, 0xe4, 0x2c, 0x3a, 0xbd, 0x21, 0x97, 0xb2, 0x26, 0x1, 0x13, 0xf8, 0x6f, 0xa3, 0x44, 0xed, 0xd1, 0xef, 0x9f, 0xde, 0xe7},
+ {0x8b, 0xa0, 0xdf, 0x15, 0x76, 0x25, 0x92, 0xd9, 0x3c, 0x85, 0xf7, 0xf6, 0x12, 0xdc, 0x42, 0xbe, 0xd8, 0xa7, 0xec, 0x7c, 0xab, 0x27, 0xb0, 0x7e, 0x53, 0x8d, 0x7d, 0xda, 0xaa, 0x3e, 0xa8, 0xde},
+ {0xaa, 0x25, 0xce, 0x93, 0xbd, 0x2, 0x69, 0xd8, 0x5a, 0xf6, 0x43, 0xfd, 0x1a, 0x73, 0x8, 0xf9, 0xc0, 0x5f, 0xef, 0xda, 0x17, 0x4a, 0x19, 0xa5, 0x97, 0x4d, 0x66, 0x33, 0x4c, 0xfd, 0x21, 0x6a},
+ {0x35, 0xb4, 0x98, 0x31, 0xdb, 0x41, 0x15, 0x70, 0xea, 0x1e, 0xf, 0xbb, 0xed, 0xcd, 0x54, 0x9b, 0x9a, 0xd0, 0x63, 0xa1, 0x51, 0x97, 0x40, 0x72, 0xf6, 0x75, 0x9d, 0xbf, 0x91, 0x47, 0x6f, 0xe2}
+};
+
+#define JH_SWAP1(x) (x) = ((((x) & 0x5555555555555555ULL) << 1) | (((x) & 0xaaaaaaaaaaaaaaaaULL) >> 1));
+#define JH_SWAP2(x) (x) = ((((x) & 0x3333333333333333ULL) << 2) | (((x) & 0xccccccccccccccccULL) >> 2));
+#define JH_SWAP4(x) (x) = ((((x) & 0x0f0f0f0f0f0f0f0fULL) << 4) | (((x) & 0xf0f0f0f0f0f0f0f0ULL) >> 4));
+#define JH_SWAP8(x) (x) = ((((x) & 0x00ff00ff00ff00ffULL) << 8) | (((x) & 0xff00ff00ff00ff00ULL) >> 8));
+#define JH_SWAP16(x) (x) = ((((x) & 0x0000ffff0000ffffULL) << 16) | (((x) & 0xffff0000ffff0000ULL) >> 16));
+#define JH_SWAP32(x) (x) = (((x) << 32) | ((x) >> 32));
+
+#define JH_L(m0,m1,m2,m3,m4,m5,m6,m7) \
+ (m4) ^= (m1); \
+ (m5) ^= (m2); \
+ (m6) ^= (m0) ^ (m3); \
+ (m7) ^= (m0); \
+ (m0) ^= (m5); \
+ (m1) ^= (m6); \
+ (m2) ^= (m4) ^ (m7); \
+ (m3) ^= (m4);
+
+#define JH_SS(m0,m1,m2,m3,m4,m5,m6,m7,cc0,cc1) \
+ m3 = ~(m3); \
+ m7 = ~(m7); \
+ m0 ^= ((~(m2)) & (cc0)); \
+ m4 ^= ((~(m6)) & (cc1)); \
+ temp0 = (cc0) ^ ((m0) & (m1));\
+ temp1 = (cc1) ^ ((m4) & (m5));\
+ m0 ^= ((m2) & (m3)); \
+ m4 ^= ((m6) & (m7)); \
+ m3 ^= ((~(m1)) & (m2)); \
+ m7 ^= ((~(m5)) & (m6)); \
+ m1 ^= ((m0) & (m2)); \
+ m5 ^= ((m4) & (m6)); \
+ m2 ^= ((m0) & (~(m3))); \
+ m6 ^= ((m4) & (~(m7))); \
+ m0 ^= ((m1) | (m3)); \
+ m4 ^= ((m5) | (m7)); \
+ m3 ^= ((m1) & (m2)); \
+ m7 ^= ((m5) & (m6)); \
+ m1 ^= (temp0 & (m0)); \
+ m5 ^= (temp1 & (m4)); \
+ m2 ^= temp0; \
+ m6 ^= temp1;
+
+__device__
+void cn_jh_E8(jhHashState *state)
+{
+ uint64_t i,roundnumber,temp0,temp1;
+
+ for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7)
+ {
+ for (i = 0; i < 2; i++) {
+ JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],
+ ((uint64_t *)d_E8_rc[roundnumber+0])[i],((uint64_t *)d_E8_rc[roundnumber+0])[i+2] );
+ JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
+ JH_SWAP1(state->x[1][i]); JH_SWAP1(state->x[3][i]); JH_SWAP1(state->x[5][i]); JH_SWAP1(state->x[7][i]);
+ }
+
+ for (i = 0; i < 2; i++) {
+ JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],
+ ((uint64_t *)d_E8_rc[roundnumber+1])[i],((uint64_t *)d_E8_rc[roundnumber+1])[i+2] );
+ JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
+ JH_SWAP2(state->x[1][i]); JH_SWAP2(state->x[3][i]); JH_SWAP2(state->x[5][i]); JH_SWAP2(state->x[7][i]);
+ }
+
+ for (i = 0; i < 2; i++) {
+ JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],
+ ((uint64_t *)d_E8_rc[roundnumber+2])[i],((uint64_t *)d_E8_rc[roundnumber+2])[i+2] );
+ JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
+ JH_SWAP4(state->x[1][i]); JH_SWAP4(state->x[3][i]); JH_SWAP4(state->x[5][i]); JH_SWAP4(state->x[7][i]);
+ }
+
+ for (i = 0; i < 2; i++) {
+ JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],
+ ((uint64_t *)d_E8_rc[roundnumber+3])[i],((uint64_t *)d_E8_rc[roundnumber+3])[i+2] );
+ JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
+ JH_SWAP8(state->x[1][i]); JH_SWAP8(state->x[3][i]); JH_SWAP8(state->x[5][i]); JH_SWAP8(state->x[7][i]);
+ }
+
+ for (i = 0; i < 2; i++) {
+ JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],
+ ((uint64_t *)d_E8_rc[roundnumber+4])[i],((uint64_t *)d_E8_rc[roundnumber+4])[i+2] );
+ JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
+ JH_SWAP16(state->x[1][i]); JH_SWAP16(state->x[3][i]); JH_SWAP16(state->x[5][i]); JH_SWAP16(state->x[7][i]);
+ }
+
+ for (i = 0; i < 2; i++) {
+ JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],
+ ((uint64_t *)d_E8_rc[roundnumber+5])[i],((uint64_t *)d_E8_rc[roundnumber+5])[i+2] );
+ JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
+ JH_SWAP32(state->x[1][i]); JH_SWAP32(state->x[3][i]); JH_SWAP32(state->x[5][i]); JH_SWAP32(state->x[7][i]);
+ }
+
+ for (i = 0; i < 2; i++) {
+ JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],
+ ((uint64_t *)d_E8_rc[roundnumber+6])[i],((uint64_t *)d_E8_rc[roundnumber+6])[i+2] );
+ JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
+ }
+
+ for (i = 1; i < 8; i = i+2) {
+ temp0 = state->x[i][0]; state->x[i][0] = state->x[i][1]; state->x[i][1] = temp0;
+ }
+ }
+}
+
+__device__
+void cn_jh_F8(jhHashState *state)
+{
+ uint64_t i;
+ for (i = 0; i < 8; i++) {
+ state->x[i >> 1][i & 1] ^= ((uint64_t *)state->buffer)[i];
+ }
+
+ cn_jh_E8(state);
+
+ for (i = 0; i < 8; i++) {
+ state->x[(8+i) >> 1][(8+i) & 1] ^= ((uint64_t *)state->buffer)[i];
+ }
+}
+
+__device__
+void cn_jh_update(jhHashState * __restrict__ state, const BitSequence * __restrict__ data, DataLength databitlen)
+{
+ DataLength index;
+
+ state->databitlen += databitlen;
+ index = 0;
+
+ if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512) )
+ {
+ if ( (databitlen & 7) == 0 ) {
+ memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3));
+ }
+ else memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)+1);
+ state->datasize_in_buffer += databitlen;
+ databitlen = 0;
+ }
+
+ if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) >= 512) ) {
+ memcpy( state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3) ) ;
+ index = 64-(state->datasize_in_buffer >> 3);
+ databitlen = databitlen - (512 - state->datasize_in_buffer);
+ cn_jh_F8(state);
+ state->datasize_in_buffer = 0;
+ }
+
+ for ( ; databitlen >= 512; index = index+64, databitlen = databitlen - 512) {
+ memcpy(state->buffer, data+index, 64);
+ cn_jh_F8(state);
+ }
+
+ if ( databitlen > 0) {
+ if ((databitlen & 7) == 0)
+ memcpy(state->buffer, data+index, (databitlen & 0x1ff) >> 3);
+ else
+ memcpy(state->buffer, data+index, ((databitlen & 0x1ff) >> 3)+1);
+ state->datasize_in_buffer = databitlen;
+ }
+}
+
+/* pad the message, process the padded block(s), truncate the hash value H to obtain the message digest */
+__device__
+void cn_jh_final(jhHashState * __restrict__ state, BitSequence * __restrict__ hashval)
+{
+ unsigned int i;
+ //uint32_t *bufptr = (uint32_t *)state->buffer;
+
+ if ( (state->databitlen & 0x1ff) == 0 ) {
+ /* pad the message when databitlen is multiple of 512 bits, then process the padded block */
+ memset(state->buffer, 0, 64);
+ //for( i = 0; i < 16; i++ ) *(bufptr+i) = 0x00000000;
+ state->buffer[0] = 0x80;
+ state->buffer[63] = state->databitlen & 0xff;
+ state->buffer[62] = (state->databitlen >> 8) & 0xff;
+ state->buffer[61] = (state->databitlen >> 16) & 0xff;
+ state->buffer[60] = (state->databitlen >> 24) & 0xff;
+ state->buffer[59] = (state->databitlen >> 32) & 0xff;
+ state->buffer[58] = (state->databitlen >> 40) & 0xff;
+ state->buffer[57] = (state->databitlen >> 48) & 0xff;
+ state->buffer[56] = (state->databitlen >> 56) & 0xff;
+ cn_jh_F8(state);
+
+ } else {
+
+ /*set the rest of the bytes in the buffer to 0*/
+ if ( (state->datasize_in_buffer & 7) == 0) {
+ for (i = (state->databitlen & 0x1ff) >> 3; i < 64; i++) state->buffer[i] = 0;
+ } else {
+ for (i = ((state->databitlen & 0x1ff) >> 3)+1; i < 64; i++) state->buffer[i] = 0;
+ }
+
+ /*pad and process the partial block when databitlen is not multiple of 512 bits, then hash the padded blocks*/
+ state->buffer[((state->databitlen & 0x1ff) >> 3)] |= 1 << (7- (state->databitlen & 7));
+
+ cn_jh_F8(state);
+ memset(state->buffer, 0, 64);
+ //for( i = 0; i < 16; i++ ) *(bufptr+i) = 0x00000000;
+ state->buffer[63] = state->databitlen & 0xff;
+ state->buffer[62] = (state->databitlen >> 8) & 0xff;
+ state->buffer[61] = (state->databitlen >> 16) & 0xff;
+ state->buffer[60] = (state->databitlen >> 24) & 0xff;
+ state->buffer[59] = (state->databitlen >> 32) & 0xff;
+ state->buffer[58] = (state->databitlen >> 40) & 0xff;
+ state->buffer[57] = (state->databitlen >> 48) & 0xff;
+ state->buffer[56] = (state->databitlen >> 56) & 0xff;
+ cn_jh_F8(state);
+ }
+
+ memcpy(hashval, (unsigned char*)state->x+64+32, 32);
+}
+
+__device__
+void cn_jh_init(jhHashState *state, int hashbitlen)
+{
+ state->databitlen = 0;
+ state->datasize_in_buffer = 0;
+ state->hashbitlen = hashbitlen;
+ memcpy(state->x, d_JH256_H0, 128);
+}
+
+__device__
+void cn_jh(const BitSequence * __restrict__ data, DataLength len, BitSequence * __restrict__ hashval)
+{
+ int hashbitlen = 256;
+ DataLength databitlen = len << 3;
+ jhHashState state;
+
+ cn_jh_init(&state, hashbitlen);
+ cn_jh_update(&state, data, databitlen);
+ cn_jh_final(&state, hashval);
+}
diff --git a/crypto/cn_keccak.cuh b/crypto/cn_keccak.cuh
new file mode 100644
index 0000000..3acef7a
--- /dev/null
+++ b/crypto/cn_keccak.cuh
@@ -0,0 +1,211 @@
+__constant__ uint64_t keccakf_rndc[24] = {
+ 0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
+ 0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
+ 0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
+ 0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
+ 0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
+ 0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
+ 0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
+ 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
+};
+
+#if __CUDA_ARCH__ >= 350
+ __forceinline__ __device__ uint64_t cuda_rotl64(const uint64_t value, const int offset)
+ {
+ uint2 result;
+ if(offset >= 32)
+ {
+ asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
+ asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset));
+ }
+ else
+ {
+ asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset));
+ asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
+ }
+ return __double_as_longlong(__hiloint2double(result.y, result.x));
+ }
+ #define rotl64_1(x, y) (cuda_rotl64((x), (y)))
+#else
+ #define rotl64_1(x, y) ((x) << (y) | ((x) >> (64 - (y))))
+#endif
+
+#define rotl64_2(x, y) rotl64_1(((x) >> 32) | ((x) << 32), (y))
+#define bitselect(a, b, c) ((a) ^ ((c) & ((b) ^ (a))))
+
+__device__ __forceinline__
+void cn_keccakf2(uint64_t *s)
+{
+ uint8_t i;
+
+ for(i = 0; i < 24; ++i)
+ {
+ uint64_t bc[5], tmpxor[5], tmp1, tmp2;
+
+ tmpxor[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
+ tmpxor[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
+ tmpxor[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
+ tmpxor[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
+ tmpxor[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
+
+ bc[0] = tmpxor[0] ^ rotl64_1(tmpxor[2], 1);
+ bc[1] = tmpxor[1] ^ rotl64_1(tmpxor[3], 1);
+ bc[2] = tmpxor[2] ^ rotl64_1(tmpxor[4], 1);
+ bc[3] = tmpxor[3] ^ rotl64_1(tmpxor[0], 1);
+ bc[4] = tmpxor[4] ^ rotl64_1(tmpxor[1], 1);
+
+ tmp1 = s[1] ^ bc[0];
+
+ s[0] ^= bc[4];
+ s[1] = rotl64_2(s[6] ^ bc[0], 12);
+ s[6] = rotl64_1(s[9] ^ bc[3], 20);
+ s[9] = rotl64_2(s[22] ^ bc[1], 29);
+ s[22] = rotl64_2(s[14] ^ bc[3], 7);
+ s[14] = rotl64_1(s[20] ^ bc[4], 18);
+ s[20] = rotl64_2(s[2] ^ bc[1], 30);
+ s[2] = rotl64_2(s[12] ^ bc[1], 11);
+ s[12] = rotl64_1(s[13] ^ bc[2], 25);
+ s[13] = rotl64_1(s[19] ^ bc[3], 8);
+ s[19] = rotl64_2(s[23] ^ bc[2], 24);
+ s[23] = rotl64_2(s[15] ^ bc[4], 9);
+ s[15] = rotl64_1(s[4] ^ bc[3], 27);
+ s[4] = rotl64_1(s[24] ^ bc[3], 14);
+ s[24] = rotl64_1(s[21] ^ bc[0], 2);
+ s[21] = rotl64_2(s[8] ^ bc[2], 23);
+ s[8] = rotl64_2(s[16] ^ bc[0], 13);
+ s[16] = rotl64_2(s[5] ^ bc[4], 4);
+ s[5] = rotl64_1(s[3] ^ bc[2], 28);
+ s[3] = rotl64_1(s[18] ^ bc[2], 21);
+ s[18] = rotl64_1(s[17] ^ bc[1], 15);
+ s[17] = rotl64_1(s[11] ^ bc[0], 10);
+ s[11] = rotl64_1(s[7] ^ bc[1], 6);
+ s[7] = rotl64_1(s[10] ^ bc[4], 3);
+ s[10] = rotl64_1(tmp1, 1);
+
+ tmp1 = s[0]; tmp2 = s[1]; s[0] = bitselect(s[0] ^ s[2], s[0], s[1]); s[1] = bitselect(s[1] ^ s[3], s[1], s[2]); s[2] = bitselect(s[2] ^ s[4], s[2], s[3]); s[3] = bitselect(s[3] ^ tmp1, s[3], s[4]); s[4] = bitselect(s[4] ^ tmp2, s[4], tmp1);
+ tmp1 = s[5]; tmp2 = s[6]; s[5] = bitselect(s[5] ^ s[7], s[5], s[6]); s[6] = bitselect(s[6] ^ s[8], s[6], s[7]); s[7] = bitselect(s[7] ^ s[9], s[7], s[8]); s[8] = bitselect(s[8] ^ tmp1, s[8], s[9]); s[9] = bitselect(s[9] ^ tmp2, s[9], tmp1);
+ tmp1 = s[10]; tmp2 = s[11]; s[10] = bitselect(s[10] ^ s[12], s[10], s[11]); s[11] = bitselect(s[11] ^ s[13], s[11], s[12]); s[12] = bitselect(s[12] ^ s[14], s[12], s[13]); s[13] = bitselect(s[13] ^ tmp1, s[13], s[14]); s[14] = bitselect(s[14] ^ tmp2, s[14], tmp1);
+ tmp1 = s[15]; tmp2 = s[16]; s[15] = bitselect(s[15] ^ s[17], s[15], s[16]); s[16] = bitselect(s[16] ^ s[18], s[16], s[17]); s[17] = bitselect(s[17] ^ s[19], s[17], s[18]); s[18] = bitselect(s[18] ^ tmp1, s[18], s[19]); s[19] = bitselect(s[19] ^ tmp2, s[19], tmp1);
+ tmp1 = s[20]; tmp2 = s[21]; s[20] = bitselect(s[20] ^ s[22], s[20], s[21]); s[21] = bitselect(s[21] ^ s[23], s[21], s[22]); s[22] = bitselect(s[22] ^ s[24], s[22], s[23]); s[23] = bitselect(s[23] ^ tmp1, s[23], s[24]); s[24] = bitselect(s[24] ^ tmp2, s[24], tmp1);
+ s[0] ^= keccakf_rndc[i];
+ }
+}
+
+__device__ __forceinline__
+void cn_keccakf(uint64_t *s)
+{
+ uint64_t bc[5], tmpxor[5], tmp1, tmp2;
+
+ tmpxor[0] = s[0] ^ s[5];
+ tmpxor[1] = s[1] ^ s[6] ^ 0x8000000000000000ULL;
+ tmpxor[2] = s[2] ^ s[7];
+ tmpxor[3] = s[3] ^ s[8];
+ tmpxor[4] = s[4] ^ s[9];
+
+ bc[0] = tmpxor[0] ^ rotl64_1(tmpxor[2], 1);
+ bc[1] = tmpxor[1] ^ rotl64_1(tmpxor[3], 1);
+ bc[2] = tmpxor[2] ^ rotl64_1(tmpxor[4], 1);
+ bc[3] = tmpxor[3] ^ rotl64_1(tmpxor[0], 1);
+ bc[4] = tmpxor[4] ^ rotl64_1(tmpxor[1], 1);
+
+ tmp1 = s[1] ^ bc[0];
+
+ s[0] ^= bc[4];
+ s[1] = rotl64_2(s[6] ^ bc[0], 12);
+ s[6] = rotl64_1(s[9] ^ bc[3], 20);
+ s[9] = rotl64_2(bc[1], 29);
+ s[22] = rotl64_2(bc[3], 7);
+ s[14] = rotl64_1(bc[4], 18);
+ s[20] = rotl64_2(s[2] ^ bc[1], 30);
+ s[2] = rotl64_2(bc[1], 11);
+ s[12] = rotl64_1(bc[2], 25);
+ s[13] = rotl64_1(bc[3], 8);
+ s[19] = rotl64_2(bc[2], 24);
+ s[23] = rotl64_2(bc[4], 9);
+ s[15] = rotl64_1(s[4] ^ bc[3], 27);
+ s[4] = rotl64_1(bc[3], 14);
+ s[24] = rotl64_1(bc[0], 2);
+ s[21] = rotl64_2(s[8] ^ bc[2], 23);
+ s[8] = rotl64_2(0x8000000000000000ULL ^ bc[0], 13);
+ s[16] = rotl64_2(s[5] ^ bc[4], 4);
+ s[5] = rotl64_1(s[3] ^ bc[2], 28);
+ s[3] = rotl64_1(bc[2], 21);
+ s[18] = rotl64_1(bc[1], 15);
+ s[17] = rotl64_1(bc[0], 10);
+ s[11] = rotl64_1(s[7] ^ bc[1], 6);
+ s[7] = rotl64_1(bc[4], 3);
+ s[10] = rotl64_1(tmp1, 1);
+
+ tmp1 = s[0]; tmp2 = s[1]; s[0] = bitselect(s[0] ^ s[2], s[0], s[1]); s[1] = bitselect(s[1] ^ s[3], s[1], s[2]); s[2] = bitselect(s[2] ^ s[4], s[2], s[3]); s[3] = bitselect(s[3] ^ tmp1, s[3], s[4]); s[4] = bitselect(s[4] ^ tmp2, s[4], tmp1);
+ tmp1 = s[5]; tmp2 = s[6]; s[5] = bitselect(s[5] ^ s[7], s[5], s[6]); s[6] = bitselect(s[6] ^ s[8], s[6], s[7]); s[7] = bitselect(s[7] ^ s[9], s[7], s[8]); s[8] = bitselect(s[8] ^ tmp1, s[8], s[9]); s[9] = bitselect(s[9] ^ tmp2, s[9], tmp1);
+ tmp1 = s[10]; tmp2 = s[11]; s[10] = bitselect(s[10] ^ s[12], s[10], s[11]); s[11] = bitselect(s[11] ^ s[13], s[11], s[12]); s[12] = bitselect(s[12] ^ s[14], s[12], s[13]); s[13] = bitselect(s[13] ^ tmp1, s[13], s[14]); s[14] = bitselect(s[14] ^ tmp2, s[14], tmp1);
+ tmp1 = s[15]; tmp2 = s[16]; s[15] = bitselect(s[15] ^ s[17], s[15], s[16]); s[16] = bitselect(s[16] ^ s[18], s[16], s[17]); s[17] = bitselect(s[17] ^ s[19], s[17], s[18]); s[18] = bitselect(s[18] ^ tmp1, s[18], s[19]); s[19] = bitselect(s[19] ^ tmp2, s[19], tmp1);
+ tmp1 = s[20]; tmp2 = s[21]; s[20] = bitselect(s[20] ^ s[22], s[20], s[21]); s[21] = bitselect(s[21] ^ s[23], s[21], s[22]); s[22] = bitselect(s[22] ^ s[24], s[22], s[23]); s[23] = bitselect(s[23] ^ tmp1, s[23], s[24]); s[24] = bitselect(s[24] ^ tmp2, s[24], tmp1);
+ s[0] ^= 0x0000000000000001;
+
+ for(int i = 1; i < 24; ++i)
+ {
+ tmpxor[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
+ tmpxor[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
+ tmpxor[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
+ tmpxor[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
+ tmpxor[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
+
+ bc[0] = tmpxor[0] ^ rotl64_1(tmpxor[2], 1);
+ bc[1] = tmpxor[1] ^ rotl64_1(tmpxor[3], 1);
+ bc[2] = tmpxor[2] ^ rotl64_1(tmpxor[4], 1);
+ bc[3] = tmpxor[3] ^ rotl64_1(tmpxor[0], 1);
+ bc[4] = tmpxor[4] ^ rotl64_1(tmpxor[1], 1);
+
+ tmp1 = s[1] ^ bc[0];
+
+ s[0] ^= bc[4];
+ s[1] = rotl64_2(s[6] ^ bc[0], 12);
+ s[6] = rotl64_1(s[9] ^ bc[3], 20);
+ s[9] = rotl64_2(s[22] ^ bc[1], 29);
+ s[22] = rotl64_2(s[14] ^ bc[3], 7);
+ s[14] = rotl64_1(s[20] ^ bc[4], 18);
+ s[20] = rotl64_2(s[2] ^ bc[1], 30);
+ s[2] = rotl64_2(s[12] ^ bc[1], 11);
+ s[12] = rotl64_1(s[13] ^ bc[2], 25);
+ s[13] = rotl64_1(s[19] ^ bc[3], 8);
+ s[19] = rotl64_2(s[23] ^ bc[2], 24);
+ s[23] = rotl64_2(s[15] ^ bc[4], 9);
+ s[15] = rotl64_1(s[4] ^ bc[3], 27);
+ s[4] = rotl64_1(s[24] ^ bc[3], 14);
+ s[24] = rotl64_1(s[21] ^ bc[0], 2);
+ s[21] = rotl64_2(s[8] ^ bc[2], 23);
+ s[8] = rotl64_2(s[16] ^ bc[0], 13);
+ s[16] = rotl64_2(s[5] ^ bc[4], 4);
+ s[5] = rotl64_1(s[3] ^ bc[2], 28);
+ s[3] = rotl64_1(s[18] ^ bc[2], 21);
+ s[18] = rotl64_1(s[17] ^ bc[1], 15);
+ s[17] = rotl64_1(s[11] ^ bc[0], 10);
+ s[11] = rotl64_1(s[7] ^ bc[1], 6);
+ s[7] = rotl64_1(s[10] ^ bc[4], 3);
+ s[10] = rotl64_1(tmp1, 1);
+
+ tmp1 = s[0]; tmp2 = s[1]; s[0] = bitselect(s[0] ^ s[2], s[0], s[1]); s[1] = bitselect(s[1] ^ s[3], s[1], s[2]); s[2] = bitselect(s[2] ^ s[4], s[2], s[3]); s[3] = bitselect(s[3] ^ tmp1, s[3], s[4]); s[4] = bitselect(s[4] ^ tmp2, s[4], tmp1);
+ tmp1 = s[5]; tmp2 = s[6]; s[5] = bitselect(s[5] ^ s[7], s[5], s[6]); s[6] = bitselect(s[6] ^ s[8], s[6], s[7]); s[7] = bitselect(s[7] ^ s[9], s[7], s[8]); s[8] = bitselect(s[8] ^ tmp1, s[8], s[9]); s[9] = bitselect(s[9] ^ tmp2, s[9], tmp1);
+ tmp1 = s[10]; tmp2 = s[11]; s[10] = bitselect(s[10] ^ s[12], s[10], s[11]); s[11] = bitselect(s[11] ^ s[13], s[11], s[12]); s[12] = bitselect(s[12] ^ s[14], s[12], s[13]); s[13] = bitselect(s[13] ^ tmp1, s[13], s[14]); s[14] = bitselect(s[14] ^ tmp2, s[14], tmp1);
+ tmp1 = s[15]; tmp2 = s[16]; s[15] = bitselect(s[15] ^ s[17], s[15], s[16]); s[16] = bitselect(s[16] ^ s[18], s[16], s[17]); s[17] = bitselect(s[17] ^ s[19], s[17], s[18]); s[18] = bitselect(s[18] ^ tmp1, s[18], s[19]); s[19] = bitselect(s[19] ^ tmp2, s[19], tmp1);
+ tmp1 = s[20]; tmp2 = s[21]; s[20] = bitselect(s[20] ^ s[22], s[20], s[21]); s[21] = bitselect(s[21] ^ s[23], s[21], s[22]); s[22] = bitselect(s[22] ^ s[24], s[22], s[23]); s[23] = bitselect(s[23] ^ tmp1, s[23], s[24]); s[24] = bitselect(s[24] ^ tmp2, s[24], tmp1);
+ s[0] ^= keccakf_rndc[i];
+ }
+}
+
+__device__ __forceinline__
+void cn_keccak(const uint8_t * __restrict__ in, uint8_t * __restrict__ md)
+{
+ uint64_t st[25];
+
+ MEMCPY4(st, in, 19);
+ MEMSET8(&st[10], 0x00, 15);
+ st[9] = (st[9] & 0x00000000FFFFFFFFULL) | 0x0000000100000000ULL;
+ st[16] = 0x8000000000000000ULL;
+
+ cn_keccakf(st);
+
+ MEMCPY8(md, st, 25);
+ return;
+}
diff --git a/crypto/cn_skein.cuh b/crypto/cn_skein.cuh
new file mode 100644
index 0000000..5ff1cb3
--- /dev/null
+++ b/crypto/cn_skein.cuh
@@ -0,0 +1,345 @@
+typedef unsigned int uint_t; /* native unsigned integer */
+
+#define SKEIN_MODIFIER_WORDS ( 2) /* number of modifier (tweak) words */
+
+#define SKEIN_256_STATE_WORDS ( 4)
+#define SKEIN_512_STATE_WORDS ( 8)
+#define SKEIN1024_STATE_WORDS (16)
+
+#define SKEIN_256_STATE_BYTES ( 8*SKEIN_256_STATE_WORDS)
+#define SKEIN_512_STATE_BYTES ( 8*SKEIN_512_STATE_WORDS)
+#define SKEIN1024_STATE_BYTES ( 8*SKEIN1024_STATE_WORDS)
+
+#define SKEIN_256_STATE_BITS (64*SKEIN_256_STATE_WORDS)
+#define SKEIN_512_STATE_BITS (64*SKEIN_512_STATE_WORDS)
+#define SKEIN1024_STATE_BITS (64*SKEIN1024_STATE_WORDS)
+
+#define SKEIN_256_BLOCK_BYTES ( 8*SKEIN_256_STATE_WORDS)
+#define SKEIN_512_BLOCK_BYTES ( 8*SKEIN_512_STATE_WORDS)
+#define SKEIN1024_BLOCK_BYTES ( 8*SKEIN1024_STATE_WORDS)
+
+#define SKEIN_MK_64(hi32,lo32) ((lo32) + (((uint64_t) (hi32)) << 32))
+#define SKEIN_KS_PARITY SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22)
+
+#define SKEIN_T1_BIT(BIT) ((BIT) - 64) /* offset 64 because it's the second word */
+
+#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126) /* bits 126 : first block flag */
+#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119) /* bit 119 : partial final input byte */
+#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127) /* bit 127 : final block flag */
+#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120) /* bits 120..125: type field */
+
+#define SKEIN_T1_FLAG_FIRST (((uint64_t) 1 ) << SKEIN_T1_POS_FIRST)
+#define SKEIN_T1_FLAG_BIT_PAD (((uint64_t) 1 ) << SKEIN_T1_POS_BIT_PAD)
+#define SKEIN_T1_FLAG_FINAL (((uint64_t) 1 ) << SKEIN_T1_POS_FINAL)
+
+#define SKEIN_BLK_TYPE_MSG (48) /* message processing */
+#define SKEIN_BLK_TYPE_OUT (63) /* output stage */
+
+#define SKEIN_T1_BLK_TYPE(T) (((uint64_t) (SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE)
+
+#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG) /* message processing */
+#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT) /* output stage */
+
+#define SKEIN_T1_BLK_TYPE_OUT_FINAL (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL)
+
+#define Skein_Set_Tweak(ctxPtr,TWK_NUM,tVal) {(ctxPtr)->h.T[TWK_NUM] = (tVal);}
+
+#define Skein_Set_T0(ctxPtr,T0) Skein_Set_Tweak(ctxPtr,0,T0)
+#define Skein_Set_T1(ctxPtr,T1) Skein_Set_Tweak(ctxPtr,1,T1)
+
+#define Skein_Set_T0_T1(ctxPtr,T0,T1) { \
+ Skein_Set_T0(ctxPtr,(T0)); \
+ Skein_Set_T1(ctxPtr,(T1)); }
+
+#define Skein_Start_New_Type(ctxPtr,BLK_TYPE) \
+{ Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; }
+
+#define Skein_Set_Bit_Pad_Flag(hdr) { (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; }
+
+#define KW_TWK_BASE (0)
+#define KW_KEY_BASE (3)
+#define ks (kw + KW_KEY_BASE)
+#define ts (kw + KW_TWK_BASE)
+
+#define R512(p0,p1,p2,p3,p4,p5,p6,p7,R512ROT,rNum) \
+ X##p0 += X##p1; X##p1 = ROTL64(X##p1,R512ROT##_0); X##p1 ^= X##p0; \
+ X##p2 += X##p3; X##p3 = ROTL64(X##p3,R512ROT##_1); X##p3 ^= X##p2; \
+ X##p4 += X##p5; X##p5 = ROTL64(X##p5,R512ROT##_2); X##p5 ^= X##p4; \
+ X##p6 += X##p7; X##p7 = ROTL64(X##p7,R512ROT##_3); X##p7 ^= X##p6;
+
+#define I512(R) \
+ X0 += ks[((R)+1) % 9]; \
+ X1 += ks[((R)+2) % 9]; \
+ X2 += ks[((R)+3) % 9]; \
+ X3 += ks[((R)+4) % 9]; \
+ X4 += ks[((R)+5) % 9]; \
+ X5 += ks[((R)+6) % 9] + ts[((R)+1) % 3]; \
+ X6 += ks[((R)+7) % 9] + ts[((R)+2) % 3]; \
+ X7 += ks[((R)+8) % 9] + (R)+1;
+
+
+#define R512_8_rounds(R) \
+ R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1); \
+ R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2); \
+ R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3); \
+ R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4); \
+ I512(2*(R)); \
+ R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5); \
+ R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6); \
+ R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7); \
+ R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8); \
+ I512(2*(R)+1);
+
+typedef struct
+{
+ size_t hashBitLen;
+ size_t bCnt;
+ uint64_t T[SKEIN_MODIFIER_WORDS];
+} Skein_Ctxt_Hdr_t;
+
+typedef struct {
+ Skein_Ctxt_Hdr_t h;
+ uint64_t X[SKEIN_256_STATE_WORDS];
+ uint8_t b[SKEIN_256_BLOCK_BYTES];
+} Skein_256_Ctxt_t;
+
+typedef struct {
+ Skein_Ctxt_Hdr_t h;
+ uint64_t X[SKEIN_512_STATE_WORDS];
+ uint8_t b[SKEIN_512_BLOCK_BYTES];
+} Skein_512_Ctxt_t;
+
+typedef struct {
+ Skein_Ctxt_Hdr_t h;
+ uint64_t X[SKEIN1024_STATE_WORDS];
+ uint8_t b[SKEIN1024_BLOCK_BYTES];
+} Skein1024_Ctxt_t;
+
+typedef struct {
+ uint_t statebits;
+ union {
+ Skein_Ctxt_Hdr_t h;
+ Skein_256_Ctxt_t ctx_256;
+ Skein_512_Ctxt_t ctx_512;
+ Skein1024_Ctxt_t ctx1024;
+ } u;
+} skeinHashState;
+
+__device__
+void cn_skein_init(skeinHashState *state, size_t hashBitLen)
+{
+ const uint64_t SKEIN_512_IV_256[] =
+ {
+ SKEIN_MK_64(0xCCD044A1,0x2FDB3E13),
+ SKEIN_MK_64(0xE8359030,0x1A79A9EB),
+ SKEIN_MK_64(0x55AEA061,0x4F816E6F),
+ SKEIN_MK_64(0x2A2767A4,0xAE9B94DB),
+ SKEIN_MK_64(0xEC06025E,0x74DD7683),
+ SKEIN_MK_64(0xE7A436CD,0xC4746251),
+ SKEIN_MK_64(0xC36FBAF9,0x393AD185),
+ SKEIN_MK_64(0x3EEDBA18,0x33EDFC13)
+ };
+
+ Skein_512_Ctxt_t *ctx = &state->u.ctx_512;
+
+ ctx->h.hashBitLen = hashBitLen;
+
+ memcpy(ctx->X, SKEIN_512_IV_256, sizeof(ctx->X));
+
+ Skein_Start_New_Type(ctx, MSG);
+}
+
+__device__
+void cn_skein512_processblock(Skein_512_Ctxt_t * __restrict__ ctx, const uint8_t * __restrict__ blkPtr, size_t blkCnt, size_t byteCntAdd)
+{
+ enum {
+ R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37,
+ R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42,
+ R_512_2_0=17, R_512_2_1=49, R_512_2_2=36, R_512_2_3=39,
+ R_512_3_0=44, R_512_3_1= 9, R_512_3_2=54, R_512_3_3=56,
+ R_512_4_0=39, R_512_4_1=30, R_512_4_2=34, R_512_4_3=24,
+ R_512_5_0=13, R_512_5_1=50, R_512_5_2=10, R_512_5_3=17,
+ R_512_6_0=25, R_512_6_1=29, R_512_6_2=39, R_512_6_3=43,
+ R_512_7_0= 8, R_512_7_1=35, R_512_7_2=56, R_512_7_3=22
+ };
+ uint64_t X0,X1,X2,X3,X4,X5,X6,X7;
+ uint64_t w[SKEIN_512_STATE_WORDS];
+ uint64_t kw[SKEIN_512_STATE_WORDS+4];
+
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+
+ do {
+
+ ts[0] += byteCntAdd;
+
+ ks[0] = ctx->X[0];
+ ks[1] = ctx->X[1];
+ ks[2] = ctx->X[2];
+ ks[3] = ctx->X[3];
+ ks[4] = ctx->X[4];
+ ks[5] = ctx->X[5];
+ ks[6] = ctx->X[6];
+ ks[7] = ctx->X[7];
+ ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
+ ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ memcpy(w, blkPtr, SKEIN_512_STATE_WORDS << 3);
+
+ X0 = w[0] + ks[0];
+ X1 = w[1] + ks[1];
+ X2 = w[2] + ks[2];
+ X3 = w[3] + ks[3];
+ X4 = w[4] + ks[4];
+ X5 = w[5] + ks[5] + ts[0];
+ X6 = w[6] + ks[6] + ts[1];
+ X7 = w[7] + ks[7];
+
+ blkPtr += SKEIN_512_BLOCK_BYTES;
+
+ R512_8_rounds( 0);
+ R512_8_rounds( 1);
+ R512_8_rounds( 2);
+ R512_8_rounds( 3);
+ R512_8_rounds( 4);
+ R512_8_rounds( 5);
+ R512_8_rounds( 6);
+ R512_8_rounds( 7);
+ R512_8_rounds( 8);
+
+ ctx->X[0] = X0 ^ w[0];
+ ctx->X[1] = X1 ^ w[1];
+ ctx->X[2] = X2 ^ w[2];
+ ctx->X[3] = X3 ^ w[3];
+ ctx->X[4] = X4 ^ w[4];
+ ctx->X[5] = X5 ^ w[5];
+ ctx->X[6] = X6 ^ w[6];
+ ctx->X[7] = X7 ^ w[7];
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ } while (--blkCnt);
+
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+}
+
+__device__
+void cn_skein_final(skeinHashState * __restrict__ state, uint8_t * __restrict__ hashVal)
+{
+ size_t i,n,byteCnt;
+ uint64_t X[SKEIN_512_STATE_WORDS];
+ Skein_512_Ctxt_t *ctx = (Skein_512_Ctxt_t *)&state->u.ctx_512;
+ //size_t tmp;
+ //uint8_t *p8;
+ //uint64_t *p64;
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;
+
+ if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) {
+
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+ //p8 = &ctx->b[ctx->h.bCnt];
+ //tmp = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt;
+ //for( i = 0; i < tmp; i++ ) *(p8+i) = 0;
+ }
+
+ cn_skein512_processblock(ctx,ctx->b,1,ctx->h.bCnt);
+
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+ //uint8_t b[SKEIN_512_BLOCK_BYTES] == 64
+ memset(ctx->b,0,sizeof(ctx->b));
+ //p64 = (uint64_t *)ctx->b;
+ //for( i = 0; i < 8; i++ ) *(p64+i) = 0;
+
+ memcpy(X,ctx->X,sizeof(X));
+
+ for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) {
+
+ ((uint64_t *)ctx->b)[0]= (uint64_t)i;
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ cn_skein512_processblock(ctx,ctx->b,1,sizeof(uint64_t));
+ n = byteCnt - i*SKEIN_512_BLOCK_BYTES;
+ if (n >= SKEIN_512_BLOCK_BYTES)
+ n = SKEIN_512_BLOCK_BYTES;
+ memcpy(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+}
+
+__device__
+void cn_skein512_update(Skein_512_Ctxt_t * __restrict__ ctx, const uint8_t * __restrict__ msg, size_t msgByteCnt)
+{
+ size_t n;
+
+ if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES)
+ {
+ if (ctx->h.bCnt) {
+
+ n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt;
+
+ if (n) {
+
+ memcpy(&ctx->b[ctx->h.bCnt],msg,n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+
+ cn_skein512_processblock(ctx,ctx->b,1,SKEIN_512_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+
+ if (msgByteCnt > SKEIN_512_BLOCK_BYTES) {
+
+ n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES;
+ cn_skein512_processblock(ctx,msg,n,SKEIN_512_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN_512_BLOCK_BYTES;
+ msg += n * SKEIN_512_BLOCK_BYTES;
+ }
+ }
+
+ if (msgByteCnt) {
+
+ memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+}
+
+__device__
+void cn_skein_update(skeinHashState * __restrict__ state, const BitSequence * __restrict__ data, DataLength databitlen)
+{
+ if ((databitlen & 7) == 0) {
+
+ cn_skein512_update(&state->u.ctx_512,data,databitlen >> 3);
+ }
+ else {
+
+ size_t bCnt = (databitlen >> 3) + 1;
+ uint8_t b,mask;
+
+ mask = (uint8_t) (1u << (7 - (databitlen & 7)));
+ b = (uint8_t) ((data[bCnt-1] & (0-mask)) | mask);
+
+ cn_skein512_update(&state->u.ctx_512, data, bCnt-1);
+ cn_skein512_update(&state->u.ctx_512, &b, 1);
+
+ Skein_Set_Bit_Pad_Flag(state->u.h);
+ }
+}
+
+__device__
+void cn_skein(const BitSequence * __restrict__ data, DataLength len, BitSequence * __restrict__ hashval)
+{
+ int hashbitlen = 256;
+ DataLength databitlen = len << 3;
+ skeinHashState state;
+
+ state.statebits = 64*SKEIN_512_STATE_WORDS;
+
+ cn_skein_init(&state, hashbitlen);
+ cn_skein_update(&state, data, databitlen);
+ cn_skein_final(&state, hashval);
+}
diff --git a/crypto/cpu/c_keccak.c b/crypto/cpu/c_keccak.c
new file mode 100644
index 0000000..d6721bf
--- /dev/null
+++ b/crypto/cpu/c_keccak.c
@@ -0,0 +1,122 @@
+// keccak.c
+// 19-Nov-11 Markku-Juhani O. Saarinen
+// A baseline Keccak (3rd round) implementation.
+
+#include "c_keccak.h"
+
+const uint64_t keccakf_rndc[24] =
+{
+ 0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
+ 0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
+ 0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
+ 0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
+ 0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
+ 0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
+ 0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
+ 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
+};
+
+const int keccakf_rotc[24] =
+{
+ 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
+ 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
+};
+
+const int keccakf_piln[24] =
+{
+ 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
+ 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
+};
+
+// update the state with given number of rounds
+
+void keccakf(uint64_t st[25], int rounds)
+{
+ int i, j, round;
+ uint64_t t, bc[5];
+
+ for (round = 0; round < rounds; ++round) {
+
+ // Theta
+ bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
+ bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
+ bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
+ bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
+ bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
+
+ for (i = 0; i < 5; ++i) {
+ t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
+ st[i ] ^= t;
+ st[i + 5] ^= t;
+ st[i + 10] ^= t;
+ st[i + 15] ^= t;
+ st[i + 20] ^= t;
+ }
+
+ // Rho Pi
+ t = st[1];
+ for (i = 0; i < 24; ++i) {
+ bc[0] = st[keccakf_piln[i]];
+ st[keccakf_piln[i]] = ROTL64(t, keccakf_rotc[i]);
+ t = bc[0];
+ }
+
+ // Chi
+ for (j = 0; j < 25; j += 5) {
+ bc[0] = st[j ];
+ bc[1] = st[j + 1];
+ bc[2] = st[j + 2];
+ bc[3] = st[j + 3];
+ bc[4] = st[j + 4];
+ st[j ] ^= (~bc[1]) & bc[2];
+ st[j + 1] ^= (~bc[2]) & bc[3];
+ st[j + 2] ^= (~bc[3]) & bc[4];
+ st[j + 3] ^= (~bc[4]) & bc[0];
+ st[j + 4] ^= (~bc[0]) & bc[1];
+ }
+
+ // Iota
+ st[0] ^= keccakf_rndc[round];
+ }
+}
+
+// compute a keccak hash (md) of given byte length from "in"
+typedef uint64_t state_t[25];
+
+int keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen)
+{
+ state_t st;
+ uint8_t temp[144];
+ int i, rsiz, rsizw;
+
+ rsiz = sizeof(state_t) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen;
+ rsizw = rsiz / 8;
+
+ memset(st, 0, sizeof(st));
+
+ for ( ; inlen >= rsiz; inlen -= rsiz, in += rsiz) {
+ for (i = 0; i < rsizw; i++)
+ st[i] ^= ((uint64_t *) in)[i];
+ keccakf(st, KECCAK_ROUNDS);
+ }
+
+ // last block and padding
+ memcpy(temp, in, inlen);
+ temp[inlen++] = 1;
+ memset(temp + inlen, 0, rsiz - inlen);
+ temp[rsiz - 1] |= 0x80;
+
+ for (i = 0; i < rsizw; i++)
+ st[i] ^= ((uint64_t *) temp)[i];
+
+ keccakf(st, KECCAK_ROUNDS);
+
+ memcpy(md, st, mdlen);
+
+ return 0;
+}
+
+void keccak1600(const uint8_t *in, int inlen, uint8_t *md)
+{
+ keccak(in, inlen, md, sizeof(state_t));
+}
diff --git a/crypto/cpu/c_keccak.h b/crypto/cpu/c_keccak.h
new file mode 100644
index 0000000..566a305
--- /dev/null
+++ b/crypto/cpu/c_keccak.h
@@ -0,0 +1,34 @@
+// keccak.h
+// 19-Nov-11 Markku-Juhani O. Saarinen
+
+#ifndef KECCAK_H
+#define KECCAK_H
+
+#include
+#include
+
+#ifndef KECCAK_ROUNDS
+#define KECCAK_ROUNDS 24
+#endif
+
+#ifndef ROTL64
+#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
+#endif
+
+#ifndef HASH_SIZE
+#define HASH_SIZE 32
+#endif
+
+#ifndef HASH_DATA_AREA
+#define HASH_DATA_AREA 136
+#endif
+
+// compute a keccak hash (md) of given byte length from "in"
+int keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen);
+
+// update the state
+void keccakf(uint64_t st[25], int norounds);
+
+void keccak1600(const uint8_t *in, int inlen, uint8_t *md);
+
+#endif
diff --git a/crypto/cryptonight-cpu.cpp b/crypto/cryptonight-cpu.cpp
new file mode 100644
index 0000000..31c19aa
--- /dev/null
+++ b/crypto/cryptonight-cpu.cpp
@@ -0,0 +1,226 @@
+#include
+#include
+
+#include "oaes_lib.h"
+#include "cryptonight.h"
+
+extern "C" {
+#include
+#include
+#include
+#include
+#include "cpu/c_keccak.h"
+}
+
+struct cryptonight_ctx {
+ uint8_t long_state[MEMORY];
+ union cn_slow_hash_state state;
+ uint8_t text[INIT_SIZE_BYTE];
+ uint8_t a[AES_BLOCK_SIZE];
+ uint8_t b[AES_BLOCK_SIZE];
+ uint8_t c[AES_BLOCK_SIZE];
+ oaes_ctx* aes_ctx;
+};
+
+static void do_blake_hash(const void* input, size_t len, void* output)
+{
+ uchar hash[32];
+ sph_blake256_context ctx;
+ sph_blake256_set_rounds(14);
+ sph_blake256_init(&ctx);
+ sph_blake256(&ctx, input, len);
+ sph_blake256_close(&ctx, hash);
+ memcpy(output, hash, 32);
+}
+
+static void do_groestl_hash(const void* input, size_t len, void* output)
+{
+ uchar hash[32];
+ sph_groestl256_context ctx;
+ sph_groestl256_init(&ctx);
+ sph_groestl256(&ctx, input, len);
+ sph_groestl256_close(&ctx, hash);
+ memcpy(output, hash, 32);
+}
+
+static void do_jh_hash(const void* input, size_t len, void* output)
+{
+ uchar hash[64];
+ sph_jh256_context ctx;
+ sph_jh256_init(&ctx);
+ sph_jh256(&ctx, input, len);
+ sph_jh256_close(&ctx, hash);
+ memcpy(output, hash, 32);
+}
+
+static void do_skein_hash(const void* input, size_t len, void* output)
+{
+ uchar hash[32];
+ sph_skein256_context ctx;
+ sph_skein256_init(&ctx);
+ sph_skein256(&ctx, input, len);
+ sph_skein256_close(&ctx, hash);
+ memcpy(output, hash, 32);
+}
+
+// todo: use sph if possible
+static void keccak_hash_permutation(union hash_state *state) {
+ keccakf((uint64_t*)state, 24);
+}
+
+static void keccak_hash_process(union hash_state *state, const uint8_t *buf, size_t count) {
+ keccak1600(buf, (int)count, (uint8_t*)state);
+}
+
+extern "C" int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
+extern "C" int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
+extern "C" int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
+extern "C" int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
+
+static void (* const extra_hashes[4])(const void*, size_t, void *) = {
+ do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
+};
+
+uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi)
+{
+ // multiplier = ab = a * 2^32 + b
+ // multiplicand = cd = c * 2^32 + d
+ // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
+ uint64_t a = hi_dword(multiplier);
+ uint64_t b = lo_dword(multiplier);
+ uint64_t c = hi_dword(multiplicand);
+ uint64_t d = lo_dword(multiplicand);
+
+ uint64_t ac = a * c;
+ uint64_t ad = a * d;
+ uint64_t bc = b * c;
+ uint64_t bd = b * d;
+
+ uint64_t adbc = ad + bc;
+ uint64_t adbc_carry = adbc < ad ? 1 : 0;
+
+ // multiplier * multiplicand = product_hi * 2^64 + product_lo
+ uint64_t product_lo = bd + (adbc << 32);
+ uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
+ *product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
+
+ return product_lo;
+}
+
+static size_t e2i(const uint8_t* a) {
+ return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (MEMORY / AES_BLOCK_SIZE - 1);
+}
+
+static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) {
+ ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res);
+}
+
+static void sum_half_blocks(uint8_t* a, const uint8_t* b) {
+ ((uint64_t*) a)[0] += ((uint64_t*) b)[0];
+ ((uint64_t*) a)[1] += ((uint64_t*) b)[1];
+}
+
+static void sum_half_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] + ((uint64_t*) b)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] + ((uint64_t*) b)[1];
+}
+
+static void mul_sum_dst(const uint8_t* a, const uint8_t* b, const uint8_t* c, uint8_t* dst) {
+ ((uint64_t*) dst)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) dst) + ((uint64_t*) c)[1];
+ ((uint64_t*) dst)[0] += ((uint64_t*) c)[0];
+}
+
+static void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
+ uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
+ hi += ((uint64_t*) c)[0];
+
+ ((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
+ ((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
+ ((uint64_t*) dst)[0] = hi;
+ ((uint64_t*) dst)[1] = lo;
+}
+
+static void copy_block(uint8_t* dst, const uint8_t* src) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) src)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) src)[1];
+}
+
+static void xor_blocks(uint8_t* a, const uint8_t* b) {
+ ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
+ ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
+}
+
+static void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
+}
+
+static void cryptonight_hash_ctx(void* output, const void* input, size_t len, struct cryptonight_ctx* ctx)
+{
+ size_t i, j;
+ keccak_hash_process(&ctx->state.hs, (const uint8_t*) input, len);
+ ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+
+ oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
+ for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
+#undef RND
+#define RND(p) aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * p], ctx->aes_ctx->key->exp_data);
+ RND(0);
+ RND(1);
+ RND(2);
+ RND(3);
+ RND(4);
+ RND(5);
+ RND(6);
+ RND(7);
+ memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
+ }
+
+ xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
+ xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
+
+ for (i = 0; likely(i < ITER / 4); ++i) {
+ j = e2i(ctx->a) * AES_BLOCK_SIZE;
+ aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
+ xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
+
+ mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c) * AES_BLOCK_SIZE]);
+
+ j = e2i(ctx->a) * AES_BLOCK_SIZE;
+ aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
+ xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
+
+ mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b) * AES_BLOCK_SIZE]);
+ }
+
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+ oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
+ for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
+#undef RND
+#define RND(p) xor_blocks(&ctx->text[p * AES_BLOCK_SIZE], &ctx->long_state[i + p * AES_BLOCK_SIZE]); \
+ aesb_pseudo_round_mut(&ctx->text[p * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
+ RND(0);
+ RND(1);
+ RND(2);
+ RND(3);
+ RND(4);
+ RND(5);
+ RND(6);
+ RND(7);
+ }
+ memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
+ keccak_hash_permutation(&ctx->state.hs);
+
+ int extra_algo = ctx->state.hs.b[0] & 3;
+ extra_hashes[extra_algo](&ctx->state, 200, output);
+
+ oaes_free((OAES_CTX **) &ctx->aes_ctx);
+}
+
+void cryptonight_hash(void* output, const void* input, size_t len)
+{
+ struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
+ cryptonight_hash_ctx(output, input, len, ctx);
+ free(ctx);
+}
diff --git a/crypto/cryptonight.cu b/crypto/cryptonight.cu
new file mode 100644
index 0000000..907899e
--- /dev/null
+++ b/crypto/cryptonight.cu
@@ -0,0 +1,170 @@
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+#include
+#include "cryptonight.h"
+
+extern char *device_config[MAX_GPUS]; // -l 24x32
+
+uint32_t cn_blocks = 24;
+uint32_t cn_threads = 32;
+
+static uint32_t *d_long_state[MAX_GPUS];
+static uint32_t *d_ctx_state[MAX_GPUS];
+static uint32_t *d_ctx_key1[MAX_GPUS];
+static uint32_t *d_ctx_key2[MAX_GPUS];
+static uint32_t *d_ctx_text[MAX_GPUS];
+static uint32_t *d_ctx_a[MAX_GPUS];
+static uint32_t *d_ctx_b[MAX_GPUS];
+
+static bool init[MAX_GPUS] = { 0 };
+
+extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
+{
+ int res = 0;
+ uint32_t throughput = 0;
+
+ uint32_t *ptarget = work->target;
+ uint8_t *pdata = (uint8_t*) work->data;
+ uint32_t *nonceptr = (uint32_t*) (&pdata[39]);
+ const uint32_t first_nonce = *nonceptr;
+ uint32_t nonce = first_nonce;
+
+ if(opt_benchmark) {
+ ptarget[7] = 0x00ff;
+ }
+
+ if(!init[thr_id])
+ {
+ if (device_config[thr_id]) {
+ sscanf(device_config[thr_id], "%ux%u", &cn_blocks, &cn_threads);
+ gpulog(LOG_INFO, thr_id, "Using %u x %u threads kernel launch config", cn_blocks, cn_threads);
+ throughput = cuda_default_throughput(thr_id, cn_blocks*cn_threads);
+ } else {
+ throughput = cuda_default_throughput(thr_id, cn_blocks*cn_threads);
+ gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
+ }
+
+ if(sizeof(size_t) == 4 && throughput > UINT32_MAX / MEMORY) {
+ gpulog(LOG_ERR, thr_id, "THE 32bit VERSION CAN'T ALLOCATE MORE THAN 4GB OF MEMORY!");
+ gpulog(LOG_ERR, thr_id, "PLEASE REDUCE THE NUMBER OF THREADS OR BLOCKS");
+ exit(1);
+ }
+
+ cudaSetDevice(device_map[thr_id]);
+ if (opt_cudaschedule == -1 && gpu_threads == 1) {
+ cudaDeviceReset();
+ // reduce cpu usage (linux)
+ cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
+ //cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+ CUDA_LOG_ERROR();
+ }
+
+ const size_t alloc = MEMORY * throughput;
+ cryptonight_extra_cpu_init(thr_id, throughput);
+
+ cudaMalloc(&d_long_state[thr_id], alloc);
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+ cudaMalloc(&d_ctx_state[thr_id], 50 * sizeof(uint32_t) * throughput);
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+ cudaMalloc(&d_ctx_key1[thr_id], 40 * sizeof(uint32_t) * throughput);
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+ cudaMalloc(&d_ctx_key2[thr_id], 40 * sizeof(uint32_t) * throughput);
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+ cudaMalloc(&d_ctx_text[thr_id], 32 * sizeof(uint32_t) * throughput);
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+ cudaMalloc(&d_ctx_a[thr_id], 4 * sizeof(uint32_t) * throughput);
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+ cudaMalloc(&d_ctx_b[thr_id], 4 * sizeof(uint32_t) * throughput);
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+
+ init[thr_id] = true;
+ }
+
+ throughput = cuda_default_throughput(thr_id, cn_blocks*cn_blocks);
+
+ do
+ {
+ const uint32_t Htarg = ptarget[7];
+ uint32_t resNonces[2] = { UINT32_MAX, UINT32_MAX };
+
+ cryptonight_extra_cpu_setData(thr_id, pdata, ptarget);
+ cryptonight_extra_cpu_prepare(thr_id, throughput, nonce, d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id]);
+ cryptonight_core_cpu_hash(thr_id, cn_blocks, cn_threads, d_long_state[thr_id], d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id]);
+ cryptonight_extra_cpu_final(thr_id, throughput, nonce, resNonces, d_ctx_state[thr_id]);
+
+ *hashes_done = nonce - first_nonce + throughput;
+
+ if(resNonces[0] != UINT32_MAX)
+ {
+ uint32_t vhash[8];
+ uint32_t tempdata[19];
+ uint32_t *tempnonceptr = (uint32_t*)(((char*)tempdata) + 39);
+ memcpy(tempdata, pdata, 76);
+ *tempnonceptr = resNonces[0];
+ gpulog(LOG_DEBUG, thr_id, "found nonce %x", resNonces[0]);
+ cryptonight_hash(vhash, tempdata, 76);
+ if(vhash[7] <= Htarg && fulltest(vhash, ptarget))
+ {
+ res = 1;
+ work->nonces[0] = resNonces[0];
+ work_set_target_ratio(work, vhash);
+ // second nonce
+ if(resNonces[1] != UINT32_MAX)
+ {
+ *tempnonceptr = resNonces[1];
+ cryptonight_hash(vhash, tempdata, 76);
+ if(vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
+ res++;
+ work->nonces[1] = resNonces[1];
+ } else if (vhash[7] > Htarg) {
+ gpulog(LOG_WARNING, thr_id, "result for second nonce %08x does not validate on CPU!", resNonces[1]);
+ }
+ }
+ goto done;
+ } else if (vhash[7] > Htarg) {
+ gpulog(LOG_WARNING, thr_id, "result for nonce %08x does not validate on CPU!", resNonces[0]);
+ }
+ }
+
+ if ((uint64_t) throughput + nonce >= max_nonce - 127) {
+ nonce = max_nonce;
+ break;
+ }
+
+ nonce += throughput;
+ gpulog(LOG_DEBUG, thr_id, "nonce %08x", nonce);
+
+ } while (!work_restart[thr_id].restart && max_nonce > (uint64_t)throughput + nonce);
+
+done:
+ gpulog(LOG_DEBUG, thr_id, "nonce %08x exit", nonce);
+
+ *nonceptr = nonce;
+ return res;
+}
+
+void free_cryptonight(int thr_id)
+{
+ if (!init[thr_id])
+ return;
+
+ cudaFree(d_long_state[thr_id]);
+ cudaFree(d_ctx_state[thr_id]);
+ cudaFree(d_ctx_key1[thr_id]);
+ cudaFree(d_ctx_key2[thr_id]);
+ cudaFree(d_ctx_text[thr_id]);
+ cudaFree(d_ctx_a[thr_id]);
+ cudaFree(d_ctx_b[thr_id]);
+
+ cryptonight_extra_cpu_free(thr_id);
+
+ cudaDeviceSynchronize();
+
+ init[thr_id] = false;
+}
diff --git a/crypto/cryptonight.h b/crypto/cryptonight.h
new file mode 100644
index 0000000..16114c5
--- /dev/null
+++ b/crypto/cryptonight.h
@@ -0,0 +1,156 @@
+#pragma once
+#include
+#include
+
+#ifdef __INTELLISENSE__
+/* avoid red underlining */
+#define __CUDA_ARCH__ 520
+struct uint3 {
+ unsigned int x, y, z;
+};
+struct uint3 threadIdx;
+struct uint3 blockIdx;
+struct uint3 blockDim;
+#define __funnelshift_r(a,b,c) 1
+#define __syncthreads()
+#define asm(x)
+#define __shfl(a,b,c) 1
+#endif
+
+
+#define MEMORY (1 << 21) // 2 MiB / 2097152 B
+#define ITER (1 << 20) // 1048576
+#define AES_BLOCK_SIZE 16
+#define AES_KEY_SIZE 32
+#define INIT_SIZE_BLK 8
+#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) // 128 B
+
+#define AES_RKEY_LEN 4
+#define AES_COL_LEN 4
+#define AES_ROUND_BASE 7
+
+#ifndef HASH_SIZE
+#define HASH_SIZE 32
+#endif
+
+#ifndef HASH_DATA_AREA
+#define HASH_DATA_AREA 136
+#endif
+
+#define hi_dword(x) (x >> 32)
+#define lo_dword(x) (x & 0xFFFFFFFF)
+
+#define C32(x) ((uint32_t)(x ## U))
+#define T32(x) ((x) & C32(0xFFFFFFFF))
+
+#ifndef ROTL64
+ #if __CUDA_ARCH__ >= 350
+ __forceinline__ __device__ uint64_t cuda_ROTL64(const uint64_t value, const int offset) {
+ uint2 result;
+ if(offset >= 32) {
+ asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
+ asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset));
+ } else {
+ asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset));
+ asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
+ }
+ return __double_as_longlong(__hiloint2double(result.y, result.x));
+ }
+ #define ROTL64(x, n) (cuda_ROTL64(x, n))
+ #else
+ #define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
+ #endif
+#endif
+
+#ifndef ROTL32
+ #if __CUDA_ARCH__ < 350
+ #define ROTL32(x, n) T32(((x) << (n)) | ((x) >> (32 - (n))))
+ #else
+ #define ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
+ #endif
+#endif
+
+#ifndef ROTR32
+ #if __CUDA_ARCH__ < 350
+ #define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
+ #else
+ #define ROTR32(x, n) __funnelshift_r( (x), (x), (n) )
+ #endif
+#endif
+
+#define MEMSET8(dst,what,cnt) { \
+ int i_memset8; \
+ uint64_t *out_memset8 = (uint64_t *)(dst); \
+ for( i_memset8 = 0; i_memset8 < cnt; i_memset8++ ) \
+ out_memset8[i_memset8] = (what); }
+
+#define MEMSET4(dst,what,cnt) { \
+ int i_memset4; \
+ uint32_t *out_memset4 = (uint32_t *)(dst); \
+ for( i_memset4 = 0; i_memset4 < cnt; i_memset4++ ) \
+ out_memset4[i_memset4] = (what); }
+
+#define MEMCPY8(dst,src,cnt) { \
+ int i_memcpy8; \
+ uint64_t *in_memcpy8 = (uint64_t *)(src); \
+ uint64_t *out_memcpy8 = (uint64_t *)(dst); \
+ for( i_memcpy8 = 0; i_memcpy8 < cnt; i_memcpy8++ ) \
+ out_memcpy8[i_memcpy8] = in_memcpy8[i_memcpy8]; }
+
+#define MEMCPY4(dst,src,cnt) { \
+ int i_memcpy4; \
+ uint32_t *in_memcpy4 = (uint32_t *)(src); \
+ uint32_t *out_memcpy4 = (uint32_t *)(dst); \
+ for( i_memcpy4 = 0; i_memcpy4 < cnt; i_memcpy4++ ) \
+ out_memcpy4[i_memcpy4] = in_memcpy4[i_memcpy4]; }
+
+#define XOR_BLOCKS(a,b) { \
+ ((uint64_t *)a)[0] ^= ((uint64_t *)b)[0]; \
+ ((uint64_t *)a)[1] ^= ((uint64_t *)b)[1]; }
+
+#define XOR_BLOCKS_DST(x,y,z) { \
+ ((uint64_t *)z)[0] = ((uint64_t *)(x))[0] ^ ((uint64_t *)(y))[0]; \
+ ((uint64_t *)z)[1] = ((uint64_t *)(x))[1] ^ ((uint64_t *)(y))[1]; }
+
+#define MUL_SUM_XOR_DST(a,c,dst) { \
+ uint64_t hi, lo = cuda_mul128(((uint64_t *)a)[0], ((uint64_t *)dst)[0], &hi) + ((uint64_t *)c)[1]; \
+ hi += ((uint64_t *)c)[0]; \
+ ((uint64_t *)c)[0] = ((uint64_t *)dst)[0] ^ hi; \
+ ((uint64_t *)c)[1] = ((uint64_t *)dst)[1] ^ lo; \
+ ((uint64_t *)dst)[0] = hi; \
+ ((uint64_t *)dst)[1] = lo; }
+
+#define E2I(x) ((size_t)(((*((uint64_t*)(x)) >> 4) & 0x1ffff)))
+
+union hash_state {
+ uint8_t b[200];
+ uint64_t w[25];
+};
+
+union cn_slow_hash_state {
+ union hash_state hs;
+ struct {
+ uint8_t k[64];
+ uint8_t init[INIT_SIZE_BYTE];
+ };
+};
+
+static inline void exit_if_cudaerror(int thr_id, const char *src, int line)
+{
+ cudaError_t err = cudaGetLastError();
+ if(err != cudaSuccess) {
+ gpulog(LOG_ERR, thr_id, "%s %s line %d", cudaGetErrorString(err), src, line);
+ exit(1);
+ }
+}
+
+void hash_permutation(union hash_state *state);
+void hash_process(union hash_state *state, const uint8_t *buf, size_t count);
+
+void cryptonight_core_cpu_hash(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2);
+
+void cryptonight_extra_cpu_setData(int thr_id, const void *data, const void *pTargetIn);
+void cryptonight_extra_cpu_init(int thr_id, uint32_t threads);
+void cryptonight_extra_cpu_free(int thr_id);
+void cryptonight_extra_cpu_prepare(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2);
+void cryptonight_extra_cpu_final(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *nonce, uint32_t *d_ctx_state);
diff --git a/crypto/cuda_cryptonight_core.cu b/crypto/cuda_cryptonight_core.cu
new file mode 100644
index 0000000..825c1a6
--- /dev/null
+++ b/crypto/cuda_cryptonight_core.cu
@@ -0,0 +1,262 @@
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+#include "cryptonight.h"
+
+#ifdef WIN32
+int cn_bfactor = 6;
+int cn_bsleep = 100;
+#else
+int cn_bfactor = 0;
+int cn_bsleep = 0;
+#endif
+
+#include "cn_aes.cuh"
+
+__device__ __forceinline__ uint64_t cuda_mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi)
+{
+ *product_hi = __umul64hi(multiplier, multiplicand);
+ return(multiplier * multiplicand);
+}
+
+__global__
+void cryptonight_core_gpu_phase1(int threads, uint32_t * __restrict__ long_state, uint32_t * __restrict__ ctx_state, uint32_t * __restrict__ ctx_key1)
+{
+ __shared__ uint32_t sharedMemory[1024];
+
+ cn_aes_gpu_init(sharedMemory);
+
+ const int thread = (blockDim.x * blockIdx.x + threadIdx.x) >> 3;
+ const int sub = (threadIdx.x & 7) << 2;
+
+ if(thread < threads)
+ {
+ uint32_t key[40], text[4];
+
+ MEMCPY8(key, ctx_key1 + thread * 40, 20);
+ MEMCPY8(text, ctx_state + thread * 50 + sub + 16, 2);
+
+ __syncthreads();
+ for(int i = 0; i < 0x80000; i += 32)
+ {
+ cn_aes_pseudo_round_mut(sharedMemory, text, key);
+ MEMCPY8(&long_state[(thread << 19) + sub + i], text, 2);
+ }
+ }
+}
+
+__global__
+void cryptonight_core_gpu_phase2(int threads, int bfactor, int partidx, uint32_t * __restrict__ d_long_state, uint32_t * __restrict__ d_ctx_a, uint32_t * __restrict__ d_ctx_b)
+{
+ __shared__ uint32_t sharedMemory[1024];
+
+ cn_aes_gpu_init(sharedMemory);
+
+ __syncthreads();
+
+#if __CUDA_ARCH__ >= 300
+
+ const int thread = (blockDim.x * blockIdx.x + threadIdx.x) >> 2;
+ const int sub = threadIdx.x & 3;
+
+ if(thread < threads)
+ {
+ const int batchsize = ITER >> (2 + bfactor);
+ const int start = partidx * batchsize;
+ const int end = start + batchsize;
+ uint32_t * __restrict__ long_state = &d_long_state[thread << 19];
+ uint32_t * __restrict__ ctx_a = d_ctx_a + thread * 4;
+ uint32_t * __restrict__ ctx_b = d_ctx_b + thread * 4;
+ uint32_t a, b, c, x[4];
+ uint32_t t1[4], t2[4], res;
+ uint64_t reshi, reslo;
+ int j;
+
+ a = ctx_a[sub];
+ b = ctx_b[sub];
+
+ #pragma unroll 8
+ for(int i = start; i < end; ++i)
+ {
+ //j = ((uint32_t *)a)[0] & 0x1FFFF0;
+ j = (__shfl((int)a, 0, 4) & 0x1FFFF0) >> 2;
+
+ //cn_aes_single_round(sharedMemory, &long_state[j], c, a);
+ x[0] = long_state[j + sub];
+ x[1] = __shfl((int)x[0], sub + 1, 4);
+ x[2] = __shfl((int)x[0], sub + 2, 4);
+ x[3] = __shfl((int)x[0], sub + 3, 4);
+ c = a ^
+ t_fn0(x[0] & 0xff) ^
+ t_fn1((x[1] >> 8) & 0xff) ^
+ t_fn2((x[2] >> 16) & 0xff) ^
+ t_fn3((x[3] >> 24) & 0xff);
+
+ //XOR_BLOCKS_DST(c, b, &long_state[j]);
+ long_state[j + sub] = c ^ b;
+
+ //MUL_SUM_XOR_DST(c, a, &long_state[((uint32_t *)c)[0] & 0x1FFFF0]);
+ j = (__shfl((int)c, 0, 4) & 0x1FFFF0) >> 2;
+ #pragma unroll
+ for(int k = 0; k < 2; k++)
+ t1[k] = __shfl((int)c, k, 4);
+ #pragma unroll
+ for(int k = 0; k < 4; k++)
+ t2[k] = __shfl((int)a, k, 4);
+ asm(
+ "mad.lo.u64 %0, %2, %3, %4;\n\t"
+ "mad.hi.u64 %1, %2, %3, %5;\n\t"
+ : "=l"(reslo), "=l"(reshi)
+ : "l"(((uint64_t *)t1)[0]), "l"(((uint64_t *)long_state)[j >> 1]), "l"(((uint64_t *)t2)[1]), "l"(((uint64_t *)t2)[0]));
+ res = (sub & 2 ? reslo : reshi) >> (sub & 1 ? 32 : 0);
+ a = long_state[j + sub] ^ res;
+ long_state[j + sub] = res;
+
+ //j = ((uint32_t *)a)[0] & 0x1FFFF0;
+ j = (__shfl((int)a, 0, 4) & 0x1FFFF0) >> 2;
+
+ //cn_aes_single_round(sharedMemory, &long_state[j], b, a);
+ x[0] = long_state[j + sub];
+ x[1] = __shfl((int)x[0], sub + 1, 4);
+ x[2] = __shfl((int)x[0], sub + 2, 4);
+ x[3] = __shfl((int)x[0], sub + 3, 4);
+ b = a ^
+ t_fn0(x[0] & 0xff) ^
+ t_fn1((x[1] >> 8) & 0xff) ^
+ t_fn2((x[2] >> 16) & 0xff) ^
+ t_fn3((x[3] >> 24) & 0xff);
+
+ //XOR_BLOCKS_DST(b, c, &long_state[j]);
+ long_state[j + sub] = c ^ b;
+
+ //MUL_SUM_XOR_DST(b, a, &long_state[((uint32_t *)b)[0] & 0x1FFFF0]);
+ j = (__shfl((int)b, 0, 4) & 0x1FFFF0) >> 2;
+
+ #pragma unroll
+ for(int k = 0; k < 2; k++)
+ t1[k] = __shfl((int)b, k, 4);
+
+ #pragma unroll
+ for(int k = 0; k < 4; k++)
+ t2[k] = __shfl((int)a, k, 4);
+ asm(
+ "mad.lo.u64 %0, %2, %3, %4;\n\t"
+ "mad.hi.u64 %1, %2, %3, %5;\n\t"
+ : "=l"(reslo), "=l"(reshi)
+ : "l"(((uint64_t *)t1)[0]), "l"(((uint64_t *)long_state)[j >> 1]), "l"(((uint64_t *)t2)[1]), "l"(((uint64_t *)t2)[0]));
+ res = (sub & 2 ? reslo : reshi) >> (sub & 1 ? 32 : 0);
+ a = long_state[j + sub] ^ res;
+ long_state[j + sub] = res;
+ }
+
+ if(bfactor > 0)
+ {
+ ctx_a[sub] = a;
+ ctx_b[sub] = b;
+ }
+ }
+
+#else // __CUDA_ARCH__ < 300
+
+ const int thread = blockDim.x * blockIdx.x + threadIdx.x;
+
+ if(thread < threads)
+ {
+ const int batchsize = ITER >> (2 + bfactor);
+ const int start = partidx * batchsize;
+ const int end = start + batchsize;
+ uint32_t * __restrict__ long_state = &d_long_state[thread << 19];
+ uint32_t * __restrict__ ctx_a = d_ctx_a + thread * 4;
+ uint32_t * __restrict__ ctx_b = d_ctx_b + thread * 4;
+ uint32_t a[4], b[4], c[4];
+ int j;
+
+ MEMCPY8(a, ctx_a, 2);
+ MEMCPY8(b, ctx_b, 2);
+
+ for(int i = start; i < end; ++i)
+ {
+ j = (a[0] & 0x1FFFF0) >> 2;
+ cn_aes_single_round(sharedMemory, &long_state[j], c, a);
+ XOR_BLOCKS_DST(c, b, &long_state[j]);
+ MUL_SUM_XOR_DST(c, a, (uint8_t *)&long_state[(c[0] & 0x1FFFF0) >> 2]);
+ j = (a[0] & 0x1FFFF0) >> 2;
+ cn_aes_single_round(sharedMemory, &long_state[j], b, a);
+ XOR_BLOCKS_DST(b, c, &long_state[j]);
+ MUL_SUM_XOR_DST(b, a, &long_state[(b[0] & 0x1FFFF0) >> 2]);
+ }
+
+ if(bfactor > 0)
+ {
+ MEMCPY8(ctx_a, a, 2);
+ MEMCPY8(ctx_b, b, 2);
+ }
+ }
+
+#endif // __CUDA_ARCH__ >= 300
+}
+
+__global__
+void cryptonight_core_gpu_phase3(int threads, const uint32_t * __restrict__ long_state, uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_key2)
+{
+ __shared__ uint32_t sharedMemory[1024];
+
+ cn_aes_gpu_init(sharedMemory);
+
+ int thread = (blockDim.x * blockIdx.x + threadIdx.x) >> 3;
+ int sub = (threadIdx.x & 7) << 2;
+
+ if(thread < threads)
+ {
+ uint32_t key[40], text[4];
+ MEMCPY8(key, d_ctx_key2 + thread * 40, 20);
+ MEMCPY8(text, d_ctx_state + thread * 50 + sub + 16, 2);
+
+ __syncthreads();
+ for(int i = 0; i < 0x80000; i += 32)
+ {
+ #pragma unroll
+ for(int j = 0; j < 4; ++j)
+ text[j] ^= long_state[(thread << 19) + sub + i + j];
+
+ cn_aes_pseudo_round_mut(sharedMemory, text, key);
+ }
+
+ MEMCPY8(d_ctx_state + thread * 50 + sub + 16, text, 2);
+ }
+}
+
+__host__
+void cryptonight_core_cpu_hash(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2)
+{
+ dim3 grid(blocks);
+ dim3 block(threads);
+ dim3 block4(threads << 2);
+ dim3 block8(threads << 3);
+
+ const int bfactor = cn_bfactor; // device_bfactor[thr_id];
+ const int bsleep = cn_bsleep; //device_bsleep[thr_id];
+
+ int i, partcount = 1 << bfactor;
+ int dev_id = device_map[thr_id];
+
+ cryptonight_core_gpu_phase1 <<>>(blocks*threads, d_long_state, d_ctx_state, d_ctx_key1);
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+ if(partcount > 1) usleep(bsleep);
+
+ for(i = 0; i < partcount; i++)
+ {
+ cryptonight_core_gpu_phase2 <<= 300 ? block4 : block)>>>(blocks*threads, bfactor, i, d_long_state, d_ctx_a, d_ctx_b);
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+ if(partcount > 1) usleep(bsleep);
+ }
+
+ cryptonight_core_gpu_phase3 <<>>(blocks*threads, d_long_state, d_ctx_state, d_ctx_key2);
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+}
diff --git a/crypto/cuda_cryptonight_extra.cu b/crypto/cuda_cryptonight_extra.cu
new file mode 100644
index 0000000..0e37af7
--- /dev/null
+++ b/crypto/cuda_cryptonight_extra.cu
@@ -0,0 +1,234 @@
+#include
+#include
+#include
+#ifndef _WIN32
+#include
+#endif
+
+#include
+#include
+//#include
+//#include
+
+#include "cryptonight.h"
+
+typedef unsigned char BitSequence;
+typedef unsigned long long DataLength;
+
+static uint32_t *d_input[MAX_GPUS] = { 0 };
+static uint32_t *d_target[MAX_GPUS];
+static uint32_t *d_result[MAX_GPUS];
+
+#include "cn_keccak.cuh"
+#include "cn_blake.cuh"
+#include "cn_groestl.cuh"
+#include "cn_jh.cuh"
+#include "cn_skein.cuh"
+
+__constant__ uint8_t d_sub_byte[16][16] = {
+ {0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76},
+ {0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0},
+ {0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15},
+ {0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75},
+ {0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84},
+ {0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf},
+ {0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8},
+ {0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2},
+ {0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73},
+ {0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb},
+ {0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79},
+ {0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08},
+ {0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a},
+ {0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e},
+ {0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf},
+ {0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16}
+};
+
+__device__ __forceinline__
+void cryptonight_aes_set_key(uint32_t * __restrict__ key, const uint32_t * __restrict__ data)
+{
+ const uint32_t aes_gf[] = {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36
+ };
+
+ MEMSET4(key, 0, 40);
+ MEMCPY4(key, data, 8);
+
+ #pragma unroll
+ for(int i = 8; i < 40; i++)
+ {
+ uint8_t temp[4];
+ *(uint32_t *)temp = key[i - 1];
+
+ if(i % 8 == 0) {
+ *(uint32_t *)temp = ROTR32(*(uint32_t *)temp, 8);
+ for(int j = 0; j < 4; j++)
+ temp[j] = d_sub_byte[(temp[j] >> 4) & 0x0f][temp[j] & 0x0f];
+ *(uint32_t *)temp ^= aes_gf[i / 8 - 1];
+ }
+ else if(i % 8 == 4) {
+ #pragma unroll
+ for(int j = 0; j < 4; j++)
+ temp[j] = d_sub_byte[(temp[j] >> 4) & 0x0f][temp[j] & 0x0f];
+ }
+
+ key[i] = key[(i - 8)] ^ *(uint32_t *)temp;
+ }
+}
+
+__global__
+void cryptonight_extra_gpu_prepare(const uint32_t threads, uint32_t * __restrict__ d_input, uint32_t startNonce,
+ uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_a, uint32_t * __restrict__ d_ctx_b,
+ uint32_t * __restrict__ d_ctx_key1, uint32_t * __restrict__ d_ctx_key2)
+{
+ const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
+
+ if(thread < threads)
+ {
+ uint32_t ctx_state[50];
+ uint32_t ctx_a[4];
+ uint32_t ctx_b[4];
+ uint32_t ctx_key1[40];
+ uint32_t ctx_key2[40];
+ uint32_t input[19];
+
+ MEMCPY4(input, d_input, 19);
+ *((uint32_t *)(((char *)input) + 39)) = startNonce + thread;
+
+ cn_keccak((uint8_t *)input, (uint8_t *)ctx_state);
+ cryptonight_aes_set_key(ctx_key1, ctx_state);
+ cryptonight_aes_set_key(ctx_key2, ctx_state + 8);
+ XOR_BLOCKS_DST(ctx_state, ctx_state + 8, ctx_a);
+ XOR_BLOCKS_DST(ctx_state + 4, ctx_state + 12, ctx_b);
+
+ memcpy(d_ctx_state + thread * 50, ctx_state, 50 * 4);
+ memcpy(d_ctx_a + thread * 4, ctx_a, 4 * 4);
+ memcpy(d_ctx_b + thread * 4, ctx_b, 4 * 4);
+ memcpy(d_ctx_key1 + thread * 40, ctx_key1, 40 * 4);
+ memcpy(d_ctx_key2 + thread * 40, ctx_key2, 40 * 4);
+ }
+}
+
+__global__
+void cryptonight_extra_gpu_keccakf2(uint32_t threads, uint32_t * __restrict__ d_ctx_state)
+{
+ const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
+ if(thread < threads)
+ {
+ uint64_t*ctx_state = (uint64_t*) (&d_ctx_state[thread * 50]);
+ uint64_t state[25];
+ #pragma unroll
+ for(int i = 0; i < 25; i++)
+ state[i] = ctx_state[i];
+
+ cn_keccakf2(state);
+
+ #pragma unroll
+ for(int i = 0; i < 25; i++)
+ ctx_state[i] = state[i];
+ }
+}
+
+__global__
+void cryptonight_extra_gpu_nonces(uint32_t threads, const uint32_t startNonce, const uint32_t * __restrict__ d_ctx_state,
+ const uint32_t* d_target, uint32_t * resNonces)
+{
+ const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
+ if(thread < threads)
+ {
+ uint64_t* ctx_state = (uint64_t*) (&d_ctx_state[thread * 50]);
+ uint64_t state[25];
+ #pragma unroll
+ for(int i = 0; i < 25; i++)
+ state[i] = ctx_state[i];
+
+ uint32_t hash[8];
+ switch(((uint8_t *)state)[0] & 0x03)
+ {
+ case 0: {
+ cn_blake((uint8_t*)state, 200, (uint8_t*)hash);
+ break;
+ }
+ case 1: {
+ cn_groestl((BitSequence*)state, 200, (BitSequence*)hash);
+ break;
+ }
+ case 2: {
+ // to double check..
+ cn_jh((BitSequence*)state, 200, (BitSequence*)hash);
+ break;
+ }
+ case 3: {
+ cn_skein((BitSequence*)state, 200, (BitSequence*)hash);
+ break;
+ }
+ }
+
+ if(hash[7] <= d_target[1] && hash[6] <= d_target[0])
+ {
+ const uint32_t nonce = startNonce + thread;
+ uint32_t tmp = atomicExch(resNonces, nonce);
+ if(tmp != UINT32_MAX)
+ resNonces[1] = tmp;
+ }
+ }
+}
+
+__host__
+void cryptonight_extra_cpu_setData(int thr_id, const void *data, const void *ptarget)
+{
+ uint32_t *pTargetIn = (uint32_t*) ptarget;
+ cudaMemcpy(d_input[thr_id], data, 19 * sizeof(uint32_t), cudaMemcpyHostToDevice);
+ cudaMemcpy(d_target[thr_id], &pTargetIn[6], 2*sizeof(uint32_t), cudaMemcpyHostToDevice);
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+}
+
+__host__
+void cryptonight_extra_cpu_init(int thr_id, uint32_t threads)
+{
+ cudaMalloc(&d_input[thr_id], 19 * sizeof(uint32_t));
+ cudaMalloc(&d_target[thr_id], 2*sizeof(uint32_t));
+ cudaMalloc(&d_result[thr_id], 2*sizeof(uint32_t));
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+}
+
+__host__
+void cryptonight_extra_cpu_prepare(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2)
+{
+ int threadsperblock = 128;
+
+ dim3 grid((threads + threadsperblock - 1) / threadsperblock);
+ dim3 block(threadsperblock);
+
+ cryptonight_extra_gpu_prepare <<>> (threads, d_input[thr_id], startNonce, d_ctx_state, d_ctx_a, d_ctx_b, d_ctx_key1, d_ctx_key2);
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+}
+
+__host__
+void cryptonight_extra_cpu_final(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *resnonce, uint32_t *d_ctx_state)
+{
+ int threadsperblock = 128;
+
+ dim3 grid((threads + threadsperblock - 1) / threadsperblock);
+ dim3 block(threadsperblock);
+
+ cudaMemset(d_result[thr_id], 0xFF, 2*sizeof(uint32_t));
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+ cryptonight_extra_gpu_keccakf2 <<>> (threads, d_ctx_state);
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+ cryptonight_extra_gpu_nonces <<>> (threads, startNonce, d_ctx_state, d_target[thr_id], d_result[thr_id]);
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+ cudaMemcpy(resnonce, d_result[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
+ exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+}
+
+__host__
+void cryptonight_extra_cpu_free(int thr_id)
+{
+ if (d_input[thr_id]) {
+ cudaFree(d_input[thr_id]);
+ cudaFree(d_target[thr_id]);
+ cudaFree(d_result[thr_id]);
+ d_input[thr_id] = NULL;
+ }
+}
\ No newline at end of file
diff --git a/crypto/oaes_config.h b/crypto/oaes_config.h
new file mode 100644
index 0000000..78b8edb
--- /dev/null
+++ b/crypto/oaes_config.h
@@ -0,0 +1,51 @@
+/*
+ * ---------------------------------------------------------------------------
+ * OpenAES License
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ * ---------------------------------------------------------------------------
+ */
+
+#ifndef _OAES_CONFIG_H
+#define _OAES_CONFIG_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//#ifndef OAES_HAVE_ISAAC
+//#define OAES_HAVE_ISAAC 1
+//#endif // OAES_HAVE_ISAAC
+
+//#ifndef OAES_DEBUG
+//#define OAES_DEBUG 0
+//#endif // OAES_DEBUG
+//#define OAES_DEBUG 1
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _OAES_CONFIG_H
diff --git a/crypto/oaes_lib.cpp b/crypto/oaes_lib.cpp
new file mode 100644
index 0000000..4711cc1
--- /dev/null
+++ b/crypto/oaes_lib.cpp
@@ -0,0 +1,1446 @@
+/*
+ * ---------------------------------------------------------------------------
+ * OpenAES License
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ * ---------------------------------------------------------------------------
+ */
+static const char _NR[] = {
+ 0x4e,0x61,0x62,0x69,0x6c,0x20,0x53,0x2e,0x20,
+ 0x41,0x6c,0x20,0x52,0x61,0x6d,0x6c,0x69,0x00 };
+
+#include "miner.h"
+
+#include
+#include
+#include
+#if !((defined(__FreeBSD__) && __FreeBSD__ >= 10) || defined(__APPLE__))
+#include
+#endif
+#include
+#include
+#include
+
+#ifdef WIN32
+#include
+#else
+#include
+#include
+#endif
+
+#include "oaes_config.h"
+#include "oaes_lib.h"
+
+#ifdef OAES_HAVE_ISAAC
+#include "rand.h"
+#endif // OAES_HAVE_ISAAC
+
+#define OAES_RKEY_LEN 4
+#define OAES_COL_LEN 4
+#define OAES_ROUND_BASE 7
+
+// the block is padded
+#define OAES_FLAG_PAD 0x01
+
+#ifndef min
+# define min(a,b) (((a)<(b)) ? (a) : (b))
+#endif /* min */
+
+// "OAES<8-bit header version><8-bit type><16-bit options><8-bit flags><56-bit reserved>"
+static uint8_t oaes_header[OAES_BLOCK_SIZE] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ 0x4f, 0x41, 0x45, 0x53, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+static uint8_t oaes_gf_8[] = {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 };
+
+static uint8_t oaes_sub_byte_value[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 },
+ /*1*/ { 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 },
+ /*2*/ { 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 },
+ /*3*/ { 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 },
+ /*4*/ { 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 },
+ /*5*/ { 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf },
+ /*6*/ { 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 },
+ /*7*/ { 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 },
+ /*8*/ { 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 },
+ /*9*/ { 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb },
+ /*a*/ { 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 },
+ /*b*/ { 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 },
+ /*c*/ { 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a },
+ /*d*/ { 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e },
+ /*e*/ { 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf },
+ /*f*/ { 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 },
+};
+
+static uint8_t oaes_inv_sub_byte_value[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb },
+ /*1*/ { 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb },
+ /*2*/ { 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e },
+ /*3*/ { 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 },
+ /*4*/ { 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 },
+ /*5*/ { 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 },
+ /*6*/ { 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 },
+ /*7*/ { 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b },
+ /*8*/ { 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 },
+ /*9*/ { 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e },
+ /*a*/ { 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b },
+ /*b*/ { 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 },
+ /*c*/ { 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f },
+ /*d*/ { 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef },
+ /*e*/ { 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 },
+ /*f*/ { 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d },
+};
+
+static uint8_t oaes_gf_mul_2[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+ /*1*/ { 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e },
+ /*2*/ { 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e },
+ /*3*/ { 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e },
+ /*4*/ { 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e },
+ /*5*/ { 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe },
+ /*6*/ { 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde },
+ /*7*/ { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe },
+ /*8*/ { 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05 },
+ /*9*/ { 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25 },
+ /*a*/ { 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45 },
+ /*b*/ { 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65 },
+ /*c*/ { 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85 },
+ /*d*/ { 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5 },
+ /*e*/ { 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5 },
+ /*f*/ { 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5 },
+};
+
+static uint8_t oaes_gf_mul_3[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+ /*1*/ { 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21 },
+ /*2*/ { 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71 },
+ /*3*/ { 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41 },
+ /*4*/ { 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1 },
+ /*5*/ { 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1 },
+ /*6*/ { 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1 },
+ /*7*/ { 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81 },
+ /*8*/ { 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a },
+ /*9*/ { 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba },
+ /*a*/ { 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea },
+ /*b*/ { 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda },
+ /*c*/ { 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a },
+ /*d*/ { 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a },
+ /*e*/ { 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a },
+ /*f*/ { 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a },
+};
+
+static uint8_t oaes_gf_mul_9[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+ /*1*/ { 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7 },
+ /*2*/ { 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c },
+ /*3*/ { 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc },
+ /*4*/ { 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01 },
+ /*5*/ { 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91 },
+ /*6*/ { 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a },
+ /*7*/ { 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa },
+ /*8*/ { 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b },
+ /*9*/ { 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b },
+ /*a*/ { 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0 },
+ /*b*/ { 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30 },
+ /*c*/ { 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed },
+ /*d*/ { 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d },
+ /*e*/ { 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6 },
+ /*f*/ { 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46 },
+};
+
+static uint8_t oaes_gf_mul_b[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+ /*1*/ { 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9 },
+ /*2*/ { 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12 },
+ /*3*/ { 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2 },
+ /*4*/ { 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f },
+ /*5*/ { 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f },
+ /*6*/ { 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4 },
+ /*7*/ { 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54 },
+ /*8*/ { 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e },
+ /*9*/ { 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e },
+ /*a*/ { 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5 },
+ /*b*/ { 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55 },
+ /*c*/ { 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68 },
+ /*d*/ { 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8 },
+ /*e*/ { 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13 },
+ /*f*/ { 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3 },
+};
+
+static uint8_t oaes_gf_mul_d[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+ /*1*/ { 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b },
+ /*2*/ { 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0 },
+ /*3*/ { 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20 },
+ /*4*/ { 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26 },
+ /*5*/ { 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6 },
+ /*6*/ { 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d },
+ /*7*/ { 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d },
+ /*8*/ { 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91 },
+ /*9*/ { 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41 },
+ /*a*/ { 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a },
+ /*b*/ { 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa },
+ /*c*/ { 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc },
+ /*d*/ { 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c },
+ /*e*/ { 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47 },
+ /*f*/ { 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97 },
+};
+
+static uint8_t oaes_gf_mul_e[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+ /*1*/ { 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba },
+ /*2*/ { 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81 },
+ /*3*/ { 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61 },
+ /*4*/ { 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7 },
+ /*5*/ { 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17 },
+ /*6*/ { 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c },
+ /*7*/ { 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc },
+ /*8*/ { 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b },
+ /*9*/ { 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb },
+ /*a*/ { 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0 },
+ /*b*/ { 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20 },
+ /*c*/ { 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6 },
+ /*d*/ { 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56 },
+ /*e*/ { 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d },
+ /*f*/ { 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d },
+};
+
+static OAES_RET oaes_sub_byte( uint8_t * byte )
+{
+ size_t _x, _y;
+
+ if( unlikely(NULL == byte) )
+ return OAES_RET_ARG1;
+
+ _y = ((_x = *byte) >> 4) & 0x0f;
+ _x &= 0x0f;
+ *byte = oaes_sub_byte_value[_y][_x];
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_inv_sub_byte( uint8_t * byte )
+{
+ size_t _x, _y;
+
+ if( NULL == byte )
+ return OAES_RET_ARG1;
+
+ _x = _y = *byte;
+ _x &= 0x0f;
+ _y &= 0xf0;
+ _y >>= 4;
+ *byte = oaes_inv_sub_byte_value[_y][_x];
+
+ return OAES_RET_SUCCESS;
+}
+/*
+static OAES_RET oaes_word_rot_right( uint8_t word[OAES_COL_LEN] )
+{
+ uint8_t _temp[OAES_COL_LEN];
+
+ if( NULL == word )
+ return OAES_RET_ARG1;
+
+ memcpy( _temp + 1, word, OAES_COL_LEN - 1 );
+ _temp[0] = word[OAES_COL_LEN - 1];
+ memcpy( word, _temp, OAES_COL_LEN );
+
+ return OAES_RET_SUCCESS;
+}
+*/
+static OAES_RET oaes_word_rot_left( uint8_t word[OAES_COL_LEN] )
+{
+ uint8_t _temp[OAES_COL_LEN];
+
+ if( NULL == word )
+ return OAES_RET_ARG1;
+
+ memcpy( _temp, word + 1, OAES_COL_LEN - 1 );
+ _temp[OAES_COL_LEN - 1] = word[0];
+ memcpy( word, _temp, OAES_COL_LEN );
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_shift_rows( uint8_t block[OAES_BLOCK_SIZE] )
+{
+ uint8_t _temp[] = { block[0x03], block[0x02], block[0x01], block[0x06], block[0x0b] };
+
+ if( unlikely(NULL == block) )
+ return OAES_RET_ARG1;
+
+ block[0x0b] = block[0x07];
+ block[0x01] = block[0x05];
+ block[0x02] = block[0x0a];
+ block[0x03] = block[0x0f];
+ block[0x05] = block[0x09];
+ block[0x06] = block[0x0e];
+ block[0x07] = _temp[0];
+ block[0x09] = block[0x0d];
+ block[0x0a] = _temp[1];
+ block[0x0d] = _temp[2];
+ block[0x0e] = _temp[3];
+ block[0x0f] = _temp[4];
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_inv_shift_rows( uint8_t block[OAES_BLOCK_SIZE] )
+{
+ uint8_t _temp[OAES_BLOCK_SIZE];
+
+ if( NULL == block )
+ return OAES_RET_ARG1;
+
+ _temp[0x00] = block[0x00];
+ _temp[0x01] = block[0x0d];
+ _temp[0x02] = block[0x0a];
+ _temp[0x03] = block[0x07];
+ _temp[0x04] = block[0x04];
+ _temp[0x05] = block[0x01];
+ _temp[0x06] = block[0x0e];
+ _temp[0x07] = block[0x0b];
+ _temp[0x08] = block[0x08];
+ _temp[0x09] = block[0x05];
+ _temp[0x0a] = block[0x02];
+ _temp[0x0b] = block[0x0f];
+ _temp[0x0c] = block[0x0c];
+ _temp[0x0d] = block[0x09];
+ _temp[0x0e] = block[0x06];
+ _temp[0x0f] = block[0x03];
+ memcpy( block, _temp, OAES_BLOCK_SIZE );
+
+ return OAES_RET_SUCCESS;
+}
+
+static uint8_t oaes_gf_mul(uint8_t left, uint8_t right)
+{
+ size_t _x, _y;
+
+ _y = ((_x = left) >> 4) & 0x0f;
+ _x &= 0x0f;
+
+ switch( right )
+ {
+ case 0x02:
+ return oaes_gf_mul_2[_y][_x];
+ break;
+ case 0x03:
+ return oaes_gf_mul_3[_y][_x];
+ break;
+ case 0x09:
+ return oaes_gf_mul_9[_y][_x];
+ break;
+ case 0x0b:
+ return oaes_gf_mul_b[_y][_x];
+ break;
+ case 0x0d:
+ return oaes_gf_mul_d[_y][_x];
+ break;
+ case 0x0e:
+ return oaes_gf_mul_e[_y][_x];
+ break;
+ default:
+ return left;
+ break;
+ }
+}
+
+static OAES_RET oaes_mix_cols( uint8_t word[OAES_COL_LEN] )
+{
+ uint8_t _temp[OAES_COL_LEN];
+
+ if( unlikely(NULL == word) )
+ return OAES_RET_ARG1;
+
+ _temp[0] = oaes_gf_mul(word[0], 0x02) ^ oaes_gf_mul( word[1], 0x03 ) ^
+ word[2] ^ word[3];
+ _temp[1] = word[0] ^ oaes_gf_mul( word[1], 0x02 ) ^
+ oaes_gf_mul( word[2], 0x03 ) ^ word[3];
+ _temp[2] = word[0] ^ word[1] ^
+ oaes_gf_mul( word[2], 0x02 ) ^ oaes_gf_mul( word[3], 0x03 );
+ _temp[3] = oaes_gf_mul( word[0], 0x03 ) ^ word[1] ^
+ word[2] ^ oaes_gf_mul( word[3], 0x02 );
+ memcpy( word, _temp, OAES_COL_LEN );
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_inv_mix_cols( uint8_t word[OAES_COL_LEN] )
+{
+ uint8_t _temp[OAES_COL_LEN];
+
+ if( NULL == word )
+ return OAES_RET_ARG1;
+
+ _temp[0] = oaes_gf_mul( word[0], 0x0e ) ^ oaes_gf_mul( word[1], 0x0b ) ^
+ oaes_gf_mul( word[2], 0x0d ) ^ oaes_gf_mul( word[3], 0x09 );
+ _temp[1] = oaes_gf_mul( word[0], 0x09 ) ^ oaes_gf_mul( word[1], 0x0e ) ^
+ oaes_gf_mul( word[2], 0x0b ) ^ oaes_gf_mul( word[3], 0x0d );
+ _temp[2] = oaes_gf_mul( word[0], 0x0d ) ^ oaes_gf_mul( word[1], 0x09 ) ^
+ oaes_gf_mul( word[2], 0x0e ) ^ oaes_gf_mul( word[3], 0x0b );
+ _temp[3] = oaes_gf_mul( word[0], 0x0b ) ^ oaes_gf_mul( word[1], 0x0d ) ^
+ oaes_gf_mul( word[2], 0x09 ) ^ oaes_gf_mul( word[3], 0x0e );
+ memcpy( word, _temp, OAES_COL_LEN );
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_RET oaes_sprintf(
+ char * buf, size_t * buf_len, const uint8_t * data, size_t data_len )
+{
+ size_t _i, _buf_len_in;
+ char _temp[4];
+
+ if( NULL == buf_len )
+ return OAES_RET_ARG2;
+
+ _buf_len_in = *buf_len;
+ *buf_len = data_len * 3 + data_len / OAES_BLOCK_SIZE + 1;
+
+ if( NULL == buf )
+ return OAES_RET_SUCCESS;
+
+ if( *buf_len > _buf_len_in )
+ return OAES_RET_BUF;
+
+ if( NULL == data )
+ return OAES_RET_ARG3;
+
+ strcpy( buf, "" );
+
+ for( _i = 0; _i < data_len; _i++ )
+ {
+ sprintf( _temp, "%02x ", data[_i] );
+ strcat( buf, _temp );
+ if( _i && 0 == ( _i + 1 ) % OAES_BLOCK_SIZE )
+ strcat( buf, "\n" );
+ }
+
+ return OAES_RET_SUCCESS;
+}
+
+#ifdef OAES_HAVE_ISAAC
+static void oaes_get_seed( char buf[RANDSIZ + 1] )
+{
+ struct timeb timer;
+ struct tm *gmTimer;
+ char * _test = NULL;
+
+ ftime (&timer);
+ gmTimer = gmtime( &timer.time );
+ _test = (char *) calloc( sizeof( char ), timer.millitm );
+ sprintf( buf, "%04d%02d%02d%02d%02d%02d%03d%p%d",
+ gmTimer->tm_year + 1900, gmTimer->tm_mon + 1, gmTimer->tm_mday,
+ gmTimer->tm_hour, gmTimer->tm_min, gmTimer->tm_sec, timer.millitm,
+ _test + timer.millitm, getpid() );
+
+ if( _test )
+ free( _test );
+}
+#else
+static uint32_t oaes_get_seed(void)
+{
+ struct timeb timer;
+ struct tm *gmTimer;
+ char * _test = NULL;
+ uint32_t _ret = 0;
+
+ ftime (&timer);
+ gmTimer = gmtime( &timer.time );
+ _test = (char *) calloc( sizeof( char ), timer.millitm );
+ _ret = (uint32_t)(gmTimer->tm_year + 1900 + gmTimer->tm_mon + 1 + gmTimer->tm_mday +
+ gmTimer->tm_hour + gmTimer->tm_min + gmTimer->tm_sec + timer.millitm +
+ (uintptr_t) ( _test + timer.millitm ) + getpid());
+
+ if( _test )
+ free( _test );
+
+ return _ret;
+}
+#endif // OAES_HAVE_ISAAC
+
+static OAES_RET oaes_key_destroy( oaes_key ** key )
+{
+ if( NULL == *key )
+ return OAES_RET_SUCCESS;
+
+ if( (*key)->data )
+ {
+ free( (*key)->data );
+ (*key)->data = NULL;
+ }
+
+ if( (*key)->exp_data )
+ {
+ free( (*key)->exp_data );
+ (*key)->exp_data = NULL;
+ }
+
+ (*key)->data_len = 0;
+ (*key)->exp_data_len = 0;
+ (*key)->num_keys = 0;
+ (*key)->key_base = 0;
+ free( *key );
+ *key = NULL;
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_key_expand( OAES_CTX * ctx )
+{
+ size_t _i, _j;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+ uint8_t _temp[OAES_COL_LEN];
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == _ctx->key )
+ return OAES_RET_NOKEY;
+
+ _ctx->key->key_base = _ctx->key->data_len / OAES_RKEY_LEN;
+ _ctx->key->num_keys = _ctx->key->key_base + OAES_ROUND_BASE;
+
+ _ctx->key->exp_data_len = _ctx->key->num_keys * OAES_RKEY_LEN * OAES_COL_LEN;
+ _ctx->key->exp_data = (uint8_t *)
+ calloc( _ctx->key->exp_data_len, sizeof( uint8_t ));
+
+ if( NULL == _ctx->key->exp_data )
+ return OAES_RET_MEM;
+
+ // the first _ctx->key->data_len are a direct copy
+ memcpy( _ctx->key->exp_data, _ctx->key->data, _ctx->key->data_len );
+
+ // apply ExpandKey algorithm for remainder
+ for( _i = _ctx->key->key_base; _i < _ctx->key->num_keys * OAES_RKEY_LEN; _i++ )
+ {
+
+ memcpy( _temp,
+ _ctx->key->exp_data + ( _i - 1 ) * OAES_RKEY_LEN, OAES_COL_LEN );
+
+ // transform key column
+ if( 0 == _i % _ctx->key->key_base )
+ {
+ oaes_word_rot_left( _temp );
+
+ for( _j = 0; _j < OAES_COL_LEN; _j++ )
+ oaes_sub_byte( _temp + _j );
+
+ _temp[0] = _temp[0] ^ oaes_gf_8[ _i / _ctx->key->key_base - 1 ];
+ }
+ else if( _ctx->key->key_base > 6 && 4 == _i % _ctx->key->key_base )
+ {
+ for( _j = 0; _j < OAES_COL_LEN; _j++ )
+ oaes_sub_byte( _temp + _j );
+ }
+
+ for( _j = 0; _j < OAES_COL_LEN; _j++ )
+ {
+ _ctx->key->exp_data[ _i * OAES_RKEY_LEN + _j ] =
+ _ctx->key->exp_data[ ( _i - _ctx->key->key_base ) *
+ OAES_RKEY_LEN + _j ] ^ _temp[_j];
+ }
+ }
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_key_gen( OAES_CTX * ctx, size_t key_size )
+{
+ size_t _i;
+ oaes_key * _key = NULL;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+ OAES_RET _rc = OAES_RET_SUCCESS;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ _key = (oaes_key *) calloc( sizeof( oaes_key ), 1 );
+
+ if( NULL == _key )
+ return OAES_RET_MEM;
+
+ if( _ctx->key )
+ oaes_key_destroy( &(_ctx->key) );
+
+ _key->data_len = key_size;
+ _key->data = (uint8_t *) calloc( key_size, sizeof( uint8_t ));
+
+ if( NULL == _key->data )
+ return OAES_RET_MEM;
+
+ for( _i = 0; _i < key_size; _i++ )
+#ifdef OAES_HAVE_ISAAC
+ _key->data[_i] = (uint8_t) rand( _ctx->rctx );
+#else
+ _key->data[_i] = (uint8_t) rand();
+#endif // OAES_HAVE_ISAAC
+
+ _ctx->key = _key;
+ _rc = _rc ? _rc : oaes_key_expand( ctx );
+
+ if( _rc != OAES_RET_SUCCESS )
+ {
+ oaes_key_destroy( &(_ctx->key) );
+ return _rc;
+ }
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_RET oaes_key_gen_128( OAES_CTX * ctx )
+{
+ return oaes_key_gen( ctx, 16 );
+}
+
+OAES_RET oaes_key_gen_192( OAES_CTX * ctx )
+{
+ return oaes_key_gen( ctx, 24 );
+}
+
+OAES_RET oaes_key_gen_256( OAES_CTX * ctx )
+{
+ return oaes_key_gen( ctx, 32 );
+}
+
+OAES_RET oaes_key_export( OAES_CTX * ctx,
+ uint8_t * data, size_t * data_len )
+{
+ size_t _data_len_in;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == _ctx->key )
+ return OAES_RET_NOKEY;
+
+ if( NULL == data_len )
+ return OAES_RET_ARG3;
+
+ _data_len_in = *data_len;
+ // data + header
+ *data_len = _ctx->key->data_len + OAES_BLOCK_SIZE;
+
+ if( NULL == data )
+ return OAES_RET_SUCCESS;
+
+ if( _data_len_in < *data_len )
+ return OAES_RET_BUF;
+
+ // header
+ memcpy( data, oaes_header, OAES_BLOCK_SIZE );
+ data[5] = 0x01;
+ data[7] = (uint8_t)(_ctx->key->data_len);
+ memcpy( data + OAES_BLOCK_SIZE, _ctx->key->data, _ctx->key->data_len );
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_RET oaes_key_export_data( OAES_CTX * ctx,
+ uint8_t * data, size_t * data_len )
+{
+ size_t _data_len_in;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == _ctx->key )
+ return OAES_RET_NOKEY;
+
+ if( NULL == data_len )
+ return OAES_RET_ARG3;
+
+ _data_len_in = *data_len;
+ *data_len = _ctx->key->data_len;
+
+ if( NULL == data )
+ return OAES_RET_SUCCESS;
+
+ if( _data_len_in < *data_len )
+ return OAES_RET_BUF;
+
+ memcpy( data, _ctx->key->data, *data_len );
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_RET oaes_key_import( OAES_CTX * ctx,
+ const uint8_t * data, size_t data_len )
+{
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+ OAES_RET _rc = OAES_RET_SUCCESS;
+ int _key_length;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == data )
+ return OAES_RET_ARG2;
+
+ switch( data_len )
+ {
+ case 16 + OAES_BLOCK_SIZE:
+ case 24 + OAES_BLOCK_SIZE:
+ case 32 + OAES_BLOCK_SIZE:
+ break;
+ default:
+ return OAES_RET_ARG3;
+ }
+
+ // header
+ if( 0 != memcmp( data, oaes_header, 4 ) )
+ return OAES_RET_HEADER;
+
+ // header version
+ switch( data[4] )
+ {
+ case 0x01:
+ break;
+ default:
+ return OAES_RET_HEADER;
+ }
+
+ // header type
+ switch( data[5] )
+ {
+ case 0x01:
+ break;
+ default:
+ return OAES_RET_HEADER;
+ }
+
+ // options
+ _key_length = data[7];
+ switch( _key_length )
+ {
+ case 16:
+ case 24:
+ case 32:
+ break;
+ default:
+ return OAES_RET_HEADER;
+ }
+
+ if( (int)data_len != _key_length + OAES_BLOCK_SIZE )
+ return OAES_RET_ARG3;
+
+ if( _ctx->key )
+ oaes_key_destroy( &(_ctx->key) );
+
+ _ctx->key = (oaes_key *) calloc( sizeof( oaes_key ), 1 );
+
+ if( NULL == _ctx->key )
+ return OAES_RET_MEM;
+
+ _ctx->key->data_len = _key_length;
+ _ctx->key->data = (uint8_t *)
+ calloc( _key_length, sizeof( uint8_t ));
+
+ if( NULL == _ctx->key->data )
+ {
+ oaes_key_destroy( &(_ctx->key) );
+ return OAES_RET_MEM;
+ }
+
+ memcpy( _ctx->key->data, data + OAES_BLOCK_SIZE, _key_length );
+ _rc = _rc ? _rc : oaes_key_expand( ctx );
+
+ if( _rc != OAES_RET_SUCCESS )
+ {
+ oaes_key_destroy( &(_ctx->key) );
+ return _rc;
+ }
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_RET oaes_key_import_data( OAES_CTX * ctx,
+ const uint8_t * data, size_t data_len )
+{
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+ OAES_RET _rc = OAES_RET_SUCCESS;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == data )
+ return OAES_RET_ARG2;
+
+ switch( data_len )
+ {
+ case 16:
+ case 24:
+ case 32:
+ break;
+ default:
+ return OAES_RET_ARG3;
+ }
+
+ if( _ctx->key )
+ oaes_key_destroy( &(_ctx->key) );
+
+ _ctx->key = (oaes_key *) calloc( sizeof( oaes_key ), 1 );
+
+ if( NULL == _ctx->key )
+ return OAES_RET_MEM;
+
+ _ctx->key->data_len = data_len;
+ _ctx->key->data = (uint8_t *)
+ calloc( data_len, sizeof( uint8_t ));
+
+ if( NULL == _ctx->key->data )
+ {
+ oaes_key_destroy( &(_ctx->key) );
+ return OAES_RET_MEM;
+ }
+
+ memcpy( _ctx->key->data, data, data_len );
+ _rc = _rc ? _rc : oaes_key_expand( ctx );
+
+ if( _rc != OAES_RET_SUCCESS )
+ {
+ oaes_key_destroy( &(_ctx->key) );
+ return _rc;
+ }
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_CTX * oaes_alloc(void)
+{
+ oaes_ctx * _ctx = (oaes_ctx *) calloc(sizeof(oaes_ctx), 1);
+ if(!_ctx)
+ return NULL;
+
+#ifdef OAES_HAVE_ISAAC
+ {
+ ub4 _i = 0;
+ char _seed[RANDSIZ + 1];
+
+ _ctx->rctx = (randctx *) calloc(sizeof(randctx), 1);
+ if(!_ctx->rctx) {
+ free(_ctx);
+ return NULL;
+ }
+
+ oaes_get_seed( _seed );
+ memset( _ctx->rctx->randrsl, 0, RANDSIZ );
+ memcpy( _ctx->rctx->randrsl, _seed, RANDSIZ );
+ randinit( _ctx->rctx, TRUE);
+ }
+#else
+ srand( oaes_get_seed() );
+#endif // OAES_HAVE_ISAAC
+
+ _ctx->key = NULL;
+ oaes_set_option( _ctx, OAES_OPTION_CBC, NULL );
+
+#ifdef OAES_DEBUG
+ _ctx->step_cb = NULL;
+ oaes_set_option( _ctx, OAES_OPTION_STEP_OFF, NULL );
+#endif // OAES_DEBUG
+
+ return (OAES_CTX *) _ctx;
+}
+
+OAES_RET oaes_free(OAES_CTX ** ctx)
+{
+ oaes_ctx ** _ctx = (oaes_ctx **) ctx;
+
+ if(!_ctx)
+ return OAES_RET_ARG1;
+
+ if(!*_ctx)
+ return OAES_RET_SUCCESS;
+
+ if( (*_ctx)->key )
+ oaes_key_destroy( &((*_ctx)->key) );
+
+#ifdef OAES_HAVE_ISAAC
+ if( (*_ctx)->rctx )
+ {
+ free( (*_ctx)->rctx );
+ (*_ctx)->rctx = NULL;
+ }
+#endif // OAES_HAVE_ISAAC
+
+ free( *_ctx );
+ *_ctx = NULL;
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_RET oaes_set_option(OAES_CTX * ctx, OAES_OPTION option, const void * value)
+{
+ size_t _i;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ switch( option )
+ {
+ case OAES_OPTION_ECB:
+ _ctx->options &= ~OAES_OPTION_CBC;
+ memset( _ctx->iv, 0, OAES_BLOCK_SIZE );
+ break;
+
+ case OAES_OPTION_CBC:
+ _ctx->options &= ~OAES_OPTION_ECB;
+ if( value )
+ memcpy( _ctx->iv, value, OAES_BLOCK_SIZE );
+ else
+ {
+ for( _i = 0; _i < OAES_BLOCK_SIZE; _i++ )
+#ifdef OAES_HAVE_ISAAC
+ _ctx->iv[_i] = (uint8_t) rand( _ctx->rctx );
+#else
+ _ctx->iv[_i] = (uint8_t) rand();
+#endif // OAES_HAVE_ISAAC
+ }
+ break;
+
+#ifdef OAES_DEBUG
+
+ case OAES_OPTION_STEP_ON:
+ if( value )
+ {
+ _ctx->options &= ~OAES_OPTION_STEP_OFF;
+ _ctx->step_cb = value;
+ }
+ else
+ {
+ _ctx->options &= ~OAES_OPTION_STEP_ON;
+ _ctx->options |= OAES_OPTION_STEP_OFF;
+ _ctx->step_cb = NULL;
+ return OAES_RET_ARG3;
+ }
+ break;
+
+ case OAES_OPTION_STEP_OFF:
+ _ctx->options &= ~OAES_OPTION_STEP_ON;
+ _ctx->step_cb = NULL;
+ break;
+
+#endif // OAES_DEBUG
+
+ default:
+ return OAES_RET_ARG2;
+ }
+
+ _ctx->options |= option;
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_encrypt_block(OAES_CTX * ctx, uint8_t * c, size_t c_len)
+{
+ size_t _i, _j;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == c )
+ return OAES_RET_ARG2;
+
+ if( c_len != OAES_BLOCK_SIZE )
+ return OAES_RET_ARG3;
+
+ if( NULL == _ctx->key )
+ return OAES_RET_NOKEY;
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "input", 1, NULL );
+#endif // OAES_DEBUG
+
+ // AddRoundKey(State, K0)
+ for( _i = 0; _i < c_len; _i++ )
+ c[_i] = c[_i] ^ _ctx->key->exp_data[_i];
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ {
+ _ctx->step_cb( _ctx->key->exp_data, "k_sch", 1, NULL );
+ _ctx->step_cb( c, "k_add", 1, NULL );
+ }
+#endif // OAES_DEBUG
+
+ // for round = 1 step 1 to Nr–1
+ for( _i = 1; _i < _ctx->key->num_keys - 1; _i++ )
+ {
+ // SubBytes(state)
+ for( _j = 0; _j < c_len; _j++ )
+ oaes_sub_byte( c + _j );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "s_box", _i, NULL );
+#endif // OAES_DEBUG
+
+ // ShiftRows(state)
+ oaes_shift_rows( c );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "s_row", _i, NULL );
+#endif // OAES_DEBUG
+
+ // MixColumns(state)
+ oaes_mix_cols( c );
+ oaes_mix_cols( c + 4 );
+ oaes_mix_cols( c + 8 );
+ oaes_mix_cols( c + 12 );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "m_col", _i, NULL );
+#endif // OAES_DEBUG
+
+ // AddRoundKey(state, w[round*Nb, (round+1)*Nb-1])
+ for( _j = 0; _j < c_len; _j++ )
+ c[_j] = c[_j] ^
+ _ctx->key->exp_data[_i * OAES_RKEY_LEN * OAES_COL_LEN + _j];
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ {
+ _ctx->step_cb( _ctx->key->exp_data + _i * OAES_RKEY_LEN * OAES_COL_LEN,
+ "k_sch", _i, NULL );
+ _ctx->step_cb( c, "k_add", _i, NULL );
+ }
+#endif // OAES_DEBUG
+
+ }
+
+ // SubBytes(state)
+ for( _i = 0; _i < c_len; _i++ )
+ oaes_sub_byte( c + _i );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "s_box", _ctx->key->num_keys - 1, NULL );
+#endif // OAES_DEBUG
+
+ // ShiftRows(state)
+ oaes_shift_rows( c );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "s_row", _ctx->key->num_keys - 1, NULL );
+#endif // OAES_DEBUG
+
+ // AddRoundKey(state, w[Nr*Nb, (Nr+1)*Nb-1])
+ for( _i = 0; _i < c_len; _i++ )
+ c[_i] = c[_i] ^ _ctx->key->exp_data[
+ ( _ctx->key->num_keys - 1 ) * OAES_RKEY_LEN * OAES_COL_LEN + _i ];
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ {
+ _ctx->step_cb( _ctx->key->exp_data +
+ ( _ctx->key->num_keys - 1 ) * OAES_RKEY_LEN * OAES_COL_LEN,
+ "k_sch", _ctx->key->num_keys - 1, NULL );
+ _ctx->step_cb( c, "output", _ctx->key->num_keys - 1, NULL );
+ }
+#endif // OAES_DEBUG
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_decrypt_block(OAES_CTX * ctx, uint8_t * c, size_t c_len)
+{
+ size_t _i, _j;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == c )
+ return OAES_RET_ARG2;
+
+ if( c_len != OAES_BLOCK_SIZE )
+ return OAES_RET_ARG3;
+
+ if( NULL == _ctx->key )
+ return OAES_RET_NOKEY;
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "iinput", _ctx->key->num_keys - 1, NULL );
+#endif // OAES_DEBUG
+
+ // AddRoundKey(state, w[Nr*Nb, (Nr+1)*Nb-1])
+ for( _i = 0; _i < c_len; _i++ )
+ c[_i] = c[_i] ^ _ctx->key->exp_data[
+ ( _ctx->key->num_keys - 1 ) * OAES_RKEY_LEN * OAES_COL_LEN + _i ];
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ {
+ _ctx->step_cb( _ctx->key->exp_data +
+ ( _ctx->key->num_keys - 1 ) * OAES_RKEY_LEN * OAES_COL_LEN,
+ "ik_sch", _ctx->key->num_keys - 1, NULL );
+ _ctx->step_cb( c, "ik_add", _ctx->key->num_keys - 1, NULL );
+ }
+#endif // OAES_DEBUG
+
+ for( _i = _ctx->key->num_keys - 2; _i > 0; _i-- )
+ {
+ // InvShiftRows(state)
+ oaes_inv_shift_rows( c );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "is_row", _i, NULL );
+#endif // OAES_DEBUG
+
+ // InvSubBytes(state)
+ for( _j = 0; _j < c_len; _j++ )
+ oaes_inv_sub_byte( c + _j );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "is_box", _i, NULL );
+#endif // OAES_DEBUG
+
+ // AddRoundKey(state, w[round*Nb, (round+1)*Nb-1])
+ for( _j = 0; _j < c_len; _j++ )
+ c[_j] = c[_j] ^
+ _ctx->key->exp_data[_i * OAES_RKEY_LEN * OAES_COL_LEN + _j];
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ {
+ _ctx->step_cb( _ctx->key->exp_data + _i * OAES_RKEY_LEN * OAES_COL_LEN,
+ "ik_sch", _i, NULL );
+ _ctx->step_cb( c, "ik_add", _i, NULL );
+ }
+#endif // OAES_DEBUG
+
+ // InvMixColums(state)
+ oaes_inv_mix_cols( c );
+ oaes_inv_mix_cols( c + 4 );
+ oaes_inv_mix_cols( c + 8 );
+ oaes_inv_mix_cols( c + 12 );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "im_col", _i, NULL );
+#endif // OAES_DEBUG
+
+ }
+
+ // InvShiftRows(state)
+ oaes_inv_shift_rows( c );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "is_row", 1, NULL );
+#endif // OAES_DEBUG
+
+ // InvSubBytes(state)
+ for( _i = 0; _i < c_len; _i++ )
+ oaes_inv_sub_byte( c + _i );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "is_box", 1, NULL );
+#endif // OAES_DEBUG
+
+ // AddRoundKey(state, w[0, Nb-1])
+ for( _i = 0; _i < c_len; _i++ )
+ c[_i] = c[_i] ^ _ctx->key->exp_data[_i];
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ {
+ _ctx->step_cb( _ctx->key->exp_data, "ik_sch", 1, NULL );
+ _ctx->step_cb( c, "ioutput", 1, NULL );
+ }
+#endif // OAES_DEBUG
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_RET oaes_encrypt(OAES_CTX * ctx, const uint8_t * m, size_t m_len, uint8_t * c, size_t * c_len)
+{
+ size_t _i, _j, _c_len_in, _c_data_len;
+ size_t _pad_len = m_len % OAES_BLOCK_SIZE == 0 ?
+ 0 : OAES_BLOCK_SIZE - m_len % OAES_BLOCK_SIZE;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+ OAES_RET _rc = OAES_RET_SUCCESS;
+ uint8_t _flags = _pad_len ? OAES_FLAG_PAD : 0;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == m )
+ return OAES_RET_ARG2;
+
+ if( NULL == c_len )
+ return OAES_RET_ARG5;
+
+ _c_len_in = *c_len;
+ // data + pad
+ _c_data_len = m_len + _pad_len;
+ // header + iv + data + pad
+ *c_len = 2 * OAES_BLOCK_SIZE + m_len + _pad_len;
+
+ if( NULL == c )
+ return OAES_RET_SUCCESS;
+
+ if( _c_len_in < *c_len )
+ return OAES_RET_BUF;
+
+ if( NULL == _ctx->key )
+ return OAES_RET_NOKEY;
+
+ // header
+ memcpy(c, oaes_header, OAES_BLOCK_SIZE );
+ memcpy(c + 6, &_ctx->options, sizeof(_ctx->options));
+ memcpy(c + 8, &_flags, sizeof(_flags));
+ // iv
+ memcpy(c + OAES_BLOCK_SIZE, _ctx->iv, OAES_BLOCK_SIZE );
+ // data
+ memcpy(c + 2 * OAES_BLOCK_SIZE, m, m_len );
+
+ for( _i = 0; _i < _c_data_len; _i += OAES_BLOCK_SIZE )
+ {
+ uint8_t _block[OAES_BLOCK_SIZE];
+ size_t _block_size = min( m_len - _i, OAES_BLOCK_SIZE );
+
+ memcpy( _block, c + 2 * OAES_BLOCK_SIZE + _i, _block_size );
+
+ // insert pad
+ for( _j = 0; _j < OAES_BLOCK_SIZE - _block_size; _j++ )
+ _block[ _block_size + _j ] = (uint8_t)_j + 1;
+
+ // CBC
+ if( _ctx->options & OAES_OPTION_CBC )
+ {
+ for( _j = 0; _j < OAES_BLOCK_SIZE; _j++ )
+ _block[_j] = _block[_j] ^ _ctx->iv[_j];
+ }
+
+ _rc = _rc ? _rc :
+ oaes_encrypt_block( ctx, _block, OAES_BLOCK_SIZE );
+ memcpy( c + 2 * OAES_BLOCK_SIZE + _i, _block, OAES_BLOCK_SIZE );
+
+ if( _ctx->options & OAES_OPTION_CBC )
+ memcpy( _ctx->iv, _block, OAES_BLOCK_SIZE );
+ }
+
+ return _rc;
+}
+
+OAES_RET oaes_decrypt(OAES_CTX * ctx, const uint8_t * c, size_t c_len, uint8_t * m, size_t * m_len)
+{
+ size_t _i, _j, _m_len_in;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+ OAES_RET _rc = OAES_RET_SUCCESS;
+ uint8_t _iv[OAES_BLOCK_SIZE];
+ uint8_t _flags;
+ OAES_OPTION _options;
+
+ if( NULL == ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == c )
+ return OAES_RET_ARG2;
+
+ if( c_len % OAES_BLOCK_SIZE )
+ return OAES_RET_ARG3;
+
+ if( NULL == m_len )
+ return OAES_RET_ARG5;
+
+ _m_len_in = *m_len;
+ *m_len = c_len - 2 * OAES_BLOCK_SIZE;
+
+ if( NULL == m )
+ return OAES_RET_SUCCESS;
+
+ if( _m_len_in < *m_len )
+ return OAES_RET_BUF;
+
+ if( NULL == _ctx->key )
+ return OAES_RET_NOKEY;
+
+ // header
+ if( 0 != memcmp( c, oaes_header, 4 ) )
+ return OAES_RET_HEADER;
+
+ // header version
+ switch( c[4] )
+ {
+ case 0x01:
+ break;
+ default:
+ return OAES_RET_HEADER;
+ }
+
+ // header type
+ switch( c[5] )
+ {
+ case 0x02:
+ break;
+ default:
+ return OAES_RET_HEADER;
+ }
+
+ // options
+ memcpy(&_options, c + 6, sizeof(_options));
+ // validate that all options are valid
+ if( _options & ~(
+ OAES_OPTION_ECB
+ | OAES_OPTION_CBC
+#ifdef OAES_DEBUG
+ | OAES_OPTION_STEP_ON
+ | OAES_OPTION_STEP_OFF
+#endif // OAES_DEBUG
+ ) )
+ return OAES_RET_HEADER;
+ if( ( _options & OAES_OPTION_ECB ) &&
+ ( _options & OAES_OPTION_CBC ) )
+ return OAES_RET_HEADER;
+ if( _options == OAES_OPTION_NONE )
+ return OAES_RET_HEADER;
+
+ // flags
+ memcpy(&_flags, c + 8, sizeof(_flags));
+ // validate that all flags are valid
+ if( _flags & ~(
+ OAES_FLAG_PAD
+ ) )
+ return OAES_RET_HEADER;
+
+ // iv
+ memcpy( _iv, c + OAES_BLOCK_SIZE, OAES_BLOCK_SIZE);
+ // data + pad
+ memcpy( m, c + 2 * OAES_BLOCK_SIZE, *m_len );
+
+ for( _i = 0; _i < *m_len; _i += OAES_BLOCK_SIZE )
+ {
+ if( ( _options & OAES_OPTION_CBC ) && _i > 0 )
+ memcpy( _iv, c + OAES_BLOCK_SIZE + _i, OAES_BLOCK_SIZE );
+
+ _rc = _rc ? _rc :
+ oaes_decrypt_block( ctx, m + _i, min( *m_len - _i, OAES_BLOCK_SIZE ) );
+
+ // CBC
+ if( _options & OAES_OPTION_CBC )
+ {
+ for( _j = 0; _j < OAES_BLOCK_SIZE; _j++ )
+ m[ _i + _j ] = m[ _i + _j ] ^ _iv[_j];
+ }
+ }
+
+ // remove pad
+ if( _flags & OAES_FLAG_PAD )
+ {
+ int _is_pad = 1;
+ size_t _temp = (size_t) m[*m_len - 1];
+
+ if( _temp <= 0x00 || _temp > 0x0f )
+ return OAES_RET_HEADER;
+ for( _i = 0; _i < _temp; _i++ )
+ if( m[*m_len - 1 - _i] != _temp - _i )
+ _is_pad = 0;
+ if( _is_pad )
+ {
+ memset( m + *m_len - _temp, 0, _temp );
+ *m_len -= _temp;
+ }
+ else
+ return OAES_RET_HEADER;
+ }
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_API OAES_RET oaes_encryption_round(const uint8_t * key, uint8_t * c)
+{
+ size_t _i;
+
+ if(!key)
+ return OAES_RET_ARG1;
+
+ if(!c)
+ return OAES_RET_ARG2;
+
+ // SubBytes(state)
+ for(_i = 0; _i < OAES_BLOCK_SIZE; _i++)
+ oaes_sub_byte( c + _i );
+
+ // ShiftRows(state)
+ oaes_shift_rows( c );
+
+ // MixColumns(state)
+ oaes_mix_cols( c );
+ oaes_mix_cols( c + 4 );
+ oaes_mix_cols( c + 8 );
+ oaes_mix_cols( c + 12 );
+
+ // AddRoundKey(State, key)
+ for( _i = 0; _i < OAES_BLOCK_SIZE; _i++ )
+ c[_i] ^= key[_i];
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_API OAES_RET oaes_pseudo_encrypt_ecb(OAES_CTX * ctx, uint8_t * c)
+{
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+ size_t _i;
+
+ if(!_ctx)
+ return OAES_RET_ARG1;
+
+ if(!c)
+ return OAES_RET_ARG2;
+
+ if(!_ctx->key)
+ return OAES_RET_NOKEY;
+
+ for (_i = 0; _i < 10; _i++)
+ oaes_encryption_round(&_ctx->key->exp_data[_i * OAES_RKEY_LEN * OAES_COL_LEN], c);
+
+ return OAES_RET_SUCCESS;
+}
diff --git a/crypto/oaes_lib.h b/crypto/oaes_lib.h
new file mode 100644
index 0000000..c48bad4
--- /dev/null
+++ b/crypto/oaes_lib.h
@@ -0,0 +1,214 @@
+/*
+ * ---------------------------------------------------------------------------
+ * OpenAES License
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ * ---------------------------------------------------------------------------
+ */
+
+#ifndef _OAES_LIB_H
+#define _OAES_LIB_H
+
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _WIN32
+# ifdef OAES_SHARED
+# ifdef oaes_lib_EXPORTS
+# define OAES_API __declspec(dllexport)
+# else
+# define OAES_API __declspec(dllimport)
+# endif
+# else
+# define OAES_API
+# endif
+#else
+# define OAES_API
+#endif // WIN32
+
+#define OAES_VERSION "0.8.1"
+#define OAES_BLOCK_SIZE 16
+
+typedef void OAES_CTX;
+
+typedef enum
+{
+ OAES_RET_FIRST = 0,
+ OAES_RET_SUCCESS = 0,
+ OAES_RET_UNKNOWN,
+ OAES_RET_ARG1,
+ OAES_RET_ARG2,
+ OAES_RET_ARG3,
+ OAES_RET_ARG4,
+ OAES_RET_ARG5,
+ OAES_RET_NOKEY,
+ OAES_RET_MEM,
+ OAES_RET_BUF,
+ OAES_RET_HEADER,
+ OAES_RET_COUNT
+} OAES_RET;
+
+/*
+ * oaes_set_option() takes one of these values for its [option] parameter
+ * some options accept either an optional or a required [value] parameter
+ */
+// no option
+#define OAES_OPTION_NONE 0
+// enable ECB mode, disable CBC mode
+#define OAES_OPTION_ECB 1
+// enable CBC mode, disable ECB mode
+// value is optional, may pass uint8_t iv[OAES_BLOCK_SIZE] to specify
+// the value of the initialization vector, iv
+#define OAES_OPTION_CBC 2
+
+#ifdef OAES_DEBUG
+typedef int ( * oaes_step_cb ) (
+ const uint8_t state[OAES_BLOCK_SIZE],
+ const char * step_name,
+ int step_count,
+ void * user_data );
+// enable state stepping mode
+// value is required, must pass oaes_step_cb to receive the state at each step
+#define OAES_OPTION_STEP_ON 4
+// disable state stepping mode
+#define OAES_OPTION_STEP_OFF 8
+#endif // OAES_DEBUG
+
+typedef uint16_t OAES_OPTION;
+
+typedef struct _oaes_key
+{
+ size_t data_len;
+ uint8_t *data;
+ size_t exp_data_len;
+ uint8_t *exp_data;
+ size_t num_keys;
+ size_t key_base;
+} oaes_key;
+
+typedef struct _oaes_ctx
+{
+#ifdef OAES_HAVE_ISAAC
+ randctx * rctx;
+#endif // OAES_HAVE_ISAAC
+
+#ifdef OAES_DEBUG
+ oaes_step_cb step_cb;
+#endif // OAES_DEBUG
+
+ oaes_key * key;
+ OAES_OPTION options;
+ uint8_t iv[OAES_BLOCK_SIZE];
+} oaes_ctx;
+/*
+ * // usage:
+ *
+ * OAES_CTX * ctx = oaes_alloc();
+ * .
+ * .
+ * .
+ * {
+ * oaes_gen_key_xxx( ctx );
+ * {
+ * oaes_key_export( ctx, _buf, &_buf_len );
+ * // or
+ * oaes_key_export_data( ctx, _buf, &_buf_len );\
+ * }
+ * }
+ * // or
+ * {
+ * oaes_key_import( ctx, _buf, _buf_len );
+ * // or
+ * oaes_key_import_data( ctx, _buf, _buf_len );
+ * }
+ * .
+ * .
+ * .
+ * oaes_encrypt( ctx, m, m_len, c, &c_len );
+ * .
+ * .
+ * .
+ * oaes_decrypt( ctx, c, c_len, m, &m_len );
+ * .
+ * .
+ * .
+ * oaes_free( &ctx );
+ */
+
+OAES_API OAES_CTX * oaes_alloc(void);
+
+OAES_API OAES_RET oaes_free( OAES_CTX ** ctx );
+
+OAES_API OAES_RET oaes_set_option( OAES_CTX * ctx,
+ OAES_OPTION option, const void * value );
+
+OAES_API OAES_RET oaes_key_gen_128( OAES_CTX * ctx );
+
+OAES_API OAES_RET oaes_key_gen_192( OAES_CTX * ctx );
+
+OAES_API OAES_RET oaes_key_gen_256( OAES_CTX * ctx );
+
+// export key with header information
+// set data == NULL to get the required data_len
+OAES_API OAES_RET oaes_key_export( OAES_CTX * ctx,
+ uint8_t * data, size_t * data_len );
+
+// directly export the data from key
+// set data == NULL to get the required data_len
+OAES_API OAES_RET oaes_key_export_data( OAES_CTX * ctx,
+ uint8_t * data, size_t * data_len );
+
+// import key with header information
+OAES_API OAES_RET oaes_key_import( OAES_CTX * ctx,
+ const uint8_t * data, size_t data_len );
+
+// directly import data into key
+OAES_API OAES_RET oaes_key_import_data( OAES_CTX * ctx,
+ const uint8_t * data, size_t data_len );
+
+// set c == NULL to get the required c_len
+OAES_API OAES_RET oaes_encrypt( OAES_CTX * ctx,
+ const uint8_t * m, size_t m_len, uint8_t * c, size_t * c_len );
+
+// set m == NULL to get the required m_len
+OAES_API OAES_RET oaes_decrypt( OAES_CTX * ctx,
+ const uint8_t * c, size_t c_len, uint8_t * m, size_t * m_len );
+
+// set buf == NULL to get the required buf_len
+OAES_API OAES_RET oaes_sprintf(
+ char * buf, size_t * buf_len, const uint8_t * data, size_t data_len );
+
+OAES_API OAES_RET oaes_encryption_round( const uint8_t * key, uint8_t * c );
+
+OAES_API OAES_RET oaes_pseudo_encrypt_ecb( OAES_CTX * ctx, uint8_t * c );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _OAES_LIB_H
diff --git a/crypto/xmr-rpc.cpp b/crypto/xmr-rpc.cpp
index bae2001..b6e29ef 100644
--- a/crypto/xmr-rpc.cpp
+++ b/crypto/xmr-rpc.cpp
@@ -27,6 +27,7 @@
#define PRIu64 "I64u"
#endif
+#include
#include "xmr-rpc.h"
#include "wildkeccak.h"
@@ -376,7 +377,7 @@ bool rpc2_job_decode(const json_t *job, struct work *work)
goto err_out;
}
- if(!addendums_decode(job)) {
+ if(opt_algo == ALGO_WILDKECCAK && !addendums_decode(job)) {
applog(LOG_ERR, "JSON failed to process addendums");
goto err_out;
}
@@ -396,6 +397,7 @@ bool rpc2_job_decode(const json_t *job, struct work *work)
applog(LOG_ERR, "JSON invalid blob length");
goto err_out;
}
+
if (blobLen != 0)
{
pthread_mutex_lock(&rpc2_job_lock);
@@ -420,7 +422,6 @@ bool rpc2_job_decode(const json_t *job, struct work *work)
if(rpc2_target != target) {
double difficulty = (((double) UINT32_MAX) / target);
stratum.job.diff = difficulty;
- //applog(LOG_WARNING, "Stratum difficulty set to %.1f M", difficulty/1e6);
rpc2_target = target;
}
@@ -430,6 +431,7 @@ bool rpc2_job_decode(const json_t *job, struct work *work)
rpc2_job_id = strdup(job_id);
pthread_mutex_unlock(&rpc2_job_lock);
}
+
if(work)
{
if (!rpc2_blob) {
@@ -438,17 +440,14 @@ bool rpc2_job_decode(const json_t *job, struct work *work)
}
memcpy(work->data, rpc2_blob, rpc2_bloblen);
memset(work->target, 0xff, sizeof(work->target));
-
- // hmmpff ? seems wrong
- //*((uint64_t*)&work->target[6]) = rpc2_target;
work->target[7] = rpc2_target;
-
work->targetdiff = target_to_diff(work->target);
snprintf(work->job_id, sizeof(work->job_id), "%s", rpc2_job_id);
}
- wildkeccak_scratchpad_need_update(pscratchpad_buff);
+ if (opt_algo == ALGO_WILDKECCAK)
+ wildkeccak_scratchpad_need_update(pscratchpad_buff);
return true;
err_out:
@@ -465,8 +464,9 @@ bool rpc2_stratum_job(struct stratum_ctx *sctx, json_t *id, json_t *params)
pthread_mutex_lock(&rpc2_work_lock);
ret = rpc2_job_decode(params, &rpc2_work);
// update miner threads work
- rpc2_stratum_gen_work(sctx, &g_work);
+ if (ret) rpc2_stratum_gen_work(sctx, &g_work);
//memcpy(&g_work, &rpc2_work, sizeof(struct work));
+ restart_threads();
pthread_mutex_unlock(&rpc2_work_lock);
return ret;
}
@@ -480,7 +480,10 @@ bool rpc2_stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
stratum_diff = sctx->job.diff;
if (opt_showdiff && work->targetdiff != stratum_diff)
snprintf(sdiff, 32, " (%.5f)", work->targetdiff);
- applog(LOG_WARNING, "Stratum difficulty set to %.1f M%s", stratum_diff/1e6, sdiff);
+ if (stratum_diff >= 1e6)
+ applog(LOG_WARNING, "Stratum difficulty set to %.1f M%s", stratum_diff/1e6, sdiff);
+ else
+ applog(LOG_WARNING, "Stratum difficulty set to %.0f%s", stratum_diff, sdiff);
}
if (work->target[7] != rpc2_target) {
work->target[7] = rpc2_target;
@@ -501,19 +504,30 @@ bool rpc2_stratum_submit(struct pool_infos *pool, struct work *work)
char *noncestr, *hashhex;
memcpy(&data[0], work->data, 88);
- memcpy(&data[1], work->nonces, 8);
- // pass if the previous hash is not the current previous hash
- if(!submit_old && memcmp(&work->data[3], &g_work.data[3], 28)) {
- if (opt_debug) applog(LOG_DEBUG, "stale work detected", work->sharediff, work->targetdiff);
- pool->stales_count++;
- return true;
+ if (opt_algo == ALGO_WILDKECCAK) {
+ memcpy(&data[1], work->nonces, 8);
+ // pass if the previous hash is not the current previous hash
+ if(!submit_old && memcmp(&work->data[3], &g_work.data[3], 28)) {
+ if (opt_debug) applog(LOG_DEBUG, "stale work detected", work->sharediff, work->targetdiff);
+ pool->stales_count++;
+ return true;
+ }
+ noncestr = bin2hex((unsigned char*) &data[1], 8);
+ memcpy(&last_found_nonce, work->nonces, 8); // "nonce":"5794ec8000000000" => 0x0000000080ec9457
+ wildkeccak_hash(hash, data, NULL, 0);
+ work_set_target_ratio(work, (uint32_t*) hash);
}
- noncestr = bin2hex((unsigned char*) &data[1], 8);
- memcpy(&last_found_nonce, work->nonces, 8); // "nonce":"5794ec8000000000" => 0x0000000080ec9457
+ else if (opt_algo == ALGO_CRYPTONIGHT) {
+ uint32_t nonce;
+ memcpy(&nonce, &data[39], 4);
+ noncestr = bin2hex((unsigned char*) &nonce, 4);
+ last_found_nonce = nonce;
+ cryptonight_hash(hash, data, 76);
+ work_set_target_ratio(work, (uint32_t*) hash);
+ }
- wildkeccak_hash(hash, data, NULL, 0);
//applog(LOG_DEBUG, "submit diff %g > %g", work->sharediff, work->targetdiff);
//applog_hex(data, 81);
//applog_hex(hash, 32);
@@ -592,7 +606,8 @@ bool store_scratchpad_to_file(bool do_fsync)
FILE *fp;
int ret;
- if(!scratchpad_size || !pscratchpad_buff) return true; // opt_algo != ALGO_WILDKECCAK ||
+ if(opt_algo != ALGO_WILDKECCAK) return true;
+ if(!scratchpad_size || !pscratchpad_buff) return true;
snprintf(file_name_buff, sizeof(file_name_buff), "%s.tmp", pscratchpad_local_cache);
unlink(file_name_buff);
@@ -646,6 +661,8 @@ bool load_scratchpad_from_file(const char *fname)
FILE *fp;
long flen;
+ if(opt_algo != ALGO_WILDKECCAK) return true;
+
fp = fopen(fname, "rb");
if (!fp) {
if (errno != ENOENT) {
@@ -692,6 +709,8 @@ bool load_scratchpad_from_file(const char *fname)
bool dump_scratchpad_to_file_debug()
{
char file_name_buff[1024] = { 0 };
+ if(opt_algo != ALGO_WILDKECCAK) return true;
+
snprintf(file_name_buff, sizeof(file_name_buff), "scratchpad_%" PRIu64 "_%llx.scr",
current_scratchpad_hi.height, (long long) last_found_nonce);
@@ -1027,6 +1046,7 @@ static bool rpc2_stratum_getscratchpad(struct stratum_ctx *sctx)
json_t *val = NULL;
json_error_t err;
char *s, *sret;
+ if(opt_algo != ALGO_WILDKECCAK) return true;
s = (char*) calloc(1, 1024);
if (!s)
@@ -1067,14 +1087,21 @@ bool rpc2_stratum_authorize(struct stratum_ctx *sctx, const char *user, const ch
bool ret = false;
json_t *val = NULL, *res_val, *err_val, *job_val = NULL;
json_error_t err;
- char *s, *sret;
- char *prevhash = bin2hex((const unsigned char*)current_scratchpad_hi.prevhash, 32);
- s = (char*) calloc(1, 320 + strlen(user) + strlen(pass));
- sprintf(s, "{\"method\":\"login\",\"params\":{\"login\":\"%s\",\"pass\":\"%s\","
+ char *sret;
+ char *s = (char*) calloc(1, 320 + strlen(user) + strlen(pass));
+
+ if (opt_algo == ALGO_WILDKECCAK) {
+ char *prevhash = bin2hex((const unsigned char*)current_scratchpad_hi.prevhash, 32);
+ sprintf(s, "{\"method\":\"login\",\"params\":{\"login\":\"%s\",\"pass\":\"%s\","
"\"hi\":{\"height\":%" PRIu64 ",\"block_id\":\"%s\"},"
"\"agent\":\"" USER_AGENT "\"},\"id\":2}",
- user, pass, current_scratchpad_hi.height, prevhash);
- free(prevhash);
+ user, pass, current_scratchpad_hi.height, prevhash);
+ free(prevhash);
+ } else {
+ sprintf(s, "{\"method\":\"login\",\"params\":{\"login\":\"%s\",\"pass\":\"%s\","
+ "\"agent\":\"" USER_AGENT "\"},\"id\":2}",
+ user, pass);
+ }
if (!stratum_send_line(sctx, s))
goto out;
@@ -1146,12 +1173,16 @@ bool rpc2_stratum_request_job(struct stratum_ctx *sctx)
return ret;
}
- char* prevhash = bin2hex((const unsigned char*)current_scratchpad_hi.prevhash, 32);
- sprintf(s, "{\"method\":\"getjob\",\"params\": {"
- "\"id\":\"%s\", \"hi\": {\"height\": %" PRIu64 ",\"block_id\":\"%s\" }, \"agent\": \"" USER_AGENT "\"},"
- "\"id\":1}",
- rpc2_id, current_scratchpad_hi.height, prevhash);
- free(prevhash);
+ if (opt_algo == ALGO_WILDKECCAK) {
+ char* prevhash = bin2hex((const unsigned char*)current_scratchpad_hi.prevhash, 32);
+ sprintf(s, "{\"method\":\"getjob\",\"params\": {"
+ "\"id\":\"%s\", \"hi\": {\"height\": %" PRIu64 ",\"block_id\":\"%s\" }, \"agent\": \"" USER_AGENT "\"},"
+ "\"id\":1}",
+ rpc2_id, current_scratchpad_hi.height, prevhash);
+ free(prevhash);
+ } else {
+ sprintf(s, "{\"method\":\"getjob\",\"params\":{\"id\":\"%s\"},\"id\":1}", rpc2_id);
+ }
if(!stratum_send_line(sctx, s)) {
applog(LOG_ERR, "Stratum failed to send getjob line");
@@ -1210,7 +1241,7 @@ int rpc2_stratum_thread_stuff(struct pool_infos* pool)
}
}
- if(!scratchpad_size) {
+ if(!scratchpad_size && opt_algo == ALGO_WILDKECCAK) {
if(!rpc2_stratum_getscratchpad(&stratum)) {
stratum_disconnect(&stratum);
applog(LOG_ERR, "...retry after %d seconds", opt_fail_pause);
@@ -1226,6 +1257,15 @@ int rpc2_stratum_thread_stuff(struct pool_infos* pool)
}
}
+ // if getjob supported
+ if(0 && opt_algo == ALGO_CRYPTONIGHT) {
+ if(!rpc2_stratum_request_job(&stratum)) {
+ stratum_disconnect(&stratum);
+ applog(LOG_ERR, "...retry after %d seconds", opt_fail_pause);
+ sleep(opt_fail_pause);
+ }
+ }
+
/* save every 12 hours */
if ((time(NULL) - prev_save) > 12*3600) {
store_scratchpad_to_file(false);
diff --git a/miner.h b/miner.h
index cb2e174..8348d98 100644
--- a/miner.h
+++ b/miner.h
@@ -267,6 +267,7 @@ extern int scanhash_blake256(int thr_id, struct work* work, uint32_t max_nonce,
extern int scanhash_blake2s(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
+extern int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
@@ -315,6 +316,7 @@ extern void free_blake256(int thr_id);
extern void free_blake2s(int thr_id);
extern void free_bmw(int thr_id);
extern void free_c11(int thr_id);
+extern void free_cryptonight(int thr_id);
extern void free_decred(int thr_id);
extern void free_deep(int thr_id);
extern void free_keccak256(int thr_id);
@@ -803,6 +805,7 @@ void blake256hash(void *output, const void *input, int8_t rounds);
void blake2s_hash(void *output, const void *input);
void bmw_hash(void *state, const void *input);
void c11hash(void *output, const void *input);
+void cryptonight_hash(void* output, const void* input, size_t len);
void decred_hash(void *state, const void *input);
void deephash(void *state, const void *input);
void luffa_hash(void *state, const void *input);
diff --git a/pools.cpp b/pools.cpp
index c58c25d..9da2bab 100644
--- a/pools.cpp
+++ b/pools.cpp
@@ -254,7 +254,7 @@ bool pool_switch(int thr_id, int pooln)
// temporary... until stratum code cleanup
stratum = p->stratum;
stratum.pooln = cur_pooln;
- stratum.rpc2 = (p->algo == ALGO_WILDKECCAK);
+ stratum.rpc2 = (p->algo == ALGO_WILDKECCAK || p->algo == ALGO_CRYPTONIGHT);
// unlock the stratum thread
tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url));
@@ -277,7 +277,7 @@ bool pool_switch(int thr_id, int pooln)
}
- stratum.rpc2 = (p->algo == ALGO_WILDKECCAK);
+ stratum.rpc2 = (p->algo == ALGO_WILDKECCAK || p->algo == ALGO_CRYPTONIGHT);
return true;
}
diff --git a/util.cpp b/util.cpp
index 223b871..9b535e8 100644
--- a/util.cpp
+++ b/util.cpp
@@ -1856,7 +1856,7 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s)
ret = stratum_show_message(sctx, id, params);
goto out;
}
- if (sctx->rpc2 && !strcasecmp(method, "job")) { // cryptonote
+ if (sctx->rpc2 && !strcasecmp(method, "job")) { // xmr/bbr
ret = rpc2_stratum_job(sctx, id, params);
goto out;
}
@@ -2141,6 +2141,9 @@ void print_hash_tests(void)
c11hash(&hash[0], &buf[0]);
printpfx("c11", hash);
+ cryptonight_hash(&hash[0], &buf[0], 76);
+ printpfx("cryptonight", hash);
+
memset(buf, 0, 180);
decred_hash(&hash[0], &buf[0]);
printpfx("decred", hash);