You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
227 lines
6.9 KiB
227 lines
6.9 KiB
#include <miner.h> |
|
#include <memory.h> |
|
|
|
#include "oaes_lib.h" |
|
#include "cryptonight.h" |
|
|
|
extern "C" { |
|
#include <sph/sph_blake.h> |
|
#include <sph/sph_groestl.h> |
|
#include <sph/sph_jh.h> |
|
#include <sph/sph_skein.h> |
|
#include "cpu/c_keccak.h" |
|
} |
|
|
|
struct cryptonight_ctx { |
|
uint8_t long_state[MEMORY]; |
|
union cn_slow_hash_state state; |
|
uint8_t text[INIT_SIZE_BYTE]; |
|
uint8_t a[AES_BLOCK_SIZE]; |
|
uint8_t b[AES_BLOCK_SIZE]; |
|
uint8_t c[AES_BLOCK_SIZE]; |
|
oaes_ctx* aes_ctx; |
|
}; |
|
|
|
static void do_blake_hash(const void* input, size_t len, void* output) |
|
{ |
|
uchar hash[32]; |
|
sph_blake256_context ctx; |
|
sph_blake256_set_rounds(14); |
|
sph_blake256_init(&ctx); |
|
sph_blake256(&ctx, input, len); |
|
sph_blake256_close(&ctx, hash); |
|
memcpy(output, hash, 32); |
|
} |
|
|
|
static void do_groestl_hash(const void* input, size_t len, void* output) |
|
{ |
|
uchar hash[32]; |
|
sph_groestl256_context ctx; |
|
sph_groestl256_init(&ctx); |
|
sph_groestl256(&ctx, input, len); |
|
sph_groestl256_close(&ctx, hash); |
|
memcpy(output, hash, 32); |
|
} |
|
|
|
static void do_jh_hash(const void* input, size_t len, void* output) |
|
{ |
|
uchar hash[64]; |
|
sph_jh256_context ctx; |
|
sph_jh256_init(&ctx); |
|
sph_jh256(&ctx, input, len); |
|
sph_jh256_close(&ctx, hash); |
|
memcpy(output, hash, 32); |
|
} |
|
|
|
static void do_skein_hash(const void* input, size_t len, void* output) |
|
{ |
|
uchar hash[32]; |
|
sph_skein256_context ctx; |
|
sph_skein256_init(&ctx); |
|
sph_skein256(&ctx, input, len); |
|
sph_skein256_close(&ctx, hash); |
|
memcpy(output, hash, 32); |
|
} |
|
|
|
// todo: use sph if possible |
|
static void keccak_hash_permutation(union hash_state *state) { |
|
keccakf((uint64_t*)state, 24); |
|
} |
|
|
|
static void keccak_hash_process(union hash_state *state, const uint8_t *buf, size_t count) { |
|
keccak1600(buf, (int)count, (uint8_t*)state); |
|
} |
|
|
|
extern "C" int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey); |
|
extern "C" int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey); |
|
extern "C" int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey); |
|
extern "C" int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey); |
|
|
|
static void (* const extra_hashes[4])(const void*, size_t, void *) = { |
|
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash |
|
}; |
|
|
|
uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) |
|
{ |
|
// multiplier = ab = a * 2^32 + b |
|
// multiplicand = cd = c * 2^32 + d |
|
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d |
|
uint64_t a = hi_dword(multiplier); |
|
uint64_t b = lo_dword(multiplier); |
|
uint64_t c = hi_dword(multiplicand); |
|
uint64_t d = lo_dword(multiplicand); |
|
|
|
uint64_t ac = a * c; |
|
uint64_t ad = a * d; |
|
uint64_t bc = b * c; |
|
uint64_t bd = b * d; |
|
|
|
uint64_t adbc = ad + bc; |
|
uint64_t adbc_carry = adbc < ad ? 1 : 0; |
|
|
|
// multiplier * multiplicand = product_hi * 2^64 + product_lo |
|
uint64_t product_lo = bd + (adbc << 32); |
|
uint64_t product_lo_carry = product_lo < bd ? 1 : 0; |
|
*product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry; |
|
|
|
return product_lo; |
|
} |
|
|
|
static size_t e2i(const uint8_t* a) { |
|
return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (MEMORY / AES_BLOCK_SIZE - 1); |
|
} |
|
|
|
static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) { |
|
((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res); |
|
} |
|
|
|
static void sum_half_blocks(uint8_t* a, const uint8_t* b) { |
|
((uint64_t*) a)[0] += ((uint64_t*) b)[0]; |
|
((uint64_t*) a)[1] += ((uint64_t*) b)[1]; |
|
} |
|
|
|
static void sum_half_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) { |
|
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] + ((uint64_t*) b)[0]; |
|
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] + ((uint64_t*) b)[1]; |
|
} |
|
|
|
static void mul_sum_dst(const uint8_t* a, const uint8_t* b, const uint8_t* c, uint8_t* dst) { |
|
((uint64_t*) dst)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) dst) + ((uint64_t*) c)[1]; |
|
((uint64_t*) dst)[0] += ((uint64_t*) c)[0]; |
|
} |
|
|
|
static void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) { |
|
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1]; |
|
hi += ((uint64_t*) c)[0]; |
|
|
|
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi; |
|
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo; |
|
((uint64_t*) dst)[0] = hi; |
|
((uint64_t*) dst)[1] = lo; |
|
} |
|
|
|
static void copy_block(uint8_t* dst, const uint8_t* src) { |
|
((uint64_t*) dst)[0] = ((uint64_t*) src)[0]; |
|
((uint64_t*) dst)[1] = ((uint64_t*) src)[1]; |
|
} |
|
|
|
static void xor_blocks(uint8_t* a, const uint8_t* b) { |
|
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0]; |
|
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1]; |
|
} |
|
|
|
static void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) { |
|
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0]; |
|
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1]; |
|
} |
|
|
|
static void cryptonight_hash_ctx(void* output, const void* input, size_t len, struct cryptonight_ctx* ctx) |
|
{ |
|
size_t i, j; |
|
keccak_hash_process(&ctx->state.hs, (const uint8_t*) input, len); |
|
ctx->aes_ctx = (oaes_ctx*) oaes_alloc(); |
|
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); |
|
|
|
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE); |
|
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) { |
|
#undef RND |
|
#define RND(p) aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * p], ctx->aes_ctx->key->exp_data); |
|
RND(0); |
|
RND(1); |
|
RND(2); |
|
RND(3); |
|
RND(4); |
|
RND(5); |
|
RND(6); |
|
RND(7); |
|
memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE); |
|
} |
|
|
|
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a); |
|
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b); |
|
|
|
for (i = 0; likely(i < ITER / 4); ++i) { |
|
j = e2i(ctx->a) * AES_BLOCK_SIZE; |
|
aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a); |
|
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]); |
|
|
|
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c) * AES_BLOCK_SIZE]); |
|
|
|
j = e2i(ctx->a) * AES_BLOCK_SIZE; |
|
aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a); |
|
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]); |
|
|
|
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b) * AES_BLOCK_SIZE]); |
|
} |
|
|
|
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); |
|
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE); |
|
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) { |
|
#undef RND |
|
#define RND(p) xor_blocks(&ctx->text[p * AES_BLOCK_SIZE], &ctx->long_state[i + p * AES_BLOCK_SIZE]); \ |
|
aesb_pseudo_round_mut(&ctx->text[p * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data); |
|
RND(0); |
|
RND(1); |
|
RND(2); |
|
RND(3); |
|
RND(4); |
|
RND(5); |
|
RND(6); |
|
RND(7); |
|
} |
|
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE); |
|
keccak_hash_permutation(&ctx->state.hs); |
|
|
|
int extra_algo = ctx->state.hs.b[0] & 3; |
|
extra_hashes[extra_algo](&ctx->state, 200, output); |
|
if (opt_debug) applog(LOG_DEBUG, "extra algo=%d", extra_algo); |
|
|
|
oaes_free((OAES_CTX **) &ctx->aes_ctx); |
|
} |
|
|
|
void cryptonight_hash(void* output, const void* input, size_t len) |
|
{ |
|
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx)); |
|
cryptonight_hash_ctx(output, input, len, ctx); |
|
free(ctx); |
|
}
|
|
|