#include #include #include "oaes_lib.h" #include "cryptolight.h" extern "C" { #include #include #include #include #include "cpu/c_keccak.h" } struct cryptonight_ctx { uint8_t long_state[MEMORY]; union cn_slow_hash_state state; uint8_t text[INIT_SIZE_BYTE]; uint8_t a[AES_BLOCK_SIZE]; uint8_t b[AES_BLOCK_SIZE]; uint8_t c[AES_BLOCK_SIZE]; oaes_ctx* aes_ctx; }; static void do_blake_hash(const void* input, int len, void* output) { uchar hash[32]; sph_blake256_context ctx; sph_blake256_set_rounds(14); sph_blake256_init(&ctx); sph_blake256(&ctx, input, len); sph_blake256_close(&ctx, hash); memcpy(output, hash, 32); } static void do_groestl_hash(const void* input, int len, void* output) { uchar hash[32]; sph_groestl256_context ctx; sph_groestl256_init(&ctx); sph_groestl256(&ctx, input, len); sph_groestl256_close(&ctx, hash); memcpy(output, hash, 32); } static void do_jh_hash(const void* input, int len, void* output) { uchar hash[64]; sph_jh256_context ctx; sph_jh256_init(&ctx); sph_jh256(&ctx, input, len); sph_jh256_close(&ctx, hash); memcpy(output, hash, 32); } static void do_skein_hash(const void* input, int len, void* output) { uchar hash[32]; sph_skein256_context ctx; sph_skein256_init(&ctx); sph_skein256(&ctx, input, len); sph_skein256_close(&ctx, hash); memcpy(output, hash, 32); } // todo: use sph if possible static void keccak_hash_permutation(union hash_state *state) { keccakf((uint64_t*)state, 24); } static void keccak_hash_process(union hash_state *state, const uint8_t *buf, int count) { keccak1600(buf, (int)count, (uint8_t*)state); } extern "C" int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey); extern "C" int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey); extern "C" int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey); extern "C" int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey); static void (* const extra_hashes[4])(const void*, int, void *) = { do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash }; static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) { // multiplier = ab = a * 2^32 + b // multiplicand = cd = c * 2^32 + d // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d uint64_t a = hi_dword(multiplier); uint64_t b = lo_dword(multiplier); uint64_t c = hi_dword(multiplicand); uint64_t d = lo_dword(multiplicand); uint64_t ac = a * c; uint64_t ad = a * d; uint64_t bc = b * c; uint64_t bd = b * d; uint64_t adbc = ad + bc; uint64_t adbc_carry = adbc < ad ? 1 : 0; // multiplier * multiplicand = product_hi * 2^64 + product_lo uint64_t product_lo = bd + (adbc << 32); uint64_t product_lo_carry = product_lo < bd ? 1 : 0; *product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry; return product_lo; } static size_t e2i(const uint8_t* a) { //const uint32_t mask = (MEMORY / AES_BLOCK_SIZE - 1); //return (*((uint64_t*) a) / AES_BLOCK_SIZE) & mask; return *((uint64_t*) a) & 0xFFFF0; /* mask * AES_BLOCK_SIZE */ } static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) { ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res); } static void sum_half_blocks(uint8_t* a, const uint8_t* b) { ((uint64_t*) a)[0] += ((uint64_t*) b)[0]; ((uint64_t*) a)[1] += ((uint64_t*) b)[1]; } static void sum_half_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) { ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] + ((uint64_t*) b)[0]; ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] + ((uint64_t*) b)[1]; } static void mul_sum_dst(const uint8_t* a, const uint8_t* b, const uint8_t* c, uint8_t* dst) { ((uint64_t*) dst)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) dst) + ((uint64_t*) c)[1]; ((uint64_t*) dst)[0] += ((uint64_t*) c)[0]; } static void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst, const int variant, const uint64_t tweak) { uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1]; hi += ((uint64_t*) c)[0]; ((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi; ((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo; ((uint64_t*) dst)[0] = hi; ((uint64_t*) dst)[1] = variant ? lo ^ tweak : lo; } static void copy_block(uint8_t* dst, const uint8_t* src) { ((uint64_t*) dst)[0] = ((uint64_t*) src)[0]; ((uint64_t*) dst)[1] = ((uint64_t*) src)[1]; } static void xor_blocks(uint8_t* a, const uint8_t* b) { ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0]; ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1]; } static void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) { ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0]; ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1]; } static void cryptolight_store_variant(void* state, int variant) { if (variant == 1) { // use variant 1 like monero since june 2018 const uint8_t tmp = ((const uint8_t*)(state))[11]; const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; ((uint8_t*)(state))[11] = tmp ^ ((0x75310 >> index) & 0x30); } } static void cryptolight_hash_ctx(void* output, const void* input, const int len, struct cryptonight_ctx* ctx, const int variant) { size_t i, j; keccak_hash_process(&ctx->state.hs, (const uint8_t*) input, len); ctx->aes_ctx = (oaes_ctx*) oaes_alloc(); memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); const uint64_t tweak = variant ? *((uint64_t*) (((uint8_t*)input) + 35)) ^ ctx->state.hs.w[24] : 0; oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE); for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) { #undef RND #define RND(p) aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * p], ctx->aes_ctx->key->exp_data); RND(0); RND(1); RND(2); RND(3); RND(4); RND(5); RND(6); RND(7); memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE); } xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a); xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b); for (i = 0; likely(i < ITER / 4); ++i) { j = e2i(ctx->a); aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a); xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]); cryptolight_store_variant(&ctx->long_state[j], variant); mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)], variant, tweak); j = e2i(ctx->a); aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a); xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]); cryptolight_store_variant(&ctx->long_state[j], variant); mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)], variant, tweak); } memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE); for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) { #undef RND #define RND(p) xor_blocks(&ctx->text[p * AES_BLOCK_SIZE], &ctx->long_state[i + p * AES_BLOCK_SIZE]); \ aesb_pseudo_round_mut(&ctx->text[p * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data); RND(0); RND(1); RND(2); RND(3); RND(4); RND(5); RND(6); RND(7); } memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE); keccak_hash_permutation(&ctx->state.hs); int extra_algo = ctx->state.hs.b[0] & 3; extra_hashes[extra_algo](&ctx->state, 200, output); if (opt_debug) applog(LOG_DEBUG, "extra algo=%d", extra_algo); oaes_free((OAES_CTX **) &ctx->aes_ctx); } void cryptolight_hash_variant(void* output, const void* input, int len, int variant) { struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx)); cryptolight_hash_ctx(output, input, len, ctx, variant); free(ctx); } void cryptolight_hash(void* output, const void* input) { cryptolight_hash_variant(output, input, 76, 1); }