GOSTCoin CUDA miner project, compatible with most nvidia cards, containing only gostd algo
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

228 lines
6.9 KiB

#include <miner.h>
#include <memory.h>
#include "oaes_lib.h"
#include "cryptonight.h"
extern "C" {
#include <sph/sph_blake.h>
#include <sph/sph_groestl.h>
#include <sph/sph_jh.h>
#include <sph/sph_skein.h>
#include "cpu/c_keccak.h"
}
struct cryptonight_ctx {
uint8_t long_state[MEMORY];
union cn_slow_hash_state state;
uint8_t text[INIT_SIZE_BYTE];
uint8_t a[AES_BLOCK_SIZE];
uint8_t b[AES_BLOCK_SIZE];
uint8_t c[AES_BLOCK_SIZE];
oaes_ctx* aes_ctx;
};
static void do_blake_hash(const void* input, size_t len, void* output)
{
uchar hash[32];
sph_blake256_context ctx;
sph_blake256_set_rounds(14);
sph_blake256_init(&ctx);
sph_blake256(&ctx, input, len);
sph_blake256_close(&ctx, hash);
memcpy(output, hash, 32);
}
static void do_groestl_hash(const void* input, size_t len, void* output)
{
uchar hash[32];
sph_groestl256_context ctx;
sph_groestl256_init(&ctx);
sph_groestl256(&ctx, input, len);
sph_groestl256_close(&ctx, hash);
memcpy(output, hash, 32);
}
static void do_jh_hash(const void* input, size_t len, void* output)
{
uchar hash[64];
sph_jh256_context ctx;
sph_jh256_init(&ctx);
sph_jh256(&ctx, input, len);
sph_jh256_close(&ctx, hash);
memcpy(output, hash, 32);
}
static void do_skein_hash(const void* input, size_t len, void* output)
{
uchar hash[32];
sph_skein256_context ctx;
sph_skein256_init(&ctx);
sph_skein256(&ctx, input, len);
sph_skein256_close(&ctx, hash);
memcpy(output, hash, 32);
}
// todo: use sph if possible
static void keccak_hash_permutation(union hash_state *state) {
keccakf((uint64_t*)state, 24);
}
static void keccak_hash_process(union hash_state *state, const uint8_t *buf, size_t count) {
keccak1600(buf, (int)count, (uint8_t*)state);
}
extern "C" int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
extern "C" int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
extern "C" int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
extern "C" int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
static void (* const extra_hashes[4])(const void*, size_t, void *) = {
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
};
static uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi)
{
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
uint64_t a = hi_dword(multiplier);
uint64_t b = lo_dword(multiplier);
uint64_t c = hi_dword(multiplicand);
uint64_t d = lo_dword(multiplicand);
uint64_t ac = a * c;
uint64_t ad = a * d;
uint64_t bc = b * c;
uint64_t bd = b * d;
uint64_t adbc = ad + bc;
uint64_t adbc_carry = adbc < ad ? 1 : 0;
// multiplier * multiplicand = product_hi * 2^64 + product_lo
uint64_t product_lo = bd + (adbc << 32);
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
*product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
return product_lo;
}
static size_t e2i(const uint8_t* a) {
return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (MEMORY / AES_BLOCK_SIZE - 1);
}
static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) {
((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res);
}
static void sum_half_blocks(uint8_t* a, const uint8_t* b) {
((uint64_t*) a)[0] += ((uint64_t*) b)[0];
((uint64_t*) a)[1] += ((uint64_t*) b)[1];
}
static void sum_half_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] + ((uint64_t*) b)[0];
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] + ((uint64_t*) b)[1];
}
static void mul_sum_dst(const uint8_t* a, const uint8_t* b, const uint8_t* c, uint8_t* dst) {
((uint64_t*) dst)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) dst) + ((uint64_t*) c)[1];
((uint64_t*) dst)[0] += ((uint64_t*) c)[0];
}
static void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
hi += ((uint64_t*) c)[0];
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
((uint64_t*) dst)[0] = hi;
((uint64_t*) dst)[1] = lo;
}
static void copy_block(uint8_t* dst, const uint8_t* src) {
((uint64_t*) dst)[0] = ((uint64_t*) src)[0];
((uint64_t*) dst)[1] = ((uint64_t*) src)[1];
}
static void xor_blocks(uint8_t* a, const uint8_t* b) {
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
}
static void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
}
static void cryptonight_hash_ctx(void* output, const void* input, size_t len, struct cryptonight_ctx* ctx)
{
size_t i, j;
keccak_hash_process(&ctx->state.hs, (const uint8_t*) input, len);
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
#undef RND
#define RND(p) aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * p], ctx->aes_ctx->key->exp_data);
RND(0);
RND(1);
RND(2);
RND(3);
RND(4);
RND(5);
RND(6);
RND(7);
memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
}
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);
for (i = 0; likely(i < ITER / 4); ++i) {
j = e2i(ctx->a) * AES_BLOCK_SIZE;
aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c) * AES_BLOCK_SIZE]);
j = e2i(ctx->a) * AES_BLOCK_SIZE;
aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b) * AES_BLOCK_SIZE]);
}
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
#undef RND
#define RND(p) xor_blocks(&ctx->text[p * AES_BLOCK_SIZE], &ctx->long_state[i + p * AES_BLOCK_SIZE]); \
aesb_pseudo_round_mut(&ctx->text[p * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
RND(0);
RND(1);
RND(2);
RND(3);
RND(4);
RND(5);
RND(6);
RND(7);
}
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
keccak_hash_permutation(&ctx->state.hs);
int extra_algo = ctx->state.hs.b[0] & 3;
extra_hashes[extra_algo](&ctx->state, 200, output);
if (opt_debug) applog(LOG_DEBUG, "extra algo=%d", extra_algo);
oaes_free((OAES_CTX **) &ctx->aes_ctx);
}
void cryptonight_hash(void* output, const void* input, size_t len)
{
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
cryptonight_hash_ctx(output, input, len, ctx);
free(ctx);
}