mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-09 14:28:15 +00:00
9dfa757dc7
The full benchmark can now be launched with "ccminer --benchmark" add a new helper function which log a warning with last cuda error (not shown with the quiet option) : CUDA_LOG_ERROR(); it can be used where miner.h is included (.c/.cpp/.cu) fix x14 (in ccminer.cpp), a break was missing in switch..case
714 lines
22 KiB
C++
714 lines
22 KiB
C++
/*
|
|
* scrypt-jane by Andrew M, https://github.com/floodyberry/scrypt-jane
|
|
*
|
|
* Public Domain or MIT License, whichever is easier
|
|
*
|
|
* Adapted to ccminer by tpruvot@github (2015)
|
|
*/
|
|
|
|
#include "miner.h"
|
|
|
|
#include "scrypt/scrypt-jane.h"
|
|
#include "scrypt/code/scrypt-jane-portable.h"
|
|
#include "scrypt/code/scrypt-jane-chacha.h"
|
|
#include "scrypt/keccak.h"
|
|
|
|
#include "scrypt/salsa_kernel.h"
|
|
|
|
#define scrypt_maxN 30 /* (1 << (30 + 1)) = ~2 billion */
|
|
#define scrypt_r_32kb 8 /* (1 << 8) = 256 * 2 blocks in a chunk * 64 bytes = Max of 32kb in a chunk */
|
|
#define scrypt_maxr scrypt_r_32kb /* 32kb */
|
|
#define scrypt_maxp 25 /* (1 << 25) = ~33 million */
|
|
|
|
// ---------------------------- BEGIN keccak functions ------------------------------------
|
|
|
|
#define SCRYPT_HASH "Keccak-512"
|
|
#define SCRYPT_HASH_DIGEST_SIZE 64
|
|
#define SCRYPT_KECCAK_F 1600
|
|
#define SCRYPT_KECCAK_C (SCRYPT_HASH_DIGEST_SIZE * 8 * 2) /* 1024 */
|
|
#define SCRYPT_KECCAK_R (SCRYPT_KECCAK_F - SCRYPT_KECCAK_C) /* 576 */
|
|
#define SCRYPT_HASH_BLOCK_SIZE (SCRYPT_KECCAK_R / 8)
|
|
|
|
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
|
|
|
typedef struct scrypt_hash_state_t {
|
|
uint64_t state[SCRYPT_KECCAK_F / 64];
|
|
uint32_t leftover;
|
|
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
|
|
} scrypt_hash_state;
|
|
|
|
static const uint64_t keccak_round_constants[24] = {
|
|
0x0000000000000001ull, 0x0000000000008082ull,
|
|
0x800000000000808aull, 0x8000000080008000ull,
|
|
0x000000000000808bull, 0x0000000080000001ull,
|
|
0x8000000080008081ull, 0x8000000000008009ull,
|
|
0x000000000000008aull, 0x0000000000000088ull,
|
|
0x0000000080008009ull, 0x000000008000000aull,
|
|
0x000000008000808bull, 0x800000000000008bull,
|
|
0x8000000000008089ull, 0x8000000000008003ull,
|
|
0x8000000000008002ull, 0x8000000000000080ull,
|
|
0x000000000000800aull, 0x800000008000000aull,
|
|
0x8000000080008081ull, 0x8000000000008080ull,
|
|
0x0000000080000001ull, 0x8000000080008008ull
|
|
};
|
|
|
|
static void keccak_block(scrypt_hash_state *S, const uint8_t *in)
|
|
{
|
|
size_t i;
|
|
uint64_t *s = S->state, t[5], u[5], v, w;
|
|
|
|
/* absorb input */
|
|
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE / 8; i++, in += 8)
|
|
s[i] ^= U8TO64_LE(in);
|
|
|
|
for (i = 0; i < 24; i++) {
|
|
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
|
|
t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
|
|
t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
|
|
t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
|
|
t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
|
|
t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
|
|
|
|
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
|
|
u[0] = t[4] ^ ROTL64(t[1], 1);
|
|
u[1] = t[0] ^ ROTL64(t[2], 1);
|
|
u[2] = t[1] ^ ROTL64(t[3], 1);
|
|
u[3] = t[2] ^ ROTL64(t[4], 1);
|
|
u[4] = t[3] ^ ROTL64(t[0], 1);
|
|
|
|
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
|
|
s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0];
|
|
s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1];
|
|
s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2];
|
|
s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3];
|
|
s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4];
|
|
|
|
/* rho pi: b[..] = rotl(a[..], ..) */
|
|
v = s[ 1];
|
|
s[ 1] = ROTL64(s[ 6], 44);
|
|
s[ 6] = ROTL64(s[ 9], 20);
|
|
s[ 9] = ROTL64(s[22], 61);
|
|
s[22] = ROTL64(s[14], 39);
|
|
s[14] = ROTL64(s[20], 18);
|
|
s[20] = ROTL64(s[ 2], 62);
|
|
s[ 2] = ROTL64(s[12], 43);
|
|
s[12] = ROTL64(s[13], 25);
|
|
s[13] = ROTL64(s[19], 8);
|
|
s[19] = ROTL64(s[23], 56);
|
|
s[23] = ROTL64(s[15], 41);
|
|
s[15] = ROTL64(s[ 4], 27);
|
|
s[ 4] = ROTL64(s[24], 14);
|
|
s[24] = ROTL64(s[21], 2);
|
|
s[21] = ROTL64(s[ 8], 55);
|
|
s[ 8] = ROTL64(s[16], 45);
|
|
s[16] = ROTL64(s[ 5], 36);
|
|
s[ 5] = ROTL64(s[ 3], 28);
|
|
s[ 3] = ROTL64(s[18], 21);
|
|
s[18] = ROTL64(s[17], 15);
|
|
s[17] = ROTL64(s[11], 10);
|
|
s[11] = ROTL64(s[ 7], 6);
|
|
s[ 7] = ROTL64(s[10], 3);
|
|
s[10] = ROTL64( v, 1);
|
|
|
|
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
|
|
v = s[ 0]; w = s[ 1]; s[ 0] ^= (~w) & s[ 2]; s[ 1] ^= (~s[ 2]) & s[ 3]; s[ 2] ^= (~s[ 3]) & s[ 4]; s[ 3] ^= (~s[ 4]) & v; s[ 4] ^= (~v) & w;
|
|
v = s[ 5]; w = s[ 6]; s[ 5] ^= (~w) & s[ 7]; s[ 6] ^= (~s[ 7]) & s[ 8]; s[ 7] ^= (~s[ 8]) & s[ 9]; s[ 8] ^= (~s[ 9]) & v; s[ 9] ^= (~v) & w;
|
|
v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w;
|
|
v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w;
|
|
v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w;
|
|
|
|
/* iota: a[0,0] ^= round constant */
|
|
s[0] ^= keccak_round_constants[i];
|
|
}
|
|
}
|
|
|
|
static void scrypt_hash_init(scrypt_hash_state *S) {
|
|
memset(S, 0, sizeof(*S));
|
|
}
|
|
|
|
static void scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen)
|
|
{
|
|
size_t want;
|
|
|
|
/* handle the previous data */
|
|
if (S->leftover) {
|
|
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
|
want = (want < inlen) ? want : inlen;
|
|
memcpy(S->buffer + S->leftover, in, want);
|
|
S->leftover += (uint32_t)want;
|
|
if (S->leftover < SCRYPT_HASH_BLOCK_SIZE)
|
|
return;
|
|
in += want;
|
|
inlen -= want;
|
|
keccak_block(S, S->buffer);
|
|
}
|
|
|
|
/* handle the current data */
|
|
while (inlen >= SCRYPT_HASH_BLOCK_SIZE) {
|
|
keccak_block(S, in);
|
|
in += SCRYPT_HASH_BLOCK_SIZE;
|
|
inlen -= SCRYPT_HASH_BLOCK_SIZE;
|
|
}
|
|
|
|
/* handle leftover data */
|
|
S->leftover = (uint32_t)inlen;
|
|
if (S->leftover)
|
|
memcpy(S->buffer, in, S->leftover);
|
|
}
|
|
|
|
static void scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash)
|
|
{
|
|
size_t i;
|
|
|
|
S->buffer[S->leftover] = 0x01;
|
|
memset(S->buffer + (S->leftover + 1), 0, SCRYPT_HASH_BLOCK_SIZE - (S->leftover + 1));
|
|
S->buffer[SCRYPT_HASH_BLOCK_SIZE - 1] |= 0x80;
|
|
keccak_block(S, S->buffer);
|
|
|
|
for (i = 0; i < SCRYPT_HASH_DIGEST_SIZE; i += 8) {
|
|
U64TO8_LE(&hash[i], S->state[i / 8]);
|
|
}
|
|
}
|
|
|
|
// ---------------------------- END keccak functions ------------------------------------
|
|
|
|
// ---------------------------- BEGIN PBKDF2 functions ------------------------------------
|
|
|
|
typedef struct scrypt_hmac_state_t {
|
|
scrypt_hash_state inner, outer;
|
|
} scrypt_hmac_state;
|
|
|
|
|
|
static void scrypt_hash(scrypt_hash_digest hash, const uint8_t *m, size_t mlen)
|
|
{
|
|
scrypt_hash_state st;
|
|
|
|
scrypt_hash_init(&st);
|
|
scrypt_hash_update(&st, m, mlen);
|
|
scrypt_hash_finish(&st, hash);
|
|
}
|
|
|
|
/* hmac */
|
|
static void scrypt_hmac_init(scrypt_hmac_state *st, const uint8_t *key, size_t keylen)
|
|
{
|
|
uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0};
|
|
size_t i;
|
|
|
|
scrypt_hash_init(&st->inner);
|
|
scrypt_hash_init(&st->outer);
|
|
|
|
if (keylen <= SCRYPT_HASH_BLOCK_SIZE) {
|
|
/* use the key directly if it's <= blocksize bytes */
|
|
memcpy(pad, key, keylen);
|
|
} else {
|
|
/* if it's > blocksize bytes, hash it */
|
|
scrypt_hash(pad, key, keylen);
|
|
}
|
|
|
|
/* inner = (key ^ 0x36) */
|
|
/* h(inner || ...) */
|
|
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
|
|
pad[i] ^= 0x36;
|
|
scrypt_hash_update(&st->inner, pad, SCRYPT_HASH_BLOCK_SIZE);
|
|
|
|
/* outer = (key ^ 0x5c) */
|
|
/* h(outer || ...) */
|
|
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
|
|
pad[i] ^= (0x5c ^ 0x36);
|
|
scrypt_hash_update(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE);
|
|
}
|
|
|
|
static void scrypt_hmac_update(scrypt_hmac_state *st, const uint8_t *m, size_t mlen)
|
|
{
|
|
/* h(inner || m...) */
|
|
scrypt_hash_update(&st->inner, m, mlen);
|
|
}
|
|
|
|
static void scrypt_hmac_finish(scrypt_hmac_state *st, scrypt_hash_digest mac)
|
|
{
|
|
/* h(inner || m) */
|
|
scrypt_hash_digest innerhash;
|
|
scrypt_hash_finish(&st->inner, innerhash);
|
|
|
|
/* h(outer || h(inner || m)) */
|
|
scrypt_hash_update(&st->outer, innerhash, sizeof(innerhash));
|
|
scrypt_hash_finish(&st->outer, mac);
|
|
}
|
|
|
|
/*
|
|
* Special version where N = 1
|
|
* - mikaelh
|
|
*/
|
|
static void scrypt_pbkdf2_1(const uint8_t *password, size_t password_len,
|
|
const uint8_t *salt, size_t salt_len, uint8_t *out, uint64_t bytes)
|
|
{
|
|
scrypt_hmac_state hmac_pw, hmac_pw_salt, work;
|
|
scrypt_hash_digest ti, u;
|
|
uint8_t be[4];
|
|
uint32_t i, blocks;
|
|
|
|
/* bytes must be <= (0xffffffff - (SCRYPT_HASH_DIGEST_SIZE - 1)), which they will always be under scrypt */
|
|
|
|
/* hmac(password, ...) */
|
|
scrypt_hmac_init(&hmac_pw, password, password_len);
|
|
|
|
/* hmac(password, salt...) */
|
|
hmac_pw_salt = hmac_pw;
|
|
scrypt_hmac_update(&hmac_pw_salt, salt, salt_len);
|
|
|
|
blocks = ((uint32_t)bytes + (SCRYPT_HASH_DIGEST_SIZE - 1)) / SCRYPT_HASH_DIGEST_SIZE;
|
|
for (i = 1; i <= blocks; i++) {
|
|
/* U1 = hmac(password, salt || be(i)) */
|
|
U32TO8_BE(be, i);
|
|
work = hmac_pw_salt;
|
|
scrypt_hmac_update(&work, be, 4);
|
|
scrypt_hmac_finish(&work, ti);
|
|
memcpy(u, ti, sizeof(u));
|
|
|
|
memcpy(out, ti, (size_t) (bytes > SCRYPT_HASH_DIGEST_SIZE ? SCRYPT_HASH_DIGEST_SIZE : bytes));
|
|
out += SCRYPT_HASH_DIGEST_SIZE;
|
|
bytes -= SCRYPT_HASH_DIGEST_SIZE;
|
|
}
|
|
}
|
|
|
|
// ---------------------------- END PBKDF2 functions ------------------------------------
|
|
|
|
static void scrypt_fatal_error_default(const char *msg) {
|
|
fprintf(stderr, "%s\n", msg);
|
|
exit(1);
|
|
}
|
|
|
|
static scrypt_fatal_errorfn scrypt_fatal_error = scrypt_fatal_error_default;
|
|
|
|
void scrypt_set_fatal_error_default(scrypt_fatal_errorfn fn) {
|
|
scrypt_fatal_error = fn;
|
|
}
|
|
|
|
typedef struct scrypt_aligned_alloc_t {
|
|
uint8_t *mem, *ptr;
|
|
} scrypt_aligned_alloc;
|
|
|
|
#if defined(SCRYPT_TEST_SPEED)
|
|
static uint8_t *mem_base = (uint8_t *)0;
|
|
static size_t mem_bump = 0;
|
|
|
|
/* allocations are assumed to be multiples of 64 bytes and total allocations not to exceed ~1.01gb */
|
|
static scrypt_aligned_alloc scrypt_alloc(uint64_t size)
|
|
{
|
|
scrypt_aligned_alloc aa;
|
|
if (!mem_base) {
|
|
mem_base = (uint8_t *)malloc((1024 * 1024 * 1024) + (1024 * 1024) + (SCRYPT_BLOCK_BYTES - 1));
|
|
if (!mem_base)
|
|
scrypt_fatal_error("scrypt: out of memory");
|
|
mem_base = (uint8_t *)(((size_t)mem_base + (SCRYPT_BLOCK_BYTES - 1)) & ~(SCRYPT_BLOCK_BYTES - 1));
|
|
}
|
|
aa.mem = mem_base + mem_bump;
|
|
aa.ptr = aa.mem;
|
|
mem_bump += (size_t)size;
|
|
return aa;
|
|
}
|
|
|
|
static void scrypt_free(scrypt_aligned_alloc *aa)
|
|
{
|
|
mem_bump = 0;
|
|
}
|
|
#else
|
|
static scrypt_aligned_alloc scrypt_alloc(uint64_t size)
|
|
{
|
|
static const size_t max_alloc = (size_t)-1;
|
|
scrypt_aligned_alloc aa;
|
|
size += (SCRYPT_BLOCK_BYTES - 1);
|
|
if (size > max_alloc)
|
|
scrypt_fatal_error("scrypt: not enough address space on this CPU to allocate required memory");
|
|
aa.mem = (uint8_t *)malloc((size_t)size);
|
|
aa.ptr = (uint8_t *)(((size_t)aa.mem + (SCRYPT_BLOCK_BYTES - 1)) & ~(SCRYPT_BLOCK_BYTES - 1));
|
|
if (!aa.mem)
|
|
scrypt_fatal_error("scrypt: out of memory");
|
|
return aa;
|
|
}
|
|
|
|
static void scrypt_free(scrypt_aligned_alloc *aa)
|
|
{
|
|
free(aa->mem);
|
|
}
|
|
#endif
|
|
|
|
|
|
// yacoin: increasing Nfactor gradually
|
|
unsigned char GetNfactor(unsigned int nTimestamp)
|
|
{
|
|
int l = 0;
|
|
|
|
unsigned int Nfactor = 0;
|
|
|
|
// Yacoin defaults
|
|
unsigned int Ntimestamp = 1367991200;
|
|
unsigned int minN = 4;
|
|
unsigned int maxN = 30;
|
|
|
|
if (strlen(jane_params) > 0) {
|
|
if (!strcmp(jane_params, "YAC") || !strcasecmp(jane_params, "Yacoin")) {} // No-Op
|
|
//
|
|
// NO WARRANTY FOR CORRECTNESS. Look for the int64 nChainStartTime constant
|
|
// in the src/main.cpp file of the official wallet clients as well as the
|
|
// const unsigned char minNfactor and const unsigned char maxNfactor
|
|
//
|
|
else if (!strcmp(jane_params, "YBC") || !strcasecmp(jane_params, "YBCoin")) {
|
|
// YBCoin: 1372386273, minN: 4, maxN: 30
|
|
Ntimestamp = 1372386273; minN= 4; maxN= 30;
|
|
} else if (!strcmp(jane_params, "ZZC") || !strcasecmp(jane_params, "ZZCoin")) {
|
|
// ZcCoin: 1375817223, minN: 12, maxN: 30
|
|
Ntimestamp = 1375817223; minN= 12; maxN= 30;
|
|
} else if (!strcmp(jane_params, "FEC") || !strcasecmp(jane_params, "FreeCoin")) {
|
|
// FreeCoin: 1375801200, minN: 6, maxN: 32
|
|
Ntimestamp = 1375801200; minN= 6; maxN= 32;
|
|
} else if (!strcmp(jane_params, "ONC") || !strcasecmp(jane_params, "OneCoin")) {
|
|
// OneCoin: 1371119462, minN: 6, maxN: 30
|
|
Ntimestamp = 1371119462; minN= 6; maxN= 30;
|
|
} else if (!strcmp(jane_params, "QQC") || !strcasecmp(jane_params, "QQCoin")) {
|
|
// QQCoin: 1387769316, minN: 4, maxN: 30
|
|
Ntimestamp = 1387769316; minN= 4; maxN= 30;
|
|
} else if (!strcmp(jane_params, "GPL") || !strcasecmp(jane_params, "GoldPressedLatinum")) {
|
|
// GoldPressedLatinum:1377557832, minN: 4, maxN: 30
|
|
Ntimestamp = 1377557832; minN= 4; maxN= 30;
|
|
} else if (!strcmp(jane_params, "MRC") || !strcasecmp(jane_params, "MicroCoin")) {
|
|
// MicroCoin:1389028879, minN: 4, maxN: 30
|
|
Ntimestamp = 1389028879; minN= 4; maxN= 30;
|
|
} else if (!strcmp(jane_params, "APC") || !strcasecmp(jane_params, "AppleCoin")) {
|
|
// AppleCoin:1384720832, minN: 4, maxN: 30
|
|
Ntimestamp = 1384720832; minN= 4; maxN= 30;
|
|
} else if (!strcmp(jane_params, "CPR") || !strcasecmp(jane_params, "Copperbars")) {
|
|
// Copperbars:1376184687, minN: 4, maxN: 30
|
|
Ntimestamp = 1376184687; minN= 4; maxN= 30;
|
|
} else if (!strcmp(jane_params, "CACH") || !strcasecmp(jane_params, "CacheCoin")) {
|
|
// CacheCoin:1388949883, minN: 4, maxN: 30
|
|
Ntimestamp = 1388949883; minN= 4; maxN= 30;
|
|
} else if (!strcmp(jane_params, "UTC") || !strcasecmp(jane_params, "UltraCoin")) {
|
|
// MicroCoin:1388361600, minN: 4, maxN: 30
|
|
Ntimestamp = 1388361600; minN= 4; maxN= 30;
|
|
} else if (!strcmp(jane_params, "VEL") || !strcasecmp(jane_params, "VelocityCoin")) {
|
|
// VelocityCoin:1387769316, minN: 4, maxN: 30
|
|
Ntimestamp = 1387769316; minN= 4; maxN= 30;
|
|
} else if (!strcmp(jane_params, "ITC") || !strcasecmp(jane_params, "InternetCoin")) {
|
|
// InternetCoin:1388385602, minN: 4, maxN: 30
|
|
Ntimestamp = 1388385602; minN= 4; maxN= 30;
|
|
} else if (!strcmp(jane_params, "RAD") || !strcasecmp(jane_params, "RadioactiveCoin")) {
|
|
// InternetCoin:1389196388, minN: 4, maxN: 30
|
|
Ntimestamp = 1389196388; minN= 4; maxN= 30;
|
|
} else if (!strcmp(jane_params, "LEO") || !strcasecmp(jane_params, "LEOCoin")) {
|
|
// LEOCoin:1402845776, minN: 4, maxN: 30
|
|
Ntimestamp = 1402845776; minN= 4; maxN= 30;
|
|
} else {
|
|
if (sscanf(jane_params, "%u,%u,%u", &Ntimestamp, &minN, &maxN) != 3)
|
|
if (sscanf(jane_params, "%u", &Nfactor) == 1) return Nfactor; // skip bounding against minN, maxN
|
|
else applog(LOG_INFO, "Unable to parse scrypt-jane parameters: '%s'. Defaulting to Yacoin.", jane_params);
|
|
}
|
|
}
|
|
// determination based on the constants determined above
|
|
if (nTimestamp <= Ntimestamp)
|
|
return minN;
|
|
|
|
unsigned long int s = nTimestamp - Ntimestamp;
|
|
while ((s >> 1) > 3) {
|
|
l += 1;
|
|
s >>= 1;
|
|
}
|
|
|
|
s &= 3;
|
|
|
|
int n = (l * 170 + s * 25 - 2320) / 100;
|
|
|
|
if (n < 0) n = 0;
|
|
|
|
if (n > 255)
|
|
printf("GetNfactor(%d) - something wrong(n == %d)\n", nTimestamp, n);
|
|
|
|
Nfactor = n;
|
|
if (Nfactor<minN) return minN;
|
|
if (Nfactor>maxN) return maxN;
|
|
return Nfactor;
|
|
}
|
|
|
|
static bool init[MAX_GPUS] = { 0 };
|
|
|
|
// cleanup
|
|
void free_scrypt_jane(int thr_id)
|
|
{
|
|
int dev_id = device_map[thr_id];
|
|
|
|
if (!init[thr_id])
|
|
return;
|
|
|
|
cudaSetDevice(dev_id);
|
|
cudaDeviceSynchronize();
|
|
cudaDeviceReset(); // well, simple way to free ;)
|
|
|
|
init[thr_id] = false;
|
|
}
|
|
|
|
#define bswap_32x4(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) \
|
|
| (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
|
|
static int s_Nfactor = 0;
|
|
|
|
int scanhash_scrypt_jane(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done,
|
|
unsigned char *scratchbuf, struct timeval *tv_start, struct timeval *tv_end)
|
|
{
|
|
uint32_t *pdata = work->data;
|
|
uint32_t *ptarget = work->target;
|
|
const uint32_t Htarg = ptarget[7];
|
|
uint32_t N;
|
|
|
|
if (s_Nfactor == 0 && strlen(jane_params) > 0)
|
|
applog(LOG_INFO, "Given scrypt-jane parameters: %s", jane_params);
|
|
|
|
int Nfactor = GetNfactor(bswap_32x4(pdata[17]));
|
|
if (Nfactor > scrypt_maxN) {
|
|
scrypt_fatal_error("scrypt: N out of range");
|
|
}
|
|
N = (1 << (Nfactor + 1));
|
|
|
|
if (Nfactor != s_Nfactor)
|
|
{
|
|
opt_nfactor = Nfactor;
|
|
applog(LOG_INFO, "N-factor is %d (%d)!", Nfactor, N);
|
|
if (s_Nfactor != 0) {
|
|
// handle N-factor increase at runtime
|
|
// by adjusting the lookup_gap by factor 2
|
|
if (s_Nfactor == Nfactor-1)
|
|
for (int i=0; i < 8; ++i)
|
|
device_lookup_gap[i] *= 2;
|
|
}
|
|
s_Nfactor = Nfactor;
|
|
}
|
|
|
|
static __thread int throughput = 0;
|
|
if(!init[thr_id]) {
|
|
int dev_id = device_map[thr_id];
|
|
|
|
cudaSetDevice(dev_id);
|
|
cudaDeviceSynchronize();
|
|
cudaDeviceReset();
|
|
cudaSetDevice(dev_id);
|
|
throughput = cuda_throughput(thr_id);
|
|
applog(LOG_INFO, "GPU #%d: cuda throughput is %d", dev_id, throughput);
|
|
|
|
init[thr_id] = true;
|
|
}
|
|
|
|
if(throughput == 0)
|
|
return -1;
|
|
|
|
gettimeofday(tv_start, NULL);
|
|
|
|
uint32_t *data[2] = { new uint32_t[20*throughput], new uint32_t[20*throughput] };
|
|
uint32_t* hash[2] = { cuda_hashbuffer(thr_id,0), cuda_hashbuffer(thr_id,1) };
|
|
|
|
uint32_t n = pdata[19];
|
|
|
|
/* byte swap pdata into data[0]/[1] arrays */
|
|
for (int k=0; k<2; ++k) {
|
|
for(int z=0;z<20;z++) data[k][z] = bswap_32x4(pdata[z]);
|
|
for(int i=1;i<throughput;++i) memcpy(&data[k][20*i], &data[k][0], 20*sizeof(uint32_t));
|
|
}
|
|
if (parallel == 2) prepare_keccak512(thr_id, pdata);
|
|
|
|
scrypt_aligned_alloc Xbuf[2] = { scrypt_alloc(128 * throughput), scrypt_alloc(128 * throughput) };
|
|
scrypt_aligned_alloc Vbuf = scrypt_alloc(N * 128);
|
|
scrypt_aligned_alloc Ybuf = scrypt_alloc(128);
|
|
|
|
uint32_t nonce[2];
|
|
uint32_t* cuda_X[2] = { cuda_transferbuffer(thr_id,0), cuda_transferbuffer(thr_id,1) };
|
|
|
|
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
|
scrypt_ROMixfn scrypt_ROMix = scrypt_getROMix();
|
|
#endif
|
|
|
|
int cur = 0, nxt = 1;
|
|
int iteration = 0;
|
|
|
|
do {
|
|
nonce[nxt] = n;
|
|
|
|
if (parallel < 2)
|
|
{
|
|
// half of cpu
|
|
|
|
for(int i=0;i<throughput;++i) {
|
|
uint32_t tmp_nonce = n++;
|
|
data[nxt][20*i + 19] = bswap_32x4(tmp_nonce);
|
|
}
|
|
|
|
for(int i=0;i<throughput;++i)
|
|
scrypt_pbkdf2_1((unsigned char *)&data[nxt][20*i], 80, (unsigned char *)&data[nxt][20*i], 80, Xbuf[nxt].ptr + 128 * i, 128);
|
|
|
|
memcpy(cuda_X[nxt], Xbuf[nxt].ptr, 128 * throughput);
|
|
cuda_scrypt_serialize(thr_id, nxt);
|
|
cuda_scrypt_HtoD(thr_id, cuda_X[nxt], nxt);
|
|
cuda_scrypt_core(thr_id, nxt, N);
|
|
cuda_scrypt_done(thr_id, nxt);
|
|
|
|
cuda_scrypt_DtoH(thr_id, cuda_X[nxt], nxt, false);
|
|
|
|
//cuda_scrypt_flush(thr_id, nxt);
|
|
if(!cuda_scrypt_sync(thr_id, nxt)) {
|
|
break;
|
|
}
|
|
|
|
memcpy(Xbuf[cur].ptr, cuda_X[cur], 128 * throughput);
|
|
for(int i=0;i<throughput;++i)
|
|
scrypt_pbkdf2_1((unsigned char *)&data[cur][20*i], 80, Xbuf[cur].ptr + 128 * i, 128, (unsigned char *)(&hash[cur][8*i]), 32);
|
|
|
|
#define VERIFY_ALL 0
|
|
#if VERIFY_ALL
|
|
{
|
|
/* 2: X = ROMix(X) */
|
|
for(int i=0;i<throughput;++i)
|
|
scrypt_ROMix_1((scrypt_mix_word_t *)(Xbuf[cur].ptr + 128 * i), (scrypt_mix_word_t *)Ybuf.ptr, (scrypt_mix_word_t *)Vbuf.ptr, N);
|
|
|
|
unsigned int err = 0;
|
|
for(int i=0;i<throughput;++i) {
|
|
unsigned char *ref = (Xbuf[cur].ptr + 128 * i);
|
|
unsigned char *dat = (unsigned char*)(cuda_X[cur] + 32 * i);
|
|
if (memcmp(ref, dat, 128) != 0)
|
|
{
|
|
err++;
|
|
#if 0
|
|
uint32_t *ref32 = (uint32_t*) ref;
|
|
uint32_t *dat32 = (uint32_t*) dat;
|
|
for (int j=0; j<32; ++j) {
|
|
if (ref32[j] != dat32[j])
|
|
fprintf(stderr, "ref32[i=%d][j=%d] = $%08x / $%08x\n", i, j, ref32[j], dat32[j]);
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
if (err > 0) fprintf(stderr, "%d out of %d hashes differ.\n", err, throughput);
|
|
}
|
|
#endif
|
|
} else {
|
|
|
|
// all on gpu
|
|
|
|
n += throughput;
|
|
if (opt_debug && (iteration % 64 == 0))
|
|
applog(LOG_DEBUG, "GPU #%d: n=%x", device_map[thr_id], n);
|
|
|
|
cuda_scrypt_serialize(thr_id, nxt);
|
|
pre_keccak512(thr_id, nxt, nonce[nxt], throughput);
|
|
cuda_scrypt_core(thr_id, nxt, N);
|
|
//cuda_scrypt_flush(thr_id, nxt);
|
|
if (!cuda_scrypt_sync(thr_id, nxt)) {
|
|
break;
|
|
}
|
|
|
|
post_keccak512(thr_id, nxt, nonce[nxt], throughput);
|
|
cuda_scrypt_done(thr_id, nxt);
|
|
|
|
cuda_scrypt_DtoH(thr_id, hash[nxt], nxt, true);
|
|
//cuda_scrypt_flush(thr_id, nxt); // made by cuda_scrypt_sync
|
|
if (!cuda_scrypt_sync(thr_id, nxt)) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (int i=0; iteration > 0 && i<throughput; i++)
|
|
{
|
|
if (hash[cur][8*i+7] <= Htarg && fulltest(&hash[cur][8*i], ptarget))
|
|
{
|
|
uint32_t _ALIGN(64) thash[8], tdata[20];
|
|
uint32_t tmp_nonce = nonce[cur] + i;
|
|
|
|
for(int z=0;z<19;z++)
|
|
tdata[z] = bswap_32x4(pdata[z]);
|
|
tdata[19] = bswap_32x4(tmp_nonce);
|
|
|
|
scrypt_pbkdf2_1((unsigned char *)tdata, 80, (unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128);
|
|
scrypt_ROMix_1((scrypt_mix_word_t *)(Xbuf[cur].ptr + 128 * i), (scrypt_mix_word_t *)(Ybuf.ptr), (scrypt_mix_word_t *)(Vbuf.ptr), N);
|
|
scrypt_pbkdf2_1((unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128, (unsigned char *)thash, 32);
|
|
|
|
if (memcmp(thash, &hash[cur][8*i], 32) == 0)
|
|
{
|
|
work_set_target_ratio(work, thash);
|
|
*hashes_done = n - pdata[19];
|
|
pdata[19] = tmp_nonce;
|
|
scrypt_free(&Vbuf);
|
|
scrypt_free(&Ybuf);
|
|
scrypt_free(&Xbuf[0]); scrypt_free(&Xbuf[1]);
|
|
delete[] data[0]; delete[] data[1];
|
|
gettimeofday(tv_end, NULL);
|
|
return 1;
|
|
} else {
|
|
applog(LOG_WARNING, "GPU #%d: %s result does not validate on CPU! (i=%d, s=%d)",
|
|
device_map[thr_id], device_name[thr_id], i, cur);
|
|
}
|
|
}
|
|
}
|
|
|
|
cur = (cur+1)&1;
|
|
nxt = (nxt+1)&1;
|
|
++iteration;
|
|
} while (n <= max_nonce && !work_restart[thr_id].restart);
|
|
|
|
scrypt_free(&Vbuf);
|
|
scrypt_free(&Ybuf);
|
|
scrypt_free(&Xbuf[0]); scrypt_free(&Xbuf[1]);
|
|
delete[] data[0]; delete[] data[1];
|
|
|
|
*hashes_done = n - pdata[19];
|
|
pdata[19] = n;
|
|
gettimeofday(tv_end, NULL);
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void scrypt_jane_hash_1_1(const uchar *password, size_t password_len, const uchar*salt, size_t salt_len, uint32_t N,
|
|
uchar *out, uint32_t bytes, uint8_t *X, uint8_t *Y, uint8_t *V)
|
|
{
|
|
uint32_t chunk_bytes, i;
|
|
const uint32_t p = SCRYPT_P;
|
|
|
|
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
|
scrypt_ROMixfn scrypt_ROMix = scrypt_getROMix();
|
|
#endif
|
|
|
|
chunk_bytes = SCRYPT_BLOCK_BYTES * SCRYPT_R * 2;
|
|
|
|
/* 1: X = PBKDF2(password, salt) */
|
|
scrypt_pbkdf2_1(password, password_len, salt, salt_len, X, chunk_bytes * p);
|
|
|
|
/* 2: X = ROMix(X) */
|
|
for (i = 0; i < p; i++)
|
|
scrypt_ROMix_1((scrypt_mix_word_t *)(X + (chunk_bytes * i)), (scrypt_mix_word_t *)Y, (scrypt_mix_word_t *)V, N);
|
|
|
|
/* 3: Out = PBKDF2(password, X) */
|
|
scrypt_pbkdf2_1(password, password_len, X, chunk_bytes * p, out, (size_t) bytes);
|
|
|
|
#ifdef SCRYPT_PREVENT_STATE_LEAK
|
|
/* This is an unnecessary security feature - mikaelh */
|
|
scrypt_ensure_zero(Y, (p + 1) * chunk_bytes);
|
|
#endif
|
|
}
|
|
|
|
/* for cpu hash test */
|
|
void scryptjane_hash(void* output, const void* input)
|
|
{
|
|
uint32_t Nsize = 1UL << (opt_nfactor + 1);
|
|
uint64_t chunk_bytes;
|
|
uint8_t *X, *Y;
|
|
scrypt_aligned_alloc YX, V;
|
|
|
|
chunk_bytes = 2ULL * SCRYPT_BLOCK_BYTES * SCRYPT_R;
|
|
V = scrypt_alloc(Nsize * chunk_bytes);
|
|
YX = scrypt_alloc((SCRYPT_P + 1) * chunk_bytes);
|
|
|
|
memset(V.ptr, 0, (size_t) (Nsize * chunk_bytes));
|
|
|
|
Y = YX.ptr;
|
|
X = Y + chunk_bytes;
|
|
|
|
scrypt_jane_hash_1_1((uchar*)input, 80, (uchar*)input, 80, (uint32_t) Nsize, (uchar*)output, 32, X, Y, V.ptr);
|
|
|
|
scrypt_free(&V);
|
|
scrypt_free(&YX);
|
|
}
|