You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
681 lines
21 KiB
681 lines
21 KiB
/* |
|
* scrypt-jane by Andrew M, https://github.com/floodyberry/scrypt-jane |
|
* |
|
* Public Domain or MIT License, whichever is easier |
|
* |
|
* Adapted to ccminer by tpruvot@github (2015) |
|
*/ |
|
|
|
#include "miner.h" |
|
|
|
#include "scrypt/scrypt-jane.h" |
|
#include "scrypt/code/scrypt-jane-portable.h" |
|
#include "scrypt/code/scrypt-jane-chacha.h" |
|
#include "scrypt/keccak.h" |
|
|
|
#include "scrypt/salsa_kernel.h" |
|
|
|
#define scrypt_maxN 30 /* (1 << (30 + 1)) = ~2 billion */ |
|
#define scrypt_r_32kb 8 /* (1 << 8) = 256 * 2 blocks in a chunk * 64 bytes = Max of 32kb in a chunk */ |
|
#define scrypt_maxr scrypt_r_32kb /* 32kb */ |
|
#define scrypt_maxp 25 /* (1 << 25) = ~33 million */ |
|
|
|
// ---------------------------- BEGIN keccak functions ------------------------------------ |
|
|
|
#define SCRYPT_HASH "Keccak-512" |
|
#define SCRYPT_HASH_DIGEST_SIZE 64 |
|
#define SCRYPT_KECCAK_F 1600 |
|
#define SCRYPT_KECCAK_C (SCRYPT_HASH_DIGEST_SIZE * 8 * 2) /* 1024 */ |
|
#define SCRYPT_KECCAK_R (SCRYPT_KECCAK_F - SCRYPT_KECCAK_C) /* 576 */ |
|
#define SCRYPT_HASH_BLOCK_SIZE (SCRYPT_KECCAK_R / 8) |
|
|
|
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE]; |
|
|
|
typedef struct scrypt_hash_state_t { |
|
uint64_t state[SCRYPT_KECCAK_F / 64]; |
|
uint32_t leftover; |
|
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE]; |
|
} scrypt_hash_state; |
|
|
|
static const uint64_t keccak_round_constants[24] = { |
|
0x0000000000000001ull, 0x0000000000008082ull, |
|
0x800000000000808aull, 0x8000000080008000ull, |
|
0x000000000000808bull, 0x0000000080000001ull, |
|
0x8000000080008081ull, 0x8000000000008009ull, |
|
0x000000000000008aull, 0x0000000000000088ull, |
|
0x0000000080008009ull, 0x000000008000000aull, |
|
0x000000008000808bull, 0x800000000000008bull, |
|
0x8000000000008089ull, 0x8000000000008003ull, |
|
0x8000000000008002ull, 0x8000000000000080ull, |
|
0x000000000000800aull, 0x800000008000000aull, |
|
0x8000000080008081ull, 0x8000000000008080ull, |
|
0x0000000080000001ull, 0x8000000080008008ull |
|
}; |
|
|
|
static void keccak_block(scrypt_hash_state *S, const uint8_t *in) |
|
{ |
|
size_t i; |
|
uint64_t *s = S->state, t[5], u[5], v, w; |
|
|
|
/* absorb input */ |
|
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE / 8; i++, in += 8) |
|
s[i] ^= U8TO64_LE(in); |
|
|
|
for (i = 0; i < 24; i++) { |
|
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */ |
|
t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; |
|
t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; |
|
t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22]; |
|
t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23]; |
|
t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; |
|
|
|
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */ |
|
u[0] = t[4] ^ ROTL64(t[1], 1); |
|
u[1] = t[0] ^ ROTL64(t[2], 1); |
|
u[2] = t[1] ^ ROTL64(t[3], 1); |
|
u[3] = t[2] ^ ROTL64(t[4], 1); |
|
u[4] = t[3] ^ ROTL64(t[0], 1); |
|
|
|
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */ |
|
s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0]; |
|
s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1]; |
|
s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2]; |
|
s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3]; |
|
s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4]; |
|
|
|
/* rho pi: b[..] = rotl(a[..], ..) */ |
|
v = s[ 1]; |
|
s[ 1] = ROTL64(s[ 6], 44); |
|
s[ 6] = ROTL64(s[ 9], 20); |
|
s[ 9] = ROTL64(s[22], 61); |
|
s[22] = ROTL64(s[14], 39); |
|
s[14] = ROTL64(s[20], 18); |
|
s[20] = ROTL64(s[ 2], 62); |
|
s[ 2] = ROTL64(s[12], 43); |
|
s[12] = ROTL64(s[13], 25); |
|
s[13] = ROTL64(s[19], 8); |
|
s[19] = ROTL64(s[23], 56); |
|
s[23] = ROTL64(s[15], 41); |
|
s[15] = ROTL64(s[ 4], 27); |
|
s[ 4] = ROTL64(s[24], 14); |
|
s[24] = ROTL64(s[21], 2); |
|
s[21] = ROTL64(s[ 8], 55); |
|
s[ 8] = ROTL64(s[16], 45); |
|
s[16] = ROTL64(s[ 5], 36); |
|
s[ 5] = ROTL64(s[ 3], 28); |
|
s[ 3] = ROTL64(s[18], 21); |
|
s[18] = ROTL64(s[17], 15); |
|
s[17] = ROTL64(s[11], 10); |
|
s[11] = ROTL64(s[ 7], 6); |
|
s[ 7] = ROTL64(s[10], 3); |
|
s[10] = ROTL64( v, 1); |
|
|
|
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */ |
|
v = s[ 0]; w = s[ 1]; s[ 0] ^= (~w) & s[ 2]; s[ 1] ^= (~s[ 2]) & s[ 3]; s[ 2] ^= (~s[ 3]) & s[ 4]; s[ 3] ^= (~s[ 4]) & v; s[ 4] ^= (~v) & w; |
|
v = s[ 5]; w = s[ 6]; s[ 5] ^= (~w) & s[ 7]; s[ 6] ^= (~s[ 7]) & s[ 8]; s[ 7] ^= (~s[ 8]) & s[ 9]; s[ 8] ^= (~s[ 9]) & v; s[ 9] ^= (~v) & w; |
|
v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w; |
|
v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w; |
|
v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w; |
|
|
|
/* iota: a[0,0] ^= round constant */ |
|
s[0] ^= keccak_round_constants[i]; |
|
} |
|
} |
|
|
|
static void scrypt_hash_init(scrypt_hash_state *S) { |
|
memset(S, 0, sizeof(*S)); |
|
} |
|
|
|
static void scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) |
|
{ |
|
size_t want; |
|
|
|
/* handle the previous data */ |
|
if (S->leftover) { |
|
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover); |
|
want = (want < inlen) ? want : inlen; |
|
memcpy(S->buffer + S->leftover, in, want); |
|
S->leftover += (uint32_t)want; |
|
if (S->leftover < SCRYPT_HASH_BLOCK_SIZE) |
|
return; |
|
in += want; |
|
inlen -= want; |
|
keccak_block(S, S->buffer); |
|
} |
|
|
|
/* handle the current data */ |
|
while (inlen >= SCRYPT_HASH_BLOCK_SIZE) { |
|
keccak_block(S, in); |
|
in += SCRYPT_HASH_BLOCK_SIZE; |
|
inlen -= SCRYPT_HASH_BLOCK_SIZE; |
|
} |
|
|
|
/* handle leftover data */ |
|
S->leftover = (uint32_t)inlen; |
|
if (S->leftover) |
|
memcpy(S->buffer, in, S->leftover); |
|
} |
|
|
|
static void scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) |
|
{ |
|
size_t i; |
|
|
|
S->buffer[S->leftover] = 0x01; |
|
memset(S->buffer + (S->leftover + 1), 0, SCRYPT_HASH_BLOCK_SIZE - (S->leftover + 1)); |
|
S->buffer[SCRYPT_HASH_BLOCK_SIZE - 1] |= 0x80; |
|
keccak_block(S, S->buffer); |
|
|
|
for (i = 0; i < SCRYPT_HASH_DIGEST_SIZE; i += 8) { |
|
U64TO8_LE(&hash[i], S->state[i / 8]); |
|
} |
|
} |
|
|
|
// ---------------------------- END keccak functions ------------------------------------ |
|
|
|
// ---------------------------- BEGIN PBKDF2 functions ------------------------------------ |
|
|
|
typedef struct scrypt_hmac_state_t { |
|
scrypt_hash_state inner, outer; |
|
} scrypt_hmac_state; |
|
|
|
|
|
static void scrypt_hash(scrypt_hash_digest hash, const uint8_t *m, size_t mlen) |
|
{ |
|
scrypt_hash_state st; |
|
|
|
scrypt_hash_init(&st); |
|
scrypt_hash_update(&st, m, mlen); |
|
scrypt_hash_finish(&st, hash); |
|
} |
|
|
|
/* hmac */ |
|
static void scrypt_hmac_init(scrypt_hmac_state *st, const uint8_t *key, size_t keylen) |
|
{ |
|
uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0}; |
|
size_t i; |
|
|
|
scrypt_hash_init(&st->inner); |
|
scrypt_hash_init(&st->outer); |
|
|
|
if (keylen <= SCRYPT_HASH_BLOCK_SIZE) { |
|
/* use the key directly if it's <= blocksize bytes */ |
|
memcpy(pad, key, keylen); |
|
} else { |
|
/* if it's > blocksize bytes, hash it */ |
|
scrypt_hash(pad, key, keylen); |
|
} |
|
|
|
/* inner = (key ^ 0x36) */ |
|
/* h(inner || ...) */ |
|
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++) |
|
pad[i] ^= 0x36; |
|
scrypt_hash_update(&st->inner, pad, SCRYPT_HASH_BLOCK_SIZE); |
|
|
|
/* outer = (key ^ 0x5c) */ |
|
/* h(outer || ...) */ |
|
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++) |
|
pad[i] ^= (0x5c ^ 0x36); |
|
scrypt_hash_update(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE); |
|
} |
|
|
|
static void scrypt_hmac_update(scrypt_hmac_state *st, const uint8_t *m, size_t mlen) |
|
{ |
|
/* h(inner || m...) */ |
|
scrypt_hash_update(&st->inner, m, mlen); |
|
} |
|
|
|
static void scrypt_hmac_finish(scrypt_hmac_state *st, scrypt_hash_digest mac) |
|
{ |
|
/* h(inner || m) */ |
|
scrypt_hash_digest innerhash; |
|
scrypt_hash_finish(&st->inner, innerhash); |
|
|
|
/* h(outer || h(inner || m)) */ |
|
scrypt_hash_update(&st->outer, innerhash, sizeof(innerhash)); |
|
scrypt_hash_finish(&st->outer, mac); |
|
} |
|
|
|
/* |
|
* Special version where N = 1 |
|
* - mikaelh |
|
*/ |
|
static void scrypt_pbkdf2_1(const uint8_t *password, size_t password_len, |
|
const uint8_t *salt, size_t salt_len, uint8_t *out, uint64_t bytes) |
|
{ |
|
scrypt_hmac_state hmac_pw, hmac_pw_salt, work; |
|
scrypt_hash_digest ti, u; |
|
uint8_t be[4]; |
|
uint32_t i, blocks; |
|
|
|
/* bytes must be <= (0xffffffff - (SCRYPT_HASH_DIGEST_SIZE - 1)), which they will always be under scrypt */ |
|
|
|
/* hmac(password, ...) */ |
|
scrypt_hmac_init(&hmac_pw, password, password_len); |
|
|
|
/* hmac(password, salt...) */ |
|
hmac_pw_salt = hmac_pw; |
|
scrypt_hmac_update(&hmac_pw_salt, salt, salt_len); |
|
|
|
blocks = ((uint32_t)bytes + (SCRYPT_HASH_DIGEST_SIZE - 1)) / SCRYPT_HASH_DIGEST_SIZE; |
|
for (i = 1; i <= blocks; i++) { |
|
/* U1 = hmac(password, salt || be(i)) */ |
|
U32TO8_BE(be, i); |
|
work = hmac_pw_salt; |
|
scrypt_hmac_update(&work, be, 4); |
|
scrypt_hmac_finish(&work, ti); |
|
memcpy(u, ti, sizeof(u)); |
|
|
|
memcpy(out, ti, (size_t) (bytes > SCRYPT_HASH_DIGEST_SIZE ? SCRYPT_HASH_DIGEST_SIZE : bytes)); |
|
out += SCRYPT_HASH_DIGEST_SIZE; |
|
bytes -= SCRYPT_HASH_DIGEST_SIZE; |
|
} |
|
} |
|
|
|
// ---------------------------- END PBKDF2 functions ------------------------------------ |
|
|
|
static void scrypt_fatal_error_default(const char *msg) { |
|
fprintf(stderr, "%s\n", msg); |
|
exit(1); |
|
} |
|
|
|
static scrypt_fatal_errorfn scrypt_fatal_error = scrypt_fatal_error_default; |
|
|
|
void scrypt_set_fatal_error_default(scrypt_fatal_errorfn fn) { |
|
scrypt_fatal_error = fn; |
|
} |
|
|
|
typedef struct scrypt_aligned_alloc_t { |
|
uint8_t *mem, *ptr; |
|
} scrypt_aligned_alloc; |
|
|
|
#if defined(SCRYPT_TEST_SPEED) |
|
static uint8_t *mem_base = (uint8_t *)0; |
|
static size_t mem_bump = 0; |
|
|
|
/* allocations are assumed to be multiples of 64 bytes and total allocations not to exceed ~1.01gb */ |
|
static scrypt_aligned_alloc scrypt_alloc(uint64_t size) |
|
{ |
|
scrypt_aligned_alloc aa; |
|
if (!mem_base) { |
|
mem_base = (uint8_t *)malloc((1024 * 1024 * 1024) + (1024 * 1024) + (SCRYPT_BLOCK_BYTES - 1)); |
|
if (!mem_base) |
|
scrypt_fatal_error("scrypt: out of memory"); |
|
mem_base = (uint8_t *)(((size_t)mem_base + (SCRYPT_BLOCK_BYTES - 1)) & ~(SCRYPT_BLOCK_BYTES - 1)); |
|
} |
|
aa.mem = mem_base + mem_bump; |
|
aa.ptr = aa.mem; |
|
mem_bump += (size_t)size; |
|
return aa; |
|
} |
|
|
|
static void scrypt_free(scrypt_aligned_alloc *aa) |
|
{ |
|
mem_bump = 0; |
|
} |
|
#else |
|
static scrypt_aligned_alloc scrypt_alloc(uint64_t size) |
|
{ |
|
static const size_t max_alloc = (size_t)-1; |
|
scrypt_aligned_alloc aa; |
|
size += (SCRYPT_BLOCK_BYTES - 1); |
|
if (size > max_alloc) |
|
scrypt_fatal_error("scrypt: not enough address space on this CPU to allocate required memory"); |
|
aa.mem = (uint8_t *)malloc((size_t)size); |
|
aa.ptr = (uint8_t *)(((size_t)aa.mem + (SCRYPT_BLOCK_BYTES - 1)) & ~(SCRYPT_BLOCK_BYTES - 1)); |
|
if (!aa.mem) |
|
scrypt_fatal_error("scrypt: out of memory"); |
|
return aa; |
|
} |
|
|
|
static void scrypt_free(scrypt_aligned_alloc *aa) |
|
{ |
|
free(aa->mem); |
|
} |
|
#endif |
|
|
|
|
|
// yacoin: increasing Nfactor gradually |
|
unsigned char GetNfactor(unsigned int nTimestamp) |
|
{ |
|
int l = 0; |
|
|
|
unsigned int Nfactor = 0; |
|
|
|
// Yacoin defaults |
|
unsigned int Ntimestamp = 1367991200; |
|
unsigned int minN = 4; |
|
unsigned int maxN = 30; |
|
|
|
if (strlen(jane_params) > 0) { |
|
if (!strcmp(jane_params, "YAC") || !strcasecmp(jane_params, "Yacoin")) {} // No-Op |
|
// |
|
// NO WARRANTY FOR CORRECTNESS. Look for the int64 nChainStartTime constant |
|
// in the src/main.cpp file of the official wallet clients as well as the |
|
// const unsigned char minNfactor and const unsigned char maxNfactor |
|
// |
|
else if (!strcmp(jane_params, "YBC") || !strcasecmp(jane_params, "YBCoin")) { |
|
// YBCoin: 1372386273, minN: 4, maxN: 30 |
|
Ntimestamp = 1372386273; minN= 4; maxN= 30; |
|
} else if (!strcmp(jane_params, "ZZC") || !strcasecmp(jane_params, "ZZCoin")) { |
|
// ZcCoin: 1375817223, minN: 12, maxN: 30 |
|
Ntimestamp = 1375817223; minN= 12; maxN= 30; |
|
} else if (!strcmp(jane_params, "FEC") || !strcasecmp(jane_params, "FreeCoin")) { |
|
// FreeCoin: 1375801200, minN: 6, maxN: 32 |
|
Ntimestamp = 1375801200; minN= 6; maxN= 32; |
|
} else if (!strcmp(jane_params, "ONC") || !strcasecmp(jane_params, "OneCoin")) { |
|
// OneCoin: 1371119462, minN: 6, maxN: 30 |
|
Ntimestamp = 1371119462; minN= 6; maxN= 30; |
|
} else if (!strcmp(jane_params, "QQC") || !strcasecmp(jane_params, "QQCoin")) { |
|
// QQCoin: 1387769316, minN: 4, maxN: 30 |
|
Ntimestamp = 1387769316; minN= 4; maxN= 30; |
|
} else if (!strcmp(jane_params, "GPL") || !strcasecmp(jane_params, "GoldPressedLatinum")) { |
|
// GoldPressedLatinum:1377557832, minN: 4, maxN: 30 |
|
Ntimestamp = 1377557832; minN= 4; maxN= 30; |
|
} else if (!strcmp(jane_params, "MRC") || !strcasecmp(jane_params, "MicroCoin")) { |
|
// MicroCoin:1389028879, minN: 4, maxN: 30 |
|
Ntimestamp = 1389028879; minN= 4; maxN= 30; |
|
} else if (!strcmp(jane_params, "APC") || !strcasecmp(jane_params, "AppleCoin")) { |
|
// AppleCoin:1384720832, minN: 4, maxN: 30 |
|
Ntimestamp = 1384720832; minN= 4; maxN= 30; |
|
} else if (!strcmp(jane_params, "CPR") || !strcasecmp(jane_params, "Copperbars")) { |
|
// Copperbars:1376184687, minN: 4, maxN: 30 |
|
Ntimestamp = 1376184687; minN= 4; maxN= 30; |
|
} else if (!strcmp(jane_params, "CACH") || !strcasecmp(jane_params, "CacheCoin")) { |
|
// CacheCoin:1388949883, minN: 4, maxN: 30 |
|
Ntimestamp = 1388949883; minN= 4; maxN= 30; |
|
} else if (!strcmp(jane_params, "UTC") || !strcasecmp(jane_params, "UltraCoin")) { |
|
// MicroCoin:1388361600, minN: 4, maxN: 30 |
|
Ntimestamp = 1388361600; minN= 4; maxN= 30; |
|
} else if (!strcmp(jane_params, "VEL") || !strcasecmp(jane_params, "VelocityCoin")) { |
|
// VelocityCoin:1387769316, minN: 4, maxN: 30 |
|
Ntimestamp = 1387769316; minN= 4; maxN= 30; |
|
} else if (!strcmp(jane_params, "ITC") || !strcasecmp(jane_params, "InternetCoin")) { |
|
// InternetCoin:1388385602, minN: 4, maxN: 30 |
|
Ntimestamp = 1388385602; minN= 4; maxN= 30; |
|
} else if (!strcmp(jane_params, "RAD") || !strcasecmp(jane_params, "RadioactiveCoin")) { |
|
// InternetCoin:1389196388, minN: 4, maxN: 30 |
|
Ntimestamp = 1389196388; minN= 4; maxN= 30; |
|
} else { |
|
if (sscanf(jane_params, "%u,%u,%u", &Ntimestamp, &minN, &maxN) != 3) |
|
if (sscanf(jane_params, "%u", &Nfactor) == 1) return Nfactor; // skip bounding against minN, maxN |
|
else applog(LOG_INFO, "Unable to parse scrypt-jane parameters: '%s'. Defaulting to Yacoin.", jane_params); |
|
} |
|
} |
|
// determination based on the constants determined above |
|
if (nTimestamp <= Ntimestamp) |
|
return minN; |
|
|
|
unsigned long int s = nTimestamp - Ntimestamp; |
|
while ((s >> 1) > 3) { |
|
l += 1; |
|
s >>= 1; |
|
} |
|
|
|
s &= 3; |
|
|
|
int n = (l * 170 + s * 25 - 2320) / 100; |
|
|
|
if (n < 0) n = 0; |
|
|
|
if (n > 255) |
|
printf("GetNfactor(%d) - something wrong(n == %d)\n", nTimestamp, n); |
|
|
|
Nfactor = n; |
|
if (Nfactor<minN) return minN; |
|
if (Nfactor>maxN) return maxN; |
|
return Nfactor; |
|
} |
|
|
|
#define bswap_32x4(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) \ |
|
| (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu)) |
|
static int s_Nfactor = 0; |
|
|
|
int scanhash_scrypt_jane(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done, |
|
unsigned char *scratchbuf, struct timeval *tv_start, struct timeval *tv_end) |
|
{ |
|
uint32_t *pdata = work->data; |
|
uint32_t *ptarget = work->target; |
|
const uint32_t Htarg = ptarget[7]; |
|
uint32_t N; |
|
|
|
if (s_Nfactor == 0 && strlen(jane_params) > 0) |
|
applog(LOG_INFO, "Given scrypt-jane parameters: %s", jane_params); |
|
|
|
int Nfactor = GetNfactor(bswap_32x4(pdata[17])); |
|
if (Nfactor > scrypt_maxN) { |
|
scrypt_fatal_error("scrypt: N out of range"); |
|
} |
|
N = (1 << (Nfactor + 1)); |
|
|
|
if (Nfactor != s_Nfactor) |
|
{ |
|
opt_nfactor = Nfactor; |
|
applog(LOG_INFO, "N-factor is %d (%d)!", Nfactor, N); |
|
if (s_Nfactor != 0) { |
|
// handle N-factor increase at runtime |
|
// by adjusting the lookup_gap by factor 2 |
|
if (s_Nfactor == Nfactor-1) |
|
for (int i=0; i < 8; ++i) |
|
device_lookup_gap[i] *= 2; |
|
} |
|
s_Nfactor = Nfactor; |
|
} |
|
|
|
int throughput = cuda_throughput(thr_id); |
|
|
|
if(throughput == 0) |
|
return -1; |
|
|
|
gettimeofday(tv_start, NULL); |
|
|
|
uint32_t *data[2] = { new uint32_t[20*throughput], new uint32_t[20*throughput] }; |
|
uint32_t* hash[2] = { cuda_hashbuffer(thr_id,0), cuda_hashbuffer(thr_id,1) }; |
|
|
|
uint32_t n = pdata[19]; |
|
|
|
/* byte swap pdata into data[0]/[1] arrays */ |
|
for (int k=0; k<2; ++k) { |
|
for(int z=0;z<20;z++) data[k][z] = bswap_32x4(pdata[z]); |
|
for(int i=1;i<throughput;++i) memcpy(&data[k][20*i], &data[k][0], 20*sizeof(uint32_t)); |
|
} |
|
if (parallel == 2) prepare_keccak512(thr_id, pdata); |
|
|
|
scrypt_aligned_alloc Xbuf[2] = { scrypt_alloc(128 * throughput), scrypt_alloc(128 * throughput) }; |
|
scrypt_aligned_alloc Vbuf = scrypt_alloc(N * 128); |
|
scrypt_aligned_alloc Ybuf = scrypt_alloc(128); |
|
|
|
uint32_t nonce[2]; |
|
uint32_t* cuda_X[2] = { cuda_transferbuffer(thr_id,0), cuda_transferbuffer(thr_id,1) }; |
|
|
|
#if !defined(SCRYPT_CHOOSE_COMPILETIME) |
|
scrypt_ROMixfn scrypt_ROMix = scrypt_getROMix(); |
|
#endif |
|
|
|
int cur = 0, nxt = 1; |
|
int iteration = 0; |
|
|
|
do { |
|
nonce[nxt] = n; |
|
|
|
if (parallel < 2) |
|
{ |
|
// half of cpu |
|
|
|
for(int i=0;i<throughput;++i) { |
|
uint32_t tmp_nonce = n++; |
|
data[nxt][20*i + 19] = bswap_32x4(tmp_nonce); |
|
} |
|
|
|
for(int i=0;i<throughput;++i) |
|
scrypt_pbkdf2_1((unsigned char *)&data[nxt][20*i], 80, (unsigned char *)&data[nxt][20*i], 80, Xbuf[nxt].ptr + 128 * i, 128); |
|
|
|
memcpy(cuda_X[nxt], Xbuf[nxt].ptr, 128 * throughput); |
|
cuda_scrypt_serialize(thr_id, nxt); |
|
cuda_scrypt_HtoD(thr_id, cuda_X[nxt], nxt); |
|
cuda_scrypt_core(thr_id, nxt, N); |
|
cuda_scrypt_done(thr_id, nxt); |
|
|
|
cuda_scrypt_DtoH(thr_id, cuda_X[nxt], nxt, false); |
|
|
|
//cuda_scrypt_flush(thr_id, nxt); |
|
if(!cuda_scrypt_sync(thr_id, nxt)) { |
|
break; |
|
} |
|
|
|
memcpy(Xbuf[cur].ptr, cuda_X[cur], 128 * throughput); |
|
for(int i=0;i<throughput;++i) |
|
scrypt_pbkdf2_1((unsigned char *)&data[cur][20*i], 80, Xbuf[cur].ptr + 128 * i, 128, (unsigned char *)(&hash[cur][8*i]), 32); |
|
|
|
#define VERIFY_ALL 0 |
|
#if VERIFY_ALL |
|
{ |
|
/* 2: X = ROMix(X) */ |
|
for(int i=0;i<throughput;++i) |
|
scrypt_ROMix_1((scrypt_mix_word_t *)(Xbuf[cur].ptr + 128 * i), (scrypt_mix_word_t *)Ybuf.ptr, (scrypt_mix_word_t *)Vbuf.ptr, N); |
|
|
|
unsigned int err = 0; |
|
for(int i=0;i<throughput;++i) { |
|
unsigned char *ref = (Xbuf[cur].ptr + 128 * i); |
|
unsigned char *dat = (unsigned char*)(cuda_X[cur] + 32 * i); |
|
if (memcmp(ref, dat, 128) != 0) |
|
{ |
|
err++; |
|
#if 0 |
|
uint32_t *ref32 = (uint32_t*) ref; |
|
uint32_t *dat32 = (uint32_t*) dat; |
|
for (int j=0; j<32; ++j) { |
|
if (ref32[j] != dat32[j]) |
|
fprintf(stderr, "ref32[i=%d][j=%d] = $%08x / $%08x\n", i, j, ref32[j], dat32[j]); |
|
} |
|
#endif |
|
} |
|
} |
|
if (err > 0) fprintf(stderr, "%d out of %d hashes differ.\n", err, throughput); |
|
} |
|
#endif |
|
} else { |
|
|
|
// all on gpu |
|
|
|
n += throughput; |
|
if (opt_debug && (iteration % 64 == 0)) |
|
applog(LOG_DEBUG, "GPU #%d: n=%x", device_map[thr_id], n); |
|
|
|
cuda_scrypt_serialize(thr_id, nxt); |
|
pre_keccak512(thr_id, nxt, nonce[nxt], throughput); |
|
cuda_scrypt_core(thr_id, nxt, N); |
|
//cuda_scrypt_flush(thr_id, nxt); |
|
if (!cuda_scrypt_sync(thr_id, nxt)) { |
|
break; |
|
} |
|
|
|
post_keccak512(thr_id, nxt, nonce[nxt], throughput); |
|
cuda_scrypt_done(thr_id, nxt); |
|
|
|
cuda_scrypt_DtoH(thr_id, hash[nxt], nxt, true); |
|
//cuda_scrypt_flush(thr_id, nxt); // made by cuda_scrypt_sync |
|
if (!cuda_scrypt_sync(thr_id, nxt)) { |
|
break; |
|
} |
|
} |
|
|
|
for (int i=0; iteration > 0 && i<throughput; i++) |
|
{ |
|
if (hash[cur][8*i+7] <= Htarg && fulltest(&hash[cur][8*i], ptarget)) |
|
{ |
|
uint32_t _ALIGN(64) thash[8], tdata[20]; |
|
uint32_t tmp_nonce = nonce[cur] + i; |
|
|
|
for(int z=0;z<19;z++) |
|
tdata[z] = bswap_32x4(pdata[z]); |
|
tdata[19] = bswap_32x4(tmp_nonce); |
|
|
|
scrypt_pbkdf2_1((unsigned char *)tdata, 80, (unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128); |
|
scrypt_ROMix_1((scrypt_mix_word_t *)(Xbuf[cur].ptr + 128 * i), (scrypt_mix_word_t *)(Ybuf.ptr), (scrypt_mix_word_t *)(Vbuf.ptr), N); |
|
scrypt_pbkdf2_1((unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128, (unsigned char *)thash, 32); |
|
|
|
if (memcmp(thash, &hash[cur][8*i], 32) == 0) |
|
{ |
|
bn_store_hash_target_ratio(thash, ptarget, work); |
|
*hashes_done = n - pdata[19]; |
|
pdata[19] = tmp_nonce; |
|
scrypt_free(&Vbuf); |
|
scrypt_free(&Ybuf); |
|
scrypt_free(&Xbuf[0]); scrypt_free(&Xbuf[1]); |
|
delete[] data[0]; delete[] data[1]; |
|
gettimeofday(tv_end, NULL); |
|
return 1; |
|
} else { |
|
applog(LOG_WARNING, "GPU #%d: %s result does not validate on CPU! (i=%d, s=%d)", |
|
device_map[thr_id], device_name[thr_id], i, cur); |
|
} |
|
} |
|
} |
|
|
|
cur = (cur+1)&1; |
|
nxt = (nxt+1)&1; |
|
++iteration; |
|
} while (n <= max_nonce && !work_restart[thr_id].restart); |
|
|
|
scrypt_free(&Vbuf); |
|
scrypt_free(&Ybuf); |
|
scrypt_free(&Xbuf[0]); scrypt_free(&Xbuf[1]); |
|
delete[] data[0]; delete[] data[1]; |
|
|
|
*hashes_done = n - pdata[19]; |
|
pdata[19] = n; |
|
gettimeofday(tv_end, NULL); |
|
return 0; |
|
} |
|
|
|
|
|
static void scrypt_jane_hash_1_1(const uchar *password, size_t password_len, const uchar*salt, size_t salt_len, uint32_t N, |
|
uchar *out, uint32_t bytes, uint8_t *X, uint8_t *Y, uint8_t *V) |
|
{ |
|
uint32_t chunk_bytes, i; |
|
const uint32_t p = SCRYPT_P; |
|
|
|
#if !defined(SCRYPT_CHOOSE_COMPILETIME) |
|
scrypt_ROMixfn scrypt_ROMix = scrypt_getROMix(); |
|
#endif |
|
|
|
chunk_bytes = SCRYPT_BLOCK_BYTES * SCRYPT_R * 2; |
|
|
|
/* 1: X = PBKDF2(password, salt) */ |
|
scrypt_pbkdf2_1(password, password_len, salt, salt_len, X, chunk_bytes * p); |
|
|
|
/* 2: X = ROMix(X) */ |
|
for (i = 0; i < p; i++) |
|
scrypt_ROMix_1((scrypt_mix_word_t *)(X + (chunk_bytes * i)), (scrypt_mix_word_t *)Y, (scrypt_mix_word_t *)V, N); |
|
|
|
/* 3: Out = PBKDF2(password, X) */ |
|
scrypt_pbkdf2_1(password, password_len, X, chunk_bytes * p, out, (size_t) bytes); |
|
|
|
#ifdef SCRYPT_PREVENT_STATE_LEAK |
|
/* This is an unnecessary security feature - mikaelh */ |
|
scrypt_ensure_zero(Y, (p + 1) * chunk_bytes); |
|
#endif |
|
} |
|
|
|
/* for cpu hash test */ |
|
void scryptjane_hash(void* output, const void* input) |
|
{ |
|
uint32_t Nsize = 1UL << (opt_nfactor + 1); |
|
uint64_t chunk_bytes; |
|
uint8_t *X, *Y; |
|
scrypt_aligned_alloc YX, V; |
|
|
|
chunk_bytes = 2ULL * SCRYPT_BLOCK_BYTES * SCRYPT_R; |
|
V = scrypt_alloc(Nsize * chunk_bytes); |
|
YX = scrypt_alloc((SCRYPT_P + 1) * chunk_bytes); |
|
|
|
memset(V.ptr, 0, (size_t) (Nsize * chunk_bytes)); |
|
|
|
Y = YX.ptr; |
|
X = Y + chunk_bytes; |
|
|
|
scrypt_jane_hash_1_1((uchar*)input, 80, (uchar*)input, 80, (uint32_t) Nsize, (uchar*)output, 32, X, Y, V.ptr); |
|
|
|
scrypt_free(&V); |
|
scrypt_free(&YX); |
|
}
|
|
|