mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-31 00:44:15 +00:00
scrypt: finish scrypt-jane algo import
This commit is contained in:
parent
9208888c57
commit
a6d88abbc9
@ -652,7 +652,7 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
|
|||||||
|
|
||||||
/* discard if a newer bloc was received */
|
/* discard if a newer bloc was received */
|
||||||
stale_work = work->height && work->height < g_work.height;
|
stale_work = work->height && work->height < g_work.height;
|
||||||
if (have_stratum && !stale_work && opt_algo != ALGO_ZR5) {
|
if (have_stratum && !stale_work && opt_algo != ALGO_ZR5 && opt_algo != ALGO_SCRYPT_JANE) {
|
||||||
pthread_mutex_lock(&g_work_lock);
|
pthread_mutex_lock(&g_work_lock);
|
||||||
if (strlen(work->job_id + 8))
|
if (strlen(work->job_id + 8))
|
||||||
stale_work = strncmp(work->job_id + 8, g_work.job_id + 8, 4);
|
stale_work = strncmp(work->job_id + 8, g_work.job_id + 8, 4);
|
||||||
|
1
miner.h
1
miner.h
@ -695,6 +695,7 @@ void pluckhash(uint32_t *hash, const uint32_t *data, uchar *hashbuffer, const in
|
|||||||
void quarkhash(void *state, const void *input);
|
void quarkhash(void *state, const void *input);
|
||||||
void qubithash(void *state, const void *input);
|
void qubithash(void *state, const void *input);
|
||||||
void scrypthash(void* output, const void* input);
|
void scrypthash(void* output, const void* input);
|
||||||
|
void scryptjane_hash(void* output, const void* input);
|
||||||
void skeincoinhash(void *output, const void *input);
|
void skeincoinhash(void *output, const void *input);
|
||||||
void skein2hash(void *output, const void *input);
|
void skein2hash(void *output, const void *input);
|
||||||
void s3hash(void *output, const void *input);
|
void s3hash(void *output, const void *input);
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
#include "scrypt/scrypt-jane.h"
|
#include "scrypt/scrypt-jane.h"
|
||||||
#include "scrypt/code/scrypt-jane-portable.h"
|
#include "scrypt/code/scrypt-jane-portable.h"
|
||||||
#include "scrypt/code/scrypt-jane-romix.h"
|
#include "scrypt/code/scrypt-jane-chacha.h"
|
||||||
#include "scrypt/keccak.h"
|
#include "scrypt/keccak.h"
|
||||||
|
|
||||||
#include "scrypt/salsa_kernel.h"
|
#include "scrypt/salsa_kernel.h"
|
||||||
@ -434,6 +434,7 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
|
|||||||
uint32_t max_nonce, unsigned long *hashes_done, struct timeval *tv_start, struct timeval *tv_end)
|
uint32_t max_nonce, unsigned long *hashes_done, struct timeval *tv_start, struct timeval *tv_end)
|
||||||
{
|
{
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
uint64_t N;
|
||||||
|
|
||||||
if (s_Nfactor == 0 && strlen(jane_params) > 0)
|
if (s_Nfactor == 0 && strlen(jane_params) > 0)
|
||||||
applog(LOG_INFO, "Given scrypt-jane parameters: %s", jane_params);
|
applog(LOG_INFO, "Given scrypt-jane parameters: %s", jane_params);
|
||||||
@ -442,14 +443,12 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
|
|||||||
if (Nfactor > scrypt_maxN) {
|
if (Nfactor > scrypt_maxN) {
|
||||||
scrypt_fatal_error("scrypt: N out of range");
|
scrypt_fatal_error("scrypt: N out of range");
|
||||||
}
|
}
|
||||||
|
N = (1 << (Nfactor + 1));
|
||||||
|
|
||||||
if (Nfactor != s_Nfactor)
|
if (Nfactor != s_Nfactor)
|
||||||
{
|
{
|
||||||
// all of this isn't very thread-safe...
|
opt_nfactor = Nfactor;
|
||||||
opt_nfactor = (1 << (Nfactor + 1));
|
applog(LOG_INFO, "N-factor is %d (%d)!", Nfactor, N);
|
||||||
|
|
||||||
applog(LOG_INFO, "Nfactor is %d (N=%d)!", Nfactor, opt_nfactor);
|
|
||||||
|
|
||||||
if (s_Nfactor != 0) {
|
if (s_Nfactor != 0) {
|
||||||
// handle N-factor increase at runtime
|
// handle N-factor increase at runtime
|
||||||
// by adjusting the lookup_gap by factor 2
|
// by adjusting the lookup_gap by factor 2
|
||||||
@ -480,7 +479,7 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
|
|||||||
if (parallel == 2) prepare_keccak512(thr_id, pdata);
|
if (parallel == 2) prepare_keccak512(thr_id, pdata);
|
||||||
|
|
||||||
scrypt_aligned_alloc Xbuf[2] = { scrypt_alloc(128 * throughput), scrypt_alloc(128 * throughput) };
|
scrypt_aligned_alloc Xbuf[2] = { scrypt_alloc(128 * throughput), scrypt_alloc(128 * throughput) };
|
||||||
scrypt_aligned_alloc Vbuf = scrypt_alloc((uint64_t)opt_nfactor * 128);
|
scrypt_aligned_alloc Vbuf = scrypt_alloc(N * 128);
|
||||||
scrypt_aligned_alloc Ybuf = scrypt_alloc(128);
|
scrypt_aligned_alloc Ybuf = scrypt_alloc(128);
|
||||||
|
|
||||||
uint32_t nonce[2];
|
uint32_t nonce[2];
|
||||||
@ -498,6 +497,8 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
|
|||||||
|
|
||||||
if (parallel < 2)
|
if (parallel < 2)
|
||||||
{
|
{
|
||||||
|
// half of cpu
|
||||||
|
|
||||||
for(int i=0;i<throughput;++i) {
|
for(int i=0;i<throughput;++i) {
|
||||||
uint32_t tmp_nonce = n++;
|
uint32_t tmp_nonce = n++;
|
||||||
data[nxt][20*i + 19] = bswap_32x4(tmp_nonce);
|
data[nxt][20*i + 19] = bswap_32x4(tmp_nonce);
|
||||||
@ -509,15 +510,13 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
|
|||||||
memcpy(cuda_X[nxt], Xbuf[nxt].ptr, 128 * throughput);
|
memcpy(cuda_X[nxt], Xbuf[nxt].ptr, 128 * throughput);
|
||||||
cuda_scrypt_serialize(thr_id, nxt);
|
cuda_scrypt_serialize(thr_id, nxt);
|
||||||
cuda_scrypt_HtoD(thr_id, cuda_X[nxt], nxt);
|
cuda_scrypt_HtoD(thr_id, cuda_X[nxt], nxt);
|
||||||
cuda_scrypt_core(thr_id, nxt, opt_nfactor);
|
cuda_scrypt_core(thr_id, nxt, N);
|
||||||
cuda_scrypt_done(thr_id, nxt);
|
cuda_scrypt_done(thr_id, nxt);
|
||||||
|
|
||||||
cuda_scrypt_DtoH(thr_id, cuda_X[nxt], nxt, false);
|
cuda_scrypt_DtoH(thr_id, cuda_X[nxt], nxt, false);
|
||||||
|
|
||||||
cuda_scrypt_flush(thr_id, nxt);
|
cuda_scrypt_flush(thr_id, nxt);
|
||||||
|
|
||||||
if(!cuda_scrypt_sync(thr_id, cur))
|
if(!cuda_scrypt_sync(thr_id, cur)) {
|
||||||
{
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -553,21 +552,25 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
|
// all on gpu
|
||||||
|
|
||||||
n += throughput;
|
n += throughput;
|
||||||
|
if (opt_debug && (iteration % 64 == 0))
|
||||||
|
applog(LOG_DEBUG, "GPU #%d: n=%x", device_map[thr_id], n);
|
||||||
|
|
||||||
cuda_scrypt_serialize(thr_id, nxt);
|
cuda_scrypt_serialize(thr_id, nxt);
|
||||||
pre_keccak512(thr_id, nxt, nonce[nxt], throughput);
|
pre_keccak512(thr_id, nxt, nonce[nxt], throughput);
|
||||||
cuda_scrypt_core(thr_id, nxt, opt_nfactor);
|
cuda_scrypt_core(thr_id, nxt, N);
|
||||||
|
cuda_scrypt_flush(thr_id, nxt); // required
|
||||||
cuda_scrypt_flush(thr_id, nxt);
|
|
||||||
|
|
||||||
post_keccak512(thr_id, nxt, nonce[nxt], throughput);
|
post_keccak512(thr_id, nxt, nonce[nxt], throughput);
|
||||||
cuda_scrypt_done(thr_id, nxt);
|
cuda_scrypt_done(thr_id, nxt);
|
||||||
|
|
||||||
cuda_scrypt_DtoH(thr_id, hash[nxt], nxt, true);
|
cuda_scrypt_DtoH(thr_id, hash[nxt], nxt, true);
|
||||||
|
cuda_scrypt_flush(thr_id, nxt); // seems required here
|
||||||
|
|
||||||
if(!cuda_scrypt_sync(thr_id, cur))
|
if (!cuda_scrypt_sync(thr_id, cur)) {
|
||||||
{
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -587,7 +590,7 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
|
|||||||
tdata[19] = bswap_32x4(tmp_nonce);
|
tdata[19] = bswap_32x4(tmp_nonce);
|
||||||
|
|
||||||
scrypt_pbkdf2_1((unsigned char *)tdata, 80, (unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128);
|
scrypt_pbkdf2_1((unsigned char *)tdata, 80, (unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128);
|
||||||
scrypt_ROMix_1((scrypt_mix_word_t *)(Xbuf[cur].ptr + 128 * i), (scrypt_mix_word_t *)(Ybuf.ptr), (scrypt_mix_word_t *)(Vbuf.ptr), opt_nfactor);
|
scrypt_ROMix_1((scrypt_mix_word_t *)(Xbuf[cur].ptr + 128 * i), (scrypt_mix_word_t *)(Ybuf.ptr), (scrypt_mix_word_t *)(Vbuf.ptr), N);
|
||||||
scrypt_pbkdf2_1((unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128, (unsigned char *)thash, 32);
|
scrypt_pbkdf2_1((unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128, (unsigned char *)thash, 32);
|
||||||
|
|
||||||
if (memcmp(thash, &hash[cur][8*i], 32) == 0)
|
if (memcmp(thash, &hash[cur][8*i], 32) == 0)
|
||||||
@ -624,3 +627,55 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
|
|||||||
gettimeofday(tv_end, NULL);
|
gettimeofday(tv_end, NULL);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void scrypt_jane_hash_1_1(const uchar *password, size_t password_len, const uchar*salt, size_t salt_len, uint32_t N,
|
||||||
|
uchar *out, size_t bytes, uint8_t *X, uint8_t *Y, uint8_t *V)
|
||||||
|
{
|
||||||
|
uint32_t chunk_bytes, i;
|
||||||
|
const uint32_t p = SCRYPT_P;
|
||||||
|
|
||||||
|
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||||
|
scrypt_ROMixfn scrypt_ROMix = scrypt_getROMix();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
chunk_bytes = SCRYPT_BLOCK_BYTES * SCRYPT_R * 2;
|
||||||
|
|
||||||
|
/* 1: X = PBKDF2(password, salt) */
|
||||||
|
scrypt_pbkdf2_1(password, password_len, salt, salt_len, X, chunk_bytes * p);
|
||||||
|
|
||||||
|
/* 2: X = ROMix(X) */
|
||||||
|
for (i = 0; i < p; i++)
|
||||||
|
scrypt_ROMix_1((scrypt_mix_word_t *)(X + (chunk_bytes * i)), (scrypt_mix_word_t *)Y, (scrypt_mix_word_t *)V, N);
|
||||||
|
|
||||||
|
/* 3: Out = PBKDF2(password, X) */
|
||||||
|
scrypt_pbkdf2_1(password, password_len, X, chunk_bytes * p, out, bytes);
|
||||||
|
|
||||||
|
#ifdef SCRYPT_PREVENT_STATE_LEAK
|
||||||
|
/* This is an unnecessary security feature - mikaelh */
|
||||||
|
scrypt_ensure_zero(Y, (p + 1) * chunk_bytes);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* for cpu hash test */
|
||||||
|
void scryptjane_hash(void* output, const void* input)
|
||||||
|
{
|
||||||
|
uint64_t Nsize = 1ULL << (opt_nfactor + 1);
|
||||||
|
uint64_t chunk_bytes;
|
||||||
|
uint8_t *X, *Y;
|
||||||
|
scrypt_aligned_alloc YX, V;
|
||||||
|
|
||||||
|
chunk_bytes = 2ULL * SCRYPT_BLOCK_BYTES * SCRYPT_R;
|
||||||
|
V = scrypt_alloc(Nsize * chunk_bytes);
|
||||||
|
YX = scrypt_alloc((SCRYPT_P + 1) * chunk_bytes);
|
||||||
|
|
||||||
|
memset(V.ptr, 0, Nsize * chunk_bytes);
|
||||||
|
|
||||||
|
Y = YX.ptr;
|
||||||
|
X = Y + chunk_bytes;
|
||||||
|
|
||||||
|
scrypt_jane_hash_1_1((uchar*)input, 80, (uchar*)input, 80, Nsize, (uchar*)output, 32, X, Y, V.ptr);
|
||||||
|
|
||||||
|
scrypt_free(&V);
|
||||||
|
scrypt_free(&YX);
|
||||||
|
}
|
||||||
|
56
scrypt.cpp
56
scrypt.cpp
@ -682,12 +682,13 @@ static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int lastFactor = 0;
|
static int lastFactor = 0;
|
||||||
//
|
|
||||||
|
static void computeGold(uint32_t* const input, uint32_t *reference, uchar *scratchpad);
|
||||||
|
|
||||||
// Scrypt proof of work algorithm
|
// Scrypt proof of work algorithm
|
||||||
// using SSE2 vectorized HMAC SHA256 on CPU and
|
// using SSE2 vectorized HMAC SHA256 on CPU and
|
||||||
// a salsa core implementation on GPU with CUDA
|
// a salsa core implementation on GPU with CUDA
|
||||||
//
|
//
|
||||||
|
|
||||||
int scanhash_scrypt(int thr_id, uint32_t *pdata, const uint32_t *ptarget, unsigned char *scratchbuf,
|
int scanhash_scrypt(int thr_id, uint32_t *pdata, const uint32_t *ptarget, unsigned char *scratchbuf,
|
||||||
uint32_t max_nonce, unsigned long *hashes_done, struct timeval *tv_start, struct timeval *tv_end)
|
uint32_t max_nonce, unsigned long *hashes_done, struct timeval *tv_start, struct timeval *tv_end)
|
||||||
{
|
{
|
||||||
@ -989,9 +990,9 @@ static void xor_salsa8(uint32_t * const B, const uint32_t * const C)
|
|||||||
/**
|
/**
|
||||||
* @param X input/ouput
|
* @param X input/ouput
|
||||||
* @param V scratch buffer
|
* @param V scratch buffer
|
||||||
* @param N factor
|
* @param N factor (def. 1024)
|
||||||
*/
|
*/
|
||||||
static void scrypt_core(uint32_t *X, uint32_t *V, int N)
|
static void scrypt_core(uint32_t *X, uint32_t *V, uint32_t N)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < N; i++) {
|
for (int i = 0; i < N; i++) {
|
||||||
memcpy(&V[i * 32], X, 128);
|
memcpy(&V[i * 32], X, 128);
|
||||||
@ -1013,11 +1014,11 @@ static void scrypt_core(uint32_t *X, uint32_t *V, int N)
|
|||||||
* @param reference reference data, computed but preallocated
|
* @param reference reference data, computed but preallocated
|
||||||
* @param scratchpad scrypt scratchpad
|
* @param scratchpad scrypt scratchpad
|
||||||
**/
|
**/
|
||||||
void computeGold(uint32_t* const input, uint32_t *reference, uchar *scratchpad)
|
static void computeGold(uint32_t* const input, uint32_t *reference, uchar *scratchpad)
|
||||||
{
|
{
|
||||||
uint32_t X[32] = { 0 };
|
uint32_t X[32] = { 0 };
|
||||||
uint32_t *V = (uint32_t*) scratchpad;
|
uint32_t *V = (uint32_t*) scratchpad;
|
||||||
int N = (1<<(opt_nfactor+1)); // default 9 = 1024
|
uint32_t N = (1<<(opt_nfactor+1)); // default 9 = 1024
|
||||||
|
|
||||||
for (int k = 0; k < 32; k++)
|
for (int k = 0; k < 32; k++)
|
||||||
X[k] = input[k];
|
X[k] = input[k];
|
||||||
@ -1028,32 +1029,18 @@ void computeGold(uint32_t* const input, uint32_t *reference, uchar *scratchpad)
|
|||||||
reference[k] = X[k];
|
reference[k] = X[k];
|
||||||
}
|
}
|
||||||
|
|
||||||
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
|
|
||||||
uint32_t *midstate, unsigned char *scratchpad, int N)
|
|
||||||
{
|
|
||||||
uint32_t tstate[8], ostate[8];
|
|
||||||
uint32_t X[32] = { 0 };
|
|
||||||
uint32_t *V = (uint32_t *) scratchpad;
|
|
||||||
|
|
||||||
memcpy(tstate, midstate, 32);
|
|
||||||
HMAC_SHA256_80_init(input, tstate, ostate);
|
|
||||||
PBKDF2_SHA256_80_128(tstate, ostate, input, X);
|
|
||||||
|
|
||||||
scrypt_core(X, V, N);
|
|
||||||
|
|
||||||
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* cputest */
|
/* cputest */
|
||||||
void scrypthash(void* output, const void* input)
|
void scrypthash(void* output, const void* input)
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(64) X[32], ref[32] = { 0 }, tstate[8], ostate[8], midstate[8];
|
uint32_t _ALIGN(64) X[32], ref[32] = { 0 }, tstate[8], ostate[8], midstate[8];
|
||||||
uint32_t _ALIGN(64) data[20];
|
uint32_t _ALIGN(64) data[20];
|
||||||
uchar *scratchbuf = (uchar *) calloc(4 * 128 + 63, 1024);
|
uchar *scratchbuf;
|
||||||
|
|
||||||
// no default set with --cputest
|
// no default set with --cputest
|
||||||
if (opt_nfactor == 0) opt_nfactor = 9;
|
if (opt_nfactor == 0) opt_nfactor = 9;
|
||||||
|
|
||||||
|
scratchbuf = (uchar*) calloc(4 * 128 + 63, 1UL << (opt_nfactor+1));
|
||||||
|
|
||||||
memcpy(data, input, 80);
|
memcpy(data, input, 80);
|
||||||
|
|
||||||
sha256_init(midstate);
|
sha256_init(midstate);
|
||||||
@ -1072,26 +1059,3 @@ void scrypthash(void* output, const void* input)
|
|||||||
|
|
||||||
free(scratchbuf);
|
free(scratchbuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define SCRYPT_MAX_WAYS 1
|
|
||||||
/* cputest */
|
|
||||||
void scrypthash2(void* output, const void* input)
|
|
||||||
{
|
|
||||||
uint32_t midstate[8] = { 0 };
|
|
||||||
uint32_t data[SCRYPT_MAX_WAYS * 20] = { 0 };
|
|
||||||
uint32_t hash[SCRYPT_MAX_WAYS * 8] = { 0 };
|
|
||||||
uint32_t N = 1U << ((opt_nfactor ? opt_nfactor : 9) + 1); // default 1024
|
|
||||||
|
|
||||||
uchar* scratch = (uchar*) calloc(4 * 128 + 63, N); // scrypt_buffer_alloc(N);
|
|
||||||
|
|
||||||
memcpy(data, input, 80);
|
|
||||||
|
|
||||||
sha256_init(midstate);
|
|
||||||
sha256_transform(midstate, data, 0);
|
|
||||||
|
|
||||||
scrypt_1024_1_1_256(data, hash, midstate, scratch, N);
|
|
||||||
|
|
||||||
memcpy(output, hash, 32);
|
|
||||||
|
|
||||||
free(scratch);
|
|
||||||
}
|
|
||||||
|
@ -5,6 +5,8 @@ typedef uint32_t scrypt_mix_word_t;
|
|||||||
#define SCRYPT_WORDTO8_LE U32TO8_LE
|
#define SCRYPT_WORDTO8_LE U32TO8_LE
|
||||||
#define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP
|
#define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP
|
||||||
|
|
||||||
|
#define SCRYPT_P 1
|
||||||
|
#define SCRYPT_R 1
|
||||||
#define SCRYPT_BLOCK_BYTES 64
|
#define SCRYPT_BLOCK_BYTES 64
|
||||||
#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t))
|
#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t))
|
||||||
|
|
||||||
|
@ -1 +0,0 @@
|
|||||||
#include "scrypt-jane-chacha.h"
|
|
@ -61,7 +61,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
// some globals containing pointers to device memory (for chunked allocation)
|
// some globals containing pointers to device memory (for chunked allocation)
|
||||||
// [MAX_DEVICES] indexes up to MAX_DEVICES threads (0...MAX_DEVICES-1)
|
// [MAX_GPUS] indexes up to MAX_GPUS threads (0...MAX_GPUS-1)
|
||||||
int MAXWARPS[MAX_GPUS];
|
int MAXWARPS[MAX_GPUS];
|
||||||
uint32_t* h_V[MAX_GPUS][TOTAL_WARP_LIMIT*64]; // NOTE: the *64 prevents buffer overflow for --keccak
|
uint32_t* h_V[MAX_GPUS][TOTAL_WARP_LIMIT*64]; // NOTE: the *64 prevents buffer overflow for --keccak
|
||||||
uint32_t h_V_extra[MAX_GPUS][TOTAL_WARP_LIMIT*64]; // with really large kernel launch configurations
|
uint32_t h_V_extra[MAX_GPUS][TOTAL_WARP_LIMIT*64]; // with really large kernel launch configurations
|
||||||
@ -69,7 +69,7 @@ uint32_t h_V_extra[MAX_GPUS][TOTAL_WARP_LIMIT*64]; // with really larg
|
|||||||
KernelInterface *Best_Kernel_Heuristics(cudaDeviceProp *props)
|
KernelInterface *Best_Kernel_Heuristics(cudaDeviceProp *props)
|
||||||
{
|
{
|
||||||
KernelInterface *kernel = NULL;
|
KernelInterface *kernel = NULL;
|
||||||
uint32_t N = (1UL << opt_nfactor+1); // not sure
|
uint64_t N = 1UL << (opt_nfactor+1);
|
||||||
|
|
||||||
if (IS_SCRYPT() || (IS_SCRYPT_JANE() && N <= 8192))
|
if (IS_SCRYPT() || (IS_SCRYPT_JANE() && N <= 8192))
|
||||||
{
|
{
|
||||||
@ -83,7 +83,7 @@ KernelInterface *Best_Kernel_Heuristics(cudaDeviceProp *props)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// low register count kernels (high N-factor scrypt-jane)
|
// high N-factor scrypt-jane = low registers count kernels
|
||||||
if (props->major > 3 || (props->major == 3 && props->minor >= 5))
|
if (props->major > 3 || (props->major == 3 && props->minor >= 5))
|
||||||
kernel = new TitanKernel();
|
kernel = new TitanKernel();
|
||||||
else if (props->major == 3 && props->minor == 0)
|
else if (props->major == 3 && props->minor == 0)
|
||||||
@ -161,7 +161,7 @@ int cuda_throughput(int thr_id)
|
|||||||
#else
|
#else
|
||||||
checkCudaErrors(cudaSetDeviceFlags(cudaDeviceScheduleYield));
|
checkCudaErrors(cudaSetDeviceFlags(cudaDeviceScheduleYield));
|
||||||
checkCudaErrors(cudaSetDevice(device_map[thr_id]));
|
checkCudaErrors(cudaSetDevice(device_map[thr_id]));
|
||||||
checkCudaErrors(cudaFree(0));
|
// checkCudaErrors(cudaFree(0));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
KernelInterface *kernel;
|
KernelInterface *kernel;
|
||||||
@ -599,8 +599,9 @@ int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurre
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
skip2: ;
|
skip2:
|
||||||
if (opt_debug) {
|
if (opt_debug) {
|
||||||
|
|
||||||
if (GRID_BLOCKS == MINB) {
|
if (GRID_BLOCKS == MINB) {
|
||||||
char line[512] = " ";
|
char line[512] = " ";
|
||||||
for (int i=1; i<=kernel->max_warps_per_block(); ++i) {
|
for (int i=1; i<=kernel->max_warps_per_block(); ++i) {
|
||||||
@ -811,17 +812,20 @@ void cuda_scrypt_core(int thr_id, int stream, unsigned int N)
|
|||||||
unsigned int LOOKUP_GAP = device_lookup_gap[thr_id];
|
unsigned int LOOKUP_GAP = device_lookup_gap[thr_id];
|
||||||
|
|
||||||
// setup execution parameters
|
// setup execution parameters
|
||||||
dim3 grid(WU_PER_LAUNCH/WU_PER_BLOCK, 1, 1);
|
dim3 grid(WU_PER_LAUNCH/WU_PER_BLOCK, 1, 1);
|
||||||
dim3 threads(THREADS_PER_WU*WU_PER_BLOCK, 1, 1);
|
dim3 threads(THREADS_PER_WU*WU_PER_BLOCK, 1, 1);
|
||||||
|
|
||||||
context_kernel[thr_id]->run_kernel(grid, threads, WARPS_PER_BLOCK, thr_id, context_streams[stream][thr_id], context_idata[stream][thr_id], context_odata[stream][thr_id], N, LOOKUP_GAP, device_interactive[thr_id], opt_benchmark, device_texturecache[thr_id]);
|
context_kernel[thr_id]->run_kernel(grid, threads, WARPS_PER_BLOCK, thr_id,
|
||||||
|
context_streams[stream][thr_id], context_idata[stream][thr_id], context_odata[stream][thr_id],
|
||||||
|
N, LOOKUP_GAP, device_interactive[thr_id], opt_benchmark, device_texturecache[thr_id]
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool cuda_prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8])
|
bool cuda_prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8])
|
||||||
{
|
{
|
||||||
return context_kernel[thr_id]->prepare_keccak256(thr_id, host_pdata, ptarget);
|
return context_kernel[thr_id]->prepare_keccak256(thr_id, host_pdata, ptarget);
|
||||||
}
|
}
|
||||||
|
#if 0
|
||||||
void cuda_do_keccak256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, int throughput, bool do_d2h)
|
void cuda_do_keccak256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, int throughput, bool do_d2h)
|
||||||
{
|
{
|
||||||
unsigned int GRID_BLOCKS = context_blocks[thr_id];
|
unsigned int GRID_BLOCKS = context_blocks[thr_id];
|
||||||
@ -834,12 +838,13 @@ void cuda_do_keccak256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, i
|
|||||||
|
|
||||||
context_kernel[thr_id]->do_keccak256(grid, threads, thr_id, stream, hash, nonce, throughput, do_d2h);
|
context_kernel[thr_id]->do_keccak256(grid, threads, thr_id, stream, hash, nonce, throughput, do_d2h);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
bool cuda_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8])
|
bool cuda_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8])
|
||||||
{
|
{
|
||||||
return context_kernel[thr_id]->prepare_blake256(thr_id, host_pdata, ptarget);
|
return context_kernel[thr_id]->prepare_blake256(thr_id, host_pdata, ptarget);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
void cuda_do_blake256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, int throughput, bool do_d2h)
|
void cuda_do_blake256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, int throughput, bool do_d2h)
|
||||||
{
|
{
|
||||||
unsigned int GRID_BLOCKS = context_blocks[thr_id];
|
unsigned int GRID_BLOCKS = context_blocks[thr_id];
|
||||||
@ -852,6 +857,7 @@ void cuda_do_blake256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, in
|
|||||||
|
|
||||||
context_kernel[thr_id]->do_blake256(grid, threads, thr_id, stream, hash, nonce, throughput, do_d2h);
|
context_kernel[thr_id]->do_blake256(grid, threads, thr_id, stream, hash, nonce, throughput, do_d2h);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void cuda_scrypt_DtoH(int thr_id, uint32_t *X, int stream, bool postSHA)
|
void cuda_scrypt_DtoH(int thr_id, uint32_t *X, int stream, bool postSHA)
|
||||||
{
|
{
|
||||||
@ -859,7 +865,6 @@ void cuda_scrypt_DtoH(int thr_id, uint32_t *X, int stream, bool postSHA)
|
|||||||
unsigned int WARPS_PER_BLOCK = context_wpb[thr_id];
|
unsigned int WARPS_PER_BLOCK = context_wpb[thr_id];
|
||||||
unsigned int THREADS_PER_WU = context_kernel[thr_id]->threads_per_wu();
|
unsigned int THREADS_PER_WU = context_kernel[thr_id]->threads_per_wu();
|
||||||
unsigned int mem_size = WU_PER_LAUNCH * sizeof(uint32_t) * (postSHA ? 8 : 32);
|
unsigned int mem_size = WU_PER_LAUNCH * sizeof(uint32_t) * (postSHA ? 8 : 32);
|
||||||
|
|
||||||
// copy result from device to host (asynchronously)
|
// copy result from device to host (asynchronously)
|
||||||
checkCudaErrors(cudaMemcpyAsync(X, postSHA ? context_hash[stream][thr_id] : context_odata[stream][thr_id], mem_size, cudaMemcpyDeviceToHost, context_streams[stream][thr_id]));
|
checkCudaErrors(cudaMemcpyAsync(X, postSHA ? context_hash[stream][thr_id] : context_odata[stream][thr_id], mem_size, cudaMemcpyDeviceToHost, context_streams[stream][thr_id]));
|
||||||
}
|
}
|
||||||
|
@ -40,8 +40,8 @@ static int scrypt_algo = -1;
|
|||||||
static __inline int get_scrypt_type() {
|
static __inline int get_scrypt_type() {
|
||||||
if (scrypt_algo != -1) return scrypt_algo;
|
if (scrypt_algo != -1) return scrypt_algo;
|
||||||
get_currentalgo(algo, 64);
|
get_currentalgo(algo, 64);
|
||||||
if (!strcasecmp(algo,"scrypt-jane")) scrypt_algo = A_SCRYPT_JANE;
|
if (!strncasecmp(algo,"scrypt-jane",11)) scrypt_algo = A_SCRYPT_JANE;
|
||||||
else if (!strcasecmp(algo,"scrypt")) scrypt_algo = A_SCRYPT;
|
else if (!strncasecmp(algo,"scrypt",6)) scrypt_algo = A_SCRYPT;
|
||||||
return scrypt_algo;
|
return scrypt_algo;
|
||||||
}
|
}
|
||||||
static __inline bool IS_SCRYPT() { get_scrypt_type(); return (scrypt_algo == A_SCRYPT); }
|
static __inline bool IS_SCRYPT() { get_scrypt_type(); return (scrypt_algo == A_SCRYPT); }
|
||||||
@ -66,8 +66,6 @@ extern void cuda_do_keccak256(int thr_id, int stream, uint32_t *hash, uint32_t n
|
|||||||
extern bool cuda_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]);
|
extern bool cuda_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]);
|
||||||
extern void cuda_do_blake256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, int throughput, bool do_d2h);
|
extern void cuda_do_blake256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, int throughput, bool do_d2h);
|
||||||
|
|
||||||
extern void computeGold(uint32_t *idata, uint32_t *reference, uchar *scratchpad);
|
|
||||||
|
|
||||||
extern bool default_prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]);
|
extern bool default_prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]);
|
||||||
extern bool default_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]);
|
extern bool default_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]);
|
||||||
|
|
||||||
|
7
util.cpp
7
util.cpp
@ -1703,6 +1703,10 @@ void do_gpu_tests(void)
|
|||||||
//memcpy(buf, zrtest, 80);
|
//memcpy(buf, zrtest, 80);
|
||||||
//scanhash_zr5(0, (uint32_t*)buf, tgt, zrtest[19]+1, &done);
|
//scanhash_zr5(0, (uint32_t*)buf, tgt, zrtest[19]+1, &done);
|
||||||
|
|
||||||
|
struct timeval tv;
|
||||||
|
memset(buf, 0, sizeof buf);
|
||||||
|
scanhash_scrypt_jane(0, (uint32_t*)buf, tgt, NULL, 1, &done, &tv, &tv);
|
||||||
|
|
||||||
memset(buf, 0, sizeof buf);
|
memset(buf, 0, sizeof buf);
|
||||||
scanhash_x11(0, (uint32_t*)buf, tgt, 1, &done);
|
scanhash_x11(0, (uint32_t*)buf, tgt, 1, &done);
|
||||||
|
|
||||||
@ -1791,6 +1795,9 @@ void print_hash_tests(void)
|
|||||||
scrypthash(&hash[0], &buf[0]);
|
scrypthash(&hash[0], &buf[0]);
|
||||||
printpfx("scrypt", hash);
|
printpfx("scrypt", hash);
|
||||||
|
|
||||||
|
scryptjane_hash(&hash[0], &buf[0]);
|
||||||
|
printpfx("scrypt-jane", hash);
|
||||||
|
|
||||||
skeincoinhash(&hash[0], &buf[0]);
|
skeincoinhash(&hash[0], &buf[0]);
|
||||||
printpfx("skein", hash);
|
printpfx("skein", hash);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user