Browse Source

scrypt: finish scrypt-jane algo import

master
Tanguy Pruvot 10 years ago
parent
commit
a6d88abbc9
  1. 2
      ccminer.cpp
  2. 1
      miner.h
  3. 89
      scrypt-jane.cpp
  4. 56
      scrypt.cpp
  5. 2
      scrypt/code/scrypt-jane-chacha.h
  6. 1
      scrypt/code/scrypt-jane-romix.h
  7. 23
      scrypt/salsa_kernel.cu
  8. 6
      scrypt/salsa_kernel.h
  9. 7
      util.cpp

2
ccminer.cpp

@ -652,7 +652,7 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
/* discard if a newer bloc was received */ /* discard if a newer bloc was received */
stale_work = work->height && work->height < g_work.height; stale_work = work->height && work->height < g_work.height;
if (have_stratum && !stale_work && opt_algo != ALGO_ZR5) { if (have_stratum && !stale_work && opt_algo != ALGO_ZR5 && opt_algo != ALGO_SCRYPT_JANE) {
pthread_mutex_lock(&g_work_lock); pthread_mutex_lock(&g_work_lock);
if (strlen(work->job_id + 8)) if (strlen(work->job_id + 8))
stale_work = strncmp(work->job_id + 8, g_work.job_id + 8, 4); stale_work = strncmp(work->job_id + 8, g_work.job_id + 8, 4);

1
miner.h

@ -695,6 +695,7 @@ void pluckhash(uint32_t *hash, const uint32_t *data, uchar *hashbuffer, const in
void quarkhash(void *state, const void *input); void quarkhash(void *state, const void *input);
void qubithash(void *state, const void *input); void qubithash(void *state, const void *input);
void scrypthash(void* output, const void* input); void scrypthash(void* output, const void* input);
void scryptjane_hash(void* output, const void* input);
void skeincoinhash(void *output, const void *input); void skeincoinhash(void *output, const void *input);
void skein2hash(void *output, const void *input); void skein2hash(void *output, const void *input);
void s3hash(void *output, const void *input); void s3hash(void *output, const void *input);

89
scrypt-jane.cpp

@ -8,7 +8,7 @@
#include "scrypt/scrypt-jane.h" #include "scrypt/scrypt-jane.h"
#include "scrypt/code/scrypt-jane-portable.h" #include "scrypt/code/scrypt-jane-portable.h"
#include "scrypt/code/scrypt-jane-romix.h" #include "scrypt/code/scrypt-jane-chacha.h"
#include "scrypt/keccak.h" #include "scrypt/keccak.h"
#include "scrypt/salsa_kernel.h" #include "scrypt/salsa_kernel.h"
@ -434,6 +434,7 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
uint32_t max_nonce, unsigned long *hashes_done, struct timeval *tv_start, struct timeval *tv_end) uint32_t max_nonce, unsigned long *hashes_done, struct timeval *tv_start, struct timeval *tv_end)
{ {
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
uint64_t N;
if (s_Nfactor == 0 && strlen(jane_params) > 0) if (s_Nfactor == 0 && strlen(jane_params) > 0)
applog(LOG_INFO, "Given scrypt-jane parameters: %s", jane_params); applog(LOG_INFO, "Given scrypt-jane parameters: %s", jane_params);
@ -442,14 +443,12 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
if (Nfactor > scrypt_maxN) { if (Nfactor > scrypt_maxN) {
scrypt_fatal_error("scrypt: N out of range"); scrypt_fatal_error("scrypt: N out of range");
} }
N = (1 << (Nfactor + 1));
if (Nfactor != s_Nfactor) if (Nfactor != s_Nfactor)
{ {
// all of this isn't very thread-safe... opt_nfactor = Nfactor;
opt_nfactor = (1 << (Nfactor + 1)); applog(LOG_INFO, "N-factor is %d (%d)!", Nfactor, N);
applog(LOG_INFO, "Nfactor is %d (N=%d)!", Nfactor, opt_nfactor);
if (s_Nfactor != 0) { if (s_Nfactor != 0) {
// handle N-factor increase at runtime // handle N-factor increase at runtime
// by adjusting the lookup_gap by factor 2 // by adjusting the lookup_gap by factor 2
@ -480,7 +479,7 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
if (parallel == 2) prepare_keccak512(thr_id, pdata); if (parallel == 2) prepare_keccak512(thr_id, pdata);
scrypt_aligned_alloc Xbuf[2] = { scrypt_alloc(128 * throughput), scrypt_alloc(128 * throughput) }; scrypt_aligned_alloc Xbuf[2] = { scrypt_alloc(128 * throughput), scrypt_alloc(128 * throughput) };
scrypt_aligned_alloc Vbuf = scrypt_alloc((uint64_t)opt_nfactor * 128); scrypt_aligned_alloc Vbuf = scrypt_alloc(N * 128);
scrypt_aligned_alloc Ybuf = scrypt_alloc(128); scrypt_aligned_alloc Ybuf = scrypt_alloc(128);
uint32_t nonce[2]; uint32_t nonce[2];
@ -498,6 +497,8 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
if (parallel < 2) if (parallel < 2)
{ {
// half of cpu
for(int i=0;i<throughput;++i) { for(int i=0;i<throughput;++i) {
uint32_t tmp_nonce = n++; uint32_t tmp_nonce = n++;
data[nxt][20*i + 19] = bswap_32x4(tmp_nonce); data[nxt][20*i + 19] = bswap_32x4(tmp_nonce);
@ -509,15 +510,13 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
memcpy(cuda_X[nxt], Xbuf[nxt].ptr, 128 * throughput); memcpy(cuda_X[nxt], Xbuf[nxt].ptr, 128 * throughput);
cuda_scrypt_serialize(thr_id, nxt); cuda_scrypt_serialize(thr_id, nxt);
cuda_scrypt_HtoD(thr_id, cuda_X[nxt], nxt); cuda_scrypt_HtoD(thr_id, cuda_X[nxt], nxt);
cuda_scrypt_core(thr_id, nxt, opt_nfactor); cuda_scrypt_core(thr_id, nxt, N);
cuda_scrypt_done(thr_id, nxt); cuda_scrypt_done(thr_id, nxt);
cuda_scrypt_DtoH(thr_id, cuda_X[nxt], nxt, false); cuda_scrypt_DtoH(thr_id, cuda_X[nxt], nxt, false);
cuda_scrypt_flush(thr_id, nxt); cuda_scrypt_flush(thr_id, nxt);
if(!cuda_scrypt_sync(thr_id, cur)) if(!cuda_scrypt_sync(thr_id, cur)) {
{
return -1; return -1;
} }
@ -553,21 +552,25 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
} }
#endif #endif
} else { } else {
// all on gpu
n += throughput; n += throughput;
if (opt_debug && (iteration % 64 == 0))
applog(LOG_DEBUG, "GPU #%d: n=%x", device_map[thr_id], n);
cuda_scrypt_serialize(thr_id, nxt); cuda_scrypt_serialize(thr_id, nxt);
pre_keccak512(thr_id, nxt, nonce[nxt], throughput); pre_keccak512(thr_id, nxt, nonce[nxt], throughput);
cuda_scrypt_core(thr_id, nxt, opt_nfactor); cuda_scrypt_core(thr_id, nxt, N);
cuda_scrypt_flush(thr_id, nxt); // required
cuda_scrypt_flush(thr_id, nxt);
post_keccak512(thr_id, nxt, nonce[nxt], throughput); post_keccak512(thr_id, nxt, nonce[nxt], throughput);
cuda_scrypt_done(thr_id, nxt); cuda_scrypt_done(thr_id, nxt);
cuda_scrypt_DtoH(thr_id, hash[nxt], nxt, true); cuda_scrypt_DtoH(thr_id, hash[nxt], nxt, true);
cuda_scrypt_flush(thr_id, nxt); // seems required here
if(!cuda_scrypt_sync(thr_id, cur)) if (!cuda_scrypt_sync(thr_id, cur)) {
{
return -1; return -1;
} }
} }
@ -587,7 +590,7 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
tdata[19] = bswap_32x4(tmp_nonce); tdata[19] = bswap_32x4(tmp_nonce);
scrypt_pbkdf2_1((unsigned char *)tdata, 80, (unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128); scrypt_pbkdf2_1((unsigned char *)tdata, 80, (unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128);
scrypt_ROMix_1((scrypt_mix_word_t *)(Xbuf[cur].ptr + 128 * i), (scrypt_mix_word_t *)(Ybuf.ptr), (scrypt_mix_word_t *)(Vbuf.ptr), opt_nfactor); scrypt_ROMix_1((scrypt_mix_word_t *)(Xbuf[cur].ptr + 128 * i), (scrypt_mix_word_t *)(Ybuf.ptr), (scrypt_mix_word_t *)(Vbuf.ptr), N);
scrypt_pbkdf2_1((unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128, (unsigned char *)thash, 32); scrypt_pbkdf2_1((unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128, (unsigned char *)thash, 32);
if (memcmp(thash, &hash[cur][8*i], 32) == 0) if (memcmp(thash, &hash[cur][8*i], 32) == 0)
@ -624,3 +627,55 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
gettimeofday(tv_end, NULL); gettimeofday(tv_end, NULL);
return 0; return 0;
} }
static void scrypt_jane_hash_1_1(const uchar *password, size_t password_len, const uchar*salt, size_t salt_len, uint32_t N,
uchar *out, size_t bytes, uint8_t *X, uint8_t *Y, uint8_t *V)
{
uint32_t chunk_bytes, i;
const uint32_t p = SCRYPT_P;
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
scrypt_ROMixfn scrypt_ROMix = scrypt_getROMix();
#endif
chunk_bytes = SCRYPT_BLOCK_BYTES * SCRYPT_R * 2;
/* 1: X = PBKDF2(password, salt) */
scrypt_pbkdf2_1(password, password_len, salt, salt_len, X, chunk_bytes * p);
/* 2: X = ROMix(X) */
for (i = 0; i < p; i++)
scrypt_ROMix_1((scrypt_mix_word_t *)(X + (chunk_bytes * i)), (scrypt_mix_word_t *)Y, (scrypt_mix_word_t *)V, N);
/* 3: Out = PBKDF2(password, X) */
scrypt_pbkdf2_1(password, password_len, X, chunk_bytes * p, out, bytes);
#ifdef SCRYPT_PREVENT_STATE_LEAK
/* This is an unnecessary security feature - mikaelh */
scrypt_ensure_zero(Y, (p + 1) * chunk_bytes);
#endif
}
/* for cpu hash test */
void scryptjane_hash(void* output, const void* input)
{
uint64_t Nsize = 1ULL << (opt_nfactor + 1);
uint64_t chunk_bytes;
uint8_t *X, *Y;
scrypt_aligned_alloc YX, V;
chunk_bytes = 2ULL * SCRYPT_BLOCK_BYTES * SCRYPT_R;
V = scrypt_alloc(Nsize * chunk_bytes);
YX = scrypt_alloc((SCRYPT_P + 1) * chunk_bytes);
memset(V.ptr, 0, Nsize * chunk_bytes);
Y = YX.ptr;
X = Y + chunk_bytes;
scrypt_jane_hash_1_1((uchar*)input, 80, (uchar*)input, 80, Nsize, (uchar*)output, 32, X, Y, V.ptr);
scrypt_free(&V);
scrypt_free(&YX);
}

56
scrypt.cpp

@ -682,12 +682,13 @@ static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate,
} }
static int lastFactor = 0; static int lastFactor = 0;
//
static void computeGold(uint32_t* const input, uint32_t *reference, uchar *scratchpad);
// Scrypt proof of work algorithm // Scrypt proof of work algorithm
// using SSE2 vectorized HMAC SHA256 on CPU and // using SSE2 vectorized HMAC SHA256 on CPU and
// a salsa core implementation on GPU with CUDA // a salsa core implementation on GPU with CUDA
// //
int scanhash_scrypt(int thr_id, uint32_t *pdata, const uint32_t *ptarget, unsigned char *scratchbuf, int scanhash_scrypt(int thr_id, uint32_t *pdata, const uint32_t *ptarget, unsigned char *scratchbuf,
uint32_t max_nonce, unsigned long *hashes_done, struct timeval *tv_start, struct timeval *tv_end) uint32_t max_nonce, unsigned long *hashes_done, struct timeval *tv_start, struct timeval *tv_end)
{ {
@ -989,9 +990,9 @@ static void xor_salsa8(uint32_t * const B, const uint32_t * const C)
/** /**
* @param X input/ouput * @param X input/ouput
* @param V scratch buffer * @param V scratch buffer
* @param N factor * @param N factor (def. 1024)
*/ */
static void scrypt_core(uint32_t *X, uint32_t *V, int N) static void scrypt_core(uint32_t *X, uint32_t *V, uint32_t N)
{ {
for (int i = 0; i < N; i++) { for (int i = 0; i < N; i++) {
memcpy(&V[i * 32], X, 128); memcpy(&V[i * 32], X, 128);
@ -1013,11 +1014,11 @@ static void scrypt_core(uint32_t *X, uint32_t *V, int N)
* @param reference reference data, computed but preallocated * @param reference reference data, computed but preallocated
* @param scratchpad scrypt scratchpad * @param scratchpad scrypt scratchpad
**/ **/
void computeGold(uint32_t* const input, uint32_t *reference, uchar *scratchpad) static void computeGold(uint32_t* const input, uint32_t *reference, uchar *scratchpad)
{ {
uint32_t X[32] = { 0 }; uint32_t X[32] = { 0 };
uint32_t *V = (uint32_t*) scratchpad; uint32_t *V = (uint32_t*) scratchpad;
int N = (1<<(opt_nfactor+1)); // default 9 = 1024 uint32_t N = (1<<(opt_nfactor+1)); // default 9 = 1024
for (int k = 0; k < 32; k++) for (int k = 0; k < 32; k++)
X[k] = input[k]; X[k] = input[k];
@ -1028,32 +1029,18 @@ void computeGold(uint32_t* const input, uint32_t *reference, uchar *scratchpad)
reference[k] = X[k]; reference[k] = X[k];
} }
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
uint32_t *midstate, unsigned char *scratchpad, int N)
{
uint32_t tstate[8], ostate[8];
uint32_t X[32] = { 0 };
uint32_t *V = (uint32_t *) scratchpad;
memcpy(tstate, midstate, 32);
HMAC_SHA256_80_init(input, tstate, ostate);
PBKDF2_SHA256_80_128(tstate, ostate, input, X);
scrypt_core(X, V, N);
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
}
/* cputest */ /* cputest */
void scrypthash(void* output, const void* input) void scrypthash(void* output, const void* input)
{ {
uint32_t _ALIGN(64) X[32], ref[32] = { 0 }, tstate[8], ostate[8], midstate[8]; uint32_t _ALIGN(64) X[32], ref[32] = { 0 }, tstate[8], ostate[8], midstate[8];
uint32_t _ALIGN(64) data[20]; uint32_t _ALIGN(64) data[20];
uchar *scratchbuf = (uchar *) calloc(4 * 128 + 63, 1024); uchar *scratchbuf;
// no default set with --cputest // no default set with --cputest
if (opt_nfactor == 0) opt_nfactor = 9; if (opt_nfactor == 0) opt_nfactor = 9;
scratchbuf = (uchar*) calloc(4 * 128 + 63, 1UL << (opt_nfactor+1));
memcpy(data, input, 80); memcpy(data, input, 80);
sha256_init(midstate); sha256_init(midstate);
@ -1072,26 +1059,3 @@ void scrypthash(void* output, const void* input)
free(scratchbuf); free(scratchbuf);
} }
#define SCRYPT_MAX_WAYS 1
/* cputest */
void scrypthash2(void* output, const void* input)
{
uint32_t midstate[8] = { 0 };
uint32_t data[SCRYPT_MAX_WAYS * 20] = { 0 };
uint32_t hash[SCRYPT_MAX_WAYS * 8] = { 0 };
uint32_t N = 1U << ((opt_nfactor ? opt_nfactor : 9) + 1); // default 1024
uchar* scratch = (uchar*) calloc(4 * 128 + 63, N); // scrypt_buffer_alloc(N);
memcpy(data, input, 80);
sha256_init(midstate);
sha256_transform(midstate, data, 0);
scrypt_1024_1_1_256(data, hash, midstate, scratch, N);
memcpy(output, hash, 32);
free(scratch);
}

2
scrypt/code/scrypt-jane-chacha.h

@ -5,6 +5,8 @@ typedef uint32_t scrypt_mix_word_t;
#define SCRYPT_WORDTO8_LE U32TO8_LE #define SCRYPT_WORDTO8_LE U32TO8_LE
#define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP #define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP
#define SCRYPT_P 1
#define SCRYPT_R 1
#define SCRYPT_BLOCK_BYTES 64 #define SCRYPT_BLOCK_BYTES 64
#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t)) #define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t))

1
scrypt/code/scrypt-jane-romix.h

@ -1 +0,0 @@
#include "scrypt-jane-chacha.h"

23
scrypt/salsa_kernel.cu

@ -61,7 +61,7 @@
} }
// some globals containing pointers to device memory (for chunked allocation) // some globals containing pointers to device memory (for chunked allocation)
// [MAX_DEVICES] indexes up to MAX_DEVICES threads (0...MAX_DEVICES-1) // [MAX_GPUS] indexes up to MAX_GPUS threads (0...MAX_GPUS-1)
int MAXWARPS[MAX_GPUS]; int MAXWARPS[MAX_GPUS];
uint32_t* h_V[MAX_GPUS][TOTAL_WARP_LIMIT*64]; // NOTE: the *64 prevents buffer overflow for --keccak uint32_t* h_V[MAX_GPUS][TOTAL_WARP_LIMIT*64]; // NOTE: the *64 prevents buffer overflow for --keccak
uint32_t h_V_extra[MAX_GPUS][TOTAL_WARP_LIMIT*64]; // with really large kernel launch configurations uint32_t h_V_extra[MAX_GPUS][TOTAL_WARP_LIMIT*64]; // with really large kernel launch configurations
@ -69,7 +69,7 @@ uint32_t h_V_extra[MAX_GPUS][TOTAL_WARP_LIMIT*64]; // with really larg
KernelInterface *Best_Kernel_Heuristics(cudaDeviceProp *props) KernelInterface *Best_Kernel_Heuristics(cudaDeviceProp *props)
{ {
KernelInterface *kernel = NULL; KernelInterface *kernel = NULL;
uint32_t N = (1UL << opt_nfactor+1); // not sure uint64_t N = 1UL << (opt_nfactor+1);
if (IS_SCRYPT() || (IS_SCRYPT_JANE() && N <= 8192)) if (IS_SCRYPT() || (IS_SCRYPT_JANE() && N <= 8192))
{ {
@ -83,7 +83,7 @@ KernelInterface *Best_Kernel_Heuristics(cudaDeviceProp *props)
} }
else else
{ {
// low register count kernels (high N-factor scrypt-jane) // high N-factor scrypt-jane = low registers count kernels
if (props->major > 3 || (props->major == 3 && props->minor >= 5)) if (props->major > 3 || (props->major == 3 && props->minor >= 5))
kernel = new TitanKernel(); kernel = new TitanKernel();
else if (props->major == 3 && props->minor == 0) else if (props->major == 3 && props->minor == 0)
@ -161,7 +161,7 @@ int cuda_throughput(int thr_id)
#else #else
checkCudaErrors(cudaSetDeviceFlags(cudaDeviceScheduleYield)); checkCudaErrors(cudaSetDeviceFlags(cudaDeviceScheduleYield));
checkCudaErrors(cudaSetDevice(device_map[thr_id])); checkCudaErrors(cudaSetDevice(device_map[thr_id]));
checkCudaErrors(cudaFree(0)); // checkCudaErrors(cudaFree(0));
#endif #endif
KernelInterface *kernel; KernelInterface *kernel;
@ -599,8 +599,9 @@ int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurre
} }
} }
} }
skip2: ; skip2:
if (opt_debug) { if (opt_debug) {
if (GRID_BLOCKS == MINB) { if (GRID_BLOCKS == MINB) {
char line[512] = " "; char line[512] = " ";
for (int i=1; i<=kernel->max_warps_per_block(); ++i) { for (int i=1; i<=kernel->max_warps_per_block(); ++i) {
@ -814,14 +815,17 @@ void cuda_scrypt_core(int thr_id, int stream, unsigned int N)
dim3 grid(WU_PER_LAUNCH/WU_PER_BLOCK, 1, 1); dim3 grid(WU_PER_LAUNCH/WU_PER_BLOCK, 1, 1);
dim3 threads(THREADS_PER_WU*WU_PER_BLOCK, 1, 1); dim3 threads(THREADS_PER_WU*WU_PER_BLOCK, 1, 1);
context_kernel[thr_id]->run_kernel(grid, threads, WARPS_PER_BLOCK, thr_id, context_streams[stream][thr_id], context_idata[stream][thr_id], context_odata[stream][thr_id], N, LOOKUP_GAP, device_interactive[thr_id], opt_benchmark, device_texturecache[thr_id]); context_kernel[thr_id]->run_kernel(grid, threads, WARPS_PER_BLOCK, thr_id,
context_streams[stream][thr_id], context_idata[stream][thr_id], context_odata[stream][thr_id],
N, LOOKUP_GAP, device_interactive[thr_id], opt_benchmark, device_texturecache[thr_id]
);
} }
bool cuda_prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]) bool cuda_prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8])
{ {
return context_kernel[thr_id]->prepare_keccak256(thr_id, host_pdata, ptarget); return context_kernel[thr_id]->prepare_keccak256(thr_id, host_pdata, ptarget);
} }
#if 0
void cuda_do_keccak256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, int throughput, bool do_d2h) void cuda_do_keccak256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, int throughput, bool do_d2h)
{ {
unsigned int GRID_BLOCKS = context_blocks[thr_id]; unsigned int GRID_BLOCKS = context_blocks[thr_id];
@ -834,12 +838,13 @@ void cuda_do_keccak256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, i
context_kernel[thr_id]->do_keccak256(grid, threads, thr_id, stream, hash, nonce, throughput, do_d2h); context_kernel[thr_id]->do_keccak256(grid, threads, thr_id, stream, hash, nonce, throughput, do_d2h);
} }
#endif
bool cuda_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]) bool cuda_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8])
{ {
return context_kernel[thr_id]->prepare_blake256(thr_id, host_pdata, ptarget); return context_kernel[thr_id]->prepare_blake256(thr_id, host_pdata, ptarget);
} }
#if 0
void cuda_do_blake256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, int throughput, bool do_d2h) void cuda_do_blake256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, int throughput, bool do_d2h)
{ {
unsigned int GRID_BLOCKS = context_blocks[thr_id]; unsigned int GRID_BLOCKS = context_blocks[thr_id];
@ -852,6 +857,7 @@ void cuda_do_blake256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, in
context_kernel[thr_id]->do_blake256(grid, threads, thr_id, stream, hash, nonce, throughput, do_d2h); context_kernel[thr_id]->do_blake256(grid, threads, thr_id, stream, hash, nonce, throughput, do_d2h);
} }
#endif
void cuda_scrypt_DtoH(int thr_id, uint32_t *X, int stream, bool postSHA) void cuda_scrypt_DtoH(int thr_id, uint32_t *X, int stream, bool postSHA)
{ {
@ -859,7 +865,6 @@ void cuda_scrypt_DtoH(int thr_id, uint32_t *X, int stream, bool postSHA)
unsigned int WARPS_PER_BLOCK = context_wpb[thr_id]; unsigned int WARPS_PER_BLOCK = context_wpb[thr_id];
unsigned int THREADS_PER_WU = context_kernel[thr_id]->threads_per_wu(); unsigned int THREADS_PER_WU = context_kernel[thr_id]->threads_per_wu();
unsigned int mem_size = WU_PER_LAUNCH * sizeof(uint32_t) * (postSHA ? 8 : 32); unsigned int mem_size = WU_PER_LAUNCH * sizeof(uint32_t) * (postSHA ? 8 : 32);
// copy result from device to host (asynchronously) // copy result from device to host (asynchronously)
checkCudaErrors(cudaMemcpyAsync(X, postSHA ? context_hash[stream][thr_id] : context_odata[stream][thr_id], mem_size, cudaMemcpyDeviceToHost, context_streams[stream][thr_id])); checkCudaErrors(cudaMemcpyAsync(X, postSHA ? context_hash[stream][thr_id] : context_odata[stream][thr_id], mem_size, cudaMemcpyDeviceToHost, context_streams[stream][thr_id]));
} }

6
scrypt/salsa_kernel.h

@ -40,8 +40,8 @@ static int scrypt_algo = -1;
static __inline int get_scrypt_type() { static __inline int get_scrypt_type() {
if (scrypt_algo != -1) return scrypt_algo; if (scrypt_algo != -1) return scrypt_algo;
get_currentalgo(algo, 64); get_currentalgo(algo, 64);
if (!strcasecmp(algo,"scrypt-jane")) scrypt_algo = A_SCRYPT_JANE; if (!strncasecmp(algo,"scrypt-jane",11)) scrypt_algo = A_SCRYPT_JANE;
else if (!strcasecmp(algo,"scrypt")) scrypt_algo = A_SCRYPT; else if (!strncasecmp(algo,"scrypt",6)) scrypt_algo = A_SCRYPT;
return scrypt_algo; return scrypt_algo;
} }
static __inline bool IS_SCRYPT() { get_scrypt_type(); return (scrypt_algo == A_SCRYPT); } static __inline bool IS_SCRYPT() { get_scrypt_type(); return (scrypt_algo == A_SCRYPT); }
@ -66,8 +66,6 @@ extern void cuda_do_keccak256(int thr_id, int stream, uint32_t *hash, uint32_t n
extern bool cuda_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]); extern bool cuda_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]);
extern void cuda_do_blake256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, int throughput, bool do_d2h); extern void cuda_do_blake256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, int throughput, bool do_d2h);
extern void computeGold(uint32_t *idata, uint32_t *reference, uchar *scratchpad);
extern bool default_prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]); extern bool default_prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]);
extern bool default_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]); extern bool default_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]);

7
util.cpp

@ -1703,6 +1703,10 @@ void do_gpu_tests(void)
//memcpy(buf, zrtest, 80); //memcpy(buf, zrtest, 80);
//scanhash_zr5(0, (uint32_t*)buf, tgt, zrtest[19]+1, &done); //scanhash_zr5(0, (uint32_t*)buf, tgt, zrtest[19]+1, &done);
struct timeval tv;
memset(buf, 0, sizeof buf);
scanhash_scrypt_jane(0, (uint32_t*)buf, tgt, NULL, 1, &done, &tv, &tv);
memset(buf, 0, sizeof buf); memset(buf, 0, sizeof buf);
scanhash_x11(0, (uint32_t*)buf, tgt, 1, &done); scanhash_x11(0, (uint32_t*)buf, tgt, 1, &done);
@ -1791,6 +1795,9 @@ void print_hash_tests(void)
scrypthash(&hash[0], &buf[0]); scrypthash(&hash[0], &buf[0]);
printpfx("scrypt", hash); printpfx("scrypt", hash);
scryptjane_hash(&hash[0], &buf[0]);
printpfx("scrypt-jane", hash);
skeincoinhash(&hash[0], &buf[0]); skeincoinhash(&hash[0], &buf[0]);
printpfx("skein", hash); printpfx("skein", hash);

Loading…
Cancel
Save