diff --git a/ccminer.cpp b/ccminer.cpp index 598da68..85e6ef1 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -652,7 +652,7 @@ static bool submit_upstream_work(CURL *curl, struct work *work) /* discard if a newer bloc was received */ stale_work = work->height && work->height < g_work.height; - if (have_stratum && !stale_work && opt_algo != ALGO_ZR5) { + if (have_stratum && !stale_work && opt_algo != ALGO_ZR5 && opt_algo != ALGO_SCRYPT_JANE) { pthread_mutex_lock(&g_work_lock); if (strlen(work->job_id + 8)) stale_work = strncmp(work->job_id + 8, g_work.job_id + 8, 4); diff --git a/miner.h b/miner.h index 0dc756d..1a90a2a 100644 --- a/miner.h +++ b/miner.h @@ -695,6 +695,7 @@ void pluckhash(uint32_t *hash, const uint32_t *data, uchar *hashbuffer, const in void quarkhash(void *state, const void *input); void qubithash(void *state, const void *input); void scrypthash(void* output, const void* input); +void scryptjane_hash(void* output, const void* input); void skeincoinhash(void *output, const void *input); void skein2hash(void *output, const void *input); void s3hash(void *output, const void *input); diff --git a/scrypt-jane.cpp b/scrypt-jane.cpp index ce21ea2..e665182 100644 --- a/scrypt-jane.cpp +++ b/scrypt-jane.cpp @@ -8,7 +8,7 @@ #include "scrypt/scrypt-jane.h" #include "scrypt/code/scrypt-jane-portable.h" -#include "scrypt/code/scrypt-jane-romix.h" +#include "scrypt/code/scrypt-jane-chacha.h" #include "scrypt/keccak.h" #include "scrypt/salsa_kernel.h" @@ -434,6 +434,7 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u uint32_t max_nonce, unsigned long *hashes_done, struct timeval *tv_start, struct timeval *tv_end) { const uint32_t Htarg = ptarget[7]; + uint64_t N; if (s_Nfactor == 0 && strlen(jane_params) > 0) applog(LOG_INFO, "Given scrypt-jane parameters: %s", jane_params); @@ -442,14 +443,12 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u if (Nfactor > scrypt_maxN) { scrypt_fatal_error("scrypt: N out of range"); } + N = (1 << (Nfactor + 1)); if (Nfactor != s_Nfactor) { - // all of this isn't very thread-safe... - opt_nfactor = (1 << (Nfactor + 1)); - - applog(LOG_INFO, "Nfactor is %d (N=%d)!", Nfactor, opt_nfactor); - + opt_nfactor = Nfactor; + applog(LOG_INFO, "N-factor is %d (%d)!", Nfactor, N); if (s_Nfactor != 0) { // handle N-factor increase at runtime // by adjusting the lookup_gap by factor 2 @@ -480,7 +479,7 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u if (parallel == 2) prepare_keccak512(thr_id, pdata); scrypt_aligned_alloc Xbuf[2] = { scrypt_alloc(128 * throughput), scrypt_alloc(128 * throughput) }; - scrypt_aligned_alloc Vbuf = scrypt_alloc((uint64_t)opt_nfactor * 128); + scrypt_aligned_alloc Vbuf = scrypt_alloc(N * 128); scrypt_aligned_alloc Ybuf = scrypt_alloc(128); uint32_t nonce[2]; @@ -498,6 +497,8 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u if (parallel < 2) { + // half of cpu + for(int i=0;imajor > 3 || (props->major == 3 && props->minor >= 5)) kernel = new TitanKernel(); else if (props->major == 3 && props->minor == 0) @@ -161,7 +161,7 @@ int cuda_throughput(int thr_id) #else checkCudaErrors(cudaSetDeviceFlags(cudaDeviceScheduleYield)); checkCudaErrors(cudaSetDevice(device_map[thr_id])); - checkCudaErrors(cudaFree(0)); + // checkCudaErrors(cudaFree(0)); #endif KernelInterface *kernel; @@ -599,8 +599,9 @@ int find_optimal_blockcount(int thr_id, KernelInterface* &kernel, bool &concurre } } } -skip2: ; +skip2: if (opt_debug) { + if (GRID_BLOCKS == MINB) { char line[512] = " "; for (int i=1; i<=kernel->max_warps_per_block(); ++i) { @@ -811,17 +812,20 @@ void cuda_scrypt_core(int thr_id, int stream, unsigned int N) unsigned int LOOKUP_GAP = device_lookup_gap[thr_id]; // setup execution parameters - dim3 grid(WU_PER_LAUNCH/WU_PER_BLOCK, 1, 1); - dim3 threads(THREADS_PER_WU*WU_PER_BLOCK, 1, 1); + dim3 grid(WU_PER_LAUNCH/WU_PER_BLOCK, 1, 1); + dim3 threads(THREADS_PER_WU*WU_PER_BLOCK, 1, 1); - context_kernel[thr_id]->run_kernel(grid, threads, WARPS_PER_BLOCK, thr_id, context_streams[stream][thr_id], context_idata[stream][thr_id], context_odata[stream][thr_id], N, LOOKUP_GAP, device_interactive[thr_id], opt_benchmark, device_texturecache[thr_id]); + context_kernel[thr_id]->run_kernel(grid, threads, WARPS_PER_BLOCK, thr_id, + context_streams[stream][thr_id], context_idata[stream][thr_id], context_odata[stream][thr_id], + N, LOOKUP_GAP, device_interactive[thr_id], opt_benchmark, device_texturecache[thr_id] + ); } bool cuda_prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]) { return context_kernel[thr_id]->prepare_keccak256(thr_id, host_pdata, ptarget); } - +#if 0 void cuda_do_keccak256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, int throughput, bool do_d2h) { unsigned int GRID_BLOCKS = context_blocks[thr_id]; @@ -834,12 +838,13 @@ void cuda_do_keccak256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, i context_kernel[thr_id]->do_keccak256(grid, threads, thr_id, stream, hash, nonce, throughput, do_d2h); } - +#endif bool cuda_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]) { return context_kernel[thr_id]->prepare_blake256(thr_id, host_pdata, ptarget); } +#if 0 void cuda_do_blake256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, int throughput, bool do_d2h) { unsigned int GRID_BLOCKS = context_blocks[thr_id]; @@ -852,6 +857,7 @@ void cuda_do_blake256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, in context_kernel[thr_id]->do_blake256(grid, threads, thr_id, stream, hash, nonce, throughput, do_d2h); } +#endif void cuda_scrypt_DtoH(int thr_id, uint32_t *X, int stream, bool postSHA) { @@ -859,7 +865,6 @@ void cuda_scrypt_DtoH(int thr_id, uint32_t *X, int stream, bool postSHA) unsigned int WARPS_PER_BLOCK = context_wpb[thr_id]; unsigned int THREADS_PER_WU = context_kernel[thr_id]->threads_per_wu(); unsigned int mem_size = WU_PER_LAUNCH * sizeof(uint32_t) * (postSHA ? 8 : 32); - // copy result from device to host (asynchronously) checkCudaErrors(cudaMemcpyAsync(X, postSHA ? context_hash[stream][thr_id] : context_odata[stream][thr_id], mem_size, cudaMemcpyDeviceToHost, context_streams[stream][thr_id])); } diff --git a/scrypt/salsa_kernel.h b/scrypt/salsa_kernel.h index 405207b..11011e4 100644 --- a/scrypt/salsa_kernel.h +++ b/scrypt/salsa_kernel.h @@ -40,8 +40,8 @@ static int scrypt_algo = -1; static __inline int get_scrypt_type() { if (scrypt_algo != -1) return scrypt_algo; get_currentalgo(algo, 64); - if (!strcasecmp(algo,"scrypt-jane")) scrypt_algo = A_SCRYPT_JANE; - else if (!strcasecmp(algo,"scrypt")) scrypt_algo = A_SCRYPT; + if (!strncasecmp(algo,"scrypt-jane",11)) scrypt_algo = A_SCRYPT_JANE; + else if (!strncasecmp(algo,"scrypt",6)) scrypt_algo = A_SCRYPT; return scrypt_algo; } static __inline bool IS_SCRYPT() { get_scrypt_type(); return (scrypt_algo == A_SCRYPT); } @@ -66,8 +66,6 @@ extern void cuda_do_keccak256(int thr_id, int stream, uint32_t *hash, uint32_t n extern bool cuda_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]); extern void cuda_do_blake256(int thr_id, int stream, uint32_t *hash, uint32_t nonce, int throughput, bool do_d2h); -extern void computeGold(uint32_t *idata, uint32_t *reference, uchar *scratchpad); - extern bool default_prepare_keccak256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]); extern bool default_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t ptarget[8]); diff --git a/util.cpp b/util.cpp index d349774..577e87a 100644 --- a/util.cpp +++ b/util.cpp @@ -1703,6 +1703,10 @@ void do_gpu_tests(void) //memcpy(buf, zrtest, 80); //scanhash_zr5(0, (uint32_t*)buf, tgt, zrtest[19]+1, &done); + struct timeval tv; + memset(buf, 0, sizeof buf); + scanhash_scrypt_jane(0, (uint32_t*)buf, tgt, NULL, 1, &done, &tv, &tv); + memset(buf, 0, sizeof buf); scanhash_x11(0, (uint32_t*)buf, tgt, 1, &done); @@ -1791,6 +1795,9 @@ void print_hash_tests(void) scrypthash(&hash[0], &buf[0]); printpfx("scrypt", hash); + scryptjane_hash(&hash[0], &buf[0]); + printpfx("scrypt-jane", hash); + skeincoinhash(&hash[0], &buf[0]); printpfx("skein", hash);