From 2e0a977784ae0dd79f83d2de6101fe7ac94eb8bc Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Thu, 16 Nov 2017 09:21:38 +0100 Subject: [PATCH] polytimos algo (6 chained algos with streebog) --- Makefile.am | 1 + README.txt | 6 +- algos.h | 2 + bench.cpp | 1 + ccminer.cpp | 5 + ccminer.vcxproj | 1 + ccminer.vcxproj.filters | 3 + compat/ccminer-config.h | 2 +- configure.ac | 2 +- miner.h | 3 + polytimos.cu | 216 ++++++++++++++++++++++++++++++++++++++++ util.cpp | 3 + 12 files changed, 242 insertions(+), 3 deletions(-) create mode 100644 polytimos.cu diff --git a/Makefile.am b/Makefile.am index 6277e9f..5d66a1e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -64,6 +64,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \ sph/hamsi.c sph/hamsi_helper.c sph/streebog.c \ sph/shabal.c sph/whirlpool.c sph/sha2big.c sph/haval.c \ sph/ripemd.c sph/sph_sha2.c \ + polytimos.cu \ lbry/lbry.cu lbry/cuda_sha256_lbry.cu lbry/cuda_sha512_lbry.cu lbry/cuda_lbry_merged.cu \ qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/luffa.cu \ tribus/tribus.cu tribus/cuda_echo512_final.cu \ diff --git a/README.txt b/README.txt index 290ba60..e8edd6a 100644 --- a/README.txt +++ b/README.txt @@ -1,5 +1,5 @@ -ccminer 2.2.2 (Oct. 2017) "phi and hsr algos" +ccminer 2.2.3-dev (Nov. 2017) "polytimos algo" --------------------------------------------------------------- *************************************************************** @@ -104,6 +104,7 @@ its command line interface and options. nist5 use to mine TalkCoin penta use to mine Joincoin / Pentablake phi use to mine LUXCoin + polytimos use to mine Polytimos quark use to mine Quarkcoin qubit use to mine Qubit scrypt use to mine Scrypt coins @@ -280,6 +281,9 @@ so we can more efficiently implement new algorithms using the latest hardware features. >>> RELEASE HISTORY <<< + Nov. 16th 2017 v2.2.3 + Polytimos Algo + Oct. 09th 2017 v2.2.2 Import and clean the hsr algo (x13 + custom hash) Import and optimise phi algo from LuxCoin repository diff --git a/algos.h b/algos.h index 3c1528b..9109ad5 100644 --- a/algos.h +++ b/algos.h @@ -37,6 +37,7 @@ enum sha_algos { ALGO_NIST5, ALGO_PENTABLAKE, ALGO_PHI, + ALGO_POLYTIMOS, ALGO_QUARK, ALGO_QUBIT, ALGO_SCRYPT, @@ -104,6 +105,7 @@ static const char *algo_names[] = { "nist5", "penta", "phi", + "polytimos", "quark", "qubit", "scrypt", diff --git a/bench.cpp b/bench.cpp index 271345f..8aef239 100644 --- a/bench.cpp +++ b/bench.cpp @@ -79,6 +79,7 @@ void algo_free_all(int thr_id) free_nist5(thr_id); free_pentablake(thr_id); free_phi(thr_id); + free_polytimos(thr_id); free_quark(thr_id); free_qubit(thr_id); free_skeincoin(thr_id); diff --git a/ccminer.cpp b/ccminer.cpp index 5aedd51..20548c6 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -267,6 +267,7 @@ Options:\n\ nist5 NIST5 (TalkCoin)\n\ penta Pentablake hash (5x Blake 512)\n\ phi BHCoin\n\ + polytimos Politimos\n\ quark Quark\n\ qubit Qubit\n\ sha256d SHA256d (bitcoin)\n\ @@ -2230,6 +2231,7 @@ static void *miner_thread(void *userdata) case ALGO_HSR: case ALGO_LYRA2v2: case ALGO_PHI: + case ALGO_POLYTIMOS: case ALGO_S3: case ALGO_SKUNK: case ALGO_TIMETRAVEL: @@ -2417,6 +2419,9 @@ static void *miner_thread(void *userdata) case ALGO_PHI: rc = scanhash_phi(thr_id, &work, max_nonce, &hashes_done); break; + case ALGO_POLYTIMOS: + rc = scanhash_polytimos(thr_id, &work, max_nonce, &hashes_done); + break; case ALGO_SCRYPT: rc = scanhash_scrypt(thr_id, &work, max_nonce, &hashes_done, NULL, &tv_start, &tv_end); diff --git a/ccminer.vcxproj b/ccminer.vcxproj index 5554061..b29e17a 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -526,6 +526,7 @@ + 64 diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters index 11fb230..8c4ec22 100644 --- a/ccminer.vcxproj.filters +++ b/ccminer.vcxproj.filters @@ -769,6 +769,9 @@ Source Files\CUDA + + Source Files\CUDA + Source Files\CUDA\skunk diff --git a/compat/ccminer-config.h b/compat/ccminer-config.h index 7c28072..69847c8 100644 --- a/compat/ccminer-config.h +++ b/compat/ccminer-config.h @@ -164,7 +164,7 @@ #define PACKAGE_URL "http://github.com/tpruvot/ccminer" /* Define to the version of this package. */ -#define PACKAGE_VERSION "2.2.2" +#define PACKAGE_VERSION "2.2.3" /* If using the C implementation of alloca, define if you know the direction of stack growth for your system; otherwise it will be diff --git a/configure.ac b/configure.ac index 061dfdd..ba09c39 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([ccminer], [2.2.2], [], [ccminer], [http://github.com/tpruvot/ccminer]) +AC_INIT([ccminer], [2.2.3], [], [ccminer], [http://github.com/tpruvot/ccminer]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/miner.h b/miner.h index 02a8e7c..0ad8523 100644 --- a/miner.h +++ b/miner.h @@ -302,6 +302,7 @@ extern int scanhash_neoscrypt(int thr_id, struct work *work, uint32_t max_nonce, extern int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_phi(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); +extern int scanhash_polytimos(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_quark(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_sha256d(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done); @@ -365,6 +366,7 @@ extern void free_neoscrypt(int thr_id); extern void free_nist5(int thr_id); extern void free_pentablake(int thr_id); extern void free_phi(int thr_id); +extern void free_polytimos(int thr_id); extern void free_quark(int thr_id); extern void free_qubit(int thr_id); extern void free_sha256d(int thr_id); @@ -908,6 +910,7 @@ void neoscrypt(uchar *output, const uchar *input, uint32_t profile); void nist5hash(void *state, const void *input); void pentablakehash(void *output, const void *input); void phihash(void *output, const void *input); +void polytimos_hash(void *output, const void *input); void quarkhash(void *state, const void *input); void qubithash(void *state, const void *input); void scrypthash(void* output, const void* input); diff --git a/polytimos.cu b/polytimos.cu new file mode 100644 index 0000000..fe7776c --- /dev/null +++ b/polytimos.cu @@ -0,0 +1,216 @@ +/* + * Polytimos algorithm + */ +extern "C" +{ +#include "sph/sph_skein.h" +#include "sph/sph_shabal.h" +#include "sph/sph_echo.h" +#include "sph/sph_luffa.h" +#include "sph/sph_fugue.h" +#include "sph/sph_streebog.h" +} + +#include "miner.h" + +#include "cuda_helper.h" +#include "x11/cuda_x11.h" + +static uint32_t *d_hash[MAX_GPUS]; +static uint32_t *d_resNonce[MAX_GPUS]; + +extern void skein512_cpu_setBlock_80(void *pdata); +extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap); +extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads); +extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads); +extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x13_fugue512_cpu_free(int thr_id); +extern void streebog_sm3_set_target(uint32_t* ptarget); +extern void streebog_sm3_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce); +extern void skunk_streebog_set_target(uint32_t* ptarget); +extern void skunk_cuda_streebog(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce); + +// CPU Hash +extern "C" void polytimos_hash(void *output, const void *input) +{ + sph_skein512_context ctx_skein; + sph_shabal512_context ctx_shabal; + sph_echo512_context ctx_echo; + sph_luffa512_context ctx_luffa; + sph_fugue512_context ctx_fugue; + sph_gost512_context ctx_gost; + + uint32_t _ALIGN(128) hash[16]; + memset(hash, 0, sizeof hash); + + sph_skein512_init(&ctx_skein); + sph_skein512(&ctx_skein, input, 80); + sph_skein512_close(&ctx_skein, (void*) hash); + + sph_shabal512_init(&ctx_shabal); + sph_shabal512(&ctx_shabal, hash, 64); + sph_shabal512_close(&ctx_shabal, hash); + + sph_echo512_init(&ctx_echo); + sph_echo512(&ctx_echo, hash, 64); + sph_echo512_close(&ctx_echo, hash); + + sph_luffa512_init(&ctx_luffa); + sph_luffa512(&ctx_luffa, hash, 64); + sph_luffa512_close(&ctx_luffa, hash); + + sph_fugue512_init(&ctx_fugue); + sph_fugue512(&ctx_fugue, hash, 64); + sph_fugue512_close(&ctx_fugue, hash); + + sph_gost512_init(&ctx_gost); + sph_gost512(&ctx_gost, (const void*) hash, 64); + sph_gost512_close(&ctx_gost, (void*) hash); + + memcpy(output, hash, 32); +} + +static bool init[MAX_GPUS] = { 0 }; +static bool use_compat_kernels[MAX_GPUS] = { 0 }; + +extern "C" int scanhash_polytimos(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) +{ + int dev_id = device_map[thr_id]; + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + const uint32_t first_nonce = pdata[19]; + int intensity = (device_sm[dev_id] > 500 && !is_windows()) ? 20 : 19; + uint32_t throughput = cuda_default_throughput(thr_id, 1 << intensity); // 19=256*256*8; + //if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x000f; + + if (!init[thr_id]) + { + cudaSetDevice(dev_id); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + CUDA_LOG_ERROR(); + } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); + + cuda_get_arch(thr_id); + use_compat_kernels[thr_id] = (cuda_arch[dev_id] < 500); + + quark_skein512_cpu_init(thr_id, throughput); + x14_shabal512_cpu_init(thr_id, throughput); + x11_echo512_cpu_init(thr_id, throughput); + x11_luffa512_cpu_init(thr_id, throughput); + x13_fugue512_cpu_init(thr_id, throughput); + + CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0); + CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], 2 * sizeof(uint32_t)), -1); + + init[thr_id] = true; + } + + + uint32_t _ALIGN(64) h_resNonce[2]; + uint32_t _ALIGN(64) endiandata[20]; + for (int k=0; k < 20; k++) + be32enc(&endiandata[k], pdata[k]); + + + cudaMemset(d_resNonce[thr_id], 0xff, 2*sizeof(uint32_t)); + skein512_cpu_setBlock_80(endiandata); + if (use_compat_kernels[thr_id]) { + streebog_sm3_set_target(ptarget); + } else { + skunk_streebog_set_target(ptarget); + } + + do { + int order = 0; + + skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + if (use_compat_kernels[thr_id]) { + streebog_sm3_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]); + } else { + skunk_cuda_streebog(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]); + } + + *hashes_done = pdata[19] - first_nonce + throughput; + + cudaMemcpy(h_resNonce, d_resNonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost); + CUDA_LOG_ERROR(); + + if (h_resNonce[0] != UINT32_MAX) + { + const uint32_t Htarg = ptarget[7]; + const uint32_t startNounce = pdata[19]; + uint32_t _ALIGN(64) vhash[8]; + + be32enc(&endiandata[19], startNounce + h_resNonce[0]); + polytimos_hash(vhash, endiandata); + if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work->nonces[0] = startNounce + h_resNonce[0]; + work_set_target_ratio(work, vhash); + if (h_resNonce[1] != UINT32_MAX) { + uint32_t secNonce = work->nonces[1] = startNounce + h_resNonce[1]; + be32enc(&endiandata[19], secNonce); + polytimos_hash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor + } + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + gpu_increment_reject(thr_id); + if (!opt_quiet) + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + cudaMemset(d_resNonce[thr_id], 0xff, 2*sizeof(uint32_t)); + pdata[19] = startNounce + h_resNonce[0] + 1; + continue; + } + } + + if ((uint64_t)throughput + pdata[19] >= max_nonce) { + pdata[19] = max_nonce; + break; + } + pdata[19] += throughput; + + } while (!work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce; + + CUDA_LOG_ERROR(); + + return 0; +} + +// cleanup +extern "C" void free_polytimos(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaThreadSynchronize(); + + cudaFree(d_hash[thr_id]); + x13_fugue512_cpu_free(thr_id); + cudaFree(d_resNonce[thr_id]); + + CUDA_LOG_ERROR(); + + cudaDeviceSynchronize(); + init[thr_id] = false; +} diff --git a/util.cpp b/util.cpp index e8f3833..152093d 100644 --- a/util.cpp +++ b/util.cpp @@ -2246,6 +2246,9 @@ void print_hash_tests(void) phihash(&hash[0], &buf[0]); printpfx("phi", hash); + polytimos_hash(&hash[0], &buf[0]); + printpfx("polytimos", hash); + quarkhash(&hash[0], &buf[0]); printpfx("quark", hash);