From c5df1421249a3a7e21e3f6b0f32c213188793de2 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Mon, 29 Jun 2015 10:49:09 +0200 Subject: [PATCH] Add c11 algo (x11 variant) Used by Chaincoin and Flaxscript --- Makefile.am | 2 +- README.txt | 3 + ccminer.cpp | 23 +++- ccminer.vcxproj | 4 +- ccminer.vcxproj.filters | 3 + miner.h | 5 + util.cpp | 3 + x11/c11.cu | 242 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 277 insertions(+), 8 deletions(-) create mode 100644 x11/c11.cu diff --git a/Makefile.am b/Makefile.am index f5c0a0f..2128327 100644 --- a/Makefile.am +++ b/Makefile.am @@ -54,7 +54,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \ x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \ x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \ x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu \ - x11/s3.cu + x11/c11.cu x11/s3.cu # scrypt ccminer_SOURCES += scrypt.cpp scrypt-jane.cpp \ diff --git a/README.txt b/README.txt index b4d38c7..b141984 100644 --- a/README.txt +++ b/README.txt @@ -31,6 +31,7 @@ JackpotCoin QuarkCoin family & AnimeCoin TalkCoin DarkCoin and other X11 coins +Chaincoin and Flaxscript (C11) Saffroncoin blake (256 14-rounds) BlakeCoin (256 8-rounds) Qubit (Digibyte, ...) @@ -66,6 +67,7 @@ its command line interface and options. anime use to mine Animecoin blake use to mine Saffroncoin (Blake 256) blakecoin use to mine Old Blake 256 + c11/flax use to mine Chaincoin and Flax deep use to mine Deepcoin dmd-gr use to mine Diamond-Groestl fresh use to mine Freshcoin @@ -222,6 +224,7 @@ features. July 2015... Nvml api power limits + Add chaincoin c11 algo (used by Flaxscript too) Remove pluck algo June 23th 2015 v1.6.5 diff --git a/ccminer.cpp b/ccminer.cpp index 0bc893b..75b6ae6 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -87,6 +87,7 @@ enum sha_algos { ALGO_ANIME, ALGO_BLAKE, ALGO_BLAKECOIN, + ALGO_C11, ALGO_DEEP, ALGO_DMD_GR, ALGO_FRESH, @@ -115,12 +116,14 @@ enum sha_algos { ALGO_X15, ALGO_X17, ALGO_ZR5, + ALGO_COUNT }; static const char *algo_names[] = { "anime", "blake", "blakecoin", + "c11", "deep", "dmd-gr", "fresh", @@ -149,6 +152,7 @@ static const char *algo_names[] = { "x15", "x17", "zr5", + "" }; bool opt_debug = false; @@ -275,6 +279,7 @@ Options:\n\ anime Animecoin\n\ blake Blake 256 (SFR)\n\ blakecoin Fast Blake 256 (8 rounds)\n\ + c11/flax X11 variant\n\ deep Deepcoin\n\ dmd-gr Diamond-Groestl\n\ fresh Freshcoin (shavite 80)\n\ @@ -1744,6 +1749,7 @@ static void *miner_thread(void *userdata) case ALGO_LUFFA: minmax = 0x2000000; break; + case ALGO_C11: case ALGO_S3: case ALGO_X11: case ALGO_X13: @@ -1818,6 +1824,11 @@ static void *miner_thread(void *userdata) max_nonce, &hashes_done); break; + case ALGO_C11: + rc = scanhash_c11(thr_id, work.data, work.target, + max_nonce, &hashes_done); + break; + case ALGO_FUGUE256: rc = scanhash_fugue256(thr_id, work.data, work.target, max_nonce, &hashes_done); @@ -2404,24 +2415,26 @@ void parse_arg(int key, char *arg) case 'a': /* --algo */ p = strstr(arg, ":"); // optional factor if (p) *p = '\0'; - for (i = 0; i < ARRAY_SIZE(algo_names); i++) { + for (i = 0; i < ALGO_COUNT; i++) { if (algo_names[i] && !strcasecmp(arg, algo_names[i])) { opt_algo = (enum sha_algos)i; break; } } - if (i == ARRAY_SIZE(algo_names)) { + if (i == ALGO_COUNT) { // some aliases... - if (!strcasecmp("diamond", arg)) + if (!strcasecmp("flax", arg)) + i = opt_algo = ALGO_C11; + else if (!strcasecmp("diamond", arg)) i = opt_algo = ALGO_DMD_GR; - if (!strcasecmp("doom", arg)) + else if (!strcasecmp("doom", arg)) i = opt_algo = ALGO_LUFFA; else if (!strcasecmp("ziftr", arg)) i = opt_algo = ALGO_ZR5; else applog(LOG_ERR, "Unknown algo parameter '%s'", arg); } - if (i == ARRAY_SIZE(algo_names)) + if (i == ALGO_COUNT) show_usage_and_exit(1); if (p) { opt_nfactor = atoi(p + 1); diff --git a/ccminer.vcxproj b/ccminer.vcxproj index 3475f07..b89f539 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -462,8 +462,8 @@ 64 - - + + true diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters index e9e47a3..f277209 100644 --- a/ccminer.vcxproj.filters +++ b/ccminer.vcxproj.filters @@ -517,6 +517,9 @@ Source Files\CUDA\x11 + + Source Files\CUDA\x11 + Source Files\CUDA\x11 diff --git a/miner.h b/miner.h index e74a83e..aae574f 100644 --- a/miner.h +++ b/miner.h @@ -307,6 +307,10 @@ extern int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done, int8_t blakerounds); +extern int scanhash_c11(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + extern int scanhash_fresh(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done); @@ -764,6 +768,7 @@ void applog_compare_hash(unsigned char *hash, unsigned char *hash2); void print_hash_tests(void); void animehash(void *state, const void *input); void blake256hash(void *output, const void *input, int8_t rounds); +void c11hash(void *output, const void *input); void deephash(void *state, const void *input); void luffa_hash(void *state, const void *input); void fresh_hash(void *state, const void *input); diff --git a/util.cpp b/util.cpp index 96a3464..d75fa1b 100644 --- a/util.cpp +++ b/util.cpp @@ -1814,6 +1814,9 @@ void print_hash_tests(void) blake256hash(&hash[0], &buf[0], 14); printpfx("blake", hash); + c11hash(&hash[0], &buf[0]); + printpfx("c11", hash); + deephash(&hash[0], &buf[0]); printpfx("deep", hash); diff --git a/x11/c11.cu b/x11/c11.cu new file mode 100644 index 0000000..ae1ba45 --- /dev/null +++ b/x11/c11.cu @@ -0,0 +1,242 @@ +extern "C" +{ +#include "sph/sph_blake.h" +#include "sph/sph_bmw.h" +#include "sph/sph_groestl.h" +#include "sph/sph_skein.h" +#include "sph/sph_jh.h" +#include "sph/sph_keccak.h" + +#include "sph/sph_luffa.h" +#include "sph/sph_cubehash.h" +#include "sph/sph_shavite.h" +#include "sph/sph_simd.h" +#include "sph/sph_echo.h" +} + +#include "miner.h" +#include "cuda_helper.h" + +#include +#include + +static uint32_t *d_hash[MAX_GPUS]; + +extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); +extern void quark_blake512_cpu_setBlock_80(int thr_id, uint32_t *pdata); +extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash); + +extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads); +extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads); +extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); +extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_keccak512_cpu_init(int thr_id, uint32_t threads); +extern void quark_keccak512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_jh512_cpu_init(int thr_id, uint32_t threads); +extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_luffaCubehash512_cpu_init(int thr_id, uint32_t threads); +extern void x11_luffaCubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash, int order); + +extern void x11_shavite512_cpu_init(int thr_id, uint32_t threads); +extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern int x11_simd512_cpu_init(int thr_id, uint32_t threads); +extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_echo512_cpu_init(int thr_id, uint32_t threads); +extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); +extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, + uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); + +// Flax/C11 CPU Hash +extern "C" void c11hash(void *output, const void *input) +{ + // blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11 + + sph_blake512_context ctx_blake; + sph_bmw512_context ctx_bmw; + sph_groestl512_context ctx_groestl; + sph_jh512_context ctx_jh; + sph_keccak512_context ctx_keccak; + sph_skein512_context ctx_skein; + sph_luffa512_context ctx_luffa; + sph_cubehash512_context ctx_cubehash; + sph_shavite512_context ctx_shavite; + sph_simd512_context ctx_simd; + sph_echo512_context ctx_echo; + + unsigned char hash[128]; + memset(hash, 0, sizeof hash); + + sph_blake512_init(&ctx_blake); + sph_blake512 (&ctx_blake, input, 80); + sph_blake512_close(&ctx_blake, (void*) hash); + + sph_bmw512_init(&ctx_bmw); + sph_bmw512 (&ctx_bmw, (const void*) hash, 64); + sph_bmw512_close(&ctx_bmw, (void*) hash); + + sph_groestl512_init(&ctx_groestl); + sph_groestl512 (&ctx_groestl, (const void*) hash, 64); + sph_groestl512_close(&ctx_groestl, (void*) hash); + + sph_jh512_init(&ctx_jh); + sph_jh512 (&ctx_jh, (const void*) hash, 64); + sph_jh512_close(&ctx_jh, (void*) hash); + + sph_keccak512_init(&ctx_keccak); + sph_keccak512 (&ctx_keccak, (const void*) hash, 64); + sph_keccak512_close(&ctx_keccak, (void*) hash); + + sph_skein512_init(&ctx_skein); + sph_skein512 (&ctx_skein, (const void*) hash, 64); + sph_skein512_close(&ctx_skein, (void*) hash); + + sph_luffa512_init(&ctx_luffa); + sph_luffa512 (&ctx_luffa, (const void*) hash, 64); + sph_luffa512_close (&ctx_luffa, (void*) hash); + + sph_cubehash512_init(&ctx_cubehash); + sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64); + sph_cubehash512_close(&ctx_cubehash, (void*) hash); + + sph_shavite512_init(&ctx_shavite); + sph_shavite512 (&ctx_shavite, (const void*) hash, 64); + sph_shavite512_close(&ctx_shavite, (void*) hash); + + sph_simd512_init(&ctx_simd); + sph_simd512 (&ctx_simd, (const void*) hash, 64); + sph_simd512_close(&ctx_simd, (void*) hash); + + sph_echo512_init(&ctx_echo); + sph_echo512 (&ctx_echo, (const void*) hash, 64); + sph_echo512_close(&ctx_echo, (void*) hash); + + memcpy(output, hash, 32); +} + +#ifdef _DEBUG +#define TRACE(algo) { \ + if (max_nonce == 1 && pdata[19] <= 1) { \ + uint32_t* debugbuf = NULL; \ + cudaMallocHost(&debugbuf, 8*sizeof(uint32_t)); \ + cudaMemcpy(debugbuf, d_hash[thr_id], 8*sizeof(uint32_t), cudaMemcpyDeviceToHost); \ + printf("X11 %s %08x %08x %08x %08x...\n", algo, swab32(debugbuf[0]), swab32(debugbuf[1]), \ + swab32(debugbuf[2]), swab32(debugbuf[3])); \ + cudaFreeHost(debugbuf); \ + } \ +} +#else +#define TRACE(algo) {} +#endif + +static bool init[MAX_GPUS] = { 0 }; + +extern "C" int scanhash_c11(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done) +{ + const uint32_t first_nonce = pdata[19]; + int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19; + uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity); // 19=256*256*8; + throughput = min(throughput, max_nonce - first_nonce); + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x5; + + if (!init[thr_id]) + { + cudaSetDevice(device_map[thr_id]); + + quark_blake512_cpu_init(thr_id, throughput); + quark_bmw512_cpu_init(thr_id, throughput); + quark_groestl512_cpu_init(thr_id, throughput); + quark_skein512_cpu_init(thr_id, throughput); + quark_keccak512_cpu_init(thr_id, throughput); + quark_jh512_cpu_init(thr_id, throughput); + x11_luffaCubehash512_cpu_init(thr_id, throughput); + x11_shavite512_cpu_init(thr_id, throughput); + x11_echo512_cpu_init(thr_id, throughput); + if (x11_simd512_cpu_init(thr_id, throughput) != 0) { + return 0; + } + CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 64 * throughput), 0); // why 64 ? + + cuda_check_cpu_init(thr_id, throughput); + + init[thr_id] = true; + } + + uint32_t endiandata[20]; + for (int k=0; k < 20; k++) + be32enc(&endiandata[k], pdata[k]); + + quark_blake512_cpu_setBlock_80(thr_id, endiandata); + cuda_check_cpu_setTarget(ptarget); + + do { + int order = 0; + uint32_t foundNonce; + + // Hash with CUDA + quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; + TRACE("blake :"); + quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("bmw :"); + quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("groestl:"); + quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("jh512 :"); + quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("keccak :"); + quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("skein :"); + x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++); + TRACE("luffa+c:"); + x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("shavite:"); + x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("simd :"); + x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("echo => "); + + foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (foundNonce != UINT32_MAX) + { + const uint32_t Htarg = ptarget[7]; + uint32_t vhash64[8]; + be32enc(&endiandata[19], foundNonce); + c11hash(vhash64, endiandata); + + if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { + int res = 1; + // check if there was some other ones... + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + pdata[21] = secNonce; + res++; + } + pdata[19] = foundNonce; + return res; + } else { + applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce); + pdata[19] = foundNonce + 1; + } + } + + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce + 1; + return 0; +}