From 7963dbb438db07903b50e7e805144670fbf6e952 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Tue, 14 Apr 2015 01:46:11 +0200 Subject: [PATCH] skein2 algo for woodcoin Also known as Double Skein Signed-off-by: Tanguy Pruvot --- Makefile.am | 4 +- README.txt | 2 + ccminer.cpp | 8 +++ ccminer.vcxproj | 6 +- ccminer.vcxproj.filters | 3 + cpuminer-config.h | 10 ++-- miner.h | 5 ++ skein2.cu | 122 ++++++++++++++++++++++++++++++++++++++++ sph/skein.c | 10 ++++ sph/sph_skein.h | 8 +++ util.cpp | 3 + 11 files changed, 173 insertions(+), 8 deletions(-) create mode 100644 skein2.cu diff --git a/Makefile.am b/Makefile.am index dcdd91e..63cfb32 100644 --- a/Makefile.am +++ b/Makefile.am @@ -41,7 +41,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \ quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu \ quark/quarkcoin.cu quark/animecoin.cu \ quark/cuda_quark_compactionTest.cu \ - cuda_nist5.cu pentablake.cu skein.cu zr5.cu \ + cuda_nist5.cu pentablake.cu skein.cu skein2.cu zr5.cu \ sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \ sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \ sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \ @@ -71,7 +71,7 @@ ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS ccminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(CPPFLAGS) $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) $(DEF_INCLUDES) $(nvml_defs) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\" -#nvcc_ARCH += -gencode=arch=compute_52,code=\"sm_52,compute_52\" +nvcc_ARCH += -gencode=arch=compute_52,code=\"sm_52,compute_52\" #nvcc_ARCH = -gencode=arch=compute_35,code=\"sm_35,compute_35\" #nvcc_ARCH = -gencode=arch=compute_30,code=\"sm_30,compute_30\" diff --git a/README.txt b/README.txt index 4a71c3c..f4b27d3 100644 --- a/README.txt +++ b/README.txt @@ -39,6 +39,7 @@ Keccak (Maxcoin) Pentablake (Blake 512 x5) 1Coin Triple S Skein (Skein + SHA) +Woodcoin (Double Skein) Vertcoin Lyra2RE Ziftrcoin (ZR5) @@ -81,6 +82,7 @@ its command line interface and options. qubit use to mine Qubit Algo s3 use to mine 1coin skein use to mine Skeincoin + skein2 use to mine Woodcoin whirl use to mine Whirlcoin whirlpoolx use to mine Vanillacoin x11 use to mine DarkCoin diff --git a/ccminer.cpp b/ccminer.cpp index 69baf85..a1ab2f4 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -103,6 +103,7 @@ enum sha_algos { ALGO_QUARK, ALGO_QUBIT, ALGO_SKEIN, + ALGO_SKEIN2, ALGO_S3, ALGO_WHIRLCOIN, ALGO_WHIRLPOOLX, @@ -137,6 +138,7 @@ static const char *algo_names[] = { "quark", "qubit", "skein", + "skein2", "s3", "whirl", "whirlpoolx", @@ -254,6 +256,7 @@ Options:\n\ quark Quark\n\ qubit Qubit\n\ skein Skein SHA2 (Skeincoin)\n\ + skein2 Double Skein (Woodcoin)\n\ s3 S3 (1Coin)\n\ x11 X11 (DarkCoin)\n\ x13 X13 (MaruCoin)\n\ @@ -1524,6 +1527,11 @@ static void *miner_thread(void *userdata) max_nonce, &hashes_done); break; + case ALGO_SKEIN2: + rc = scanhash_skein2(thr_id, work.data, work.target, + max_nonce, &hashes_done); + break; + case ALGO_S3: rc = scanhash_s3(thr_id, work.data, work.target, max_nonce, &hashes_done); diff --git a/ccminer.vcxproj b/ccminer.vcxproj index 8b2a01b..528d5c0 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -103,11 +103,11 @@ false + true 80 true true compute_50,sm_50 - true @@ -133,6 +133,7 @@ false + true 80 true true @@ -435,6 +436,9 @@ 64 + + 64 + true diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters index 4d89287..3d1fd91 100644 --- a/ccminer.vcxproj.filters +++ b/ccminer.vcxproj.filters @@ -577,6 +577,9 @@ Source Files\CUDA + + Source Files\CUDA + diff --git a/cpuminer-config.h b/cpuminer-config.h index c3b94c4..1264f97 100644 --- a/cpuminer-config.h +++ b/cpuminer-config.h @@ -159,7 +159,7 @@ #define PACKAGE_NAME "ccminer" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "ccminer 1.6" +#define PACKAGE_STRING "ccminer 1.6.1" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "ccminer" @@ -168,7 +168,7 @@ #define PACKAGE_URL "http://github.com/tpruvot/ccminer" /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.6" +#define PACKAGE_VERSION "1.6.1" /* If using the C implementation of alloca, define if you know the direction of stack growth for your system; otherwise it will be @@ -182,16 +182,16 @@ #define STDC_HEADERS 1 /* Define to 1 if AVX assembly is available. */ -#define USE_AVX 1 +/* #undef USE_AVX */ /* Define to 1 if AVX2 assembly is available. */ /* #undef USE_AVX2 */ /* Define to 1 if XOP assembly is available. */ -#define USE_XOP 1 +/* undef USE_XOP */ /* Version number of package */ -#define VERSION "1.6" +#define VERSION "1.6.1" /* Define curl_free() as free() if our version of curl lacks curl_free. */ /* #undef curl_free */ diff --git a/miner.h b/miner.h index 5dd7d4c..65ee5ae 100644 --- a/miner.h +++ b/miner.h @@ -350,6 +350,10 @@ extern int scanhash_skeincoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done); +extern int scanhash_skein2(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + extern int scanhash_s3(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done); @@ -680,6 +684,7 @@ void pluckhash(uint32_t *hash, const uint32_t *data, uchar *hashbuffer, const in void quarkhash(void *state, const void *input); void qubithash(void *state, const void *input); void skeincoinhash(void *output, const void *input); +void skein2hash(void *output, const void *input); void s3hash(void *output, const void *input); void wcoinhash(void *state, const void *input); void whirlxHash(void *state, const void *input); diff --git a/skein2.cu b/skein2.cu new file mode 100644 index 0000000..82592de --- /dev/null +++ b/skein2.cu @@ -0,0 +1,122 @@ +extern "C" { +#include "sph/sph_skein.h" +} + +#include "miner.h" +#include "cuda_helper.h" + +static uint32_t *d_hash[MAX_GPUS]; + +extern void skein512_cpu_setBlock_80(void *pdata); +extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); + +extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern "C" void skein2hash(void *output, const void *input) +{ + sph_skein512_context ctx_skein; + + uint32_t hash[16]; + + sph_skein512_init(&ctx_skein); + sph_skein512(&ctx_skein, input, 80); + sph_skein512_close(&ctx_skein, hash); + //applog_hash((uchar*)hash); + //applog_hash((uchar*)&hash[8]); + sph_skein512_init(&ctx_skein); + sph_skein512(&ctx_skein, hash, 64); + sph_skein512_close(&ctx_skein, hash); + + memcpy(output, hash, 32); +} + +#ifdef _DEBUG +#define TRACE(algo) { \ + if (max_nonce == 1 && pdata[19] <= 1) { \ + uint32_t* debugbuf = NULL; \ + cudaMallocHost(&debugbuf, 16*sizeof(uint32_t)); \ + cudaMemcpy(debugbuf, d_hash[thr_id], 16*sizeof(uint32_t), cudaMemcpyDeviceToHost); \ + printf("SK2 %s %08x %08x %08x %08x...\n", algo, \ + swab32(debugbuf[0]), swab32(debugbuf[1]), swab32(debugbuf[2]), swab32(debugbuf[3])); \ + cudaFree(debugbuf); \ + } \ +} +#else +#define TRACE(algo) {} +#endif + +static bool init[MAX_GPUS] = { 0 }; + +extern "C" int scanhash_skein2(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done) +{ + const uint32_t first_nonce = pdata[19]; + + uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8 + throughput = min(throughput, (max_nonce - first_nonce)); + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x0FFF; + + if (!init[thr_id]) + { + cudaDeviceReset(); + cudaSetDevice(device_map[thr_id]); + + CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput)); + + cuda_check_cpu_init(thr_id, throughput); + init[thr_id] = true; + } + + uint32_t endiandata[20]; + for (int k=0; k < 20; k++) + be32enc(&endiandata[k], pdata[k]); + + skein512_cpu_setBlock_80((void*)endiandata); + cuda_check_cpu_setTarget(ptarget); + + do { + int order = 0; + + // Hash with CUDA + skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + TRACE("80:"); + quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + TRACE("64:"); + + uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (foundNonce != UINT32_MAX) + { + uint32_t vhash64[8]; + + endiandata[19] = foundNonce; + skein2hash(vhash64, endiandata); + + #define Htarg ptarget[7] + if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { + int res = 1; + uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (secNonce != 0) { + if (!opt_quiet) + applog(LOG_BLUE, "GPU #%d: found second nonce %08x !", device_map[thr_id], swab32(secNonce)); + pdata[21] = swab32(secNonce); + res++; + } + pdata[19] = swab32(foundNonce); + return res; + } + else { + applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", device_map[thr_id], foundNonce); + } + } + + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce + 1; + return 0; +} diff --git a/sph/skein.c b/sph/skein.c index 2fcfae5..7e47e35 100644 --- a/sph/skein.c +++ b/sph/skein.c @@ -35,6 +35,11 @@ #include "sph_skein.h" +#ifdef __cplusplus +extern "C"{ +#endif + + #if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SKEIN #define SPH_SMALL_FOOTPRINT_SKEIN 1 #endif @@ -1242,3 +1247,8 @@ sph_skein512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) } #endif + + +#ifdef __cplusplus +} +#endif diff --git a/sph/sph_skein.h b/sph/sph_skein.h index 8555984..bddbc86 100644 --- a/sph/sph_skein.h +++ b/sph/sph_skein.h @@ -41,6 +41,10 @@ #ifndef SPH_SKEIN_H__ #define SPH_SKEIN_H__ +#ifdef __cplusplus +extern "C"{ +#endif + #include #include "sph_types.h" @@ -287,4 +291,8 @@ void sph_skein512_addbits_and_close( #endif +#ifdef __cplusplus +} +#endif + #endif diff --git a/util.cpp b/util.cpp index a89f27f..0b9e012 100644 --- a/util.cpp +++ b/util.cpp @@ -1791,6 +1791,9 @@ void print_hash_tests(void) skeincoinhash(&hash[0], &buf[0]); printpfx("skein", hash); + skein2hash(&hash[0], &buf[0]); + printpfx("skein2", hash); + s3hash(&hash[0], &buf[0]); printpfx("S3", hash);