diff --git a/Makefile.am b/Makefile.am index 8ac3ae5..3b6d766 100644 --- a/Makefile.am +++ b/Makefile.am @@ -74,7 +74,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \ x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \ x15/whirlpool.cu x15/cuda_x15_whirlpool_sm3.cu \ x17/x17.cu x17/hmq17.cu x17/cuda_x17_haval256.cu x17/cuda_x17_sha512.cu \ - x11/c11.cu x11/s3.cu x11/sib.cu x11/veltor.cu x11/cuda_streebog.cu + x11/c11.cu x11/phi.cu x11/s3.cu x11/sib.cu x11/veltor.cu x11/cuda_streebog.cu # scrypt ccminer_SOURCES += scrypt.cpp scrypt-jane.cpp \ diff --git a/README.txt b/README.txt index 21e36cb..bbaf9a7 100644 --- a/README.txt +++ b/README.txt @@ -1,5 +1,5 @@ -ccminer 2.2.1 (Sept. 2017) "optimized tribus kernel (Maxwell+)" +ccminer 2.2.2 (Oct. 2017) "phi algo" --------------------------------------------------------------- *************************************************************** @@ -102,6 +102,7 @@ its command line interface and options. neoscrypt use to mine FeatherCoin nist5 use to mine TalkCoin penta use to mine Joincoin / Pentablake + phi use to mine BHCoin quark use to mine Quarkcoin qubit use to mine Qubit scrypt use to mine Scrypt coins @@ -277,6 +278,9 @@ so we can more efficiently implement new algorithms using the latest hardware features. >>> RELEASE HISTORY <<< + Oct. 07th 217 v2.2.2 + Import phi algo from BHCoin repository (anorganix) + Sep. 01st 2017 v2.2.1 Improve tribus algo on recent cards (up to +10%) diff --git a/algos.h b/algos.h index b7dd0f2..7febd71 100644 --- a/algos.h +++ b/algos.h @@ -35,6 +35,7 @@ enum sha_algos { ALGO_NEOSCRYPT, ALGO_NIST5, ALGO_PENTABLAKE, + ALGO_PHI, ALGO_QUARK, ALGO_QUBIT, ALGO_SCRYPT, @@ -100,6 +101,7 @@ static const char *algo_names[] = { "neoscrypt", "nist5", "penta", + "phi", "quark", "qubit", "scrypt", @@ -167,6 +169,8 @@ static inline int algo_to_int(char* arg) i = ALGO_LYRA2; else if (!strcasecmp("lyra2rev2", arg)) i = ALGO_LYRA2v2; + else if (!strcasecmp("phi1612", arg)) + i = ALGO_PHI; else if (!strcasecmp("bitcoin", arg)) i = ALGO_SHA256D; else if (!strcasecmp("sha256", arg)) diff --git a/bench.cpp b/bench.cpp index 500176f..d147a9e 100644 --- a/bench.cpp +++ b/bench.cpp @@ -76,6 +76,7 @@ void algo_free_all(int thr_id) free_neoscrypt(thr_id); free_nist5(thr_id); free_pentablake(thr_id); + free_phi(thr_id); free_quark(thr_id); free_qubit(thr_id); free_skeincoin(thr_id); diff --git a/ccminer.cpp b/ccminer.cpp index 12f61ec..0388427 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -266,6 +266,7 @@ Options:\n\ neoscrypt FeatherCoin, Phoenix, UFO...\n\ nist5 NIST5 (TalkCoin)\n\ penta Pentablake hash (5x Blake 512)\n\ + phi BHCoin\n\ quark Quark\n\ qubit Qubit\n\ sha256d SHA256d (bitcoin)\n\ @@ -279,7 +280,7 @@ Options:\n\ skunk Skein Cube Fugue Streebog\n\ s3 S3 (1Coin)\n\ timetravel Machinecoin permuted x8\n\ - tribus Denerius\n\ + tribus Denarius\n\ vanilla Blake256-8 (VNL)\n\ veltor Thorsriddle streebog\n\ whirlcoin Old Whirlcoin (Whirlpool algo)\n\ @@ -2225,6 +2226,7 @@ static void *miner_thread(void *userdata) case ALGO_JACKPOT: case ALGO_JHA: case ALGO_LYRA2v2: + case ALGO_PHI: case ALGO_S3: case ALGO_SKUNK: case ALGO_TIMETRAVEL: @@ -2406,6 +2408,9 @@ static void *miner_thread(void *userdata) case ALGO_PENTABLAKE: rc = scanhash_pentablake(thr_id, &work, max_nonce, &hashes_done); break; + case ALGO_PHI: + rc = scanhash_phi(thr_id, &work, max_nonce, &hashes_done); + break; case ALGO_SCRYPT: rc = scanhash_scrypt(thr_id, &work, max_nonce, &hashes_done, NULL, &tv_start, &tv_end); @@ -3847,7 +3852,7 @@ int main(int argc, char *argv[]) #endif CUDART_VERSION/1000, (CUDART_VERSION % 1000)/10, arch); printf(" Originally based on Christian Buchner and Christian H. project\n"); - printf(" Include some algos from alexis78, djm34, sp, tsiv and klausT.\n\n"); + printf(" Include some kernels from alexis78, djm34, djEzo, tsiv and krnlx.\n\n"); printf("BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo (tpruvot)\n\n"); } diff --git a/ccminer.vcxproj b/ccminer.vcxproj index 0332845..8288895 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -487,6 +487,7 @@ --ptxas-options="-dlcm=cg" %(AdditionalOptions) true + 128 diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters index 15cf8fc..d312492 100644 --- a/ccminer.vcxproj.filters +++ b/ccminer.vcxproj.filters @@ -778,6 +778,9 @@ Source Files\CUDA\tribus + + Source Files\CUDA\x11 + Source Files\CUDA\x11 diff --git a/compat/ccminer-config.h b/compat/ccminer-config.h index 375f513..7c28072 100644 --- a/compat/ccminer-config.h +++ b/compat/ccminer-config.h @@ -164,7 +164,7 @@ #define PACKAGE_URL "http://github.com/tpruvot/ccminer" /* Define to the version of this package. */ -#define PACKAGE_VERSION "2.2.1" +#define PACKAGE_VERSION "2.2.2" /* If using the C implementation of alloca, define if you know the direction of stack growth for your system; otherwise it will be diff --git a/configure.ac b/configure.ac index 2bd9d2e..061dfdd 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([ccminer], [2.2.1], [], [ccminer], [http://github.com/tpruvot/ccminer]) +AC_INIT([ccminer], [2.2.2], [], [ccminer], [http://github.com/tpruvot/ccminer]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/miner.h b/miner.h index b9c7256..0d58eec 100644 --- a/miner.h +++ b/miner.h @@ -300,6 +300,7 @@ extern int scanhash_myriad(int thr_id, struct work* work, uint32_t max_nonce, un extern int scanhash_neoscrypt(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done); +extern int scanhash_phi(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_quark(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_sha256d(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done); @@ -361,6 +362,7 @@ extern void free_myriad(int thr_id); extern void free_neoscrypt(int thr_id); extern void free_nist5(int thr_id); extern void free_pentablake(int thr_id); +extern void free_phi(int thr_id); extern void free_quark(int thr_id); extern void free_qubit(int thr_id); extern void free_sha256d(int thr_id); @@ -902,6 +904,7 @@ void myriadhash(void *state, const void *input); void neoscrypt(uchar *output, const uchar *input, uint32_t profile); void nist5hash(void *state, const void *input); void pentablakehash(void *output, const void *input); +void phihash(void *output, const void *input); void quarkhash(void *state, const void *input); void qubithash(void *state, const void *input); void scrypthash(void* output, const void* input); diff --git a/util.cpp b/util.cpp index e601756..e5fe5b6 100644 --- a/util.cpp +++ b/util.cpp @@ -2240,6 +2240,9 @@ void print_hash_tests(void) pentablakehash(&hash[0], &buf[0]); printpfx("pentablake", hash); + phihash(&hash[0], &buf[0]); + printpfx("phi", hash); + quarkhash(&hash[0], &buf[0]); printpfx("quark", hash); diff --git a/x11/phi.cu b/x11/phi.cu new file mode 100644 index 0000000..717cff8 --- /dev/null +++ b/x11/phi.cu @@ -0,0 +1,197 @@ +// +// +// PHI1612 algo +// Skein + JH + CubeHash + Fugue + Gost + Echo +// +// Implemented by anorganix @ bitcointalk on 01.10.2017 +// Feel free to send some satoshis to 1Bitcoin8tfbtGAQNFxDRUVUfFgFWKoWi9 +// +// + +extern "C" { +#include "sph/sph_skein.h" +#include "sph/sph_jh.h" +#include "sph/sph_cubehash.h" +#include "sph/sph_fugue.h" +#include "sph/sph_streebog.h" +#include "sph/sph_echo.h" +} + +#include "miner.h" +#include "cuda_helper.h" +#include "cuda_x11.h" + +extern void skein512_cpu_setBlock_80(void *pdata); +extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap); +extern void streebog_cpu_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash); + +extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads); +extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x13_fugue512_cpu_free(int thr_id); + +#include +#include + +static uint32_t *d_hash[MAX_GPUS]; + +extern "C" void phihash(void *output, const void *input) +{ + unsigned char _ALIGN(128) hash[128] = { 0 }; + + sph_skein512_context ctx_skein; + sph_jh512_context ctx_jh; + sph_cubehash512_context ctx_cubehash; + sph_fugue512_context ctx_fugue; + sph_gost512_context ctx_gost; + sph_echo512_context ctx_echo; + + sph_skein512_init(&ctx_skein); + sph_skein512(&ctx_skein, input, 80); + sph_skein512_close(&ctx_skein, (void*)hash); + + sph_jh512_init(&ctx_jh); + sph_jh512(&ctx_jh, (const void*)hash, 64); + sph_jh512_close(&ctx_jh, (void*)hash); + + sph_cubehash512_init(&ctx_cubehash); + sph_cubehash512(&ctx_cubehash, (const void*)hash, 64); + sph_cubehash512_close(&ctx_cubehash, (void*)hash); + + sph_fugue512_init(&ctx_fugue); + sph_fugue512(&ctx_fugue, (const void*)hash, 64); + sph_fugue512_close(&ctx_fugue, (void*)hash); + + sph_gost512_init(&ctx_gost); + sph_gost512(&ctx_gost, (const void*)hash, 64); + sph_gost512_close(&ctx_gost, (void*)hash); + + sph_echo512_init(&ctx_echo); + sph_echo512(&ctx_echo, (const void*)hash, 64); + sph_echo512_close(&ctx_echo, (void*)hash); + + memcpy(output, hash, 32); +} + +#define _DEBUG_PREFIX "phi" +#include "cuda_debug.cuh" + +static bool init[MAX_GPUS] = { 0 }; + +extern "C" int scanhash_phi(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) +{ + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + + const uint32_t first_nonce = pdata[19]; + const int dev_id = device_map[thr_id]; + + int intensity = (device_sm[dev_id] >= 500 && !is_windows()) ? 19 : 18; // 2^18 = 262144 cuda threads + if (device_sm[dev_id] >= 600) intensity = 20; + + uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); + + if (opt_benchmark) + ptarget[7] = 0xf; + + if (!init[thr_id]) + { + cudaSetDevice(device_map[thr_id]); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + + CUDA_LOG_ERROR(); + } + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); + + quark_skein512_cpu_init(thr_id, throughput); + quark_jh512_cpu_init(thr_id, throughput); + x11_cubehash512_cpu_init(thr_id, throughput); + x13_fugue512_cpu_init(thr_id, throughput); + x11_echo512_cpu_init(thr_id, throughput); + + CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t)64 * throughput), -1); + cuda_check_cpu_init(thr_id, throughput); + init[thr_id] = true; + } + + uint32_t endiandata[20]; + + for (int k = 0; k < 20; k++) + be32enc(&endiandata[k], pdata[k]); + + skein512_cpu_setBlock_80((void*)endiandata); + cuda_check_cpu_setTarget(ptarget); + + do { + int order = 0; + + skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1); order++; + quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + streebog_cpu_hash_64(thr_id, throughput, d_hash[thr_id]); + x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + + if (work->nonces[0] != UINT32_MAX) + { + const uint32_t Htarg = ptarget[7]; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); + phihash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { + work->valid_nonces = 1; + work_set_target_ratio(work, vhash); + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + *hashes_done = pdata[19] - first_nonce + throughput; + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + phihash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } + else { + pdata[19] = work->nonces[0] + 1; // cursor + } + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + gpu_increment_reject(thr_id); + if (!opt_quiet) + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + pdata[19] = work->nonces[0] + 1; + continue; + } + } + + if ((uint64_t)throughput + pdata[19] >= max_nonce) { + pdata[19] = max_nonce; + break; + } + pdata[19] += throughput; + + } while (!work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce; + return 0; +} + +// cleanup +extern "C" void free_phi(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaThreadSynchronize(); + cudaFree(d_hash[thr_id]); + + cuda_check_cpu_free(thr_id); + init[thr_id] = false; + + cudaDeviceSynchronize(); +}