From d9f242b8d1a1ef46e584f69666450fbc4431db15 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Sat, 23 Jun 2018 14:40:29 +0200 Subject: [PATCH] add sonoa algo, heavy x17 hashes seems to works, more or less correctly (a few validation errors) --- Makefile.am | 2 +- README.txt | 10 +- algos.h | 2 + bench.cpp | 2 + ccminer.cpp | 7 +- ccminer.vcxproj | 3 +- ccminer.vcxproj.filters | 3 + miner.h | 2 + x17/sonoa.cu | 632 ++++++++++++++++++++++++++++++++++++++++ 9 files changed, 657 insertions(+), 6 deletions(-) create mode 100644 x17/sonoa.cu diff --git a/Makefile.am b/Makefile.am index 80a80c8..ddfbec6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -80,7 +80,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \ x16/x16r.cu x16/x16s.cu x16/cuda_x16_echo512.cu x16/cuda_x16_fugue512.cu \ x16/cuda_x16_shabal512.cu x16/cuda_x16_simd512_80.cu \ x16/cuda_x16_echo512_64.cu \ - x17/x17.cu x17/hmq17.cu x17/cuda_x17_haval256.cu x17/cuda_x17_sha512.cu \ + x17/x17.cu x17/hmq17.cu x17/sonoa.cu x17/cuda_x17_haval256.cu x17/cuda_x17_sha512.cu \ phi/phi.cu phi/phi2.cu phi/cuda_phi2.cu phi/cuda_phi2_cubehash512.cu x11/cuda_streebog_maxwell.cu \ x11/c11.cu x11/s3.cu x11/sib.cu x11/veltor.cu x11/cuda_streebog.cu diff --git a/README.txt b/README.txt index 148f089..cb60fca 100644 --- a/README.txt +++ b/README.txt @@ -41,19 +41,21 @@ Keccak (Maxcoin) Pentablake (Blake 512 x5) 1Coin Triple S Neoscrypt (FeatherCoin) -Revolver (X11evo) +x11evo (Revolver) +phi2 (LUXCoin) Scrypt and Scrypt:N Scrypt-Jane (Chacha) -Sibcoin (sib) +sib (Sibcoin) Skein (Skein + SHA) Signatum (Skein cubehash fugue Streebog) +SonoA (Sono) Tribus (JH, keccak, simd) Woodcoin (Double Skein) Vanilla (Blake256 8-rounds - double sha256) Vertcoin Lyra2RE Ziftrcoin (ZR5) Boolberry (Wild Keccak) -Monero (Cryptonight) +Monero (Cryptonight v7 with -a monero) Aeon (Cryptonight-lite) where some of these coins have a VERY NOTABLE nVidia advantage @@ -119,6 +121,7 @@ its command line interface and options. skein use to mine Skeincoin skein2 use to mine Woodcoin skunk use to mine Signatum + sonoa use to mine Sono stellite use to mine Stellite (a cryptonight variant) timetravel use to mine MachineCoin tribus use to mine Denarius @@ -288,6 +291,7 @@ features. June 23th 2018 v2.3 Handle phi2 header variation for smart contracts Handle monero, stellite, graft and cryptolight variants + Handle SonoA algo June 10th 2018 v2.2.6 New phi2 algo for LUX diff --git a/algos.h b/algos.h index c484bcc..dfbf7d8 100644 --- a/algos.h +++ b/algos.h @@ -52,6 +52,7 @@ enum sha_algos { ALGO_SKEIN, ALGO_SKEIN2, ALGO_SKUNK, + ALGO_SONOA, ALGO_S3, ALGO_TIMETRAVEL, ALGO_TRIBUS, @@ -129,6 +130,7 @@ static const char *algo_names[] = { "skein", "skein2", "skunk", + "sonoa", "s3", "timetravel", "tribus", diff --git a/bench.cpp b/bench.cpp index 84f9bc5..894fd8a 100644 --- a/bench.cpp +++ b/bench.cpp @@ -82,6 +82,7 @@ void algo_free_all(int thr_id) free_nist5(thr_id); free_pentablake(thr_id); free_phi(thr_id); + free_phi2(thr_id); free_polytimos(thr_id); free_quark(thr_id); free_qubit(thr_id); @@ -92,6 +93,7 @@ void algo_free_all(int thr_id) free_sha256t(thr_id); free_sia(thr_id); free_sib(thr_id); + free_sonoa(thr_id); free_s3(thr_id); free_vanilla(thr_id); free_veltor(thr_id); diff --git a/ccminer.cpp b/ccminer.cpp index 6521284..c2b34f8 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -274,7 +274,7 @@ Options:\n\ neoscrypt FeatherCoin, Phoenix, UFO...\n\ nist5 NIST5 (TalkCoin)\n\ penta Pentablake hash (5x Blake 512)\n\ - phi LUX initial algo\n\ + phi1612 LUX initial algo, for Seraph\n\ phi2 LUX v2 with lyra2\n\ polytimos Politimos\n\ quark Quark\n\ @@ -288,6 +288,7 @@ Options:\n\ skein Skein SHA2 (Skeincoin)\n\ skein2 Double Skein (Woodcoin)\n\ skunk Skein Cube Fugue Streebog\n\ + sonoa 97 hashes based on X17 ones (Sono)\n\ stellite Cryptonight v3\n\ s3 S3 (1Coin)\n\ timetravel Machinecoin permuted x8\n\ @@ -2299,6 +2300,7 @@ static void *miner_thread(void *userdata) case ALGO_NEOSCRYPT: case ALGO_SIB: case ALGO_SCRYPT: + case ALGO_SONOA: case ALGO_VELTOR: minmax = 0x80000; break; @@ -2508,6 +2510,9 @@ static void *miner_thread(void *userdata) case ALGO_SIB: rc = scanhash_sib(thr_id, &work, max_nonce, &hashes_done); break; + case ALGO_SONOA: + rc = scanhash_sonoa(thr_id, &work, max_nonce, &hashes_done); + break; case ALGO_S3: rc = scanhash_s3(thr_id, &work, max_nonce, &hashes_done); break; diff --git a/ccminer.vcxproj b/ccminer.vcxproj index c0aa954..f3d3e28 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -591,7 +591,6 @@ - @@ -604,6 +603,8 @@ compute_50,sm_50;compute_52,sm_52 + + diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters index 667331a..a1b9e86 100644 --- a/ccminer.vcxproj.filters +++ b/ccminer.vcxproj.filters @@ -778,6 +778,9 @@ Source Files\CUDA\x17 + + Source Files\CUDA\x17 + Source Files\CUDA\x17 diff --git a/miner.h b/miner.h index 86088cb..368b3cb 100644 --- a/miner.h +++ b/miner.h @@ -315,6 +315,7 @@ extern int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_nonce, extern int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_skunk(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); +extern int scanhash_sonoa(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_timetravel(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_tribus(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); extern int scanhash_bitcore(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done); @@ -384,6 +385,7 @@ extern void free_skeincoin(int thr_id); extern void free_skein2(int thr_id); extern void free_skunk(int thr_id); extern void free_s3(int thr_id); +extern void free_sonoa(int thr_id); extern void free_timetravel(int thr_id); extern void free_tribus(int thr_id); extern void free_bitcore(int thr_id); diff --git a/x17/sonoa.cu b/x17/sonoa.cu new file mode 100644 index 0000000..153f787 --- /dev/null +++ b/x17/sonoa.cu @@ -0,0 +1,632 @@ +/** + * x97 SONO + **/ + +extern "C" { +#include "sph/sph_blake.h" +#include "sph/sph_bmw.h" +#include "sph/sph_groestl.h" +#include "sph/sph_skein.h" +#include "sph/sph_jh.h" +#include "sph/sph_keccak.h" +#include "sph/sph_luffa.h" +#include "sph/sph_cubehash.h" +#include "sph/sph_shavite.h" +#include "sph/sph_simd.h" +#include "sph/sph_echo.h" +#include "sph/sph_hamsi.h" +#include "sph/sph_fugue.h" +#include "sph/sph_shabal.h" +#include "sph/sph_whirlpool.h" +#include "sph/sph_sha2.h" +#include "sph/sph_haval.h" +} + +#include "miner.h" +#include "cuda_helper.h" +#include "x11/cuda_x11.h" + +#define NBN 2 + +static uint32_t *d_hash[MAX_GPUS]; + +extern void x16_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash); + +extern void x13_hamsi512_cpu_init(int thr_id, uint32_t threads); +extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads); +extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x13_fugue512_cpu_free(int thr_id); + +extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads); +extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x15_whirlpool_cpu_init(int thr_id, uint32_t threads, int flag); +extern void x15_whirlpool_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x15_whirlpool_cpu_free(int thr_id); + +extern void x17_sha512_cpu_init(int thr_id, uint32_t threads); +extern void x17_sha512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash); + +extern void x17_haval256_cpu_init(int thr_id, uint32_t threads); +extern void x17_haval256_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, const int outlen); + +// CPU Hash Validation +extern "C" void sonoa_hash(void *output, const void *input) +{ + unsigned char _ALIGN(128) hash[64]; + + sph_blake512_context ctx_blake; + sph_bmw512_context ctx_bmw; + sph_groestl512_context ctx_groestl; + sph_jh512_context ctx_jh; + sph_keccak512_context ctx_keccak; + sph_skein512_context ctx_skein; + sph_luffa512_context ctx_luffa; + sph_cubehash512_context ctx_cubehash; + sph_shavite512_context ctx_shavite; + sph_simd512_context ctx_simd; + sph_echo512_context ctx_echo; + sph_hamsi512_context ctx_hamsi; + sph_fugue512_context ctx_fugue; + sph_shabal512_context ctx_shabal; + sph_whirlpool_context ctx_whirlpool; + sph_sha512_context ctx_sha512; + sph_haval256_5_context ctx_haval; + + + sph_blake512_init(&ctx_blake); + sph_blake512(&ctx_blake, input, 80); + sph_blake512_close(&ctx_blake, (void*)hash); + + sph_bmw512_init(&ctx_bmw); + sph_bmw512(&ctx_bmw, (const void*)hash, 64); + sph_bmw512_close(&ctx_bmw, (void*)hash); + + sph_groestl512_init(&ctx_groestl); + sph_groestl512(&ctx_groestl, (const void*)hash, 64); + sph_groestl512_close(&ctx_groestl, (void*)hash); + + sph_skein512_init(&ctx_skein); + sph_skein512(&ctx_skein, (const void*)hash, 64); + sph_skein512_close(&ctx_skein, (void*)hash); + + sph_jh512_init(&ctx_jh); + sph_jh512(&ctx_jh, (const void*)hash, 64); + sph_jh512_close(&ctx_jh, (void*)hash); + + sph_keccak512_init(&ctx_keccak); + sph_keccak512(&ctx_keccak, (const void*)hash, 64); + sph_keccak512_close(&ctx_keccak, (void*)hash); + + sph_luffa512_init(&ctx_luffa); + sph_luffa512(&ctx_luffa, (const void*)hash, 64); + sph_luffa512_close(&ctx_luffa, (void*)hash); + + sph_cubehash512_init(&ctx_cubehash); + sph_cubehash512(&ctx_cubehash, (const void*)hash, 64); + sph_cubehash512_close(&ctx_cubehash, (void*)hash); + + sph_shavite512_init(&ctx_shavite); + sph_shavite512(&ctx_shavite, (const void*)hash, 64); + sph_shavite512_close(&ctx_shavite, (void*)hash); + + sph_simd512_init(&ctx_simd); + sph_simd512(&ctx_simd, (const void*)hash, 64); + sph_simd512_close(&ctx_simd, (void*)hash); + + sph_echo512_init(&ctx_echo); + sph_echo512(&ctx_echo, (const void*)hash, 64); + sph_echo512_close(&ctx_echo, (void*)hash); + + + sph_bmw512(&ctx_bmw, (const void*)hash, 64); + sph_bmw512_close(&ctx_bmw, (void*)hash); + + sph_groestl512(&ctx_groestl, (const void*)hash, 64); + sph_groestl512_close(&ctx_groestl, (void*)hash); + + sph_skein512(&ctx_skein, (const void*)hash, 64); + sph_skein512_close(&ctx_skein, (void*)hash); + + sph_jh512(&ctx_jh, (const void*)hash, 64); + sph_jh512_close(&ctx_jh, (void*)hash); + + sph_keccak512(&ctx_keccak, (const void*)hash, 64); + sph_keccak512_close(&ctx_keccak, (void*)hash); + + sph_luffa512(&ctx_luffa, (const void*)hash, 64); + sph_luffa512_close(&ctx_luffa, (void*)hash); + + sph_cubehash512(&ctx_cubehash, (const void*)hash, 64); + sph_cubehash512_close(&ctx_cubehash, (void*)hash); + + sph_shavite512(&ctx_shavite, (const void*)hash, 64); + sph_shavite512_close(&ctx_shavite, (void*)hash); + + sph_simd512(&ctx_simd, (const void*)hash, 64); + sph_simd512_close(&ctx_simd, (void*)hash); + + sph_echo512(&ctx_echo, (const void*)hash, 64); + sph_echo512_close(&ctx_echo, (void*)hash); + + sph_hamsi512_init(&ctx_hamsi); + sph_hamsi512(&ctx_hamsi, (const void*)hash, 64); + sph_hamsi512_close(&ctx_hamsi, (void*)hash); + + + sph_bmw512(&ctx_bmw, (const void*)hash, 64); + sph_bmw512_close(&ctx_bmw, (void*)hash); + + sph_groestl512(&ctx_groestl, (const void*)hash, 64); + sph_groestl512_close(&ctx_groestl, (void*)hash); + + sph_skein512(&ctx_skein, (const void*)hash, 64); + sph_skein512_close(&ctx_skein, (void*)hash); + + sph_jh512(&ctx_jh, (const void*)hash, 64); + sph_jh512_close(&ctx_jh, (void*)hash); + + sph_keccak512(&ctx_keccak, (const void*)hash, 64); + sph_keccak512_close(&ctx_keccak, (void*)hash); + + sph_luffa512(&ctx_luffa, (const void*)hash, 64); + sph_luffa512_close(&ctx_luffa, (void*)hash); + + sph_cubehash512(&ctx_cubehash, (const void*)hash, 64); + sph_cubehash512_close(&ctx_cubehash, (void*)hash); + + sph_shavite512(&ctx_shavite, (const void*)hash, 64); + sph_shavite512_close(&ctx_shavite, (void*)hash); + + sph_simd512(&ctx_simd, (const void*)hash, 64); + sph_simd512_close(&ctx_simd, (void*)hash); + + sph_echo512(&ctx_echo, (const void*)hash, 64); + sph_echo512_close(&ctx_echo, (void*)hash); + + sph_hamsi512(&ctx_hamsi, (const void*)hash, 64); + sph_hamsi512_close(&ctx_hamsi, (void*)hash); + + sph_fugue512_init(&ctx_fugue); + sph_fugue512(&ctx_fugue, (const void*)hash, 64); + sph_fugue512_close(&ctx_fugue, (void*)hash); + + + sph_bmw512(&ctx_bmw, (const void*)hash, 64); + sph_bmw512_close(&ctx_bmw, (void*)hash); + + sph_groestl512(&ctx_groestl, (const void*)hash, 64); + sph_groestl512_close(&ctx_groestl, (void*)hash); + + sph_skein512(&ctx_skein, (const void*)hash, 64); + sph_skein512_close(&ctx_skein, (void*)hash); + + sph_jh512(&ctx_jh, (const void*)hash, 64); + sph_jh512_close(&ctx_jh, (void*)hash); + + sph_keccak512(&ctx_keccak, (const void*)hash, 64); + sph_keccak512_close(&ctx_keccak, (void*)hash); + + sph_luffa512(&ctx_luffa, (const void*)hash, 64); + sph_luffa512_close(&ctx_luffa, (void*)hash); + + sph_cubehash512(&ctx_cubehash, (const void*)hash, 64); + sph_cubehash512_close(&ctx_cubehash, (void*)hash); + + sph_shavite512(&ctx_shavite, (const void*)hash, 64); + sph_shavite512_close(&ctx_shavite, (void*)hash); + + sph_simd512(&ctx_simd, (const void*)hash, 64); + sph_simd512_close(&ctx_simd, (void*)hash); + + sph_echo512(&ctx_echo, (const void*)hash, 64); + sph_echo512_close(&ctx_echo, (void*)hash); + + sph_hamsi512(&ctx_hamsi, (const void*)hash, 64); + sph_hamsi512_close(&ctx_hamsi, (void*)hash); + + sph_fugue512(&ctx_fugue, (const void*)hash, 64); + sph_fugue512_close(&ctx_fugue, (void*)hash); + + sph_shabal512_init(&ctx_shabal); + sph_shabal512(&ctx_shabal, (const void*)hash, 64); + sph_shabal512_close(&ctx_shabal, (void*)hash); + + sph_hamsi512(&ctx_hamsi, (const void*)hash, 64); + sph_hamsi512_close(&ctx_hamsi, (void*)hash); + + sph_echo512(&ctx_echo, (const void*)hash, 64); + sph_echo512_close(&ctx_echo, (void*)hash); + + sph_shavite512(&ctx_shavite, (const void*)hash, 64); + sph_shavite512_close(&ctx_shavite, (void*)hash); + + + sph_bmw512(&ctx_bmw, (const void*)hash, 64); + sph_bmw512_close(&ctx_bmw, (void*)hash); + + sph_shabal512(&ctx_shabal, (const void*)hash, 64); + sph_shabal512_close(&ctx_shabal, (void*)hash); + + sph_groestl512(&ctx_groestl, (const void*)hash, 64); + sph_groestl512_close(&ctx_groestl, (void*)hash); + + sph_skein512(&ctx_skein, (const void*)hash, 64); + sph_skein512_close(&ctx_skein, (void*)hash); + + sph_jh512(&ctx_jh, (const void*)hash, 64); + sph_jh512_close(&ctx_jh, (void*)hash); + + sph_keccak512(&ctx_keccak, (const void*)hash, 64); + sph_keccak512_close(&ctx_keccak, (void*)hash); + + sph_luffa512(&ctx_luffa, (const void*)hash, 64); + sph_luffa512_close(&ctx_luffa, (void*)hash); + + sph_cubehash512(&ctx_cubehash, (const void*)hash, 64); + sph_cubehash512_close(&ctx_cubehash, (void*)hash); + + sph_shavite512(&ctx_shavite, (const void*)hash, 64); + sph_shavite512_close(&ctx_shavite, (void*)hash); + + sph_simd512(&ctx_simd, (const void*)hash, 64); + sph_simd512_close(&ctx_simd, (void*)hash); + + sph_echo512(&ctx_echo, (const void*)hash, 64); + sph_echo512_close(&ctx_echo, (void*)hash); + + sph_hamsi512(&ctx_hamsi, (const void*)hash, 64); + sph_hamsi512_close(&ctx_hamsi, (void*)hash); + + sph_fugue512(&ctx_fugue, (const void*)hash, 64); + sph_fugue512_close(&ctx_fugue, (void*)hash); + + sph_shabal512(&ctx_shabal, (const void*)hash, 64); + sph_shabal512_close(&ctx_shabal, (void*)hash); + + sph_whirlpool_init(&ctx_whirlpool); + sph_whirlpool(&ctx_whirlpool, (const void*)hash, 64); + sph_whirlpool_close(&ctx_whirlpool, (void*)hash); + + + sph_bmw512(&ctx_bmw, (const void*)hash, 64); + sph_bmw512_close(&ctx_bmw, (void*)hash); + + sph_groestl512(&ctx_groestl, (const void*)hash, 64); + sph_groestl512_close(&ctx_groestl, (void*)hash); + + sph_skein512(&ctx_skein, (const void*)hash, 64); + sph_skein512_close(&ctx_skein, (void*)hash); + + sph_jh512(&ctx_jh, (const void*)hash, 64); + sph_jh512_close(&ctx_jh, (void*)hash); + + sph_keccak512(&ctx_keccak, (const void*)hash, 64); + sph_keccak512_close(&ctx_keccak, (void*)hash); + + sph_luffa512(&ctx_luffa, (const void*)hash, 64); + sph_luffa512_close(&ctx_luffa, (void*)hash); + + sph_cubehash512(&ctx_cubehash, (const void*)hash, 64); + sph_cubehash512_close(&ctx_cubehash, (void*)hash); + + sph_shavite512(&ctx_shavite, (const void*)hash, 64); + sph_shavite512_close(&ctx_shavite, (void*)hash); + + sph_simd512(&ctx_simd, (const void*)hash, 64); + sph_simd512_close(&ctx_simd, (void*)hash); + + sph_echo512(&ctx_echo, (const void*)hash, 64); + sph_echo512_close(&ctx_echo, (void*)hash); + + sph_hamsi512(&ctx_hamsi, (const void*)hash, 64); + sph_hamsi512_close(&ctx_hamsi, (void*)hash); + + sph_fugue512(&ctx_fugue, (const void*)hash, 64); + sph_fugue512_close(&ctx_fugue, (void*)hash); + + sph_shabal512(&ctx_shabal, (const void*)hash, 64); + sph_shabal512_close(&ctx_shabal, (void*)hash); + + sph_whirlpool(&ctx_whirlpool, (const void*)hash, 64); + sph_whirlpool_close(&ctx_whirlpool, (void*)hash); + + sph_sha512_init(&ctx_sha512); + sph_sha512(&ctx_sha512, (const void*)hash, 64); + sph_sha512_close(&ctx_sha512, (void*)hash); + + sph_whirlpool(&ctx_whirlpool, (const void*)hash, 64); + sph_whirlpool_close(&ctx_whirlpool, (void*)hash); + + + sph_bmw512(&ctx_bmw, (const void*)hash, 64); + sph_bmw512_close(&ctx_bmw, (void*)hash); + + sph_groestl512(&ctx_groestl, (const void*)hash, 64); + sph_groestl512_close(&ctx_groestl, (void*)hash); + + sph_skein512(&ctx_skein, (const void*)hash, 64); + sph_skein512_close(&ctx_skein, (void*)hash); + + sph_jh512(&ctx_jh, (const void*)hash, 64); + sph_jh512_close(&ctx_jh, (void*)hash); + + sph_keccak512(&ctx_keccak, (const void*)hash, 64); + sph_keccak512_close(&ctx_keccak, (void*)hash); + + sph_luffa512(&ctx_luffa, (const void*)hash, 64); + sph_luffa512_close(&ctx_luffa, (void*)hash); + + sph_cubehash512(&ctx_cubehash, (const void*)hash, 64); + sph_cubehash512_close(&ctx_cubehash, (void*)hash); + + sph_shavite512(&ctx_shavite, (const void*)hash, 64); + sph_shavite512_close(&ctx_shavite, (void*)hash); + + sph_simd512(&ctx_simd, (const void*)hash, 64); + sph_simd512_close(&ctx_simd, (void*)hash); + + sph_echo512(&ctx_echo, (const void*)hash, 64); + sph_echo512_close(&ctx_echo, (void*)hash); + + sph_hamsi512(&ctx_hamsi, (const void*)hash, 64); + sph_hamsi512_close(&ctx_hamsi, (void*)hash); + + sph_fugue512(&ctx_fugue, (const void*)hash, 64); + sph_fugue512_close(&ctx_fugue, (void*)hash); + + sph_shabal512(&ctx_shabal, (const void*)hash, 64); + sph_shabal512_close(&ctx_shabal, (void*)hash); + + sph_whirlpool(&ctx_whirlpool, (const void*)hash, 64); + sph_whirlpool_close(&ctx_whirlpool, (void*)hash); + + sph_sha512(&ctx_sha512, (const void*)hash, 64); + sph_sha512_close(&ctx_sha512, (void*)hash); + + sph_haval256_5_init(&ctx_haval); + sph_haval256_5(&ctx_haval, (const void*)hash, 64); + sph_haval256_5_close(&ctx_haval, (void*)hash); + + memcpy(output, hash, 32); +} + +#define x11_simd_echo512_cpu_hash_64(thr_id, throughput, d_hash) \ + x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash, order++); \ + if (use_compat_kernels[thr_id]) x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash, order++); \ + else x16_echo512_cpu_hash_64(thr_id, throughput, d_hash) + + +static bool init[MAX_GPUS] = { 0 }; +static bool use_compat_kernels[MAX_GPUS] = { 0 }; + +extern "C" int scanhash_sonoa(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) +{ + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + const uint32_t first_nonce = pdata[19]; + const int dev_id = device_map[thr_id]; + + uint32_t default_throughput = 1 << 18; + if (device_sm[dev_id] <= 500) default_throughput = 1 << 18; + else if (device_sm[dev_id] <= 520) default_throughput = 1 << 18; + else if (device_sm[dev_id] > 520) default_throughput = (1 << 19) + (1 << 18); + + uint32_t throughput = cuda_default_throughput(thr_id, default_throughput); + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); + + throughput &= 0xFFFFFF00; + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x00ff; + + if (!init[thr_id]) + { + cudaSetDevice(dev_id); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + } + gpulog(LOG_INFO,thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); + + cuda_get_arch(thr_id); + use_compat_kernels[thr_id] = (cuda_arch[dev_id] < 500); + if (use_compat_kernels[thr_id]) + x11_echo512_cpu_init(thr_id, throughput); + + quark_blake512_cpu_init(thr_id, throughput); + quark_groestl512_cpu_init(thr_id, throughput); + quark_skein512_cpu_init(thr_id, throughput); + quark_bmw512_cpu_init(thr_id, throughput); + quark_keccak512_cpu_init(thr_id, throughput); + quark_jh512_cpu_init(thr_id, throughput); + x11_luffaCubehash512_cpu_init(thr_id, throughput); + x11_shavite512_cpu_init(thr_id, throughput); + x11_simd512_cpu_init(thr_id, throughput); + x13_hamsi512_cpu_init(thr_id, throughput); + x13_fugue512_cpu_init(thr_id, throughput); + x14_shabal512_cpu_init(thr_id, throughput); + x15_whirlpool_cpu_init(thr_id, throughput, 0); + x17_sha512_cpu_init(thr_id, throughput); + x17_haval256_cpu_init(thr_id, throughput); + + CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 8 * sizeof(uint64_t) * throughput)); + + cuda_check_cpu_init(thr_id, throughput); + + init[thr_id] = true; + } + + int warn = 0; + uint32_t _ALIGN(64) endiandata[20]; + for (int k=0; k < 20; k++) + be32enc(&endiandata[k], pdata[k]); + + quark_blake512_cpu_setBlock_80(thr_id, endiandata); + cuda_check_cpu_setTarget(ptarget); + + do { + int order = 0; + quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); + quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++); + x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_simd_echo512_cpu_hash_64(thr_id, throughput, d_hash[thr_id]); + + quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++); + x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_simd_echo512_cpu_hash_64(thr_id, throughput, d_hash[thr_id]); + x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++); + x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_simd_echo512_cpu_hash_64(thr_id, throughput, d_hash[thr_id]); + x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++); + x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_simd_echo512_cpu_hash_64(thr_id, throughput, d_hash[thr_id]); + x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x16_echo512_cpu_hash_64(thr_id, throughput, d_hash[thr_id]); + x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++); + x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_simd_echo512_cpu_hash_64(thr_id, throughput, d_hash[thr_id]); + x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++); + x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_simd_echo512_cpu_hash_64(thr_id, throughput, d_hash[thr_id]); + x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x17_sha512_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; + x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++); + x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x11_simd_echo512_cpu_hash_64(thr_id, throughput, d_hash[thr_id]); + x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x17_sha512_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; + x17_haval256_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], 256); order++; + + *hashes_done = pdata[19] - first_nonce + throughput; + + work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); + if (work->nonces[0] != UINT32_MAX) + { + const uint32_t Htarg = ptarget[7]; + uint32_t _ALIGN(64) vhash[8]; + be32enc(&endiandata[19], work->nonces[0]); + sonoa_hash(vhash, endiandata); + + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) + { + work->valid_nonces = 1; + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); + work_set_target_ratio(work, vhash); + if (work->nonces[1] != 0) { + be32enc(&endiandata[19], work->nonces[1]); + sonoa_hash(vhash, endiandata); + bn_set_target_ratio(work, vhash, 1); + work->valid_nonces++; + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; + } else { + pdata[19] = work->nonces[0] + 1; // cursor + } + return work->valid_nonces; + } + else if (vhash[7] > Htarg) { + gpu_increment_reject(thr_id); + if (!warn) { + warn++; + pdata[19] = work->nonces[0] + 1; + continue; + } else { + if (!opt_quiet) + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); + warn = 0; + } + } + } + + if ((uint64_t)throughput + pdata[19] >= max_nonce) { + pdata[19] = max_nonce; + break; + } + + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce; + return 0; +} + +extern "C" void free_sonoa(int thr_id) +{ + if (!init[thr_id]) + return; + + cudaDeviceSynchronize(); + + cudaFree(d_hash[thr_id]); + + quark_blake512_cpu_free(thr_id); + quark_groestl512_cpu_free(thr_id); + x11_simd512_cpu_free(thr_id); + x13_fugue512_cpu_free(thr_id); + x15_whirlpool_cpu_free(thr_id); + + cudaDeviceSynchronize(); + init[thr_id] = false; +}