handle the new tribus algo

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
8 years ago · c120ecae1d
10 changed files with 188 additions and 1 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -65,6 +65,7 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
 			  sph/ripemd.c sph/sph_sha2.c \
 			  lbry/lbry.cu lbry/cuda_sha256_lbry.cu lbry/cuda_sha512_lbry.cu lbry/cuda_lbry_merged.cu \
 			  qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/luffa.cu \
 			  tribus.cu \
 			  x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
 			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
 			  x11/cuda_x11_luffa512_Cubehash.cu x11/x11evo.cu x11/timetravel.cu x11/bitcore.cu \
--- a/README.txt
+++ b/README.txt
@ -117,6 +117,7 @@ its command line interface and options.
                          skein       use to mine Skeincoin
                          skein2      use to mine Woodcoin
                          timetravel  use to mine MachineCoin
                          tribus      use to mine Denarius
                          x11evo      use to mine Revolver
                          x11         use to mine DarkCoin
                          x14         use to mine X14Coin
@ -282,6 +283,7 @@ features.
  v2.1 (unfinished)
                  Interface equihash algo with djeZo solver (from nheqminer 0.5c)
                  New api parameters (and multicast announces for local networks)
                  New tribus algo
  May. 14th 2017  v2.0
                  Handle cryptonight, wildkeccak and cryptonight-lite
--- a/algos.h
+++ b/algos.h
@ -47,6 +47,7 @@ enum sha_algos {
 	ALGO_SKEIN2,
 	ALGO_S3,
 	ALGO_TIMETRAVEL,
 	ALGO_TRIBUS,
 	ALGO_BITCORE,
 	ALGO_X11EVO,
 	ALGO_X11,
@ -110,6 +111,7 @@ static const char *algo_names[] = {
 	"skein2",
 	"s3",
 	"timetravel",
 	"tribus",
 	"bitcore",
 	"x11evo",
 	"x11",
--- a/bench.cpp
+++ b/bench.cpp
@ -100,6 +100,7 @@ void algo_free_all(int thr_id)
 	free_scrypt(thr_id);
 	free_scrypt_jane(thr_id);
 	free_timetravel(thr_id);
 	free_tribus(thr_id);
 	free_bitcore(thr_id);
 }
--- a/ccminer.cpp
+++ b/ccminer.cpp
@ -277,6 +277,7 @@ Options:\n\
 			skein2      Double Skein (Woodcoin)\n\
 			s3          S3 (1Coin)\n\
 			timetravel  Machinecoin permuted x8\n\
 			tribus      Denerius\n\
 			vanilla     Blake256-8 (VNL)\n\
 			veltor      Thorsriddle streebog\n\
 			whirlcoin   Old Whirlcoin (Whirlpool algo)\n\
@ -2197,6 +2198,7 @@ static void *miner_thread(void *userdata)
 			case ALGO_SIA:
 			case ALGO_SKEIN:
 			case ALGO_SKEIN2:
 			case ALGO_TRIBUS:
 				minmax = 0x1000000;
 				break;
 			case ALGO_C11:
@ -2433,6 +2435,9 @@ static void *miner_thread(void *userdata)
 		case ALGO_TIMETRAVEL:
 			rc = scanhash_timetravel(thr_id, &work, max_nonce, &hashes_done);
 			break;
 		case ALGO_TRIBUS:
 			rc = scanhash_tribus(thr_id, &work, max_nonce, &hashes_done);
 			break;
 		case ALGO_BITCORE:
 			rc = scanhash_bitcore(thr_id, &work, max_nonce, &hashes_done);
 			break;
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -531,6 +531,7 @@
    <CudaCompile Include="cuda_skeincoin.cu">
      <MaxRegCount>48</MaxRegCount>
    </CudaCompile>
    <CudaCompile Include="tribus.cu" />
    <ClInclude Include="x11\cuda_x11_aes.cuh" />
    <CudaCompile Include="x11\cuda_x11_cubehash512.cu" />
    <CudaCompile Include="x11\cuda_x11_echo.cu">
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -754,6 +754,9 @@
    <CudaCompile Include="pentablake.cu">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
    <CudaCompile Include="tribus.cu">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
    <CudaCompile Include="x11\sib.cu">
      <Filter>Source Files\CUDA\x11</Filter>
    </CudaCompile>
--- a/miner.h
+++ b/miner.h
@ -310,6 +310,7 @@ extern int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_nonce,
 extern int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_timetravel(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_tribus(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_bitcore(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int8_t blake_rounds);
 extern int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
@ -369,6 +370,7 @@ extern void free_skeincoin(int thr_id);
 extern void free_skein2(int thr_id);
 extern void free_s3(int thr_id);
 extern void free_timetravel(int thr_id);
 extern void free_tribus(int thr_id);
 extern void free_bitcore(int thr_id);
 extern void free_vanilla(int thr_id);
 extern void free_veltor(int thr_id);
@ -909,6 +911,7 @@ void skein2hash(void *output, const void *input);
 void s3hash(void *output, const void *input);
 void timetravel_hash(void *output, const void *input);
 void bitcore_hash(void *output, const void *input);
 void tribus_hash(void *output, const void *input);
 void veltorhash(void *output, const void *input);
 void wcoinhash(void *state, const void *input);
 void whirlxHash(void *state, const void *input);
--- a/tribus.cu
+++ b/tribus.cu
@ -0,0 +1,166 @@
 /**
 * Tribus Algo for Denarius
 *
 * tpruvot@github 06 2017 - GPLv3
 *
 */
 extern "C" {
 #include "sph/sph_jh.h"
 #include "sph/sph_keccak.h"
 #include "sph/sph_echo.h"
 }
 #include "miner.h"
 #include "cuda_helper.h"
 #include "x11/cuda_x11.h"
 void jh512_setBlock_80(int thr_id, uint32_t *endiandata);
 void jh512_cuda_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNounce, uint32_t *d_hash);
 static uint32_t *d_hash[MAX_GPUS];
 // cpu hash
 extern "C" void tribus_hash(void *state, const void *input)
 {
 	uint8_t _ALIGN(64) hash[64];
 	sph_jh512_context ctx_jh;
 	sph_keccak512_context ctx_keccak;
 	sph_echo512_context ctx_echo;
 	sph_jh512_init(&ctx_jh);
 	sph_jh512(&ctx_jh, input, 80);
 	sph_jh512_close(&ctx_jh, (void*) hash);
 	sph_keccak512_init(&ctx_keccak);
 	sph_keccak512(&ctx_keccak, (const void*) hash, 64);
 	sph_keccak512_close(&ctx_keccak, (void*) hash);
 	sph_echo512_init(&ctx_echo);
 	sph_echo512(&ctx_echo, (const void*) hash, 64);
 	sph_echo512_close(&ctx_echo, (void*) hash);
 	memcpy(state, hash, 32);
 }
 static bool init[MAX_GPUS] = { 0 };
 extern "C" int scanhash_tribus(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done)
 {
 	uint32_t _ALIGN(64) endiandata[20];
 	uint32_t *pdata = work->data;
 	uint32_t *ptarget = work->target;
 	const uint32_t first_nonce = pdata[19];
 	int8_t intensity = is_windows() ? 20 : 23;
 	uint32_t throughput =  cuda_default_throughput(thr_id, 1 << intensity);
 	if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
 	if (opt_benchmark)
 		((uint32_t*)ptarget)[7] = 0x00FF;
 	if (!init[thr_id])
 	{
 		cudaSetDevice(device_map[thr_id]);
 		if (opt_cudaschedule == -1 && gpu_threads == 1) {
 			cudaDeviceReset();
 			// reduce cpu usage
 			cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
 			CUDA_LOG_ERROR();
 		}
 		gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
 		quark_jh512_cpu_init(thr_id, throughput);
 		quark_keccak512_cpu_init(thr_id, throughput);
 		x11_echo512_cpu_init(thr_id, throughput);
 		// char[64] work space for hashes results
 		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)64 * throughput));
 		cuda_check_cpu_init(thr_id, throughput);
 		init[thr_id] = true;
 	}
 	for (int k=0; k < 20; k++)
 		be32enc(&endiandata[k], pdata[k]);
 	jh512_setBlock_80(thr_id, endiandata);
 	cuda_check_cpu_setTarget(ptarget);
 	work->valid_nonces = 0;
 	do {
 		int order = 1;
 		// Hash with CUDA
 		jh512_cuda_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]);
 		quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		*hashes_done = pdata[19] - first_nonce + throughput;
 		work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
 		if (work->nonces[0] != UINT32_MAX)
 		{
 			const uint32_t Htarg = ptarget[7];
 			uint32_t _ALIGN(64) vhash[8];
 			be32enc(&endiandata[19], work->nonces[0]);
 			tribus_hash(vhash, endiandata);
 			if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
 				work->valid_nonces = 1;
 				work_set_target_ratio(work, vhash);
 				work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
 				if (work->nonces[1] != 0) {
 					be32enc(&endiandata[19], work->nonces[1]);
 					tribus_hash(vhash, endiandata);
 					bn_set_target_ratio(work, vhash, 1);
 					work->valid_nonces++;
 					pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
 				} else {
 					pdata[19] = work->nonces[0] + 1; // cursor
 				}
 				goto out;
 			}
 			else if (vhash[7] > Htarg) {
 				gpu_increment_reject(thr_id);
 				if (!opt_quiet)
 				gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
 				pdata[19] = work->nonces[0] + 1;
 				continue;
 			}
 		}
 		if ((uint64_t) throughput + pdata[19] >= max_nonce) {
 			pdata[19] = max_nonce;
 			break;
 		}
 		pdata[19] += throughput;
 	} while (!work_restart[thr_id].restart);
 out:
 //	*hashes_done = pdata[19] - first_nonce;
 	return work->valid_nonces;
 }
 // ressources cleanup
 extern "C" void free_tribus(int thr_id)
 {
 	if (!init[thr_id])
 		return;
 	cudaThreadSynchronize();
 	cudaFree(d_hash[thr_id]);
 	quark_groestl512_cpu_free(thr_id);
 	cuda_check_cpu_free(thr_id);
 	init[thr_id] = false;
 	cudaDeviceSynchronize();
 }
--- a/util.cpp
+++ b/util.cpp
@ -2279,6 +2279,9 @@ void print_hash_tests(void)
 	blake256hash(&hash[0], &buf[0], 8);
 	printpfx("vanilla", hash);
 	tribus_hash(&hash[0], &buf[0]);
 	printpfx("tribus", hash);
 	veltorhash(&hash[0], &buf[0]);
 	printpfx("veltor", hash);