implement x11evo algo

Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com>
9 years ago · 85c212eaad
12 changed files with 441 additions and 5 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -53,7 +53,7 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
 			  qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/luffa.cu \
 			  x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
 			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
-			  x11/cuda_x11_luffa512_Cubehash.cu \
+			  x11/cuda_x11_luffa512_Cubehash.cu x11/x11evo.cu \
 			  x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
 			  x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \
 			  x15/whirlpool.cu \
--- a/README.txt
+++ b/README.txt
@ -1,5 +1,5 @@
-ccMiner release 1.7.6 (May 2016) "DCR vote & pool device stats"
+ccMiner preview 1.8-dev (May 2016) "Pascal and x11evo algo"
 ---------------------------------------------------------------
 ***************************************************************
@ -98,6 +98,7 @@ its command line interface and options.
                          sib         use to mine Sibcoin
                          skein       use to mine Skeincoin
                          skein2      use to mine Woodcoin
                          x11evo      use to mine Revolver
                          x11         use to mine DarkCoin
                          x14         use to mine X14Coin
                          x15         use to mine Halcyon
@ -239,6 +240,10 @@ features.
 >>> RELEASE HISTORY <<<
  June 2016       v1.8.0
                  Pascal support with cuda 8
                  x11evo algo (XRE)
  May  18th 2016  v1.7.6
                  Decred vote support
                  X17 cleanup and improvement
--- a/algos.h
+++ b/algos.h
@ -35,6 +35,7 @@ enum sha_algos {
 	ALGO_SKEIN,
 	ALGO_SKEIN2,
 	ALGO_S3,
 	ALGO_X11EVO,
 	ALGO_X11,
 	ALGO_X13,
 	ALGO_X14,
@ -82,6 +83,7 @@ static const char *algo_names[] = {
 	"skein",
 	"skein2",
 	"s3",
 	"x11evo",
 	"x11",
 	"x13",
 	"x14",
--- a/bench.cpp
+++ b/bench.cpp
@ -71,6 +71,7 @@ void algo_free_all(int thr_id)
 	free_s3(thr_id);
 	free_vanilla(thr_id);
 	free_whirl(thr_id);
 	free_x11evo(thr_id);
 	free_x11(thr_id);
 	free_x13(thr_id);
 	free_x14(thr_id);
--- a/ccminer.cpp
+++ b/ccminer.cpp
@ -241,6 +241,7 @@ Options:\n\
 			skein       Skein SHA2 (Skeincoin)\n\
 			skein2      Double Skein (Woodcoin)\n\
 			s3          S3 (1Coin)\n\
 			x11evo      Permuted x11 (Revolver)\n\
 			x11         X11 (DarkCoin)\n\
 			x13         X13 (MaruCoin)\n\
 			x14         X14\n\
@ -1901,6 +1902,7 @@ static void *miner_thread(void *userdata)
 			case ALGO_HEAVY:
 			case ALGO_LYRA2v2:
 			case ALGO_S3:
 			case ALGO_X11EVO:
 			case ALGO_X11:
 			case ALGO_X13:
 			case ALGO_WHIRLCOIN:
@ -2066,6 +2068,9 @@ static void *miner_thread(void *userdata)
 		//case ALGO_WHIRLPOOLX:
 		//	rc = scanhash_whirlx(thr_id, &work, max_nonce, &hashes_done);
 		//	break;
 		case ALGO_X11EVO:
 			rc = scanhash_x11evo(thr_id, &work, max_nonce, &hashes_done);
 			break;
 		case ALGO_X11:
 			rc = scanhash_x11(thr_id, &work, max_nonce, &hashes_done);
 			break;
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -494,6 +494,7 @@
    <CudaCompile Include="x11\sib.cu" />
    <CudaCompile Include="x11\s3.cu" />
    <CudaCompile Include="x11\x11.cu" />
    <CudaCompile Include="x11\x11evo.cu" />
    <CudaCompile Include="x13\cuda_x13_hamsi512.cu">
      <MaxRegCount>72</MaxRegCount>
    </CudaCompile>
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -571,6 +571,9 @@
    <CudaCompile Include="x11\x11.cu">
      <Filter>Source Files\CUDA\x11</Filter>
    </CudaCompile>
    <CudaCompile Include="x11\x11evo.cu">
      <Filter>Source Files\CUDA\x11</Filter>
    </CudaCompile>
    <CudaCompile Include="x13\cuda_x13_fugue512.cu">
      <Filter>Source Files\CUDA\x13</Filter>
    </CudaCompile>
--- a/miner.h
+++ b/miner.h
@ -288,6 +288,7 @@ extern int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, un
 extern int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int8_t blake_rounds);
 extern int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
@ -331,6 +332,7 @@ extern void free_skein2(int thr_id);
 extern void free_s3(int thr_id);
 extern void free_vanilla(int thr_id);
 extern void free_whirl(int thr_id);
 extern void free_x11evo(int thr_id);
 extern void free_x11(int thr_id);
 extern void free_x13(int thr_id);
 extern void free_x14(int thr_id);
@ -807,6 +809,7 @@ void skein2hash(void *output, const void *input);
 void s3hash(void *output, const void *input);
 void wcoinhash(void *state, const void *input);
 void whirlxHash(void *state, const void *input);
 void x11evo_hash(void *output, const void *input);
 void x11hash(void *output, const void *input);
 void x13hash(void *output, const void *input);
 void x14hash(void *output, const void *input);
--- a/skein2.cpp
+++ b/skein2.cpp
@ -11,8 +11,6 @@
 static uint32_t *d_hash[MAX_GPUS];
 extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
 extern void skein512_cpu_setBlock_80(void *pdata);
 extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
--- a/util.cpp
+++ b/util.cpp
@ -2192,6 +2192,9 @@ void print_hash_tests(void)
 	//whirlxHash(&hash[0], &buf[0]);
 	//printpfx("whirlpoolx", hash);
 	x11evo_hash(&hash[0], &buf[0]);
 	printpfx("x11evo", hash);
 	x11hash(&hash[0], &buf[0]);
 	printpfx("X11", hash);
--- a/x11/cuda_x11.h
+++ b/x11/cuda_x11.h
@ -3,6 +3,9 @@
 extern void x11_luffaCubehash512_cpu_init(int thr_id, uint32_t threads);
 extern void x11_luffaCubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash, int order);
 extern void x11_luffa512_cpu_init(int thr_id, uint32_t threads);
 extern void x11_luffa512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x11_cubehash512_cpu_init(int thr_id, uint32_t threads);
 extern void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
--- a/x11/x11evo.cu
+++ b/x11/x11evo.cu
@ -0,0 +1,412 @@
 /**
 * X11EVO algo implementation
 * Cuda implementation by tpruvot@github - May 2016
 */
 #include <stdio.h>
 #include <memory.h>
 extern "C" {
 #include "sph/sph_blake.h"
 #include "sph/sph_bmw.h"
 #include "sph/sph_groestl.h"
 #include "sph/sph_skein.h"
 #include "sph/sph_jh.h"
 #include "sph/sph_keccak.h"
 #include "sph/sph_luffa.h"
 #include "sph/sph_cubehash.h"
 #include "sph/sph_shavite.h"
 #include "sph/sph_simd.h"
 #include "sph/sph_echo.h"
 }
 #include "miner.h"
 #include "cuda_helper.h"
 #include "cuda_x11.h"
 static uint32_t *d_hash[MAX_GPUS];
 enum Algo {
 	BLAKE = 0,
 	BMW,
 	GROESTL,
 	SKEIN,
 	JH,
 	KECCAK,
 	LUFFA,
 	CUBEHASH,
 	SHAVITE,
 	SIMD,
 	ECHO,
 	HASH_FUNC_COUNT
 };
 static void swap8(uint8_t *a, uint8_t *b)
 {
 	uint8_t t = *a;
 	*a = *b;
 	*b = t;
 }
 static void initPerm(uint8_t n[], int count)
 {
 	for (int i = 0; i < count; i++)
 		n[i] = i;
 }
 static int nextPerm(uint8_t n[], int count)
 {
 	int tail, i, j;
 	if (count <= 1)
 		return 0;
 	for (i = count - 1; i>0 && n[i - 1] >= n[i]; i--);
 	tail = i;
 	if (tail > 0) {
 		for (j = count - 1; j>tail && n[j] <= n[tail - 1]; j--);
 		swap8(&n[tail - 1], &n[j]);
 	}
 	for (i = tail, j = count - 1; i<j; i++, j--)
 		swap8(&n[i], &n[j]);
 	return (tail != 0);
 }
 static void getAlgoString(char *str, int seq)
 {
 	uint8_t algoList[HASH_FUNC_COUNT];
 	char *sptr;
 	initPerm(algoList, HASH_FUNC_COUNT);
 	for (int k = 0; k < seq; k++) {
 		nextPerm(algoList, HASH_FUNC_COUNT);
 	}
 	sptr = str;
 	for (int j = 0; j < HASH_FUNC_COUNT; j++) {
 		if (algoList[j] >= 10)
 			sprintf(sptr, "%c", 'A' + (algoList[j] - 10));
 		else
 			sprintf(sptr, "%u", (uint32_t) algoList[j]);
 		sptr++;
 	}
 	*sptr = '\0';
 	//applog(LOG_DEBUG, "nextPerm %s", str);
 }
 static __thread uint32_t s_ntime = 0;
 static char hashOrder[HASH_FUNC_COUNT + 1] = { 0 };
 static int  s_sequence = -1;
 #define INITIAL_DATE 0x57254700
 static inline int getCurrentAlgoSeq(uint32_t current_time)
 {
 	// change once per day
 	return (int) (current_time - INITIAL_DATE) / (60 * 60 * 24);
 }
 static void evo_twisted_code(uint32_t ntime, char *permstr)
 {
 	int seq = getCurrentAlgoSeq(ntime);
 	if (s_sequence != seq) {
 		getAlgoString(permstr, seq);
 		s_sequence = seq;
 	}
 }
 // X11evo CPU Hash
 extern "C" void x11evo_hash(void *output, const void *input)
 {
 	uint32_t _ALIGN(64) hash[64/4] = { 0 };
 	sph_blake512_context     ctx_blake;
 	sph_bmw512_context       ctx_bmw;
 	sph_groestl512_context   ctx_groestl;
 	sph_skein512_context     ctx_skein;
 	sph_jh512_context        ctx_jh;
 	sph_keccak512_context    ctx_keccak;
 	sph_luffa512_context     ctx_luffa1;
 	sph_cubehash512_context  ctx_cubehash1;
 	sph_shavite512_context   ctx_shavite1;
 	sph_simd512_context      ctx_simd1;
 	sph_echo512_context      ctx_echo1;
 	if (s_sequence == -1) {
 		uint32_t *data = (uint32_t*) input;
 		const uint32_t ntime = data[17];
 		evo_twisted_code(ntime, hashOrder);
 	}
 	void *in = (void*) input;
 	int size = 80;
 	const int hashes = (int) strlen(hashOrder);
 	for (int i = 0; i < hashes; i++)
 	{
 		const char elem = hashOrder[i];
 		uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
 		if (i > 0) {
 			in = (void*) hash;
 			size = 64;
 		}
 		switch (algo) {
 		case BLAKE:
 			sph_blake512_init(&ctx_blake);
 			sph_blake512(&ctx_blake, in, size);
 			sph_blake512_close(&ctx_blake, hash);
 			break;
 		case BMW:
 			sph_bmw512_init(&ctx_bmw);
 			sph_bmw512(&ctx_bmw, in, size);
 			sph_bmw512_close(&ctx_bmw, hash);
 			break;
 		case GROESTL:
 			sph_groestl512_init(&ctx_groestl);
 			sph_groestl512(&ctx_groestl, in, size);
 			sph_groestl512_close(&ctx_groestl, hash);
 			break;
 		case SKEIN:
 			sph_skein512_init(&ctx_skein);
 			sph_skein512(&ctx_skein, in, size);
 			sph_skein512_close(&ctx_skein, hash);
 			break;
 		case JH:
 			sph_jh512_init(&ctx_jh);
 			sph_jh512(&ctx_jh, in, size);
 			sph_jh512_close(&ctx_jh, hash);
 			break;
 		case KECCAK:
 			sph_keccak512_init(&ctx_keccak);
 			sph_keccak512(&ctx_keccak, in, size);
 			sph_keccak512_close(&ctx_keccak, hash);
 			break;
 		case LUFFA:
 			sph_luffa512_init(&ctx_luffa1);
 			sph_luffa512(&ctx_luffa1, in, size);
 			sph_luffa512_close(&ctx_luffa1, hash);
 			break;
 		case CUBEHASH:
 			sph_cubehash512_init(&ctx_cubehash1);
 			sph_cubehash512(&ctx_cubehash1, in, size);
 			sph_cubehash512_close(&ctx_cubehash1, hash);
 			break;
 		case SHAVITE:
 			sph_shavite512_init(&ctx_shavite1);
 			sph_shavite512(&ctx_shavite1, in, size);
 			sph_shavite512_close(&ctx_shavite1, hash);
 			break;
 		case SIMD:
 			sph_simd512_init(&ctx_simd1);
 			sph_simd512(&ctx_simd1, in, size);
 			sph_simd512_close(&ctx_simd1, hash);
 			break;
 		case ECHO:
 			sph_echo512_init(&ctx_echo1);
 			sph_echo512(&ctx_echo1, in, size);
 			sph_echo512_close(&ctx_echo1, hash);
 			break;
 		}
 	}
 	memcpy(output, hash, 32);
 }
 //#define _DEBUG
 #define _DEBUG_PREFIX "evo"
 #include "cuda_debug.cuh"
 static bool init[MAX_GPUS] = { 0 };
 extern void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_outputHash, int order);
 extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
 {
 	uint32_t *pdata = work->data;
 	uint32_t *ptarget = work->target;
 	const uint32_t first_nonce = pdata[19];
 	int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
 	uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 19=256*256*8;
 	if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
 	if (opt_debug || s_ntime != pdata[17] || s_sequence == -1) {
 		uint32_t ntime = swab32(work->data[17]);
 		evo_twisted_code(ntime, hashOrder);
 		s_ntime = pdata[17];
 		if (opt_debug) {
 			int secs = (int) (ntime - INITIAL_DATE) % (60 * 60 * 24);
 			secs = (60 * 60 * 24) - secs;
 			applog(LOG_DEBUG, "evo hash order %s, next in %d mn", hashOrder, secs/60);
 		}
 	}
 	if (opt_benchmark)
 		ptarget[7] = 0x5;
 	if (!init[thr_id])
 	{
 		cudaSetDevice(device_map[thr_id]);
 		if (opt_cudaschedule == -1 && gpu_threads == 1) {
 			cudaDeviceReset();
 			// reduce cpu usage
 			cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
 			CUDA_LOG_ERROR();
 		}
 		quark_blake512_cpu_init(thr_id, throughput);
 		quark_bmw512_cpu_init(thr_id, throughput);
 		quark_groestl512_cpu_init(thr_id, throughput);
 		quark_skein512_cpu_init(thr_id, throughput);
 		quark_keccak512_cpu_init(thr_id, throughput);
 		quark_jh512_cpu_init(thr_id, throughput);
 		x11_luffa512_cpu_init(thr_id, throughput);
 		x11_cubehash512_cpu_init(thr_id, throughput);
 		x11_shavite512_cpu_init(thr_id, throughput);
 		x11_echo512_cpu_init(thr_id, throughput);
 		if (x11_simd512_cpu_init(thr_id, throughput) != 0) {
 			return 0;
 		}
 		CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
 		cuda_check_cpu_init(thr_id, throughput);
 		init[thr_id] = true;
 	}
 	uint32_t endiandata[20];
 	for (int k=0; k < 19; k++)
 		be32enc(&endiandata[k], pdata[k]);
 	cuda_check_cpu_setTarget(ptarget);
 	quark_blake512_cpu_setBlock_80(thr_id, endiandata);
 	const int hashes = (int) strlen(hashOrder);
 	do {
 		int order = 1;
 		uint32_t foundNonce;
 		// Hash with CUDA
 		quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]);
 		TRACE("blake80:");
 		for (int i = 1; i < hashes; i++)
 		{
 			const char elem = hashOrder[i];
 			const uint8_t algo64 = elem >= 'A' ? elem - 'A' + 10 : elem - '0';
 			switch (algo64) {
 			case BLAKE:
 				quark_blake512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 				TRACE("blake  :");
 				break;
 			case BMW:
 				quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 				TRACE("bmw    :");
 				break;
 			case GROESTL:
 				quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 				TRACE("groestl:");
 				break;
 			case SKEIN:
 				quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 				TRACE("skein  :");
 				break;
 			case JH:
 				quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 				TRACE("jh512  :");
 				break;
 			case KECCAK:
 				quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 				TRACE("keccak :");
 				break;
 			case LUFFA:
 				x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 				TRACE("luffa  :");
 				break;
 			case CUBEHASH:
 				x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 				TRACE("cube   :");
 				break;
 			case SHAVITE:
 				x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 				TRACE("shavite:");
 				break;
 			case SIMD:
 				x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 				TRACE("simd   :");
 				break;
 			case ECHO:
 				x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 				TRACE("echo => ");
 				break;
 			}
 		}
 		foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
 		if (foundNonce != UINT32_MAX)
 		{
 			uint32_t _ALIGN(64) vhash64[8];
 			const uint32_t Htarg = ptarget[7];
 			be32enc(&endiandata[19], foundNonce);
 			x11evo_hash(vhash64, endiandata);
 			if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
 				int res = 1;
 				// check if there was some other ones...
 				uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
 				work_set_target_ratio(work, vhash64);
 				*hashes_done = pdata[19] - first_nonce + throughput;
 				if (secNonce != 0) {
 					be32enc(&endiandata[19], secNonce);
 					x11evo_hash(vhash64, endiandata);
 					if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio)
 						work_set_target_ratio(work, vhash64);
 					pdata[21] = secNonce;
 					res++;
 				}
 				pdata[19] = foundNonce;
 				return res;
 			} else {
 				gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
 				pdata[19] = foundNonce + 1;
 				continue;
 			}
 		}
 		if ((uint64_t) throughput + pdata[19] >= max_nonce) {
 			pdata[19] = max_nonce;
 			break;
 		}
 		pdata[19] += throughput;
 	} while (!work_restart[thr_id].restart);
 	*hashes_done = pdata[19] - first_nonce;
 	return 0;
 }
 // cleanup
 extern "C" void free_x11evo(int thr_id)
 {
 	if (!init[thr_id])
 		return;
 	cudaThreadSynchronize();
 	cudaFree(d_hash[thr_id]);
 	quark_blake512_cpu_free(thr_id);
 	quark_groestl512_cpu_free(thr_id);
 	x11_simd512_cpu_free(thr_id);
 	cuda_check_cpu_free(thr_id);
 	init[thr_id] = false;
 	cudaDeviceSynchronize();
 }