Add c11 algo (x11 variant)

Used by Chaincoin and Flaxscript
10 years ago · c5df142124
8 changed files with 277 additions and 8 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -54,7 +54,7 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
				@@ -54,7 +54,7 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
 			  x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
 			  x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \
 			  x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu \
-			  x11/s3.cu
+			  x11/c11.cu x11/s3.cu

 # scrypt
 ccminer_SOURCES += scrypt.cpp scrypt-jane.cpp \
--- a/README.txt
+++ b/README.txt
@ -31,6 +31,7 @@ JackpotCoin
				@@ -31,6 +31,7 @@ JackpotCoin
 QuarkCoin family & AnimeCoin
 TalkCoin
 DarkCoin and other X11 coins
+Chaincoin and Flaxscript (C11)
 Saffroncoin blake (256 14-rounds)
 BlakeCoin (256 8-rounds)
 Qubit (Digibyte, ...)
@ -66,6 +67,7 @@ its command line interface and options.
				@@ -66,6 +67,7 @@ its command line interface and options.
                          anime       use to mine Animecoin
                          blake       use to mine Saffroncoin (Blake 256)
                          blakecoin   use to mine Old Blake 256
+                          c11/flax    use to mine Chaincoin and Flax
                          deep        use to mine Deepcoin
                          dmd-gr      use to mine Diamond-Groestl
                          fresh       use to mine Freshcoin
@ -222,6 +224,7 @@ features.
				@@ -222,6 +224,7 @@ features.

  July 2015...
                  Nvml api power limits
+                  Add chaincoin c11 algo (used by Flaxscript too)
                  Remove pluck algo

  June 23th 2015  v1.6.5
--- a/ccminer.cpp
+++ b/ccminer.cpp
@ -87,6 +87,7 @@ enum sha_algos {
				@@ -87,6 +87,7 @@ enum sha_algos {
 	ALGO_ANIME,
 	ALGO_BLAKE,
 	ALGO_BLAKECOIN,
+	ALGO_C11,
 	ALGO_DEEP,
 	ALGO_DMD_GR,
 	ALGO_FRESH,
@ -115,12 +116,14 @@ enum sha_algos {
				@@ -115,12 +116,14 @@ enum sha_algos {
 	ALGO_X15,
 	ALGO_X17,
 	ALGO_ZR5,
+	ALGO_COUNT
 };

 static const char *algo_names[] = {
 	"anime",
 	"blake",
 	"blakecoin",
+	"c11",
 	"deep",
 	"dmd-gr",
 	"fresh",
@ -149,6 +152,7 @@ static const char *algo_names[] = {
				@@ -149,6 +152,7 @@ static const char *algo_names[] = {
 	"x15",
 	"x17",
 	"zr5",
+	""
 };

 bool opt_debug = false;
@ -275,6 +279,7 @@ Options:\n\
				@@ -275,6 +279,7 @@ Options:\n\
 			anime       Animecoin\n\
 			blake       Blake 256 (SFR)\n\
 			blakecoin   Fast Blake 256 (8 rounds)\n\
+			c11/flax    X11 variant\n\
 			deep        Deepcoin\n\
 			dmd-gr      Diamond-Groestl\n\
 			fresh       Freshcoin (shavite 80)\n\
@ -1744,6 +1749,7 @@ static void *miner_thread(void *userdata)
				@@ -1744,6 +1749,7 @@ static void *miner_thread(void *userdata)
 			case ALGO_LUFFA:
 				minmax = 0x2000000;
 				break;
+			case ALGO_C11:
 			case ALGO_S3:
 			case ALGO_X11:
 			case ALGO_X13:
@ -1818,6 +1824,11 @@ static void *miner_thread(void *userdata)
				@@ -1818,6 +1824,11 @@ static void *miner_thread(void *userdata)
 			                      max_nonce, &hashes_done);
 			break;

+		case ALGO_C11:
+			rc = scanhash_c11(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
+
 		case ALGO_FUGUE256:
 			rc = scanhash_fugue256(thr_id, work.data, work.target,
 			                      max_nonce, &hashes_done);
@ -2404,24 +2415,26 @@ void parse_arg(int key, char *arg)
				@@ -2404,24 +2415,26 @@ void parse_arg(int key, char *arg)
 	case 'a': /* --algo */
 		p = strstr(arg, ":"); // optional factor
 		if (p) *p = '\0';
-		for (i = 0; i < ARRAY_SIZE(algo_names); i++) {
+		for (i = 0; i < ALGO_COUNT; i++) {
 			if (algo_names[i] && !strcasecmp(arg, algo_names[i])) {
 				opt_algo = (enum sha_algos)i;
 				break;
 			}
 		}
-		if (i == ARRAY_SIZE(algo_names)) {
+		if (i == ALGO_COUNT) {
 			// some aliases...
-			if (!strcasecmp("diamond", arg))
+			if (!strcasecmp("flax", arg))
+				i = opt_algo = ALGO_C11;
+			else if (!strcasecmp("diamond", arg))
 				i = opt_algo = ALGO_DMD_GR;
-			if (!strcasecmp("doom", arg))
+			else if (!strcasecmp("doom", arg))
 				i = opt_algo = ALGO_LUFFA;
 			else if (!strcasecmp("ziftr", arg))
 				i = opt_algo = ALGO_ZR5;
 			else
 				applog(LOG_ERR, "Unknown algo parameter '%s'", arg);
 		}
-		if (i == ARRAY_SIZE(algo_names))
+		if (i == ALGO_COUNT)
 			show_usage_and_exit(1);
 		if (p) {
 			opt_nfactor = atoi(p + 1);
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -462,8 +462,8 @@
				@@ -462,8 +462,8 @@
    <CudaCompile Include="x11\cuda_x11_simd512.cu">
      <MaxRegCount>64</MaxRegCount>
    </CudaCompile>
-    <CudaCompile Include="x11\fresh.cu">
-    </CudaCompile>
+    <CudaCompile Include="x11\c11.cu" />
+    <CudaCompile Include="x11\fresh.cu" />
    <CudaCompile Include="x11\s3.cu" />
    <CudaCompile Include="x11\simd_functions.cu">
      <ExcludedFromBuild>true</ExcludedFromBuild>
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -517,6 +517,9 @@
				@@ -517,6 +517,9 @@
    <CudaCompile Include="x11\cuda_x11_simd512.cu">
      <Filter>Source Files\CUDA\x11</Filter>
    </CudaCompile>
+    <CudaCompile Include="x11\c11.cu">
+      <Filter>Source Files\CUDA\x11</Filter>
+    </CudaCompile>
    <CudaCompile Include="x11\fresh.cu">
      <Filter>Source Files\CUDA\x11</Filter>
    </CudaCompile>
--- a/miner.h
+++ b/miner.h
@ -307,6 +307,10 @@ extern int scanhash_blake256(int thr_id, uint32_t *pdata,
				@@ -307,6 +307,10 @@ extern int scanhash_blake256(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done, int8_t blakerounds);

+extern int scanhash_c11(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
 extern int scanhash_fresh(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
@ -764,6 +768,7 @@ void applog_compare_hash(unsigned char *hash, unsigned char *hash2);
				@@ -764,6 +768,7 @@ void applog_compare_hash(unsigned char *hash, unsigned char *hash2);
 void print_hash_tests(void);
 void animehash(void *state, const void *input);
 void blake256hash(void *output, const void *input, int8_t rounds);
+void c11hash(void *output, const void *input);
 void deephash(void *state, const void *input);
 void luffa_hash(void *state, const void *input);
 void fresh_hash(void *state, const void *input);
--- a/util.cpp
+++ b/util.cpp
@ -1814,6 +1814,9 @@ void print_hash_tests(void)
				@@ -1814,6 +1814,9 @@ void print_hash_tests(void)
 	blake256hash(&hash[0], &buf[0], 14);
 	printpfx("blake", hash);

+	c11hash(&hash[0], &buf[0]);
+	printpfx("c11", hash);
+
 	deephash(&hash[0], &buf[0]);
 	printpfx("deep", hash);

--- a/x11/c11.cu
+++ b/x11/c11.cu
@ -0,0 +1,242 @@
				@@ -0,0 +1,242 @@
+extern "C"
+{
+#include "sph/sph_blake.h"
+#include "sph/sph_bmw.h"
+#include "sph/sph_groestl.h"
+#include "sph/sph_skein.h"
+#include "sph/sph_jh.h"
+#include "sph/sph_keccak.h"
+
+#include "sph/sph_luffa.h"
+#include "sph/sph_cubehash.h"
+#include "sph/sph_shavite.h"
+#include "sph/sph_simd.h"
+#include "sph/sph_echo.h"
+}
+
+#include "miner.h"
+#include "cuda_helper.h"
+
+#include <stdio.h>
+#include <memory.h>
+
+static uint32_t *d_hash[MAX_GPUS];
+
+extern void quark_blake512_cpu_init(int thr_id, uint32_t threads);
+extern void quark_blake512_cpu_setBlock_80(int thr_id, uint32_t *pdata);
+extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash);
+
+extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads);
+extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
+extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
+extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_keccak512_cpu_init(int thr_id, uint32_t threads);
+extern void quark_keccak512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_jh512_cpu_init(int thr_id, uint32_t threads);
+extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_luffaCubehash512_cpu_init(int thr_id, uint32_t threads);
+extern void x11_luffaCubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash, int order);
+
+extern void x11_shavite512_cpu_init(int thr_id, uint32_t threads);
+extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern int  x11_simd512_cpu_init(int thr_id, uint32_t threads);
+extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
+extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads);
+extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes,
+                                          uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
+
+// Flax/C11 CPU Hash
+extern "C" void c11hash(void *output, const void *input)
+{
+	// blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11
+
+	sph_blake512_context ctx_blake;
+	sph_bmw512_context ctx_bmw;
+	sph_groestl512_context ctx_groestl;
+	sph_jh512_context ctx_jh;
+	sph_keccak512_context ctx_keccak;
+	sph_skein512_context ctx_skein;
+	sph_luffa512_context ctx_luffa;
+	sph_cubehash512_context ctx_cubehash;
+	sph_shavite512_context ctx_shavite;
+	sph_simd512_context ctx_simd;
+	sph_echo512_context ctx_echo;
+
+	unsigned char hash[128];
+	memset(hash, 0, sizeof hash);
+
+	sph_blake512_init(&ctx_blake);
+	sph_blake512 (&ctx_blake, input, 80);
+	sph_blake512_close(&ctx_blake, (void*) hash);
+
+	sph_bmw512_init(&ctx_bmw);
+	sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
+	sph_bmw512_close(&ctx_bmw, (void*) hash);
+
+	sph_groestl512_init(&ctx_groestl);
+	sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
+	sph_groestl512_close(&ctx_groestl, (void*) hash);
+
+	sph_jh512_init(&ctx_jh);
+	sph_jh512 (&ctx_jh, (const void*) hash, 64);
+	sph_jh512_close(&ctx_jh, (void*) hash);
+
+	sph_keccak512_init(&ctx_keccak);
+	sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
+	sph_keccak512_close(&ctx_keccak, (void*) hash);
+
+	sph_skein512_init(&ctx_skein);
+	sph_skein512 (&ctx_skein, (const void*) hash, 64);
+	sph_skein512_close(&ctx_skein, (void*) hash);
+
+	sph_luffa512_init(&ctx_luffa);
+	sph_luffa512 (&ctx_luffa, (const void*) hash, 64);
+	sph_luffa512_close (&ctx_luffa, (void*) hash);
+
+	sph_cubehash512_init(&ctx_cubehash);
+	sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
+	sph_cubehash512_close(&ctx_cubehash, (void*) hash);
+
+	sph_shavite512_init(&ctx_shavite);
+	sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
+	sph_shavite512_close(&ctx_shavite, (void*) hash);
+
+	sph_simd512_init(&ctx_simd);
+	sph_simd512 (&ctx_simd, (const void*) hash, 64);
+	sph_simd512_close(&ctx_simd, (void*) hash);
+
+	sph_echo512_init(&ctx_echo);
+	sph_echo512 (&ctx_echo, (const void*) hash, 64);
+	sph_echo512_close(&ctx_echo, (void*) hash);
+
+	memcpy(output, hash, 32);
+}
+
+#ifdef _DEBUG
+#define TRACE(algo) { \
+	if (max_nonce == 1 && pdata[19] <= 1) { \
+		uint32_t* debugbuf = NULL; \
+		cudaMallocHost(&debugbuf, 8*sizeof(uint32_t)); \
+		cudaMemcpy(debugbuf, d_hash[thr_id], 8*sizeof(uint32_t), cudaMemcpyDeviceToHost); \
+		printf("X11 %s %08x %08x %08x %08x...\n", algo, swab32(debugbuf[0]), swab32(debugbuf[1]), \
+			swab32(debugbuf[2]), swab32(debugbuf[3])); \
+		cudaFreeHost(debugbuf); \
+	} \
+}
+#else
+#define TRACE(algo) {}
+#endif
+
+static bool init[MAX_GPUS] = { 0 };
+
+extern "C" int scanhash_c11(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+	int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
+	uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity); // 19=256*256*8;
+	throughput = min(throughput, max_nonce - first_nonce);
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x5;
+
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		quark_blake512_cpu_init(thr_id, throughput);
+		quark_bmw512_cpu_init(thr_id, throughput);
+		quark_groestl512_cpu_init(thr_id, throughput);
+		quark_skein512_cpu_init(thr_id, throughput);
+		quark_keccak512_cpu_init(thr_id, throughput);
+		quark_jh512_cpu_init(thr_id, throughput);
+		x11_luffaCubehash512_cpu_init(thr_id, throughput);
+		x11_shavite512_cpu_init(thr_id, throughput);
+		x11_echo512_cpu_init(thr_id, throughput);
+		if (x11_simd512_cpu_init(thr_id, throughput) != 0) {
+			return 0;
+		}
+		CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 64 * throughput), 0); // why 64 ?
+
+		cuda_check_cpu_init(thr_id, throughput);
+
+		init[thr_id] = true;
+	}
+
+	uint32_t endiandata[20];
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], pdata[k]);
+
+	quark_blake512_cpu_setBlock_80(thr_id, endiandata);
+	cuda_check_cpu_setTarget(ptarget);
+
+	do {
+		int order = 0;
+		uint32_t foundNonce;
+
+		// Hash with CUDA
+		quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
+		TRACE("blake  :");
+		quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		TRACE("bmw    :");
+		quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		TRACE("groestl:");
+		quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		TRACE("jh512  :");
+		quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		TRACE("keccak :");
+		quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		TRACE("skein  :");
+		x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++);
+		TRACE("luffa+c:");
+		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		TRACE("shavite:");
+		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		TRACE("simd   :");
+		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		TRACE("echo => ");
+
+		foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
+		if (foundNonce != UINT32_MAX)
+		{
+			const uint32_t Htarg = ptarget[7];
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			c11hash(vhash64, endiandata);
+
+			if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
+				int res = 1;
+				// check if there was some other ones...
+				uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
+				*hashes_done = pdata[19] - first_nonce + throughput;
+				if (secNonce != 0) {
+					pdata[21] = secNonce;
+					res++;
+				}
+				pdata[19] = foundNonce;
+				return res;
+			} else {
+				applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce);
+				pdata[19] = foundNonce + 1;
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}