Add c11 algo (x11 variant)

Used by Chaincoin and Flaxscript
9 years ago · c5df142124
8 changed files with 277 additions and 8 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -54,7 +54,7 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
 			  x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
 			  x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \
 			  x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu \
-			  x11/s3.cu
+			  x11/c11.cu x11/s3.cu
 # scrypt
 ccminer_SOURCES += scrypt.cpp scrypt-jane.cpp \
--- a/README.txt
+++ b/README.txt
@ -31,6 +31,7 @@ JackpotCoin
 QuarkCoin family & AnimeCoin
 TalkCoin
 DarkCoin and other X11 coins
 Chaincoin and Flaxscript (C11)
 Saffroncoin blake (256 14-rounds)
 BlakeCoin (256 8-rounds)
 Qubit (Digibyte, ...)
@ -66,6 +67,7 @@ its command line interface and options.
                          anime       use to mine Animecoin
                          blake       use to mine Saffroncoin (Blake 256)
                          blakecoin   use to mine Old Blake 256
                          c11/flax    use to mine Chaincoin and Flax
                          deep        use to mine Deepcoin
                          dmd-gr      use to mine Diamond-Groestl
                          fresh       use to mine Freshcoin
@ -222,6 +224,7 @@ features.
  July 2015...
                  Nvml api power limits
                  Add chaincoin c11 algo (used by Flaxscript too)
                  Remove pluck algo
  June 23th 2015  v1.6.5
--- a/ccminer.cpp
+++ b/ccminer.cpp
@ -87,6 +87,7 @@ enum sha_algos {
 	ALGO_ANIME,
 	ALGO_BLAKE,
 	ALGO_BLAKECOIN,
 	ALGO_C11,
 	ALGO_DEEP,
 	ALGO_DMD_GR,
 	ALGO_FRESH,
@ -115,12 +116,14 @@ enum sha_algos {
 	ALGO_X15,
 	ALGO_X17,
 	ALGO_ZR5,
 	ALGO_COUNT
 };
 static const char *algo_names[] = {
 	"anime",
 	"blake",
 	"blakecoin",
 	"c11",
 	"deep",
 	"dmd-gr",
 	"fresh",
@ -149,6 +152,7 @@ static const char *algo_names[] = {
 	"x15",
 	"x17",
 	"zr5",
 	""
 };
 bool opt_debug = false;
@ -275,6 +279,7 @@ Options:\n\
 			anime       Animecoin\n\
 			blake       Blake 256 (SFR)\n\
 			blakecoin   Fast Blake 256 (8 rounds)\n\
 			c11/flax    X11 variant\n\
 			deep        Deepcoin\n\
 			dmd-gr      Diamond-Groestl\n\
 			fresh       Freshcoin (shavite 80)\n\
@ -1744,6 +1749,7 @@ static void *miner_thread(void *userdata)
 			case ALGO_LUFFA:
 				minmax = 0x2000000;
 				break;
 			case ALGO_C11:
 			case ALGO_S3:
 			case ALGO_X11:
 			case ALGO_X13:
@ -1818,6 +1824,11 @@ static void *miner_thread(void *userdata)
 			                      max_nonce, &hashes_done);
 			break;
 		case ALGO_C11:
 			rc = scanhash_c11(thr_id, work.data, work.target,
 			                      max_nonce, &hashes_done);
 			break;
 		case ALGO_FUGUE256:
 			rc = scanhash_fugue256(thr_id, work.data, work.target,
 			                      max_nonce, &hashes_done);
@ -2404,24 +2415,26 @@ void parse_arg(int key, char *arg)
 	case 'a': /* --algo */
 		p = strstr(arg, ":"); // optional factor
 		if (p) *p = '\0';
-		for (i = 0; i < ARRAY_SIZE(algo_names); i++) {
+		for (i = 0; i < ALGO_COUNT; i++) {
 			if (algo_names[i] && !strcasecmp(arg, algo_names[i])) {
 				opt_algo = (enum sha_algos)i;
 				break;
 			}
 		}
-		if (i == ARRAY_SIZE(algo_names)) {
+		if (i == ALGO_COUNT) {
 			// some aliases...
-			if (!strcasecmp("diamond", arg))
+			if (!strcasecmp("flax", arg))
 				i = opt_algo = ALGO_C11;
 			else if (!strcasecmp("diamond", arg))
 				i = opt_algo = ALGO_DMD_GR;
-			if (!strcasecmp("doom", arg))
+			else if (!strcasecmp("doom", arg))
 				i = opt_algo = ALGO_LUFFA;
 			else if (!strcasecmp("ziftr", arg))
 				i = opt_algo = ALGO_ZR5;
 			else
 				applog(LOG_ERR, "Unknown algo parameter '%s'", arg);
 		}
-		if (i == ARRAY_SIZE(algo_names))
+		if (i == ALGO_COUNT)
 			show_usage_and_exit(1);
 		if (p) {
 			opt_nfactor = atoi(p + 1);
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -462,8 +462,8 @@
    <CudaCompile Include="x11\cuda_x11_simd512.cu">
      <MaxRegCount>64</MaxRegCount>
    </CudaCompile>
-    <CudaCompile Include="x11\fresh.cu">
+    <CudaCompile Include="x11\c11.cu" />
-    </CudaCompile>
+    <CudaCompile Include="x11\fresh.cu" />
    <CudaCompile Include="x11\s3.cu" />
    <CudaCompile Include="x11\simd_functions.cu">
      <ExcludedFromBuild>true</ExcludedFromBuild>
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -517,6 +517,9 @@
    <CudaCompile Include="x11\cuda_x11_simd512.cu">
      <Filter>Source Files\CUDA\x11</Filter>
    </CudaCompile>
    <CudaCompile Include="x11\c11.cu">
      <Filter>Source Files\CUDA\x11</Filter>
    </CudaCompile>
    <CudaCompile Include="x11\fresh.cu">
      <Filter>Source Files\CUDA\x11</Filter>
    </CudaCompile>
--- a/miner.h
+++ b/miner.h
@ -307,6 +307,10 @@ extern int scanhash_blake256(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done, int8_t blakerounds);
 extern int scanhash_c11(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
 extern int scanhash_fresh(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
@ -764,6 +768,7 @@ void applog_compare_hash(unsigned char *hash, unsigned char *hash2);
 void print_hash_tests(void);
 void animehash(void *state, const void *input);
 void blake256hash(void *output, const void *input, int8_t rounds);
 void c11hash(void *output, const void *input);
 void deephash(void *state, const void *input);
 void luffa_hash(void *state, const void *input);
 void fresh_hash(void *state, const void *input);
--- a/util.cpp
+++ b/util.cpp
@ -1814,6 +1814,9 @@ void print_hash_tests(void)
 	blake256hash(&hash[0], &buf[0], 14);
 	printpfx("blake", hash);
 	c11hash(&hash[0], &buf[0]);
 	printpfx("c11", hash);
 	deephash(&hash[0], &buf[0]);
 	printpfx("deep", hash);
--- a/x11/c11.cu
+++ b/x11/c11.cu
@ -0,0 +1,242 @@
 extern "C"
 {
 #include "sph/sph_blake.h"
 #include "sph/sph_bmw.h"
 #include "sph/sph_groestl.h"
 #include "sph/sph_skein.h"
 #include "sph/sph_jh.h"
 #include "sph/sph_keccak.h"
 #include "sph/sph_luffa.h"
 #include "sph/sph_cubehash.h"
 #include "sph/sph_shavite.h"
 #include "sph/sph_simd.h"
 #include "sph/sph_echo.h"
 }
 #include "miner.h"
 #include "cuda_helper.h"
 #include <stdio.h>
 #include <memory.h>
 static uint32_t *d_hash[MAX_GPUS];
 extern void quark_blake512_cpu_init(int thr_id, uint32_t threads);
 extern void quark_blake512_cpu_setBlock_80(int thr_id, uint32_t *pdata);
 extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash);
 extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads);
 extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
 extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
 extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_keccak512_cpu_init(int thr_id, uint32_t threads);
 extern void quark_keccak512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_jh512_cpu_init(int thr_id, uint32_t threads);
 extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x11_luffaCubehash512_cpu_init(int thr_id, uint32_t threads);
 extern void x11_luffaCubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash, int order);
 extern void x11_shavite512_cpu_init(int thr_id, uint32_t threads);
 extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern int  x11_simd512_cpu_init(int thr_id, uint32_t threads);
 extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
 extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
 extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads);
 extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes,
                                          uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order);
 // Flax/C11 CPU Hash
 extern "C" void c11hash(void *output, const void *input)
 {
 	// blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11
 	sph_blake512_context ctx_blake;
 	sph_bmw512_context ctx_bmw;
 	sph_groestl512_context ctx_groestl;
 	sph_jh512_context ctx_jh;
 	sph_keccak512_context ctx_keccak;
 	sph_skein512_context ctx_skein;
 	sph_luffa512_context ctx_luffa;
 	sph_cubehash512_context ctx_cubehash;
 	sph_shavite512_context ctx_shavite;
 	sph_simd512_context ctx_simd;
 	sph_echo512_context ctx_echo;
 	unsigned char hash[128];
 	memset(hash, 0, sizeof hash);
 	sph_blake512_init(&ctx_blake);
 	sph_blake512 (&ctx_blake, input, 80);
 	sph_blake512_close(&ctx_blake, (void*) hash);
 	sph_bmw512_init(&ctx_bmw);
 	sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
 	sph_bmw512_close(&ctx_bmw, (void*) hash);
 	sph_groestl512_init(&ctx_groestl);
 	sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
 	sph_groestl512_close(&ctx_groestl, (void*) hash);
 	sph_jh512_init(&ctx_jh);
 	sph_jh512 (&ctx_jh, (const void*) hash, 64);
 	sph_jh512_close(&ctx_jh, (void*) hash);
 	sph_keccak512_init(&ctx_keccak);
 	sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
 	sph_keccak512_close(&ctx_keccak, (void*) hash);
 	sph_skein512_init(&ctx_skein);
 	sph_skein512 (&ctx_skein, (const void*) hash, 64);
 	sph_skein512_close(&ctx_skein, (void*) hash);
 	sph_luffa512_init(&ctx_luffa);
 	sph_luffa512 (&ctx_luffa, (const void*) hash, 64);
 	sph_luffa512_close (&ctx_luffa, (void*) hash);
 	sph_cubehash512_init(&ctx_cubehash);
 	sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
 	sph_cubehash512_close(&ctx_cubehash, (void*) hash);
 	sph_shavite512_init(&ctx_shavite);
 	sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
 	sph_shavite512_close(&ctx_shavite, (void*) hash);
 	sph_simd512_init(&ctx_simd);
 	sph_simd512 (&ctx_simd, (const void*) hash, 64);
 	sph_simd512_close(&ctx_simd, (void*) hash);
 	sph_echo512_init(&ctx_echo);
 	sph_echo512 (&ctx_echo, (const void*) hash, 64);
 	sph_echo512_close(&ctx_echo, (void*) hash);
 	memcpy(output, hash, 32);
 }
 #ifdef _DEBUG
 #define TRACE(algo) { \
 	if (max_nonce == 1 && pdata[19] <= 1) { \
 		uint32_t* debugbuf = NULL; \
 		cudaMallocHost(&debugbuf, 8*sizeof(uint32_t)); \
 		cudaMemcpy(debugbuf, d_hash[thr_id], 8*sizeof(uint32_t), cudaMemcpyDeviceToHost); \
 		printf("X11 %s %08x %08x %08x %08x...\n", algo, swab32(debugbuf[0]), swab32(debugbuf[1]), \
 			swab32(debugbuf[2]), swab32(debugbuf[3])); \
 		cudaFreeHost(debugbuf); \
 	} \
 }
 #else
 #define TRACE(algo) {}
 #endif
 static bool init[MAX_GPUS] = { 0 };
 extern "C" int scanhash_c11(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
 	uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity); // 19=256*256*8;
 	throughput = min(throughput, max_nonce - first_nonce);
 	if (opt_benchmark)
 		((uint32_t*)ptarget)[7] = 0x5;
 	if (!init[thr_id])
 	{
 		cudaSetDevice(device_map[thr_id]);
 		quark_blake512_cpu_init(thr_id, throughput);
 		quark_bmw512_cpu_init(thr_id, throughput);
 		quark_groestl512_cpu_init(thr_id, throughput);
 		quark_skein512_cpu_init(thr_id, throughput);
 		quark_keccak512_cpu_init(thr_id, throughput);
 		quark_jh512_cpu_init(thr_id, throughput);
 		x11_luffaCubehash512_cpu_init(thr_id, throughput);
 		x11_shavite512_cpu_init(thr_id, throughput);
 		x11_echo512_cpu_init(thr_id, throughput);
 		if (x11_simd512_cpu_init(thr_id, throughput) != 0) {
 			return 0;
 		}
 		CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 64 * throughput), 0); // why 64 ?
 		cuda_check_cpu_init(thr_id, throughput);
 		init[thr_id] = true;
 	}
 	uint32_t endiandata[20];
 	for (int k=0; k < 20; k++)
 		be32enc(&endiandata[k], pdata[k]);
 	quark_blake512_cpu_setBlock_80(thr_id, endiandata);
 	cuda_check_cpu_setTarget(ptarget);
 	do {
 		int order = 0;
 		uint32_t foundNonce;
 		// Hash with CUDA
 		quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
 		TRACE("blake  :");
 		quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		TRACE("bmw    :");
 		quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		TRACE("groestl:");
 		quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		TRACE("jh512  :");
 		quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		TRACE("keccak :");
 		quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		TRACE("skein  :");
 		x11_luffaCubehash512_cpu_hash_64(thr_id, throughput, d_hash[thr_id], order++);
 		TRACE("luffa+c:");
 		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		TRACE("shavite:");
 		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		TRACE("simd   :");
 		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		TRACE("echo => ");
 		foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
 		if (foundNonce != UINT32_MAX)
 		{
 			const uint32_t Htarg = ptarget[7];
 			uint32_t vhash64[8];
 			be32enc(&endiandata[19], foundNonce);
 			c11hash(vhash64, endiandata);
 			if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
 				int res = 1;
 				// check if there was some other ones...
 				uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
 				*hashes_done = pdata[19] - first_nonce + throughput;
 				if (secNonce != 0) {
 					pdata[21] = secNonce;
 					res++;
 				}
 				pdata[19] = foundNonce;
 				return res;
 			} else {
 				applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce);
 				pdata[19] = foundNonce + 1;
 			}
 		}
 		pdata[19] += throughput;
 	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
 }