VeltorCoin Streebog based algo (veltor)

also known as "Thor's Riddle"... yes sure ;) Credits to ocminer who found and "implemented" it. Note: tested "ok" on x64 and CUDA 6.5 x86, not on 7.5 and 8.0 x86 PS: Don't have the time for a more proper CUDA implementation of Streebog
8 years ago · 683dc0e149
12 changed files with 209 additions and 9 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -61,7 +61,7 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
				@@ -61,7 +61,7 @@ ccminer_SOURCES	= elist.h miner.h compat.h \
 			  x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \
 			  x15/whirlpool.cu \
 			  x17/x17.cu x17/cuda_x17_haval256.cu x17/cuda_x17_sha512.cu \
-			  x11/c11.cu x11/s3.cu x11/sib.cu x11/cuda_streebog.cu
+			  x11/c11.cu x11/s3.cu x11/sib.cu x11/veltor.cu x11/cuda_streebog.cu

 # scrypt
 ccminer_SOURCES += scrypt.cpp scrypt-jane.cpp \
--- a/README.txt
+++ b/README.txt
@ -1,5 +1,5 @@
				@@ -1,5 +1,5 @@

-ccMiner 1.8.1 (August 2016) "Sia Blake2-B Algo and RPCs"
+ccMiner 1.8.2 (August 2016) "Veltor algo Thor's Riddle streebog"
 ---------------------------------------------------------------

 ***************************************************************
@ -108,6 +108,7 @@ its command line interface and options.
				@@ -108,6 +108,7 @@ its command line interface and options.
                          x17         use to mine X17
                          x17         use to mine X17
                          vanilla     use to mine Vanilla (Blake256)
+                          veltor      use to mine VeltorCoin
                          whirlpool   use to mine Joincoin
                          zr5         use to mine ZiftrCoin

@ -244,6 +245,10 @@ features.
				@@ -244,6 +245,10 @@ features.

 >>> RELEASE HISTORY <<<

+  Aug. 20th 2016  v1.8.2
+                  Prevent Windows hibernate while mining
+                  veltor algo
+
  Aug. 10th 2016  v1.8.1
                  SIA Blake2-B Algo (getwork over stratum for Suprnova)
                  SIA Nanopool RPC (getwork over http)
--- a/algos.h
+++ b/algos.h
@ -44,6 +44,7 @@ enum sha_algos {
				@@ -44,6 +44,7 @@ enum sha_algos {
 	ALGO_X15,
 	ALGO_X17,
 	ALGO_VANILLA,
+	ALGO_VELTOR,
 	ALGO_WHIRLCOIN,
 	ALGO_WHIRLPOOL,
 	ALGO_WHIRLPOOLX,
@ -94,6 +95,7 @@ static const char *algo_names[] = {
				@@ -94,6 +95,7 @@ static const char *algo_names[] = {
 	"x15",
 	"x17",
 	"vanilla",
+	"veltor",
 	"whirlcoin",
 	"whirlpool",
 	"whirlpoolx",
@ -127,6 +129,8 @@ static inline int algo_to_int(char* arg)
				@@ -127,6 +129,8 @@ static inline int algo_to_int(char* arg)
 			i = ALGO_LYRA2;
 		else if (!strcasecmp("lyra2rev2", arg))
 			i = ALGO_LYRA2v2;
+		else if (!strcasecmp("thorsriddle", arg))
+			i = ALGO_VELTOR;
 		else if (!strcasecmp("whirl", arg))
 			i = ALGO_WHIRLPOOL;
 		else if (!strcasecmp("ziftr", arg))
--- a/ccminer.cpp
+++ b/ccminer.cpp
@ -230,7 +230,7 @@ Options:\n\
				@@ -230,7 +230,7 @@ Options:\n\
 			keccak      Keccak-256 (Maxcoin)\n\
 			lbry        LBRY Credits (Sha/Ripemd)\n\
 			luffa       Joincoin\n\
-			lyra2       LyraBar\n\
+			lyra2       CryptoCoin\n\
 			lyra2v2     VertCoin\n\
 			mjollnir    Mjollnircoin\n\
 			myr-gr      Myriad-Groestl\n\
@ -245,6 +245,7 @@ Options:\n\
				@@ -245,6 +245,7 @@ Options:\n\
 			skein       Skein SHA2 (Skeincoin)\n\
 			skein2      Double Skein (Woodcoin)\n\
 			s3          S3 (1Coin)\n\
+			veltor      Thorsriddle streebog\n\
 			x11evo      Permuted x11 (Revolver)\n\
 			x11         X11 (DarkCoin)\n\
 			x13         X13 (MaruCoin)\n\
@ -2194,6 +2195,7 @@ static void *miner_thread(void *userdata)
				@@ -2194,6 +2195,7 @@ static void *miner_thread(void *userdata)
 			case ALGO_NEOSCRYPT:
 			case ALGO_SIB:
 			case ALGO_SCRYPT:
+			case ALGO_VELTOR:
 				minmax = 0x80000;
 				break;
 			case ALGO_SCRYPT_JANE:
@ -2346,6 +2348,9 @@ static void *miner_thread(void *userdata)
				@@ -2346,6 +2348,9 @@ static void *miner_thread(void *userdata)
 		case ALGO_VANILLA:
 			rc = scanhash_vanilla(thr_id, &work, max_nonce, &hashes_done, 8);
 			break;
+		case ALGO_VELTOR:
+			rc = scanhash_veltor(thr_id, &work, max_nonce, &hashes_done);
+			break;
 		case ALGO_WHIRLCOIN:
 		case ALGO_WHIRLPOOL:
 			rc = scanhash_whirl(thr_id, &work, max_nonce, &hashes_done);
@ -2396,6 +2401,7 @@ static void *miner_thread(void *userdata)
				@@ -2396,6 +2401,7 @@ static void *miner_thread(void *userdata)
 		if (opt_algo != ALGO_SIA) // reversed endian
 			work.nonces[0] = nonceptr[0];
 		if (opt_algo != ALGO_DECRED && opt_algo != ALGO_BLAKE2S && opt_algo != ALGO_LBRY && opt_algo != ALGO_SIA) {
+			if (opt_algo != ALGO_VELTOR)
 			work.nonces[1] = nonceptr[2];
 		}

--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -499,6 +499,7 @@
				@@ -499,6 +499,7 @@
    <CudaCompile Include="x11\fresh.cu" />
    <CudaCompile Include="x11\sib.cu" />
    <CudaCompile Include="x11\s3.cu" />
+    <CudaCompile Include="x11\veltor.cu" />
    <CudaCompile Include="x11\x11.cu" />
    <CudaCompile Include="x11\x11evo.cu" />
    <CudaCompile Include="x13\cuda_x13_hamsi512.cu">
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -637,6 +637,9 @@
				@@ -637,6 +637,9 @@
    <CudaCompile Include="x11\s3.cu">
      <Filter>Source Files\CUDA\x11</Filter>
    </CudaCompile>
+    <CudaCompile Include="x11\veltor.cu">
+      <Filter>Source Files\CUDA\x11</Filter>
+    </CudaCompile>
    <CudaCompile Include="Algo256\blake256.cu">
      <Filter>Source Files\CUDA\Algo256</Filter>
    </CudaCompile>
--- a/compat/ccminer-config.h
+++ b/compat/ccminer-config.h
@ -164,7 +164,7 @@
				@@ -164,7 +164,7 @@
 #define PACKAGE_URL "http://github.com/tpruvot/ccminer"

 /* Define to the version of this package. */
-#define PACKAGE_VERSION "1.8.1"
+#define PACKAGE_VERSION "1.8.2"

 /* If using the C implementation of alloca, define if you know the
   direction of stack growth for your system; otherwise it will be
--- a/configure.ac
+++ b/configure.ac
@ -1,4 +1,4 @@
				@@ -1,4 +1,4 @@
-AC_INIT([ccminer], [1.8.1], [], [ccminer], [http://github.com/tpruvot/ccminer])
+AC_INIT([ccminer], [1.8.2], [], [ccminer], [http://github.com/tpruvot/ccminer])

 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
--- a/miner.h
+++ b/miner.h
@ -291,6 +291,7 @@ extern int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_nonce,
				@@ -291,6 +291,7 @@ extern int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_nonce,
 extern int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int8_t blake_rounds);
+extern int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
 extern int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
@ -337,6 +338,7 @@ extern void free_skeincoin(int thr_id);
				@@ -337,6 +338,7 @@ extern void free_skeincoin(int thr_id);
 extern void free_skein2(int thr_id);
 extern void free_s3(int thr_id);
 extern void free_vanilla(int thr_id);
+extern void free_veltor(int thr_id);
 extern void free_whirl(int thr_id);
 extern void free_x11evo(int thr_id);
 extern void free_x11(int thr_id);
@ -816,6 +818,7 @@ void sibhash(void *output, const void *input);
				@@ -816,6 +818,7 @@ void sibhash(void *output, const void *input);
 void skeincoinhash(void *output, const void *input);
 void skein2hash(void *output, const void *input);
 void s3hash(void *output, const void *input);
+void veltorhash(void *output, const void *input);
 void wcoinhash(void *state, const void *input);
 void whirlxHash(void *state, const void *input);
 void x11evo_hash(void *output, const void *input);
--- a/res/ccminer.rc
+++ b/res/ccminer.rc
@ -60,8 +60,8 @@ IDI_ICON1               ICON                    "ccminer.ico"
				@@ -60,8 +60,8 @@ IDI_ICON1               ICON                    "ccminer.ico"
 //

 VS_VERSION_INFO VERSIONINFO
- FILEVERSION 1,8,1,0
- PRODUCTVERSION 1,8,1,0
+ FILEVERSION 1,8,2,0
+ PRODUCTVERSION 1,8,2,0
 FILEFLAGSMASK 0x3fL
 #ifdef _DEBUG
 FILEFLAGS 0x21L
@ -76,10 +76,10 @@ BEGIN
				@@ -76,10 +76,10 @@ BEGIN
    BEGIN
        BLOCK "040904e4"
        BEGIN
-            VALUE "FileVersion", "1.8.1"
+            VALUE "FileVersion", "1.8.2"
            VALUE "LegalCopyright", "Copyright (C) 2016"
            VALUE "ProductName", "ccminer"
-            VALUE "ProductVersion", "1.8.1"
+            VALUE "ProductVersion", "1.8.2"
        END
    END
    BLOCK "VarFileInfo"
--- a/util.cpp
+++ b/util.cpp
@ -2207,6 +2207,9 @@ void print_hash_tests(void)
				@@ -2207,6 +2207,9 @@ void print_hash_tests(void)
 	blake256hash(&hash[0], &buf[0], 8);
 	printpfx("vanilla", hash);

+	veltorhash(&hash[0], &buf[0]);
+	printpfx("veltor", hash);
+
 	wcoinhash(&hash[0], &buf[0]);
 	printpfx("whirlpool", hash);

--- a/x11/veltor.cu
+++ b/x11/veltor.cu
@ -0,0 +1,175 @@
				@@ -0,0 +1,175 @@
+extern "C" {
+#include "sph/sph_skein.h"
+#include "sph/sph_shavite.h"
+#include "sph/sph_shabal.h"
+#include "sph/sph_streebog.h"
+}
+
+#include "miner.h"
+#include "cuda_helper.h"
+#include "cuda_x11.h"
+
+extern void streebog_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash);
+
+extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads);
+extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
+extern void skein512_cpu_setBlock_80(void *pdata);
+extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
+
+#include <stdio.h>
+#include <memory.h>
+
+static uint32_t *d_hash[MAX_GPUS];
+
+// Veltor CPU Hash
+extern "C" void veltorhash(void *output, const void *input)
+{
+	uint8_t _ALIGN(64) hash[128] = { 0 };
+
+	sph_skein512_context ctx_skein;
+	sph_shavite512_context ctx_shavite;
+	sph_shabal512_context ctx_shabal;
+	sph_gost512_context ctx_gost;
+
+	sph_skein512_init(&ctx_skein);
+	sph_skein512(&ctx_skein, input, 80);
+	sph_skein512_close(&ctx_skein, (void*) hash);
+
+	sph_shavite512_init(&ctx_shavite);
+	sph_shavite512(&ctx_shavite, (const void*) hash, 64);
+	sph_shavite512_close(&ctx_shavite, (void*) hash);
+
+	sph_shabal512_init(&ctx_shabal);
+	sph_shabal512(&ctx_shabal, (const void*) hash, 64);
+	sph_shabal512_close(&ctx_shabal, (void*) hash);
+
+	sph_gost512_init(&ctx_gost);
+	sph_gost512(&ctx_gost, (const void*) hash, 64);
+	sph_gost512_close(&ctx_gost, (void*) hash);
+
+	memcpy(output, hash, 32);
+}
+
+//#define _DEBUG
+#define _DEBUG_PREFIX "veltor"
+#include "cuda_debug.cuh"
+
+static bool init[MAX_GPUS] = { 0 };
+
+extern "C" int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
+{
+	uint32_t *pdata = work->data;
+	uint32_t *ptarget = work->target;
+	const uint32_t first_nonce = pdata[19];
+	int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 18;
+	uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 19=256*256*8;
+	//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
+
+	if (opt_benchmark)
+		ptarget[7] = 0xf;
+
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+		if (opt_cudaschedule == -1 && gpu_threads == 1) {
+			cudaDeviceReset();
+			// reduce cpu usage
+			cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
+			CUDA_LOG_ERROR();
+		}
+
+		quark_skein512_cpu_init(thr_id, throughput);
+		x11_shavite512_cpu_init(thr_id, throughput);
+		x14_shabal512_cpu_init(thr_id, throughput);
+
+		CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
+
+		cuda_check_cpu_init(thr_id, throughput);
+
+		init[thr_id] = true;
+	}
+
+	uint32_t endiandata[20];
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], pdata[k]);
+
+	skein512_cpu_setBlock_80(endiandata);
+	cuda_check_cpu_setTarget(ptarget);
+
+	do {
+		int order = 0;
+		uint32_t foundNonce;
+
+		// Hash with CUDA
+		skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1); order++;
+		TRACE("blake  :");
+		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		TRACE("shavite:");
+		x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		TRACE("shabal :");
+		streebog_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id]);
+		TRACE("gost   :");
+
+		*hashes_done = pdata[19] - first_nonce + throughput;
+
+		foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
+		if (foundNonce != UINT32_MAX)
+		{
+			const uint32_t Htarg = ptarget[7];
+			uint32_t _ALIGN(64) vhash[8];
+			be32enc(&endiandata[19], foundNonce);
+			veltorhash(vhash, endiandata);
+
+			if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
+				int res = 1;
+				// check if there was another one...
+				uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
+				work_set_target_ratio(work, vhash);
+				if (secNonce != 0) {
+					be32enc(&endiandata[19], secNonce);
+					veltorhash(vhash, endiandata);
+					work->nonces[1] = secNonce;
+					if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio) {
+						work_set_target_ratio(work, vhash);
+						xchg(work->nonces[1], work->nonces[0]);
+					}
+					res++;
+				}
+				pdata[19] = work->nonces[0] = foundNonce;
+				return res;
+			} else {
+				gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
+				pdata[19] = foundNonce + 1;
+				continue;
+			}
+		}
+
+		if ((uint64_t) throughput + pdata[19] >= max_nonce) {
+			pdata[19] = max_nonce;
+			break;
+		}
+		pdata[19] += throughput;
+
+	} while (!work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce;
+	return 0;
+}
+
+// cleanup
+extern "C" void free_veltor(int thr_id)
+{
+	if (!init[thr_id])
+		return;
+
+	cudaThreadSynchronize();
+
+	cudaFree(d_hash[thr_id]);
+
+	cuda_check_cpu_free(thr_id);
+	init[thr_id] = false;
+
+	cudaDeviceSynchronize();
+}