Add cuda error checks on qubit algos

And rename doom to luffa, like djm34
2025-01-08 22:07:56 +00:00 · 2014-09-11 01:59:43 +02:00 · 2014-09-11 01:59:43 +02:00 · 23f0cee61f
commit 23f0cee61f
parent 1aec4555cc
5 changed files with 76 additions and 45 deletions
--- a/README.txt
+++ b/README.txt
@ -58,12 +58,12 @@ its command line interface and options.
                          heavy       use to mine Heavycoin
                          mjollnir    use to mine Mjollnircoin
                          deep        use to mine Deepcoin
-                          doom        use to mine Doomcoin
                          fugue256    use to mine Fuguecoin
                          groestl     use to mine Groestlcoin
                          dmd-gr      use to mine Diamond-Groestl
                          myr-gr      use to mine Myriad-Groest
                          jackpot     use to mine Jackpotcoin
+                          luffa       use to mine Doomcoin
                          quark       use to mine Quarkcoin
                          qubit       use to mine Qubit Algo
                          anime       use to mine Animecoin
--- a/cpu-miner.c
+++ b/cpu-miner.c
@ -136,6 +136,7 @@ typedef enum {
 	ALGO_GROESTL,
 	ALGO_HEAVY,		/* Heavycoin hash */
 	ALGO_JACKPOT,
+	ALGO_LUFFA_DOOM,
 	ALGO_MJOLLNIR,		/* Mjollnir hash */
 	ALGO_MYR_GR,
 	ALGO_NIST5,
@ -156,12 +157,13 @@ static const char *algo_names[] = {
 	"blake",
 	"blakecoin",
 	"deep",
-	"doom",
+	"doom", /* is luffa */
 	"fresh",
 	"fugue256",
 	"groestl",
 	"heavy",
 	"jackpot",
+	"luffa",
 	"mjollnir",
 	"myr-gr",
 	"nist5",
@ -242,12 +244,12 @@ Options:\n\
                        blake     Blake 256 (like NEOS blake)\n\
                        blakecoin Old Blake 256 (8 rounds)\n\
                        deep      Deepcoin hash\n\
-                        doom      Doomcoin hash\n\
                        fresh     Freshcoin hash (shavite 80)\n\
                        fugue256  Fuguecoin hash\n\
                        groestl   Groestlcoin hash\n\
                        heavy     Heavycoin hash\n\
                        jackpot   Jackpot hash\n\
+                        luffa     Doomcoin hash\n\
                        mjollnir  Mjollnircoin hash\n\
                        myr-gr    Myriad-Groestl hash\n\
                        nist5     NIST5 (TalkCoin) hash\n\
--- a/qubit/doom.cu
+++ b/qubit/doom.cu
@ -51,7 +51,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
 	{
 		cudaSetDevice(device_map[thr_id]);

-		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));

 		qubit_luffa512_cpu_init(thr_id, throughput);

@ -86,7 +86,12 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,

 		pdata[19] += throughput;

-	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+		if ((uint64_t) pdata[19] + throughput > max_nonce) {
+			pdata[19] = max_nonce;
+			break;
+		}
+
+	} while (!work_restart[thr_id].restart);

 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
--- a/qubit/qubit_luffa512.cu
+++ b/qubit/qubit_luffa512.cu
@ -23,13 +23,20 @@

 #include "cuda_helper.h"

+#define MAXU 0xffffffffU
+
 typedef unsigned char BitSequence;

 __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
-__constant__ uint32_t pTarget[8];
+__constant__ uint32_t c_Target[8];

-uint32_t *d_lnounce[8];
-uint32_t *d_LNonce[8];
+static uint32_t *h_resNounce[8];
+static uint32_t *d_resNounce[8];
+
+#define NBN 1 /* max results, could be 2, see blake32.cu */
+#if NBN > 1
+static uint32_t extra_results[2] = { MAXU, MAXU };
+#endif

 typedef struct {
 	uint32_t buffer[8]; /* Buffer to be hashed */
@ -388,56 +395,67 @@ void qubit_luffa512_gpu_finalhash_80(int threads, uint32_t startNounce, void *ou
 		#pragma unroll 16
 		for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i];

-		// die Nounce durch die thread-spezifische ersetzen
+		// Tested nonce
 		buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce));

-
 		hashState state;
 		#pragma unroll 40
 		for(int i=0;i<40;i++) state.chainv[i] = c_IV[i];
+
 		#pragma unroll 8
 		for(int i=0;i<8;i++) state.buffer[i] = 0;
+
 		Update512(&state, (BitSequence*)buff.buf32);
 		finalization512(&state, Hash);

-		bool rc = true;
+		/* dont ask me why not a simple if (Hash[i] > c_Target[i]) return;
+		 * we lose 20% in perfs without the position test */
 		int position = -1;
 		#pragma unroll 8
 		for (int i = 7; i >= 0; i--) {
-			if (Hash[i] > pTarget[i]) {
+			if (Hash[i] > c_Target[i]) {
+				if (position < i) {
+					return;
+				}
+			}
+			if (Hash[i] < c_Target[i]) {
 				if (position < i) {
 					position = i;
-					rc = false;
-				}
-
-			}
-			if (Hash[i] < pTarget[i]) {
-				if(position < i) {
-					position = i;
-					rc = true;
+					//break; /* impact perfs, unroll ? */
 				}
 			}
 		}

-		if(rc && resNounce[0] > nounce)
+#if NBN == 1
+		if (resNounce[0] > nounce) {
 			resNounce[0] = nounce;
 		}
+#else
+		/* keep the smallest nounce, + extra one if found */
+		if (resNounce[0] > nounce) {
+			resNounce[1] = resNounce[0];
+			resNounce[0] = nounce;
+		} else {
+			resNounce[1] = nounce;
+		}
+#endif
+	}
 }

 __host__
 void qubit_luffa512_cpu_init(int thr_id, int threads)
 {
-	cudaMemcpyToSymbol( c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice );
-	cudaMemcpyToSymbol( c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice );
-	cudaMalloc(&d_LNonce[thr_id], sizeof(uint32_t));
-	cudaMallocHost(&d_lnounce[thr_id], 1*sizeof(uint32_t));
+	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice));
+	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice));
+	CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], NBN * sizeof(uint32_t)));
+	CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], NBN * sizeof(uint32_t)));
 }

 __host__
 uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order)
 {
-	uint32_t result = 0xffffffff;
-	cudaMemset(d_LNonce[thr_id], 0xffffffff, sizeof(uint32_t));
+	uint32_t result = MAXU;
+	cudaMemset(d_resNounce[thr_id], 0xff, NBN * sizeof(uint32_t));
 	const int threadsperblock = 256;

 	dim3 grid((threads + threadsperblock-1)/threadsperblock);
@ -445,11 +463,15 @@ uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t start

 	size_t shared_size = 0;

-	qubit_luffa512_gpu_finalhash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash, d_LNonce[thr_id]);
-	MyStreamSynchronize(NULL, order, thr_id);
-	cudaMemcpy(d_lnounce[thr_id], d_LNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
+	qubit_luffa512_gpu_finalhash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash, d_resNounce[thr_id]);
+	cudaDeviceSynchronize();
+	if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], NBN * sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
 		//cudaThreadSynchronize();
-	result = *d_lnounce[thr_id];
+		result = h_resNounce[thr_id][0];
+#if NBN > 1
+		extra_results[0] = h_resNounce[thr_id][1];
+#endif
+	}
 	return result;
 }

@ -470,6 +492,7 @@ __host__
 void qubit_luffa512_cpu_setBlock_80(void *pdata)
 {
 	unsigned char PaddedMessage[128];
+
 	memcpy(PaddedMessage, pdata, 80);
 	memset(PaddedMessage+80, 0, 48);
 	PaddedMessage[80] = 0x80;
@ -477,20 +500,21 @@ void qubit_luffa512_cpu_setBlock_80(void *pdata)
 	PaddedMessage[126] = 0x02;
 	PaddedMessage[127] = 0x80;

-	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+	CUDA_SAFE_CALL(cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice));
 }

 __host__
 void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget)
 {
 	unsigned char PaddedMessage[128];
+
 	memcpy(PaddedMessage, pdata, 80);
 	memset(PaddedMessage+80, 0, 48);
 	PaddedMessage[80] = 0x80;
 	PaddedMessage[111] = 1;
 	PaddedMessage[126] = 0x02;
 	PaddedMessage[127] = 0x80;
-	cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);

-	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice));
+	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice));
 }
--- a/util.c
+++ b/util.c
@ -1474,10 +1474,6 @@ void print_hash_tests(void)
 	deephash(&hash[0], &buf[0]);
 	printpfx("deep", hash);

-	memset(hash, 0, sizeof hash);
-	doomhash(&hash[0], &buf[0]);
-	printpfx("doom", hash);
-
 	memset(hash, 0, sizeof hash);
 	fresh_hash(&hash[0], &buf[0]);
 	printpfx("fresh", hash);
@ -1498,6 +1494,10 @@ void print_hash_tests(void)
 	jackpothash(&hash[0], &buf[0]);
 	printpfx("jackpot", hash);

+	memset(hash, 0, sizeof hash);
+	doomhash(&hash[0], &buf[0]);
+	printpfx("luffa", hash);
+
 	memset(hash, 0, sizeof hash);
 	myriadhash(&hash[0], &buf[0]);
 	printpfx("myriad", hash);