Add cuda error checks on qubit algos

And rename doom to luffa, like djm34
10 years ago · 23f0cee61f
5 changed files with 76 additions and 45 deletions
--- a/README.txt
+++ b/README.txt
@ -58,12 +58,12 @@ its command line interface and options.
                          heavy       use to mine Heavycoin
                          mjollnir    use to mine Mjollnircoin
                          deep        use to mine Deepcoin
                          doom        use to mine Doomcoin
                          fugue256    use to mine Fuguecoin
                          groestl     use to mine Groestlcoin
                          dmd-gr      use to mine Diamond-Groestl
                          myr-gr      use to mine Myriad-Groest
                          jackpot     use to mine Jackpotcoin
                          luffa       use to mine Doomcoin
                          quark       use to mine Quarkcoin
                          qubit       use to mine Qubit Algo
                          anime       use to mine Animecoin
--- a/cpu-miner.c
+++ b/cpu-miner.c
@ -136,6 +136,7 @@ typedef enum {
 	ALGO_GROESTL,
 	ALGO_HEAVY,		/* Heavycoin hash */
 	ALGO_JACKPOT,
 	ALGO_LUFFA_DOOM,
 	ALGO_MJOLLNIR,		/* Mjollnir hash */
 	ALGO_MYR_GR,
 	ALGO_NIST5,
@ -156,12 +157,13 @@ static const char *algo_names[] = {
 	"blake",
 	"blakecoin",
 	"deep",
-	"doom",
+	"doom", /* is luffa */
 	"fresh",
 	"fugue256",
 	"groestl",
 	"heavy",
 	"jackpot",
 	"luffa",
 	"mjollnir",
 	"myr-gr",
 	"nist5",
@ -242,12 +244,12 @@ Options:\n\
                        blake     Blake 256 (like NEOS blake)\n\
                        blakecoin Old Blake 256 (8 rounds)\n\
                        deep      Deepcoin hash\n\
                        doom      Doomcoin hash\n\
                        fresh     Freshcoin hash (shavite 80)\n\
                        fugue256  Fuguecoin hash\n\
                        groestl   Groestlcoin hash\n\
                        heavy     Heavycoin hash\n\
                        jackpot   Jackpot hash\n\
                        luffa     Doomcoin hash\n\
                        mjollnir  Mjollnircoin hash\n\
                        myr-gr    Myriad-Groestl hash\n\
                        nist5     NIST5 (TalkCoin) hash\n\
--- a/qubit/doom.cu
+++ b/qubit/doom.cu
@ -51,7 +51,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
 	{
 		cudaSetDevice(device_map[thr_id]);
-		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
 		qubit_luffa512_cpu_init(thr_id, throughput);
@ -86,7 +86,12 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
 		pdata[19] += throughput;
-	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+		if ((uint64_t) pdata[19] + throughput > max_nonce) {
 			pdata[19] = max_nonce;
 			break;
 		}
 	} while (!work_restart[thr_id].restart);
 	*hashes_done = pdata[19] - first_nonce + 1;
 	return 0;
--- a/qubit/qubit_luffa512.cu
+++ b/qubit/qubit_luffa512.cu
@ -23,13 +23,20 @@
 #include "cuda_helper.h"
 #define MAXU 0xffffffffU
 typedef unsigned char BitSequence;
 __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
-__constant__ uint32_t pTarget[8];
+__constant__ uint32_t c_Target[8];
 static uint32_t *h_resNounce[8];
 static uint32_t *d_resNounce[8];
-uint32_t *d_lnounce[8];
+#define NBN 1 /* max results, could be 2, see blake32.cu */
-uint32_t *d_LNonce[8];
+#if NBN > 1
 static uint32_t extra_results[2] = { MAXU, MAXU };
 #endif
 typedef struct {
 	uint32_t buffer[8]; /* Buffer to be hashed */
@ -380,64 +387,75 @@ void qubit_luffa512_gpu_finalhash_80(int threads, uint32_t startNounce, void *ou
 	{
 		uint32_t nounce = startNounce + thread;
 		union {
-		uint64_t buf64[16];
+			uint64_t buf64[16];
-		uint32_t buf32[32];
+			uint32_t buf32[32];
 		} buff;
 		uint32_t Hash[16];
-#pragma unroll 16
+		#pragma unroll 16
 		for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i];
-		// die Nounce durch die thread-spezifische ersetzen
+		// Tested nonce
 		buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce));
 		hashState state;
-#pragma unroll 40
+		#pragma unroll 40
 		for(int i=0;i<40;i++) state.chainv[i] = c_IV[i];
-#pragma unroll 8
+
 		#pragma unroll 8
 		for(int i=0;i<8;i++) state.buffer[i] = 0;
 		Update512(&state, (BitSequence*)buff.buf32);
 		finalization512(&state, Hash);
-		bool rc = true;
+		/* dont ask me why not a simple if (Hash[i] > c_Target[i]) return;
 		 * we lose 20% in perfs without the position test */
 		int position = -1;
-#pragma unroll 8
+		#pragma unroll 8
 		for (int i = 7; i >= 0; i--) {
-			if (Hash[i] > pTarget[i]) {
+			if (Hash[i] > c_Target[i]) {
-				if(position < i) {
+				if (position < i) {
-					position = i;
+					return;
 					rc = false;
 				}
 			}
-			if (Hash[i] < pTarget[i]) {
+			if (Hash[i] < c_Target[i]) {
-				if(position < i) {
+				if (position < i) {
 					position = i;
-					rc = true;
+					//break; /* impact perfs, unroll ? */
 				}
 			}
 		}
-		if(rc && resNounce[0] > nounce)
+#if NBN == 1
 		if (resNounce[0] > nounce) {
 			resNounce[0] = nounce;
 		}
 #else
 		/* keep the smallest nounce, + extra one if found */
 		if (resNounce[0] > nounce) {
 			resNounce[1] = resNounce[0];
 			resNounce[0] = nounce;
 		} else {
 			resNounce[1] = nounce;
 		}
 #endif
 	}
 }
 __host__
 void qubit_luffa512_cpu_init(int thr_id, int threads)
 {
-	cudaMemcpyToSymbol( c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice );
+	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice));
-	cudaMemcpyToSymbol( c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice );
+	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice));
-	cudaMalloc(&d_LNonce[thr_id], sizeof(uint32_t));
+	CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], NBN * sizeof(uint32_t)));
-	cudaMallocHost(&d_lnounce[thr_id], 1*sizeof(uint32_t));
+	CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], NBN * sizeof(uint32_t)));
 }
 __host__
 uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order)
 {
-	uint32_t result = 0xffffffff;
+	uint32_t result = MAXU;
-	cudaMemset(d_LNonce[thr_id], 0xffffffff, sizeof(uint32_t));
+	cudaMemset(d_resNounce[thr_id], 0xff, NBN * sizeof(uint32_t));
 	const int threadsperblock = 256;
 	dim3 grid((threads + threadsperblock-1)/threadsperblock);
@ -445,11 +463,15 @@ uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t start
 	size_t shared_size = 0;
-	qubit_luffa512_gpu_finalhash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash, d_LNonce[thr_id]);
+	qubit_luffa512_gpu_finalhash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash, d_resNounce[thr_id]);
-	MyStreamSynchronize(NULL, order, thr_id);
+	cudaDeviceSynchronize();
-	cudaMemcpy(d_lnounce[thr_id], d_LNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
+	if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], NBN * sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
-	//cudaThreadSynchronize();
+		//cudaThreadSynchronize();
-	result = *d_lnounce[thr_id];
+		result = h_resNounce[thr_id][0];
 #if NBN > 1
 		extra_results[0] = h_resNounce[thr_id][1];
 #endif
 	}
 	return result;
 }
@ -462,7 +484,7 @@ void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, u
 	dim3 block(threadsperblock);
 	size_t shared_size = 0;
-	qubit_luffa512_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
+	qubit_luffa512_gpu_hash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash);
 	MyStreamSynchronize(NULL, order, thr_id);
 }
@ -470,6 +492,7 @@ __host__
 void qubit_luffa512_cpu_setBlock_80(void *pdata)
 {
 	unsigned char PaddedMessage[128];
 	memcpy(PaddedMessage, pdata, 80);
 	memset(PaddedMessage+80, 0, 48);
 	PaddedMessage[80] = 0x80;
@ -477,20 +500,21 @@ void qubit_luffa512_cpu_setBlock_80(void *pdata)
 	PaddedMessage[126] = 0x02;
 	PaddedMessage[127] = 0x80;
-	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+	CUDA_SAFE_CALL(cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice));
 }
 __host__
 void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget)
 {
 	unsigned char PaddedMessage[128];
 	memcpy(PaddedMessage, pdata, 80);
 	memset(PaddedMessage+80, 0, 48);
 	PaddedMessage[80] = 0x80;
 	PaddedMessage[111] = 1;
 	PaddedMessage[126] = 0x02;
 	PaddedMessage[127] = 0x80;
 	cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
-	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice));
 	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice));
 }
--- a/util.c
+++ b/util.c
@ -1474,10 +1474,6 @@ void print_hash_tests(void)
 	deephash(&hash[0], &buf[0]);
 	printpfx("deep", hash);
 	memset(hash, 0, sizeof hash);
 	doomhash(&hash[0], &buf[0]);
 	printpfx("doom", hash);
 	memset(hash, 0, sizeof hash);
 	fresh_hash(&hash[0], &buf[0]);
 	printpfx("fresh", hash);
@ -1498,6 +1494,10 @@ void print_hash_tests(void)
 	jackpothash(&hash[0], &buf[0]);
 	printpfx("jackpot", hash);
 	memset(hash, 0, sizeof hash);
 	doomhash(&hash[0], &buf[0]);
 	printpfx("luffa", hash);
 	memset(hash, 0, sizeof hash);
 	myriadhash(&hash[0], &buf[0]);
 	printpfx("myriad", hash);