myriad/groestl: some more cleanup + tabs...

10 years ago · 74e94fa1ec
2 changed files with 87 additions and 95 deletions
--- a/groestlcoin.cpp
+++ b/groestlcoin.cpp
@ -9,89 +9,84 @@
				@@ -9,89 +9,84 @@

 #include "miner.h"

-#define SWAP32(x) \
-    ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u)   | \
-      (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
-
-// CPU-groestl
-extern "C" void groestlhash(void *state, const void *input)
+// CPU hash
+void groestlhash(void *state, const void *input)
 {
-    sph_groestl512_context ctx_groestl;
-
-    //these uint512 in the c++ source of the client are backed by an array of uint32
-    uint32_t hashA[16], hashB[16];
+	uint32_t _ALIGN(64) hash[16];
+	sph_groestl512_context ctx_groestl;

-    sph_groestl512_init(&ctx_groestl);
-    sph_groestl512 (&ctx_groestl, input, 80); //6
-    sph_groestl512_close(&ctx_groestl, hashA); //7
+	sph_groestl512_init(&ctx_groestl);
+	sph_groestl512(&ctx_groestl, input, 80);
+	sph_groestl512_close(&ctx_groestl, hash);

-    sph_groestl512_init(&ctx_groestl);
-    sph_groestl512 (&ctx_groestl, hashA, 64); //6
-    sph_groestl512_close(&ctx_groestl, hashB); //7
+	sph_groestl512_init(&ctx_groestl);
+	sph_groestl512(&ctx_groestl, hash, 64);
+	sph_groestl512_close(&ctx_groestl, hash);

-    memcpy(state, hashB, 32);
+	memcpy(state, hash, 32);
 }

 static bool init[MAX_GPUS] = { 0 };

-extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
-    uint32_t max_nonce, unsigned long *hashes_done)
+int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
+	uint32_t max_nonce, unsigned long *hashes_done)
 {
-    uint32_t start_nonce = pdata[19];
-    uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
-    throughput = min(throughput, max_nonce - start_nonce);
-
-    uint32_t *outputHash = (uint32_t*)malloc(throughput * 64);
-
-    if (opt_benchmark)
-        ((uint32_t*)ptarget)[7] = 0x000000ff;
-
-    // init
-    if(!init[thr_id])
-    {
-        cudaSetDevice(device_map[thr_id]);
-        groestlcoin_cpu_init(thr_id, throughput);
-        init[thr_id] = true;
-    }
-
-    // Endian Drehung ist notwendig
-    uint32_t endiandata[32];
-    for (int kk=0; kk < 32; kk++)
-        be32enc(&endiandata[kk], pdata[kk]);
-
-    // Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt)
-    groestlcoin_cpu_setBlock(thr_id, endiandata, (void*)ptarget);
-
-    do {
-        // GPU
-        uint32_t foundNounce = UINT32_MAX;
-
-        *hashes_done = pdata[19] - start_nonce + throughput;
-
-        groestlcoin_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);
-
-        if(foundNounce < UINT32_MAX)
-        {
-            uint32_t _ALIGN(64) tmpHash[8];
-            endiandata[19] = SWAP32(foundNounce);
-            groestlhash(tmpHash, endiandata);
-
-            if (tmpHash[7] <= ptarget[7] && fulltest(tmpHash, ptarget)) {
-                pdata[19] = foundNounce;
-                free(outputHash);
-                return true;
-            } else {
-                applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNounce);
-            }
-        }
-
-        if (pdata[19] + throughput < pdata[19])
-            pdata[19] = max_nonce;
-        else pdata[19] += throughput;
-
-    } while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
-
-    free(outputHash);
-    return 0;
+	uint32_t _ALIGN(64) endiandata[20];
+	uint32_t start_nonce = pdata[19];
+	uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
+	throughput = min(throughput, max_nonce - start_nonce);
+
+	uint32_t *outputHash = (uint32_t*)malloc(throughput * 64);
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x000000ff;
+
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+		groestlcoin_cpu_init(thr_id, throughput);
+		init[thr_id] = true;
+	}
+
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], pdata[k]);
+
+	groestlcoin_cpu_setBlock(thr_id, endiandata, (void*)ptarget);
+
+	do {
+		uint32_t foundNounce = UINT32_MAX;
+
+		*hashes_done = pdata[19] - start_nonce + throughput;
+
+		// GPU hash
+		groestlcoin_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);
+
+		if (foundNounce < UINT32_MAX)
+		{
+			uint32_t _ALIGN(64) tmpHash[8];
+			endiandata[19] = swab32(foundNounce);
+			groestlhash(tmpHash, endiandata);
+
+			if (tmpHash[7] <= ptarget[7] && fulltest(tmpHash, ptarget)) {
+				pdata[19] = foundNounce;
+				free(outputHash);
+				return true;
+			} else {
+				applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!",
+					device_map[thr_id], foundNounce);
+			}
+		}
+
+		if ((uint64_t) pdata[19] + throughput > max_nonce) {
+			pdata[19] = max_nonce;
+			*hashes_done = max_nonce - start_nonce + 1;
+			break;
+		}
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	free(outputHash);
+	return 0;
 }

--- a/myriadgroestl.cpp
+++ b/myriadgroestl.cpp
@ -12,32 +12,29 @@ void myriadgroestl_cpu_init(int thr_id, uint32_t threads);
				@@ -12,32 +12,29 @@ void myriadgroestl_cpu_init(int thr_id, uint32_t threads);
 void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn);
 void myriadgroestl_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce);

-#define SWAP32(x) \
-    ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u)   | \
-      (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
-
-extern "C" void myriadhash(void *state, const void *input)
+void myriadhash(void *state, const void *input)
 {
-	uint32_t hashA[16], hashB[16];
+	uint32_t _ALIGN(64) hash[16];
 	sph_groestl512_context ctx_groestl;
 	SHA256_CTX sha256;

 	sph_groestl512_init(&ctx_groestl);
-	sph_groestl512 (&ctx_groestl, input, 80);
-	sph_groestl512_close(&ctx_groestl, hashA);
+	sph_groestl512(&ctx_groestl, input, 80);
+	sph_groestl512_close(&ctx_groestl, hash);

 	SHA256_Init(&sha256);
-	SHA256_Update(&sha256,(unsigned char *)hashA, 64);
-	SHA256_Final((unsigned char *)hashB, &sha256);
+	SHA256_Update(&sha256,(unsigned char *)hash, 64);
+	SHA256_Final((unsigned char *)hash, &sha256);

-	memcpy(state, hashB, 32);
+	memcpy(state, hash, 32);
 }

 static bool init[MAX_GPUS] = { 0 };

-extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
+int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
 	uint32_t max_nonce, unsigned long *hashes_done)
 {
+	uint32_t _ALIGN(64) endiandata[32];
 	uint32_t start_nonce = pdata[19]++;
 	uint32_t throughput = device_intensity(thr_id, __func__, 1 << 17);
 	throughput = min(throughput, max_nonce - start_nonce);
@ -51,14 +48,12 @@ extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptar
				@@ -51,14 +48,12 @@ extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptar
 	if(!init[thr_id])
 	{
 		cudaSetDevice(device_map[thr_id]);
-
 		myriadgroestl_cpu_init(thr_id, throughput);
 		init[thr_id] = true;
 	}

-	uint32_t _ALIGN(64) endiandata[32];
-	for (int kk=0; kk < 32; kk++)
-		be32enc(&endiandata[kk], pdata[kk]);
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], pdata[k]);

 	// Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt)
 	myriadgroestl_cpu_setBlock(thr_id, endiandata, (void*)ptarget);
@ -74,19 +69,21 @@ extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptar
				@@ -74,19 +69,21 @@ extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptar
 		if (foundNounce < UINT32_MAX)
 		{
 			uint32_t _ALIGN(64) tmpHash[8];
-			endiandata[19] = SWAP32(foundNounce);
+			endiandata[19] = swab32(foundNounce);
 			myriadhash(tmpHash, endiandata);
 			if (tmpHash[7] <= ptarget[7] && fulltest(tmpHash, ptarget)) {
 				pdata[19] = foundNounce;
 				free(outputHash);
-				return true;
+				return 1;
 			} else {
-				applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNounce);
+				applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!",
+					device_map[thr_id], foundNounce);
 			}
 		}

-		if ((uint64_t) pdata[19] + throughput > (uint64_t) max_nonce) {
+		if ((uint64_t) pdata[19] + throughput > max_nonce) {
 			pdata[19] = max_nonce;
+			*hashes_done = max_nonce - start_nonce + 1;
 			break;
 		}
 		pdata[19] += throughput;