From 74e94fa1ec471e2d6807492c985fc5c4c87d4862 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Sun, 10 May 2015 20:07:20 +0200 Subject: [PATCH] myriad/groestl: some more cleanup + tabs... --- groestlcoin.cpp | 145 ++++++++++++++++++++++------------------------ myriadgroestl.cpp | 37 ++++++------ 2 files changed, 87 insertions(+), 95 deletions(-) diff --git a/groestlcoin.cpp b/groestlcoin.cpp index b501284..400b8ef 100644 --- a/groestlcoin.cpp +++ b/groestlcoin.cpp @@ -9,89 +9,84 @@ #include "miner.h" -#define SWAP32(x) \ - ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \ - (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu)) - -// CPU-groestl -extern "C" void groestlhash(void *state, const void *input) +// CPU hash +void groestlhash(void *state, const void *input) { - sph_groestl512_context ctx_groestl; - - //these uint512 in the c++ source of the client are backed by an array of uint32 - uint32_t hashA[16], hashB[16]; + uint32_t _ALIGN(64) hash[16]; + sph_groestl512_context ctx_groestl; - sph_groestl512_init(&ctx_groestl); - sph_groestl512 (&ctx_groestl, input, 80); //6 - sph_groestl512_close(&ctx_groestl, hashA); //7 + sph_groestl512_init(&ctx_groestl); + sph_groestl512(&ctx_groestl, input, 80); + sph_groestl512_close(&ctx_groestl, hash); - sph_groestl512_init(&ctx_groestl); - sph_groestl512 (&ctx_groestl, hashA, 64); //6 - sph_groestl512_close(&ctx_groestl, hashB); //7 + sph_groestl512_init(&ctx_groestl); + sph_groestl512(&ctx_groestl, hash, 64); + sph_groestl512_close(&ctx_groestl, hash); - memcpy(state, hashB, 32); + memcpy(state, hash, 32); } static bool init[MAX_GPUS] = { 0 }; -extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget, - uint32_t max_nonce, unsigned long *hashes_done) +int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget, + uint32_t max_nonce, unsigned long *hashes_done) { - uint32_t start_nonce = pdata[19]; - uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8 - throughput = min(throughput, max_nonce - start_nonce); - - uint32_t *outputHash = (uint32_t*)malloc(throughput * 64); - - if (opt_benchmark) - ((uint32_t*)ptarget)[7] = 0x000000ff; - - // init - if(!init[thr_id]) - { - cudaSetDevice(device_map[thr_id]); - groestlcoin_cpu_init(thr_id, throughput); - init[thr_id] = true; - } - - // Endian Drehung ist notwendig - uint32_t endiandata[32]; - for (int kk=0; kk < 32; kk++) - be32enc(&endiandata[kk], pdata[kk]); - - // Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt) - groestlcoin_cpu_setBlock(thr_id, endiandata, (void*)ptarget); - - do { - // GPU - uint32_t foundNounce = UINT32_MAX; - - *hashes_done = pdata[19] - start_nonce + throughput; - - groestlcoin_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce); - - if(foundNounce < UINT32_MAX) - { - uint32_t _ALIGN(64) tmpHash[8]; - endiandata[19] = SWAP32(foundNounce); - groestlhash(tmpHash, endiandata); - - if (tmpHash[7] <= ptarget[7] && fulltest(tmpHash, ptarget)) { - pdata[19] = foundNounce; - free(outputHash); - return true; - } else { - applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNounce); - } - } - - if (pdata[19] + throughput < pdata[19]) - pdata[19] = max_nonce; - else pdata[19] += throughput; - - } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); - - free(outputHash); - return 0; + uint32_t _ALIGN(64) endiandata[20]; + uint32_t start_nonce = pdata[19]; + uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8 + throughput = min(throughput, max_nonce - start_nonce); + + uint32_t *outputHash = (uint32_t*)malloc(throughput * 64); + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x000000ff; + + if (!init[thr_id]) + { + cudaSetDevice(device_map[thr_id]); + groestlcoin_cpu_init(thr_id, throughput); + init[thr_id] = true; + } + + for (int k=0; k < 20; k++) + be32enc(&endiandata[k], pdata[k]); + + groestlcoin_cpu_setBlock(thr_id, endiandata, (void*)ptarget); + + do { + uint32_t foundNounce = UINT32_MAX; + + *hashes_done = pdata[19] - start_nonce + throughput; + + // GPU hash + groestlcoin_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce); + + if (foundNounce < UINT32_MAX) + { + uint32_t _ALIGN(64) tmpHash[8]; + endiandata[19] = swab32(foundNounce); + groestlhash(tmpHash, endiandata); + + if (tmpHash[7] <= ptarget[7] && fulltest(tmpHash, ptarget)) { + pdata[19] = foundNounce; + free(outputHash); + return true; + } else { + applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", + device_map[thr_id], foundNounce); + } + } + + if ((uint64_t) pdata[19] + throughput > max_nonce) { + pdata[19] = max_nonce; + *hashes_done = max_nonce - start_nonce + 1; + break; + } + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + free(outputHash); + return 0; } diff --git a/myriadgroestl.cpp b/myriadgroestl.cpp index a3e0340..5464819 100644 --- a/myriadgroestl.cpp +++ b/myriadgroestl.cpp @@ -12,32 +12,29 @@ void myriadgroestl_cpu_init(int thr_id, uint32_t threads); void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn); void myriadgroestl_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce); -#define SWAP32(x) \ - ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \ - (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu)) - -extern "C" void myriadhash(void *state, const void *input) +void myriadhash(void *state, const void *input) { - uint32_t hashA[16], hashB[16]; + uint32_t _ALIGN(64) hash[16]; sph_groestl512_context ctx_groestl; SHA256_CTX sha256; sph_groestl512_init(&ctx_groestl); - sph_groestl512 (&ctx_groestl, input, 80); - sph_groestl512_close(&ctx_groestl, hashA); + sph_groestl512(&ctx_groestl, input, 80); + sph_groestl512_close(&ctx_groestl, hash); SHA256_Init(&sha256); - SHA256_Update(&sha256,(unsigned char *)hashA, 64); - SHA256_Final((unsigned char *)hashB, &sha256); + SHA256_Update(&sha256,(unsigned char *)hash, 64); + SHA256_Final((unsigned char *)hash, &sha256); - memcpy(state, hashB, 32); + memcpy(state, hash, 32); } static bool init[MAX_GPUS] = { 0 }; -extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget, +int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done) { + uint32_t _ALIGN(64) endiandata[32]; uint32_t start_nonce = pdata[19]++; uint32_t throughput = device_intensity(thr_id, __func__, 1 << 17); throughput = min(throughput, max_nonce - start_nonce); @@ -51,14 +48,12 @@ extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptar if(!init[thr_id]) { cudaSetDevice(device_map[thr_id]); - myriadgroestl_cpu_init(thr_id, throughput); init[thr_id] = true; } - uint32_t _ALIGN(64) endiandata[32]; - for (int kk=0; kk < 32; kk++) - be32enc(&endiandata[kk], pdata[kk]); + for (int k=0; k < 20; k++) + be32enc(&endiandata[k], pdata[k]); // Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt) myriadgroestl_cpu_setBlock(thr_id, endiandata, (void*)ptarget); @@ -74,19 +69,21 @@ extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptar if (foundNounce < UINT32_MAX) { uint32_t _ALIGN(64) tmpHash[8]; - endiandata[19] = SWAP32(foundNounce); + endiandata[19] = swab32(foundNounce); myriadhash(tmpHash, endiandata); if (tmpHash[7] <= ptarget[7] && fulltest(tmpHash, ptarget)) { pdata[19] = foundNounce; free(outputHash); - return true; + return 1; } else { - applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNounce); + applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", + device_map[thr_id], foundNounce); } } - if ((uint64_t) pdata[19] + throughput > (uint64_t) max_nonce) { + if ((uint64_t) pdata[19] + throughput > max_nonce) { pdata[19] = max_nonce; + *hashes_done = max_nonce - start_nonce + 1; break; } pdata[19] += throughput;