From 23f0cee61fd976285524e6d20a096cdd4d3b8240 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Thu, 11 Sep 2014 01:59:43 +0200 Subject: [PATCH] Add cuda error checks on qubit algos And rename doom to luffa, like djm34 --- README.txt | 2 +- cpu-miner.c | 6 ++- qubit/doom.cu | 9 +++- qubit/qubit_luffa512.cu | 96 +++++++++++++++++++++++++---------------- util.c | 8 ++-- 5 files changed, 76 insertions(+), 45 deletions(-) diff --git a/README.txt b/README.txt index 61ae2c6..cee450a 100644 --- a/README.txt +++ b/README.txt @@ -58,12 +58,12 @@ its command line interface and options. heavy use to mine Heavycoin mjollnir use to mine Mjollnircoin deep use to mine Deepcoin - doom use to mine Doomcoin fugue256 use to mine Fuguecoin groestl use to mine Groestlcoin dmd-gr use to mine Diamond-Groestl myr-gr use to mine Myriad-Groest jackpot use to mine Jackpotcoin + luffa use to mine Doomcoin quark use to mine Quarkcoin qubit use to mine Qubit Algo anime use to mine Animecoin diff --git a/cpu-miner.c b/cpu-miner.c index 287fdc9..830b68a 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -136,6 +136,7 @@ typedef enum { ALGO_GROESTL, ALGO_HEAVY, /* Heavycoin hash */ ALGO_JACKPOT, + ALGO_LUFFA_DOOM, ALGO_MJOLLNIR, /* Mjollnir hash */ ALGO_MYR_GR, ALGO_NIST5, @@ -156,12 +157,13 @@ static const char *algo_names[] = { "blake", "blakecoin", "deep", - "doom", + "doom", /* is luffa */ "fresh", "fugue256", "groestl", "heavy", "jackpot", + "luffa", "mjollnir", "myr-gr", "nist5", @@ -242,12 +244,12 @@ Options:\n\ blake Blake 256 (like NEOS blake)\n\ blakecoin Old Blake 256 (8 rounds)\n\ deep Deepcoin hash\n\ - doom Doomcoin hash\n\ fresh Freshcoin hash (shavite 80)\n\ fugue256 Fuguecoin hash\n\ groestl Groestlcoin hash\n\ heavy Heavycoin hash\n\ jackpot Jackpot hash\n\ + luffa Doomcoin hash\n\ mjollnir Mjollnircoin hash\n\ myr-gr Myriad-Groestl hash\n\ nist5 NIST5 (TalkCoin) hash\n\ diff --git a/qubit/doom.cu b/qubit/doom.cu index 30719c7..8f93ac4 100644 --- a/qubit/doom.cu +++ b/qubit/doom.cu @@ -51,7 +51,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata, { cudaSetDevice(device_map[thr_id]); - cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput)); qubit_luffa512_cpu_init(thr_id, throughput); @@ -86,7 +86,12 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata, pdata[19] += throughput; - } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + if ((uint64_t) pdata[19] + throughput > max_nonce) { + pdata[19] = max_nonce; + break; + } + + } while (!work_restart[thr_id].restart); *hashes_done = pdata[19] - first_nonce + 1; return 0; diff --git a/qubit/qubit_luffa512.cu b/qubit/qubit_luffa512.cu index 1d827d5..23d95bb 100644 --- a/qubit/qubit_luffa512.cu +++ b/qubit/qubit_luffa512.cu @@ -23,13 +23,20 @@ #include "cuda_helper.h" +#define MAXU 0xffffffffU + typedef unsigned char BitSequence; __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) -__constant__ uint32_t pTarget[8]; +__constant__ uint32_t c_Target[8]; + +static uint32_t *h_resNounce[8]; +static uint32_t *d_resNounce[8]; -uint32_t *d_lnounce[8]; -uint32_t *d_LNonce[8]; +#define NBN 1 /* max results, could be 2, see blake32.cu */ +#if NBN > 1 +static uint32_t extra_results[2] = { MAXU, MAXU }; +#endif typedef struct { uint32_t buffer[8]; /* Buffer to be hashed */ @@ -380,64 +387,75 @@ void qubit_luffa512_gpu_finalhash_80(int threads, uint32_t startNounce, void *ou { uint32_t nounce = startNounce + thread; union { - uint64_t buf64[16]; - uint32_t buf32[32]; + uint64_t buf64[16]; + uint32_t buf32[32]; } buff; uint32_t Hash[16]; -#pragma unroll 16 + #pragma unroll 16 for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i]; - // die Nounce durch die thread-spezifische ersetzen + // Tested nonce buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce)); - hashState state; -#pragma unroll 40 + #pragma unroll 40 for(int i=0;i<40;i++) state.chainv[i] = c_IV[i]; -#pragma unroll 8 + + #pragma unroll 8 for(int i=0;i<8;i++) state.buffer[i] = 0; + Update512(&state, (BitSequence*)buff.buf32); finalization512(&state, Hash); - bool rc = true; + /* dont ask me why not a simple if (Hash[i] > c_Target[i]) return; + * we lose 20% in perfs without the position test */ int position = -1; -#pragma unroll 8 + #pragma unroll 8 for (int i = 7; i >= 0; i--) { - if (Hash[i] > pTarget[i]) { - if(position < i) { - position = i; - rc = false; + if (Hash[i] > c_Target[i]) { + if (position < i) { + return; } - } - if (Hash[i] < pTarget[i]) { - if(position < i) { + if (Hash[i] < c_Target[i]) { + if (position < i) { position = i; - rc = true; + //break; /* impact perfs, unroll ? */ } } } - if(rc && resNounce[0] > nounce) +#if NBN == 1 + if (resNounce[0] > nounce) { + resNounce[0] = nounce; + } +#else + /* keep the smallest nounce, + extra one if found */ + if (resNounce[0] > nounce) { + resNounce[1] = resNounce[0]; resNounce[0] = nounce; + } else { + resNounce[1] = nounce; + } +#endif } } __host__ void qubit_luffa512_cpu_init(int thr_id, int threads) { - cudaMemcpyToSymbol( c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice ); - cudaMemcpyToSymbol( c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice ); - cudaMalloc(&d_LNonce[thr_id], sizeof(uint32_t)); - cudaMallocHost(&d_lnounce[thr_id], 1*sizeof(uint32_t)); + CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice)); + CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice)); + CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], NBN * sizeof(uint32_t))); + CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], NBN * sizeof(uint32_t))); } __host__ uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order) { - uint32_t result = 0xffffffff; - cudaMemset(d_LNonce[thr_id], 0xffffffff, sizeof(uint32_t)); + uint32_t result = MAXU; + cudaMemset(d_resNounce[thr_id], 0xff, NBN * sizeof(uint32_t)); const int threadsperblock = 256; dim3 grid((threads + threadsperblock-1)/threadsperblock); @@ -445,11 +463,15 @@ uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t start size_t shared_size = 0; - qubit_luffa512_gpu_finalhash_80<<>>(threads, startNounce, d_outputHash, d_LNonce[thr_id]); - MyStreamSynchronize(NULL, order, thr_id); - cudaMemcpy(d_lnounce[thr_id], d_LNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); - //cudaThreadSynchronize(); - result = *d_lnounce[thr_id]; + qubit_luffa512_gpu_finalhash_80 <<>> (threads, startNounce, d_outputHash, d_resNounce[thr_id]); + cudaDeviceSynchronize(); + if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], NBN * sizeof(uint32_t), cudaMemcpyDeviceToHost)) { + //cudaThreadSynchronize(); + result = h_resNounce[thr_id][0]; +#if NBN > 1 + extra_results[0] = h_resNounce[thr_id][1]; +#endif + } return result; } @@ -462,7 +484,7 @@ void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, u dim3 block(threadsperblock); size_t shared_size = 0; - qubit_luffa512_gpu_hash_80<<>>(threads, startNounce, d_outputHash); + qubit_luffa512_gpu_hash_80 <<>> (threads, startNounce, d_outputHash); MyStreamSynchronize(NULL, order, thr_id); } @@ -470,6 +492,7 @@ __host__ void qubit_luffa512_cpu_setBlock_80(void *pdata) { unsigned char PaddedMessage[128]; + memcpy(PaddedMessage, pdata, 80); memset(PaddedMessage+80, 0, 48); PaddedMessage[80] = 0x80; @@ -477,20 +500,21 @@ void qubit_luffa512_cpu_setBlock_80(void *pdata) PaddedMessage[126] = 0x02; PaddedMessage[127] = 0x80; - cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); + CUDA_SAFE_CALL(cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice)); } __host__ void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget) { unsigned char PaddedMessage[128]; + memcpy(PaddedMessage, pdata, 80); memset(PaddedMessage+80, 0, 48); PaddedMessage[80] = 0x80; PaddedMessage[111] = 1; PaddedMessage[126] = 0x02; PaddedMessage[127] = 0x80; - cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice); - cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); + CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice)); + CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice)); } \ No newline at end of file diff --git a/util.c b/util.c index 374afca..1c8cdb8 100644 --- a/util.c +++ b/util.c @@ -1474,10 +1474,6 @@ void print_hash_tests(void) deephash(&hash[0], &buf[0]); printpfx("deep", hash); - memset(hash, 0, sizeof hash); - doomhash(&hash[0], &buf[0]); - printpfx("doom", hash); - memset(hash, 0, sizeof hash); fresh_hash(&hash[0], &buf[0]); printpfx("fresh", hash); @@ -1498,6 +1494,10 @@ void print_hash_tests(void) jackpothash(&hash[0], &buf[0]); printpfx("jackpot", hash); + memset(hash, 0, sizeof hash); + doomhash(&hash[0], &buf[0]); + printpfx("luffa", hash); + memset(hash, 0, sizeof hash); myriadhash(&hash[0], &buf[0]); printpfx("myriad", hash);