Browse Source

Add cuda error checks on qubit algos

And rename doom to luffa, like djm34
2upstream
Tanguy Pruvot 10 years ago
parent
commit
23f0cee61f
  1. 2
      README.txt
  2. 6
      cpu-miner.c
  3. 9
      qubit/doom.cu
  4. 96
      qubit/qubit_luffa512.cu
  5. 8
      util.c

2
README.txt

@ -58,12 +58,12 @@ its command line interface and options. @@ -58,12 +58,12 @@ its command line interface and options.
heavy use to mine Heavycoin
mjollnir use to mine Mjollnircoin
deep use to mine Deepcoin
doom use to mine Doomcoin
fugue256 use to mine Fuguecoin
groestl use to mine Groestlcoin
dmd-gr use to mine Diamond-Groestl
myr-gr use to mine Myriad-Groest
jackpot use to mine Jackpotcoin
luffa use to mine Doomcoin
quark use to mine Quarkcoin
qubit use to mine Qubit Algo
anime use to mine Animecoin

6
cpu-miner.c

@ -136,6 +136,7 @@ typedef enum { @@ -136,6 +136,7 @@ typedef enum {
ALGO_GROESTL,
ALGO_HEAVY, /* Heavycoin hash */
ALGO_JACKPOT,
ALGO_LUFFA_DOOM,
ALGO_MJOLLNIR, /* Mjollnir hash */
ALGO_MYR_GR,
ALGO_NIST5,
@ -156,12 +157,13 @@ static const char *algo_names[] = { @@ -156,12 +157,13 @@ static const char *algo_names[] = {
"blake",
"blakecoin",
"deep",
"doom",
"doom", /* is luffa */
"fresh",
"fugue256",
"groestl",
"heavy",
"jackpot",
"luffa",
"mjollnir",
"myr-gr",
"nist5",
@ -242,12 +244,12 @@ Options:\n\ @@ -242,12 +244,12 @@ Options:\n\
blake Blake 256 (like NEOS blake)\n\
blakecoin Old Blake 256 (8 rounds)\n\
deep Deepcoin hash\n\
doom Doomcoin hash\n\
fresh Freshcoin hash (shavite 80)\n\
fugue256 Fuguecoin hash\n\
groestl Groestlcoin hash\n\
heavy Heavycoin hash\n\
jackpot Jackpot hash\n\
luffa Doomcoin hash\n\
mjollnir Mjollnircoin hash\n\
myr-gr Myriad-Groestl hash\n\
nist5 NIST5 (TalkCoin) hash\n\

9
qubit/doom.cu

@ -51,7 +51,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata, @@ -51,7 +51,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
{
cudaSetDevice(device_map[thr_id]);
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
qubit_luffa512_cpu_init(thr_id, throughput);
@ -86,7 +86,12 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata, @@ -86,7 +86,12 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
if ((uint64_t) pdata[19] + throughput > max_nonce) {
pdata[19] = max_nonce;
break;
}
} while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce + 1;
return 0;

96
qubit/qubit_luffa512.cu

@ -23,13 +23,20 @@ @@ -23,13 +23,20 @@
#include "cuda_helper.h"
#define MAXU 0xffffffffU
typedef unsigned char BitSequence;
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
__constant__ uint32_t pTarget[8];
__constant__ uint32_t c_Target[8];
static uint32_t *h_resNounce[8];
static uint32_t *d_resNounce[8];
uint32_t *d_lnounce[8];
uint32_t *d_LNonce[8];
#define NBN 1 /* max results, could be 2, see blake32.cu */
#if NBN > 1
static uint32_t extra_results[2] = { MAXU, MAXU };
#endif
typedef struct {
uint32_t buffer[8]; /* Buffer to be hashed */
@ -380,64 +387,75 @@ void qubit_luffa512_gpu_finalhash_80(int threads, uint32_t startNounce, void *ou @@ -380,64 +387,75 @@ void qubit_luffa512_gpu_finalhash_80(int threads, uint32_t startNounce, void *ou
{
uint32_t nounce = startNounce + thread;
union {
uint64_t buf64[16];
uint32_t buf32[32];
uint64_t buf64[16];
uint32_t buf32[32];
} buff;
uint32_t Hash[16];
#pragma unroll 16
#pragma unroll 16
for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i];
// die Nounce durch die thread-spezifische ersetzen
// Tested nonce
buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce));
hashState state;
#pragma unroll 40
#pragma unroll 40
for(int i=0;i<40;i++) state.chainv[i] = c_IV[i];
#pragma unroll 8
#pragma unroll 8
for(int i=0;i<8;i++) state.buffer[i] = 0;
Update512(&state, (BitSequence*)buff.buf32);
finalization512(&state, Hash);
bool rc = true;
/* dont ask me why not a simple if (Hash[i] > c_Target[i]) return;
* we lose 20% in perfs without the position test */
int position = -1;
#pragma unroll 8
#pragma unroll 8
for (int i = 7; i >= 0; i--) {
if (Hash[i] > pTarget[i]) {
if(position < i) {
position = i;
rc = false;
if (Hash[i] > c_Target[i]) {
if (position < i) {
return;
}
}
if (Hash[i] < pTarget[i]) {
if(position < i) {
if (Hash[i] < c_Target[i]) {
if (position < i) {
position = i;
rc = true;
//break; /* impact perfs, unroll ? */
}
}
}
if(rc && resNounce[0] > nounce)
#if NBN == 1
if (resNounce[0] > nounce) {
resNounce[0] = nounce;
}
#else
/* keep the smallest nounce, + extra one if found */
if (resNounce[0] > nounce) {
resNounce[1] = resNounce[0];
resNounce[0] = nounce;
} else {
resNounce[1] = nounce;
}
#endif
}
}
__host__
void qubit_luffa512_cpu_init(int thr_id, int threads)
{
cudaMemcpyToSymbol( c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice );
cudaMemcpyToSymbol( c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice );
cudaMalloc(&d_LNonce[thr_id], sizeof(uint32_t));
cudaMallocHost(&d_lnounce[thr_id], 1*sizeof(uint32_t));
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], NBN * sizeof(uint32_t)));
CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], NBN * sizeof(uint32_t)));
}
__host__
uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order)
{
uint32_t result = 0xffffffff;
cudaMemset(d_LNonce[thr_id], 0xffffffff, sizeof(uint32_t));
uint32_t result = MAXU;
cudaMemset(d_resNounce[thr_id], 0xff, NBN * sizeof(uint32_t));
const int threadsperblock = 256;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
@ -445,11 +463,15 @@ uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t start @@ -445,11 +463,15 @@ uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t start
size_t shared_size = 0;
qubit_luffa512_gpu_finalhash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash, d_LNonce[thr_id]);
MyStreamSynchronize(NULL, order, thr_id);
cudaMemcpy(d_lnounce[thr_id], d_LNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
//cudaThreadSynchronize();
result = *d_lnounce[thr_id];
qubit_luffa512_gpu_finalhash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash, d_resNounce[thr_id]);
cudaDeviceSynchronize();
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], NBN * sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
//cudaThreadSynchronize();
result = h_resNounce[thr_id][0];
#if NBN > 1
extra_results[0] = h_resNounce[thr_id][1];
#endif
}
return result;
}
@ -462,7 +484,7 @@ void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, u @@ -462,7 +484,7 @@ void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, u
dim3 block(threadsperblock);
size_t shared_size = 0;
qubit_luffa512_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
qubit_luffa512_gpu_hash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash);
MyStreamSynchronize(NULL, order, thr_id);
}
@ -470,6 +492,7 @@ __host__ @@ -470,6 +492,7 @@ __host__
void qubit_luffa512_cpu_setBlock_80(void *pdata)
{
unsigned char PaddedMessage[128];
memcpy(PaddedMessage, pdata, 80);
memset(PaddedMessage+80, 0, 48);
PaddedMessage[80] = 0x80;
@ -477,20 +500,21 @@ void qubit_luffa512_cpu_setBlock_80(void *pdata) @@ -477,20 +500,21 @@ void qubit_luffa512_cpu_setBlock_80(void *pdata)
PaddedMessage[126] = 0x02;
PaddedMessage[127] = 0x80;
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
CUDA_SAFE_CALL(cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice));
}
__host__
void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget)
{
unsigned char PaddedMessage[128];
memcpy(PaddedMessage, pdata, 80);
memset(PaddedMessage+80, 0, 48);
PaddedMessage[80] = 0x80;
PaddedMessage[111] = 1;
PaddedMessage[126] = 0x02;
PaddedMessage[127] = 0x80;
cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice));
}

8
util.c

@ -1474,10 +1474,6 @@ void print_hash_tests(void) @@ -1474,10 +1474,6 @@ void print_hash_tests(void)
deephash(&hash[0], &buf[0]);
printpfx("deep", hash);
memset(hash, 0, sizeof hash);
doomhash(&hash[0], &buf[0]);
printpfx("doom", hash);
memset(hash, 0, sizeof hash);
fresh_hash(&hash[0], &buf[0]);
printpfx("fresh", hash);
@ -1498,6 +1494,10 @@ void print_hash_tests(void) @@ -1498,6 +1494,10 @@ void print_hash_tests(void)
jackpothash(&hash[0], &buf[0]);
printpfx("jackpot", hash);
memset(hash, 0, sizeof hash);
doomhash(&hash[0], &buf[0]);
printpfx("luffa", hash);
memset(hash, 0, sizeof hash);
myriadhash(&hash[0], &buf[0]);
printpfx("myriad", hash);

Loading…
Cancel
Save