1
0
mirror of https://github.com/GOSTSec/ccminer synced 2025-01-10 14:57:53 +00:00

Add cuda error checks on qubit algos

And rename doom to luffa, like djm34
This commit is contained in:
Tanguy Pruvot 2014-09-11 01:59:43 +02:00
parent 1aec4555cc
commit 23f0cee61f
5 changed files with 76 additions and 45 deletions

View File

@ -58,12 +58,12 @@ its command line interface and options.
heavy use to mine Heavycoin heavy use to mine Heavycoin
mjollnir use to mine Mjollnircoin mjollnir use to mine Mjollnircoin
deep use to mine Deepcoin deep use to mine Deepcoin
doom use to mine Doomcoin
fugue256 use to mine Fuguecoin fugue256 use to mine Fuguecoin
groestl use to mine Groestlcoin groestl use to mine Groestlcoin
dmd-gr use to mine Diamond-Groestl dmd-gr use to mine Diamond-Groestl
myr-gr use to mine Myriad-Groest myr-gr use to mine Myriad-Groest
jackpot use to mine Jackpotcoin jackpot use to mine Jackpotcoin
luffa use to mine Doomcoin
quark use to mine Quarkcoin quark use to mine Quarkcoin
qubit use to mine Qubit Algo qubit use to mine Qubit Algo
anime use to mine Animecoin anime use to mine Animecoin

View File

@ -136,6 +136,7 @@ typedef enum {
ALGO_GROESTL, ALGO_GROESTL,
ALGO_HEAVY, /* Heavycoin hash */ ALGO_HEAVY, /* Heavycoin hash */
ALGO_JACKPOT, ALGO_JACKPOT,
ALGO_LUFFA_DOOM,
ALGO_MJOLLNIR, /* Mjollnir hash */ ALGO_MJOLLNIR, /* Mjollnir hash */
ALGO_MYR_GR, ALGO_MYR_GR,
ALGO_NIST5, ALGO_NIST5,
@ -156,12 +157,13 @@ static const char *algo_names[] = {
"blake", "blake",
"blakecoin", "blakecoin",
"deep", "deep",
"doom", "doom", /* is luffa */
"fresh", "fresh",
"fugue256", "fugue256",
"groestl", "groestl",
"heavy", "heavy",
"jackpot", "jackpot",
"luffa",
"mjollnir", "mjollnir",
"myr-gr", "myr-gr",
"nist5", "nist5",
@ -242,12 +244,12 @@ Options:\n\
blake Blake 256 (like NEOS blake)\n\ blake Blake 256 (like NEOS blake)\n\
blakecoin Old Blake 256 (8 rounds)\n\ blakecoin Old Blake 256 (8 rounds)\n\
deep Deepcoin hash\n\ deep Deepcoin hash\n\
doom Doomcoin hash\n\
fresh Freshcoin hash (shavite 80)\n\ fresh Freshcoin hash (shavite 80)\n\
fugue256 Fuguecoin hash\n\ fugue256 Fuguecoin hash\n\
groestl Groestlcoin hash\n\ groestl Groestlcoin hash\n\
heavy Heavycoin hash\n\ heavy Heavycoin hash\n\
jackpot Jackpot hash\n\ jackpot Jackpot hash\n\
luffa Doomcoin hash\n\
mjollnir Mjollnircoin hash\n\ mjollnir Mjollnircoin hash\n\
myr-gr Myriad-Groestl hash\n\ myr-gr Myriad-Groestl hash\n\
nist5 NIST5 (TalkCoin) hash\n\ nist5 NIST5 (TalkCoin) hash\n\

View File

@ -51,7 +51,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
qubit_luffa512_cpu_init(thr_id, throughput); qubit_luffa512_cpu_init(thr_id, throughput);
@ -86,7 +86,12 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
pdata[19] += throughput; pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart); if ((uint64_t) pdata[19] + throughput > max_nonce) {
pdata[19] = max_nonce;
break;
}
} while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce + 1; *hashes_done = pdata[19] - first_nonce + 1;
return 0; return 0;

View File

@ -23,13 +23,20 @@
#include "cuda_helper.h" #include "cuda_helper.h"
#define MAXU 0xffffffffU
typedef unsigned char BitSequence; typedef unsigned char BitSequence;
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
__constant__ uint32_t pTarget[8]; __constant__ uint32_t c_Target[8];
uint32_t *d_lnounce[8]; static uint32_t *h_resNounce[8];
uint32_t *d_LNonce[8]; static uint32_t *d_resNounce[8];
#define NBN 1 /* max results, could be 2, see blake32.cu */
#if NBN > 1
static uint32_t extra_results[2] = { MAXU, MAXU };
#endif
typedef struct { typedef struct {
uint32_t buffer[8]; /* Buffer to be hashed */ uint32_t buffer[8]; /* Buffer to be hashed */
@ -380,64 +387,75 @@ void qubit_luffa512_gpu_finalhash_80(int threads, uint32_t startNounce, void *ou
{ {
uint32_t nounce = startNounce + thread; uint32_t nounce = startNounce + thread;
union { union {
uint64_t buf64[16]; uint64_t buf64[16];
uint32_t buf32[32]; uint32_t buf32[32];
} buff; } buff;
uint32_t Hash[16]; uint32_t Hash[16];
#pragma unroll 16 #pragma unroll 16
for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i]; for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i];
// die Nounce durch die thread-spezifische ersetzen // Tested nonce
buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce)); buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce));
hashState state; hashState state;
#pragma unroll 40 #pragma unroll 40
for(int i=0;i<40;i++) state.chainv[i] = c_IV[i]; for(int i=0;i<40;i++) state.chainv[i] = c_IV[i];
#pragma unroll 8
#pragma unroll 8
for(int i=0;i<8;i++) state.buffer[i] = 0; for(int i=0;i<8;i++) state.buffer[i] = 0;
Update512(&state, (BitSequence*)buff.buf32); Update512(&state, (BitSequence*)buff.buf32);
finalization512(&state, Hash); finalization512(&state, Hash);
bool rc = true; /* dont ask me why not a simple if (Hash[i] > c_Target[i]) return;
* we lose 20% in perfs without the position test */
int position = -1; int position = -1;
#pragma unroll 8 #pragma unroll 8
for (int i = 7; i >= 0; i--) { for (int i = 7; i >= 0; i--) {
if (Hash[i] > pTarget[i]) { if (Hash[i] > c_Target[i]) {
if(position < i) { if (position < i) {
position = i; return;
rc = false;
} }
} }
if (Hash[i] < pTarget[i]) { if (Hash[i] < c_Target[i]) {
if(position < i) { if (position < i) {
position = i; position = i;
rc = true; //break; /* impact perfs, unroll ? */
} }
} }
} }
if(rc && resNounce[0] > nounce) #if NBN == 1
if (resNounce[0] > nounce) {
resNounce[0] = nounce; resNounce[0] = nounce;
}
#else
/* keep the smallest nounce, + extra one if found */
if (resNounce[0] > nounce) {
resNounce[1] = resNounce[0];
resNounce[0] = nounce;
} else {
resNounce[1] = nounce;
}
#endif
} }
} }
__host__ __host__
void qubit_luffa512_cpu_init(int thr_id, int threads) void qubit_luffa512_cpu_init(int thr_id, int threads)
{ {
cudaMemcpyToSymbol( c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice ); CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice));
cudaMemcpyToSymbol( c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice ); CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice));
cudaMalloc(&d_LNonce[thr_id], sizeof(uint32_t)); CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], NBN * sizeof(uint32_t)));
cudaMallocHost(&d_lnounce[thr_id], 1*sizeof(uint32_t)); CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], NBN * sizeof(uint32_t)));
} }
__host__ __host__
uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order) uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order)
{ {
uint32_t result = 0xffffffff; uint32_t result = MAXU;
cudaMemset(d_LNonce[thr_id], 0xffffffff, sizeof(uint32_t)); cudaMemset(d_resNounce[thr_id], 0xff, NBN * sizeof(uint32_t));
const int threadsperblock = 256; const int threadsperblock = 256;
dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 grid((threads + threadsperblock-1)/threadsperblock);
@ -445,11 +463,15 @@ uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t start
size_t shared_size = 0; size_t shared_size = 0;
qubit_luffa512_gpu_finalhash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash, d_LNonce[thr_id]); qubit_luffa512_gpu_finalhash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash, d_resNounce[thr_id]);
MyStreamSynchronize(NULL, order, thr_id); cudaDeviceSynchronize();
cudaMemcpy(d_lnounce[thr_id], d_LNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], NBN * sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
//cudaThreadSynchronize(); //cudaThreadSynchronize();
result = *d_lnounce[thr_id]; result = h_resNounce[thr_id][0];
#if NBN > 1
extra_results[0] = h_resNounce[thr_id][1];
#endif
}
return result; return result;
} }
@ -462,7 +484,7 @@ void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, u
dim3 block(threadsperblock); dim3 block(threadsperblock);
size_t shared_size = 0; size_t shared_size = 0;
qubit_luffa512_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash); qubit_luffa512_gpu_hash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash);
MyStreamSynchronize(NULL, order, thr_id); MyStreamSynchronize(NULL, order, thr_id);
} }
@ -470,6 +492,7 @@ __host__
void qubit_luffa512_cpu_setBlock_80(void *pdata) void qubit_luffa512_cpu_setBlock_80(void *pdata)
{ {
unsigned char PaddedMessage[128]; unsigned char PaddedMessage[128];
memcpy(PaddedMessage, pdata, 80); memcpy(PaddedMessage, pdata, 80);
memset(PaddedMessage+80, 0, 48); memset(PaddedMessage+80, 0, 48);
PaddedMessage[80] = 0x80; PaddedMessage[80] = 0x80;
@ -477,20 +500,21 @@ void qubit_luffa512_cpu_setBlock_80(void *pdata)
PaddedMessage[126] = 0x02; PaddedMessage[126] = 0x02;
PaddedMessage[127] = 0x80; PaddedMessage[127] = 0x80;
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); CUDA_SAFE_CALL(cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice));
} }
__host__ __host__
void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget) void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget)
{ {
unsigned char PaddedMessage[128]; unsigned char PaddedMessage[128];
memcpy(PaddedMessage, pdata, 80); memcpy(PaddedMessage, pdata, 80);
memset(PaddedMessage+80, 0, 48); memset(PaddedMessage+80, 0, 48);
PaddedMessage[80] = 0x80; PaddedMessage[80] = 0x80;
PaddedMessage[111] = 1; PaddedMessage[111] = 1;
PaddedMessage[126] = 0x02; PaddedMessage[126] = 0x02;
PaddedMessage[127] = 0x80; PaddedMessage[127] = 0x80;
cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice));
} }

8
util.c
View File

@ -1474,10 +1474,6 @@ void print_hash_tests(void)
deephash(&hash[0], &buf[0]); deephash(&hash[0], &buf[0]);
printpfx("deep", hash); printpfx("deep", hash);
memset(hash, 0, sizeof hash);
doomhash(&hash[0], &buf[0]);
printpfx("doom", hash);
memset(hash, 0, sizeof hash); memset(hash, 0, sizeof hash);
fresh_hash(&hash[0], &buf[0]); fresh_hash(&hash[0], &buf[0]);
printpfx("fresh", hash); printpfx("fresh", hash);
@ -1498,6 +1494,10 @@ void print_hash_tests(void)
jackpothash(&hash[0], &buf[0]); jackpothash(&hash[0], &buf[0]);
printpfx("jackpot", hash); printpfx("jackpot", hash);
memset(hash, 0, sizeof hash);
doomhash(&hash[0], &buf[0]);
printpfx("luffa", hash);
memset(hash, 0, sizeof hash); memset(hash, 0, sizeof hash);
myriadhash(&hash[0], &buf[0]); myriadhash(&hash[0], &buf[0]);
printpfx("myriad", hash); printpfx("myriad", hash);