mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-08 22:07:56 +00:00
Add cuda error checks on qubit algos
And rename doom to luffa, like djm34
This commit is contained in:
parent
1aec4555cc
commit
23f0cee61f
@ -58,12 +58,12 @@ its command line interface and options.
|
||||
heavy use to mine Heavycoin
|
||||
mjollnir use to mine Mjollnircoin
|
||||
deep use to mine Deepcoin
|
||||
doom use to mine Doomcoin
|
||||
fugue256 use to mine Fuguecoin
|
||||
groestl use to mine Groestlcoin
|
||||
dmd-gr use to mine Diamond-Groestl
|
||||
myr-gr use to mine Myriad-Groest
|
||||
jackpot use to mine Jackpotcoin
|
||||
luffa use to mine Doomcoin
|
||||
quark use to mine Quarkcoin
|
||||
qubit use to mine Qubit Algo
|
||||
anime use to mine Animecoin
|
||||
|
@ -136,6 +136,7 @@ typedef enum {
|
||||
ALGO_GROESTL,
|
||||
ALGO_HEAVY, /* Heavycoin hash */
|
||||
ALGO_JACKPOT,
|
||||
ALGO_LUFFA_DOOM,
|
||||
ALGO_MJOLLNIR, /* Mjollnir hash */
|
||||
ALGO_MYR_GR,
|
||||
ALGO_NIST5,
|
||||
@ -156,12 +157,13 @@ static const char *algo_names[] = {
|
||||
"blake",
|
||||
"blakecoin",
|
||||
"deep",
|
||||
"doom",
|
||||
"doom", /* is luffa */
|
||||
"fresh",
|
||||
"fugue256",
|
||||
"groestl",
|
||||
"heavy",
|
||||
"jackpot",
|
||||
"luffa",
|
||||
"mjollnir",
|
||||
"myr-gr",
|
||||
"nist5",
|
||||
@ -242,12 +244,12 @@ Options:\n\
|
||||
blake Blake 256 (like NEOS blake)\n\
|
||||
blakecoin Old Blake 256 (8 rounds)\n\
|
||||
deep Deepcoin hash\n\
|
||||
doom Doomcoin hash\n\
|
||||
fresh Freshcoin hash (shavite 80)\n\
|
||||
fugue256 Fuguecoin hash\n\
|
||||
groestl Groestlcoin hash\n\
|
||||
heavy Heavycoin hash\n\
|
||||
jackpot Jackpot hash\n\
|
||||
luffa Doomcoin hash\n\
|
||||
mjollnir Mjollnircoin hash\n\
|
||||
myr-gr Myriad-Groestl hash\n\
|
||||
nist5 NIST5 (TalkCoin) hash\n\
|
||||
|
@ -51,7 +51,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
|
||||
{
|
||||
cudaSetDevice(device_map[thr_id]);
|
||||
|
||||
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
|
||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
|
||||
|
||||
qubit_luffa512_cpu_init(thr_id, throughput);
|
||||
|
||||
@ -86,7 +86,12 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
|
||||
|
||||
pdata[19] += throughput;
|
||||
|
||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||
if ((uint64_t) pdata[19] + throughput > max_nonce) {
|
||||
pdata[19] = max_nonce;
|
||||
break;
|
||||
}
|
||||
|
||||
} while (!work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
|
@ -23,13 +23,20 @@
|
||||
|
||||
#include "cuda_helper.h"
|
||||
|
||||
#define MAXU 0xffffffffU
|
||||
|
||||
typedef unsigned char BitSequence;
|
||||
|
||||
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
|
||||
__constant__ uint32_t pTarget[8];
|
||||
__constant__ uint32_t c_Target[8];
|
||||
|
||||
uint32_t *d_lnounce[8];
|
||||
uint32_t *d_LNonce[8];
|
||||
static uint32_t *h_resNounce[8];
|
||||
static uint32_t *d_resNounce[8];
|
||||
|
||||
#define NBN 1 /* max results, could be 2, see blake32.cu */
|
||||
#if NBN > 1
|
||||
static uint32_t extra_results[2] = { MAXU, MAXU };
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
uint32_t buffer[8]; /* Buffer to be hashed */
|
||||
@ -385,59 +392,70 @@ void qubit_luffa512_gpu_finalhash_80(int threads, uint32_t startNounce, void *ou
|
||||
} buff;
|
||||
uint32_t Hash[16];
|
||||
|
||||
#pragma unroll 16
|
||||
#pragma unroll 16
|
||||
for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i];
|
||||
|
||||
// die Nounce durch die thread-spezifische ersetzen
|
||||
// Tested nonce
|
||||
buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce));
|
||||
|
||||
|
||||
hashState state;
|
||||
#pragma unroll 40
|
||||
#pragma unroll 40
|
||||
for(int i=0;i<40;i++) state.chainv[i] = c_IV[i];
|
||||
#pragma unroll 8
|
||||
|
||||
#pragma unroll 8
|
||||
for(int i=0;i<8;i++) state.buffer[i] = 0;
|
||||
|
||||
Update512(&state, (BitSequence*)buff.buf32);
|
||||
finalization512(&state, Hash);
|
||||
|
||||
bool rc = true;
|
||||
/* dont ask me why not a simple if (Hash[i] > c_Target[i]) return;
|
||||
* we lose 20% in perfs without the position test */
|
||||
int position = -1;
|
||||
#pragma unroll 8
|
||||
#pragma unroll 8
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
if (Hash[i] > pTarget[i]) {
|
||||
if(position < i) {
|
||||
position = i;
|
||||
rc = false;
|
||||
if (Hash[i] > c_Target[i]) {
|
||||
if (position < i) {
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
if (Hash[i] < pTarget[i]) {
|
||||
if(position < i) {
|
||||
if (Hash[i] < c_Target[i]) {
|
||||
if (position < i) {
|
||||
position = i;
|
||||
rc = true;
|
||||
//break; /* impact perfs, unroll ? */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(rc && resNounce[0] > nounce)
|
||||
#if NBN == 1
|
||||
if (resNounce[0] > nounce) {
|
||||
resNounce[0] = nounce;
|
||||
}
|
||||
#else
|
||||
/* keep the smallest nounce, + extra one if found */
|
||||
if (resNounce[0] > nounce) {
|
||||
resNounce[1] = resNounce[0];
|
||||
resNounce[0] = nounce;
|
||||
} else {
|
||||
resNounce[1] = nounce;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
__host__
|
||||
void qubit_luffa512_cpu_init(int thr_id, int threads)
|
||||
{
|
||||
cudaMemcpyToSymbol( c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice );
|
||||
cudaMemcpyToSymbol( c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice );
|
||||
cudaMalloc(&d_LNonce[thr_id], sizeof(uint32_t));
|
||||
cudaMallocHost(&d_lnounce[thr_id], 1*sizeof(uint32_t));
|
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice));
|
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice));
|
||||
CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], NBN * sizeof(uint32_t)));
|
||||
CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], NBN * sizeof(uint32_t)));
|
||||
}
|
||||
|
||||
__host__
|
||||
uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order)
|
||||
{
|
||||
uint32_t result = 0xffffffff;
|
||||
cudaMemset(d_LNonce[thr_id], 0xffffffff, sizeof(uint32_t));
|
||||
uint32_t result = MAXU;
|
||||
cudaMemset(d_resNounce[thr_id], 0xff, NBN * sizeof(uint32_t));
|
||||
const int threadsperblock = 256;
|
||||
|
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||
@ -445,11 +463,15 @@ uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t start
|
||||
|
||||
size_t shared_size = 0;
|
||||
|
||||
qubit_luffa512_gpu_finalhash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash, d_LNonce[thr_id]);
|
||||
MyStreamSynchronize(NULL, order, thr_id);
|
||||
cudaMemcpy(d_lnounce[thr_id], d_LNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||
qubit_luffa512_gpu_finalhash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash, d_resNounce[thr_id]);
|
||||
cudaDeviceSynchronize();
|
||||
if (cudaSuccess == cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], NBN * sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
|
||||
//cudaThreadSynchronize();
|
||||
result = *d_lnounce[thr_id];
|
||||
result = h_resNounce[thr_id][0];
|
||||
#if NBN > 1
|
||||
extra_results[0] = h_resNounce[thr_id][1];
|
||||
#endif
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -462,7 +484,7 @@ void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, u
|
||||
dim3 block(threadsperblock);
|
||||
size_t shared_size = 0;
|
||||
|
||||
qubit_luffa512_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
|
||||
qubit_luffa512_gpu_hash_80 <<<grid, block, shared_size>>> (threads, startNounce, d_outputHash);
|
||||
MyStreamSynchronize(NULL, order, thr_id);
|
||||
}
|
||||
|
||||
@ -470,6 +492,7 @@ __host__
|
||||
void qubit_luffa512_cpu_setBlock_80(void *pdata)
|
||||
{
|
||||
unsigned char PaddedMessage[128];
|
||||
|
||||
memcpy(PaddedMessage, pdata, 80);
|
||||
memset(PaddedMessage+80, 0, 48);
|
||||
PaddedMessage[80] = 0x80;
|
||||
@ -477,20 +500,21 @@ void qubit_luffa512_cpu_setBlock_80(void *pdata)
|
||||
PaddedMessage[126] = 0x02;
|
||||
PaddedMessage[127] = 0x80;
|
||||
|
||||
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
|
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
__host__
|
||||
void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget)
|
||||
{
|
||||
unsigned char PaddedMessage[128];
|
||||
|
||||
memcpy(PaddedMessage, pdata, 80);
|
||||
memset(PaddedMessage+80, 0, 48);
|
||||
PaddedMessage[80] = 0x80;
|
||||
PaddedMessage[111] = 1;
|
||||
PaddedMessage[126] = 0x02;
|
||||
PaddedMessage[127] = 0x80;
|
||||
cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
|
||||
|
||||
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
|
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice));
|
||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice));
|
||||
}
|
8
util.c
8
util.c
@ -1474,10 +1474,6 @@ void print_hash_tests(void)
|
||||
deephash(&hash[0], &buf[0]);
|
||||
printpfx("deep", hash);
|
||||
|
||||
memset(hash, 0, sizeof hash);
|
||||
doomhash(&hash[0], &buf[0]);
|
||||
printpfx("doom", hash);
|
||||
|
||||
memset(hash, 0, sizeof hash);
|
||||
fresh_hash(&hash[0], &buf[0]);
|
||||
printpfx("fresh", hash);
|
||||
@ -1498,6 +1494,10 @@ void print_hash_tests(void)
|
||||
jackpothash(&hash[0], &buf[0]);
|
||||
printpfx("jackpot", hash);
|
||||
|
||||
memset(hash, 0, sizeof hash);
|
||||
doomhash(&hash[0], &buf[0]);
|
||||
printpfx("luffa", hash);
|
||||
|
||||
memset(hash, 0, sizeof hash);
|
||||
myriadhash(&hash[0], &buf[0]);
|
||||
printpfx("myriad", hash);
|
||||
|
Loading…
Reference in New Issue
Block a user