Browse Source

migrate 2nd nonce storage of most algos

This allow to keep pdata[19] as cursor between scans, and later, to sort them..

remains... heavy, scrypt, sia...
2upstream
Tanguy Pruvot 7 years ago
parent
commit
0ff75791e5
  1. 41
      Algo256/blake256.cu
  2. 40
      Algo256/bmw.cu
  3. 10
      Algo256/decred.cu
  4. 31
      Algo256/keccak256.cu
  5. 29
      Algo256/vanilla.cu
  6. 46
      JHA/jackpotcoin.cu
  7. 22
      ccminer.cpp
  8. 54
      lyra2/lyra2RE.cu
  9. 50
      lyra2/lyra2REv2.cu
  10. 34
      myriadgroestl.cpp
  11. 23
      neoscrypt/neoscrypt.cpp
  12. 33
      pentablake.cu
  13. 47
      quark/nist5.cu
  14. 43
      quark/quarkcoin.cu
  15. 46
      qubit/deep.cu
  16. 41
      qubit/luffa.cu
  17. 45
      qubit/qubit.cu
  18. 61
      skein.cu
  19. 39
      skein2.cpp
  20. 47
      x11/c11.cu
  21. 51
      x11/fresh.cu
  22. 50
      x11/s3.cu
  23. 47
      x11/sib.cu
  24. 23
      x11/veltor.cu
  25. 47
      x11/x11.cu
  26. 53
      x11/x11evo.cu
  27. 39
      x13/x13.cu
  28. 29
      x15/whirlpool.cu
  29. 45
      x15/x14.cu
  30. 45
      x15/x15.cu
  31. 47
      x17/x17.cu
  32. 49
      zr5.cu

41
Algo256/blake256.cu

@ -45,7 +45,7 @@ static uint32_t *h_resNonce[MAX_GPUS]; @@ -45,7 +45,7 @@ static uint32_t *h_resNonce[MAX_GPUS];
/* max count of found nonces in one call */
#define NBN 2
static uint32_t extra_results[NBN] = { UINT32_MAX };
static __thread uint32_t extra_results[NBN] = { UINT32_MAX };
#define GSPREC(a,b,c,d,x,y) { \
v[a] += (m[x] ^ c_u256[y]) + v[b]; \
@ -519,46 +519,51 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non @@ -519,46 +519,51 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
do {
// GPU HASH (second block only, first is midstate)
uint32_t foundNonce = blake256_cpu_hash_16(thr_id, throughput, pdata[19], targetHigh, blakerounds);
work->nonces[0] = blake256_cpu_hash_16(thr_id, throughput, pdata[19], targetHigh, blakerounds);
if (foundNonce != UINT32_MAX)
*hashes_done = pdata[19] - first_nonce + throughput;
if (work->nonces[0] != UINT32_MAX)
{
uint32_t vhashcpu[8];
uint32_t Htarg = ptarget[6];
uint32_t _ALIGN(64) vhashcpu[8];
const uint32_t Htarg = ptarget[6];
for (int k=16; k < 19; k++)
be32enc(&endiandata[k], pdata[k]);
be32enc(&endiandata[19], foundNonce);
be32enc(&endiandata[19], work->nonces[0]);
blake256hash(vhashcpu, endiandata, blakerounds);
if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget))
{
rc = 1;
work->valid_nonces = 1;
work_set_target_ratio(work, vhashcpu);
*hashes_done = pdata[19] - first_nonce + throughput;
pdata[19] = foundNonce;
#if NBN > 1
if (extra_results[0] != UINT32_MAX) {
be32enc(&endiandata[19], extra_results[0]);
work->nonces[1] = extra_results[0];
be32enc(&endiandata[19], work->nonces[1]);
blake256hash(vhashcpu, endiandata, blakerounds);
if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget)) {
pdata[21] = extra_results[0];
if (bn_hash_target_ratio(vhashcpu, ptarget) > work->shareratio[0]) {
work_set_target_ratio(work, vhashcpu);
xchg(pdata[21], pdata[19]);
xchg(work->nonces[0], work->nonces[1]);
} else {
bn_set_target_ratio(work, vhashcpu, 1);
}
rc = 2;
work->valid_nonces = 2;
}
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
extra_results[0] = UINT32_MAX;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
#endif
return rc;
return work->valid_nonces;
}
else if (opt_debug) {
applog_hash((uchar*)ptarget);
applog_compare_hash((uchar*)vhashcpu, (uchar*)ptarget);
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
else if (vhashcpu[6] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

40
Algo256/bmw.cu

@ -43,7 +43,6 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u @@ -43,7 +43,6 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
bool swapnonce = true;
uint32_t throughput = cuda_default_throughput(thr_id, 1U << 21);
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
@ -77,24 +76,37 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u @@ -77,24 +76,37 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u
cuda_check_cpu_setTarget(ptarget);
do {
bmw256_cpu_hash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], (int) swapnonce);
uint32_t foundNonce = cuda_check_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id]);
bmw256_cpu_hash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], 1);
*hashes_done = pdata[19] - first_nonce + throughput;
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
uint32_t _ALIGN(64) vhash64[8];
endiandata[19] = swab32_if(foundNonce, swapnonce);
bmw_hash(vhash64, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
pdata[19] = swab32_if(foundNonce,!swapnonce);
work_set_target_ratio(work, vhash64);
return 1;
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
bmw_hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
bmw_hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
}
else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

10
Algo256/decred.cu

@ -391,6 +391,8 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce @@ -391,6 +391,8 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce
// GPU HASH
decred_gpu_hash_nonce <<<grid, block>>> (throughput, (*pnonce), d_resNonce[thr_id], targetHigh);
*hashes_done = (*pnonce) - first_nonce + throughput;
// first cell contains the valid nonces count
cudaMemcpy(resNonces, d_resNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
@ -404,9 +406,8 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce @@ -404,9 +406,8 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce
decred_hash(vhash, endiandata);
if (vhash[6] <= ptarget[6] && fulltest(vhash, ptarget))
{
int rc = work->valid_nonces = 1;
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
*hashes_done = (*pnonce) - first_nonce + throughput;
work->nonces[0] = swab32(resNonces[1]);
*pnonce = work->nonces[0];
@ -417,7 +418,6 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce @@ -417,7 +418,6 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce
decred_hash(vhash, endiandata);
if (vhash[6] <= ptarget[6] && fulltest(vhash, ptarget)) {
work->nonces[1] = swab32(resNonces[n]);
if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) {
// we really want the best first ? depends...
work->shareratio[1] = work->shareratio[0];
@ -429,7 +429,7 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce @@ -429,7 +429,7 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
}
rc = 2; // MAX_NONCES submit limited to 2
work->valid_nonces = 2; // MAX_NONCES submit limited to 2
gpulog(LOG_DEBUG, thr_id, "multiple nonces 1:%08x (%g) %u:%08x (%g)",
work->nonces[0], work->sharediff[0], n, work->nonces[1], work->sharediff[1]);
@ -438,7 +438,7 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce @@ -438,7 +438,7 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce
gpulog(LOG_WARNING, thr_id, "result %u for %08x does not validate on CPU!", n, resNonces[n]);
}
}
return rc;
return work->valid_nonces;
} else if (vhash[6] > ptarget[6]) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", resNonces[1]);

31
Algo256/keccak256.cu

@ -65,7 +65,7 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no @@ -65,7 +65,7 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no
init[thr_id] = true;
}
for (int k=0; k < 20; k++) {
for (int k=0; k < 19; k++) {
be32enc(&endiandata[k], pdata[k]);
}
@ -75,20 +75,25 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no @@ -75,20 +75,25 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no
*hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
if (foundNonce != UINT32_MAX && bench_algo < 0)
work->nonces[0] = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
if (work->nonces[0] != UINT32_MAX && bench_algo < 0)
{
uint32_t _ALIGN(64) vhash64[8];
be32enc(&endiandata[19], foundNonce);
keccak256_hash(vhash64, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
work_set_target_ratio(work, vhash64);
pdata[19] = foundNonce;
return 1;
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
keccak256_hash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
pdata[19] = work->nonces[0] + 1;
return work->valid_nonces;
}
else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

29
Algo256/vanilla.cu

@ -416,6 +416,7 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc @@ -416,6 +416,7 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc
do {
vanilla_gpu_hash_16_8<<<grid,block, 0, streams[thr_id]>>>(throughput, pdata[19], d_resNonce[thr_id], targetHigh);
cudaMemcpyAsync(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost,streams[thr_id]);
*hashes_done = pdata[19] - first_nonce + throughput;
cudaStreamSynchronize(streams[thr_id]);
if (h_resNonce[thr_id][0] != UINT32_MAX){
@ -429,31 +430,41 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc @@ -429,31 +430,41 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc
vanillahash(vhashcpu, endiandata, blakerounds);
if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget)) {
rc = 1;
work->valid_nonces = 1;
work->nonces[0] = h_resNonce[thr_id][0];
work_set_target_ratio(work, vhashcpu);
*hashes_done = pdata[19] - first_nonce + throughput;
pdata[19] = h_resNonce[thr_id][0];
#if NBN > 1
if (h_resNonce[thr_id][1] != UINT32_MAX) {
work->nonces[1] = h_resNonce[thr_id][1];
be32enc(&endiandata[19], h_resNonce[thr_id][1]);
vanillahash(vhashcpu, endiandata, blakerounds);
pdata[21] = h_resNonce[thr_id][1];
if (bn_hash_target_ratio(vhashcpu, ptarget) > work->shareratio[0]) {
work_set_target_ratio(work, vhashcpu);
xchg(pdata[19], pdata[21]);
xchg(work->nonces[0], work->nonces[1]);
}
rc = 2;
work->valid_nonces = 2;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
#endif
return rc;
return work->valid_nonces;
}
else {
else if (vhashcpu[6] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", h_resNonce[thr_id][0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
pdata[19] = max_nonce;
break;
}
pdata[19] += throughput;
} while (!work_restart[thr_id].restart && ((uint64_t)max_nonce > ((uint64_t)(pdata[19]) + (uint64_t)throughput)));
} while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce;
MyStreamSynchronize(NULL, 0, dev_id);

46
JHA/jackpotcoin.cu

@ -214,34 +214,40 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc @@ -214,34 +214,40 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc
CUDA_LOG_ERROR();
uint32_t foundNonce = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
work->nonces[0] = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
if (foundNonce != UINT32_MAX)
if (work->nonces[0] != UINT32_MAX)
{
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
// jackpothash function gibt die Zahl der Runden zurück
jackpothash(vhash64, endiandata);
jackpothash(vhash, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
int res = 1;
work_set_target_ratio(work, vhash64);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
#if 0
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
nist5hash(vhash64, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
jackpothash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
#else
pdata[19] = work->nonces[0] + 1; // cursor
#endif
pdata[19] = foundNonce;
return res;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

22
ccminer.cpp

@ -2343,24 +2343,13 @@ static void *miner_thread(void *userdata) @@ -2343,24 +2343,13 @@ static void *miner_thread(void *userdata)
/* record scanhash elapsed time */
gettimeofday(&tv_end, NULL);
// todo: update all algos to use work->nonces and pdata[19] as counter
switch (opt_algo) {
case ALGO_BLAKE2S:
case ALGO_CRYPTOLIGHT:
case ALGO_CRYPTONIGHT:
case ALGO_DECRED:
case ALGO_LBRY:
// algos to migrate to replace pdata[21] by work.nonces[]
case ALGO_HEAVY:
case ALGO_SCRYPT:
case ALGO_SCRYPT_JANE:
case ALGO_SIA:
case ALGO_VELTOR:
case ALGO_WILDKECCAK:
// migrated algos
break;
case ALGO_ZR5:
// algos with only work.nonces[1] set
work.nonces[0] = nonceptr[0];
break;
default:
// algos with 2 results in pdata and work.nonces unset
//case ALGO_WHIRLPOOLX:
work.nonces[0] = nonceptr[0];
work.nonces[1] = nonceptr[2];
}
@ -2483,7 +2472,6 @@ static void *miner_thread(void *userdata) @@ -2483,7 +2472,6 @@ static void *miner_thread(void *userdata)
work.submit_nonce_id = 1;
nonceptr[0] = work.nonces[1];
if (opt_algo == ALGO_ZR5) {
// todo: use + 4..6 index for pok to allow multiple nonces
work.data[0] = work.data[22]; // pok
work.data[22] = 0;
}

54
lyra2/lyra2RE.cu

@ -136,7 +136,6 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, @@ -136,7 +136,6 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce,
do {
int order = 0;
uint32_t foundNonce;
blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
keccak256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
@ -146,35 +145,34 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, @@ -146,35 +145,34 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + throughput;
foundNonce = groestl256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
if (foundNonce != UINT32_MAX)
work->nonces[0] = groestl256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
if (work->nonces[0] != UINT32_MAX)
{
uint32_t _ALIGN(64) vhash64[8];
be32enc(&endiandata[19], foundNonce);
lyra2re_hash(vhash64, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
int res = 1;
uint32_t secNonce = groestl256_getSecNonce(thr_id, 1);
work_set_target_ratio(work, vhash64);
if (secNonce != UINT32_MAX)
{
be32enc(&endiandata[19], secNonce);
lyra2re_hash(vhash64, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
if (opt_debug)
gpulog(LOG_BLUE, thr_id, "found second nonce %08x", secNonce);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
}
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
lyra2re_hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
work->nonces[1] = groestl256_getSecNonce(thr_id, 1);
if (work->nonces[1] != UINT32_MAX) {
be32enc(&endiandata[19], work->nonces[1]);
lyra2re_hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
pdata[19] = foundNonce;
return res;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

50
lyra2/lyra2REv2.cu

@ -142,7 +142,6 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc @@ -142,7 +142,6 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc
do {
int order = 0;
uint32_t foundNonces[2] = { 0, 0 };
blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
TRACE("blake :");
@ -157,37 +156,36 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc @@ -157,37 +156,36 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc
cubehash256_cpu_hash_32(thr_id, throughput,pdata[19], d_hash[thr_id], order++);
TRACE("cube :");
bmw256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], foundNonces);
memset(work->nonces, 0, sizeof(work->nonces));
bmw256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], work->nonces);
*hashes_done = pdata[19] - first_nonce + throughput;
if (foundNonces[0] != 0)
if (work->nonces[0] != 0)
{
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonces[0]);
lyra2v2_hash(vhash64, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget))
{
int res = 1;
work_set_target_ratio(work, vhash64);
pdata[19] = foundNonces[0];
// check if there was another one...
if (foundNonces[1] != 0)
{
be32enc(&endiandata[19], foundNonces[1]);
lyra2v2_hash(vhash64, endiandata);
pdata[21] = foundNonces[1];
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) {
work_set_target_ratio(work, vhash64);
xchg(pdata[19], pdata[21]);
}
res++;
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
lyra2v2_hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
lyra2v2_hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
return res;
return work->valid_nonces;
}
else
{
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonces[0]);
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

34
myriadgroestl.cpp

@ -67,34 +67,36 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned @@ -67,34 +67,36 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned
myriadgroestl_cpu_setBlock(thr_id, endiandata, ptarget);
do {
// GPU
uint32_t foundNonces[2] = { UINT32_MAX, UINT32_MAX };
memset(work->nonces, 0xff, sizeof(work->nonces));
myriadgroestl_cpu_hash(thr_id, throughput, pdata[19], foundNonces);
// GPU
myriadgroestl_cpu_hash(thr_id, throughput, pdata[19], work->nonces);
*hashes_done = pdata[19] - start_nonce + throughput;
if (foundNonces[0] < UINT32_MAX && bench_algo < 0)
if (work->nonces[0] < UINT32_MAX && bench_algo < 0)
{
uint32_t _ALIGN(64) vhash[8];
endiandata[19] = swab32(foundNonces[0]);
endiandata[19] = swab32(work->nonces[0]);
myriadhash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
work->nonces[0] = foundNonces[0];
pdata[19] = foundNonces[0];
// search for another nonce
if (foundNonces[1] != UINT32_MAX) {
endiandata[19] = swab32(foundNonces[1]);
if (work->nonces[1] != UINT32_MAX) {
endiandata[19] = swab32(work->nonces[1]);
myriadhash(vhash, endiandata);
pdata[21] = foundNonces[1]; // to drop
work->nonces[1] = foundNonces[1];
bn_set_target_ratio(work, vhash, 1);
return 2;
work->valid_nonces = 2;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
return 1;
} else if (vhash[7] > ptarget[7]) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonces[0]);
return work->valid_nonces;
}
else if (vhash[7] > ptarget[7]) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

23
neoscrypt/neoscrypt.cpp

@ -62,28 +62,31 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign @@ -62,28 +62,31 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign
neoscrypt_setBlockTarget(endiandata,ptarget);
do {
uint32_t foundNonces[2] = { UINT32_MAX, UINT32_MAX };
neoscrypt_hash_k4(thr_id, throughput, pdata[19], foundNonces, have_stratum);
memset(work->nonces, 0xff, sizeof(work->nonces));
neoscrypt_hash_k4(thr_id, throughput, pdata[19], work->nonces, have_stratum);
*hashes_done = pdata[19] - first_nonce + throughput;
if (foundNonces[0] != UINT32_MAX)
if (work->nonces[0] != UINT32_MAX)
{
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
if (have_stratum) {
be32enc(&endiandata[19], foundNonces[0]);
be32enc(&endiandata[19], work->nonces[0]);
} else {
endiandata[19] = foundNonces[0];
endiandata[19] = work->nonces[0];
}
neoscrypt((uchar*)vhash, (uchar*) endiandata, 0x80000620U);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
pdata[19] = foundNonces[0];
return 1;
} else {
gpulog(LOG_WARNING, thr_id, "nonce %08x does not validate on CPU!", foundNonces[0]);
pdata[19] = work->nonces[0] + 1; // cursor
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "nonce %08x does not validate on CPU!", work->nonces[0]);
}
}

33
pentablake.cu

@ -98,21 +98,34 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n @@ -98,21 +98,34 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n
*hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
uint32_t vhash[8];
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], foundNonce);
be32enc(&endiandata[19], work->nonces[0]);
pentablakehash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
rc = 1;
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
pdata[19] = foundNonce;
return rc;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
pentablakehash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

47
quark/nist5.cu

@ -56,7 +56,6 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, @@ -56,7 +56,6 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce,
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
int res = 0;
uint32_t throughput = cuda_default_throughput(thr_id, 1 << 20); // 256*256*16
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
@ -101,6 +100,8 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, @@ -101,6 +100,8 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce,
quark_blake512_cpu_setBlock_80(thr_id, endiandata);
cuda_check_cpu_setTarget(ptarget);
work->valid_nonces = 0;
do {
int order = 0;
@ -113,31 +114,33 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce, @@ -113,31 +114,33 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
nist5hash(vhash64, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
res = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64);
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
nist5hash(vhash64, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
nist5hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
nist5hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
pdata[19] = foundNonce;
goto out;
}
else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}
@ -157,7 +160,7 @@ out: @@ -157,7 +160,7 @@ out:
cudaStreamDestroy(stream[i]);
#endif
return res;
return work->valid_nonces;
}
// ressources cleanup

43
quark/quarkcoin.cu

@ -180,7 +180,6 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce, @@ -180,7 +180,6 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce,
do {
int order = 0;
uint32_t foundNonce;
uint32_t nrm1=0, nrm2=0, nrm3=0;
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
@ -229,8 +228,8 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce, @@ -229,8 +228,8 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce,
quark_keccak512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);
quark_jh512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
foundNonce = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
work->nonces[0] = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
work->nonces[1] = 0;
} else {
/* algo permutations are made with 2 different buffers */
@ -263,31 +262,47 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce, @@ -263,31 +262,47 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce,
TRACE("perm3 :");
CUDA_LOG_ERROR();
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
}
*hashes_done = pdata[19] - first_nonce + throughput;
if (foundNonce != UINT32_MAX)
if (work->nonces[0] != UINT32_MAX)
{
uint32_t vhash[8];
be32enc(&endiandata[19], foundNonce);
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
quarkhash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
pdata[19] = foundNonce;
return 1;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
applog_hash((uchar*) vhash);
applog_hash((uchar*) ptarget);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
quarkhash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
}
else if (vhash[7] > ptarget[7]) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
pdata[19] = max_nonce;
break;
}
pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
} while (!work_restart[thr_id].restart);
return 0;
}

46
qubit/deep.cu

@ -96,30 +96,33 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, @@ -96,30 +96,33 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
uint32_t _ALIGN(64) vhash64[8];
be32enc(&endiandata[19], foundNonce);
deephash(vhash64, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
int res = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64);
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
deephash(vhash64, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
deephash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
deephash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
pdata[19] = foundNonce;
return res;
return work->valid_nonces;
}
else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}
@ -127,7 +130,6 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, @@ -127,7 +130,6 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce,
pdata[19] = max_nonce;
break;
}
pdata[19] += throughput;
} while (!work_restart[thr_id].restart);

41
qubit/luffa.cu

@ -72,27 +72,40 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce, @@ -72,27 +72,40 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
uint32_t _ALIGN(64) vhash64[8];
be32enc(&endiandata[19], foundNonce);
luffa_hash(vhash64, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
work_set_target_ratio(work, vhash64);
pdata[19] = foundNonce;
return 1;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
luffa_hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
luffa_hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
if ((uint64_t)throughput + pdata[19] >= max_nonce) {
pdata[19] = max_nonce;
break;
}
pdata[19] += throughput;
} while (!work_restart[thr_id].restart);

45
qubit/qubit.cu

@ -112,31 +112,33 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce, @@ -112,31 +112,33 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
qubithash(vhash64, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
int res = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64);
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
qubithash(vhash64, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
qubithash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
qubithash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
pdata[19] = foundNonce;
return res;
return work->valid_nonces;
}
else {
applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce);
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}
@ -144,7 +146,6 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce, @@ -144,7 +146,6 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce,
pdata[19] = max_nonce;
break;
}
pdata[19] += throughput;
} while (!work_restart[thr_id].restart);

61
skein.cu

@ -342,10 +342,6 @@ extern "C" void skeincoinhash(void *output, const void *input) @@ -342,10 +342,6 @@ extern "C" void skeincoinhash(void *output, const void *input)
memcpy(output, hash, 32);
}
static __inline uint32_t swab32_if(uint32_t val, bool iftrue) {
return iftrue ? swab32(val) : val;
}
static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
@ -355,7 +351,6 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no @@ -355,7 +351,6 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const int swap = 1;
sm5 = (device_sm[device_map[thr_id]] >= 500);
bool checkSecnonce = (have_stratum || have_longpoll) && !sm5;
@ -363,7 +358,6 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no @@ -363,7 +358,6 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
uint32_t throughput = cuda_default_throughput(thr_id, 1U << 20);
if (init[thr_id]) throughput = min(throughput, (max_nonce - first_nonce));
uint32_t foundNonce, secNonce = 0;
uint64_t target64 = 0;
if (opt_benchmark)
@ -409,54 +403,45 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no @@ -409,54 +403,45 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
if (sm5) {
/* cuda_skeincoin.cu */
foundNonce = skeincoin_hash_sm5(thr_id, throughput, pdata[19], swap, target64, &secNonce);
work->nonces[0] = skeincoin_hash_sm5(thr_id, throughput, pdata[19], 1, target64, &work->nonces[1]);
} else {
/* quark/cuda_skein512.cu */
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], swap);
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
sha2_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id]);
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
}
if (foundNonce != UINT32_MAX)
if (work->nonces[0] != UINT32_MAX)
{
uint32_t _ALIGN(64) vhash[8];
endiandata[19] = swab32_if(foundNonce, swap);
endiandata[19] = swab32(work->nonces[0]);
skeincoinhash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
int res = 1;
uint8_t num = res;
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
if (checkSecnonce) {
secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num);
}
while (secNonce != 0 && res < 2) /* todo: up to 6 */
{
endiandata[19] = swab32_if(secNonce, swap);
skeincoinhash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
// todo: use 19 20 21... zr5 pok to adapt...
endiandata[19] = swab32_if(secNonce, swap);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], work->valid_nonces);
if (work->nonces[1] != 0) {
endiandata[19] = swab32(work->nonces[1]);
skeincoinhash(vhash, endiandata);
if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash);
pdata[19+res*2] = swab32_if(secNonce, !swap);
res++;
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
work->valid_nonces++;
bn_set_target_ratio(work, vhash, 1);
}
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1;
}
num++;
//if (checkSecnonce)
// secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num);
//else
break; // only one secNonce...
} else {
pdata[19] = work->nonces[0] + 1; // cursor for next scan
}
if (res > 1 && opt_debug)
applog(LOG_BLUE, "GPU #%d: %d/%d valid nonces !!!", device_map[thr_id], res, (int)num);
pdata[19] = swab32_if(foundNonce, !swap);
return res;
return work->valid_nonces;
}
else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
else if (vhash[7] > ptarget[7]) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

39
skein2.cpp

@ -35,17 +35,12 @@ void skein2hash(void *output, const void *input) @@ -35,17 +35,12 @@ void skein2hash(void *output, const void *input)
static bool init[MAX_GPUS] = { 0 };
static __inline uint32_t swab32_if(uint32_t val, bool iftrue) {
return iftrue ? swab32(val) : val;
}
int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{
int dev_id = device_map[thr_id];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
const int swap = 1; // to toggle nonce endian
uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 256*256*8
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
@ -85,35 +80,39 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned @@ -85,35 +80,39 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned
int order = 0;
// Hash with CUDA
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], swap);
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
*hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
uint32_t _ALIGN(64) vhash[8];
endiandata[19] = swab32_if(foundNonce, swap);
endiandata[19] = swab32(work->nonces[0]);
skein2hash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
int res = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work->valid_nonces = 1;
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash);
if (secNonce != 0) {
endiandata[19] = swab32_if(secNonce, swap);
if (work->nonces[1] != 0) {
endiandata[19] = swab32(work->nonces[1]);
skein2hash(vhash, endiandata);
work->valid_nonces++;
bn_set_target_ratio(work, vhash, 1);
pdata[21] = work->nonces[1] = swab32_if(secNonce, !swap);
gpulog(LOG_DEBUG, thr_id, "found second nonce %08x!", swab32(secNonce));
res++;
gpulog(LOG_DEBUG, thr_id, "found second nonce %08x!", endiandata[19]);
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor for next scan
}
pdata[19] = work->nonces[0] = swab32_if(foundNonce, !swap);
return res;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
return work->valid_nonces;
}
else if (vhash[7] > ptarget[7]) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

47
x11/c11.cu

@ -155,7 +155,6 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u @@ -155,7 +155,6 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u
do {
int order = 0;
uint32_t foundNonce;
// Hash with CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
@ -181,32 +180,32 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u @@ -181,32 +180,32 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u
*hashes_done = pdata[19] - first_nonce + throughput;
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
c11hash(vhash64, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
int res = 1;
// check if there was some other ones...
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64);
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
c11hash(vhash64, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
c11hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
c11hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
pdata[19] = foundNonce;
return res;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
pdata[19] = foundNonce + 1;
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

51
x11/fresh.cu

@ -108,7 +108,6 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce, @@ -108,7 +108,6 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce,
x11_shavite512_setBlock_80((void*)endiandata);
cuda_check_cpu_setTarget(ptarget);
do {
uint32_t foundNonce;
int order = 0;
// GPU Hash
@ -126,35 +125,41 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce, @@ -126,35 +125,41 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce,
#endif
*hashes_done = pdata[19] - first_nonce + throughput;
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
fresh_hash(vhash64, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
int res = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64);
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
fresh_hash(vhash64, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
fresh_hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
fresh_hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
pdata[19] = foundNonce;
return res;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
}
}
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
pdata[19] = max_nonce;
break;
}
pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
} while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce + 1;
return 0;

50
x11/s3.cu

@ -107,8 +107,6 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un @@ -107,8 +107,6 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un
cuda_check_cpu_setTarget(ptarget);
do {
const uint32_t Htarg = ptarget[7];
uint32_t foundNonce;
int order = 0;
x11_shavite512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
@ -120,31 +118,33 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un @@ -120,31 +118,33 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un
*hashes_done = pdata[19] - first_nonce + throughput;
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
s3hash(vhash64, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
int res = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64);
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
s3hash(vhash64, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
s3hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
s3hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
pdata[19] = foundNonce;
return res;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

47
x11/sib.cu

@ -152,7 +152,6 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u @@ -152,7 +152,6 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u
do {
int order = 0;
uint32_t foundNonce;
// Hash with CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
@ -178,32 +177,34 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u @@ -178,32 +177,34 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("echo => ");
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
sibhash(vhash64, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
int res = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64);
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
sibhash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
work->nonces[1] =cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
*hashes_done = pdata[19] - first_nonce + throughput;
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
sibhash(vhash64, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
sibhash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
pdata[19] = foundNonce;
return res;
} else if (vhash64[7] > Htarg && !opt_quiet) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
pdata[19] = foundNonce + 1;
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
if (!opt_quiet)
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

23
x11/veltor.cu

@ -120,31 +120,34 @@ extern "C" int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce @@ -120,31 +120,34 @@ extern "C" int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce
veltorhash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget))
{
int res = 1;
work_set_target_ratio(work, vhash);
work->nonces[0] = startNounce + h_resNonce[0];
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
if (h_resNonce[1] != UINT32_MAX)
{
uint32_t secNonce = work->nonces[1] = startNounce + h_resNonce[1];
gpulog(LOG_DEBUG, thr_id, "Found 2nd nonce: %08x", secNonce);
be32enc(&endiandata[19], secNonce);
veltorhash(vhash, endiandata);
work->nonces[1] = secNonce;
if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) {
work_set_target_ratio(work, vhash);
xchg(work->nonces[1], work->nonces[0]);
} else {
bn_set_target_ratio(work, vhash, res);
bn_set_target_ratio(work, vhash, work->valid_nonces);
}
res++;
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; // next scan
return res;
return work->valid_nonces;
}
else if (vhash[7] > Htarg && !opt_quiet) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", h_resNonce[0]);
else if (vhash[7] > Htarg) {
if (!opt_quiet)
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", h_resNonce[0]);
cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t));
pdata[19] = h_resNonce[0] + 1;
continue;
}
}
if ((uint64_t) throughput + pdata[19] >= max_nonce) {

47
x11/x11.cu

@ -144,7 +144,6 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u @@ -144,7 +144,6 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u
do {
int order = 0;
uint32_t foundNonce;
// Hash with CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
@ -168,33 +167,33 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u @@ -168,33 +167,33 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("echo => ");
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
*hashes_done = pdata[19] - first_nonce + throughput;
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
x11hash(vhash64, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
int res = 1;
// check if there was some other ones...
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64);
*hashes_done = pdata[19] - first_nonce + throughput;
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
x11hash(vhash64, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
x11hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
x11hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
pdata[19] = foundNonce;
return res;
return work->valid_nonces;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
pdata[19] = foundNonce + 1;
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

53
x11/x11evo.cu

@ -290,7 +290,6 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce @@ -290,7 +290,6 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce
do {
int order = 1;
uint32_t foundNonce;
// Hash with CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]);
@ -344,38 +343,38 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce @@ -344,38 +343,38 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce
break;
case ECHO:
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("echo => ");
TRACE("echo :");
break;
}
}
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
*hashes_done = pdata[19] - first_nonce + throughput;
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
uint32_t _ALIGN(64) vhash64[8];
const uint32_t Htarg = ptarget[7];
be32enc(&endiandata[19], foundNonce);
x11evo_hash(vhash64, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
int res = 1;
// check if there was some other ones...
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64);
*hashes_done = pdata[19] - first_nonce + throughput;
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
x11evo_hash(vhash64, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
x11evo_hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
pdata[19] = work->nonces[0] + 1; // cursor
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
x11evo_hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
gpulog(LOG_DEBUG, thr_id, "second nonce %08x! cursor %08x", work->nonces[1], pdata[19]);
work->valid_nonces++;
}
pdata[19] = foundNonce;
return res;
} else if (vhash64[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
pdata[19] = foundNonce + 1;
return work->valid_nonces;
} else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}
@ -388,7 +387,7 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce @@ -388,7 +387,7 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce
} while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}

39
x13/x13.cu

@ -165,7 +165,6 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u @@ -165,7 +165,6 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u
cuda_check_cpu_setTarget(ptarget);
do {
uint32_t foundNonce;
int order = 0;
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
@ -185,31 +184,33 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u @@ -185,31 +184,33 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u
CUDA_LOG_ERROR();
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
uint32_t vhash[8];
be32enc(&endiandata[19], foundNonce);
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
x13hash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
int res = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work->valid_nonces = 1;
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash);
pdata[19] = foundNonce;
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
x13hash(vhash, endiandata);
pdata[21] = secNonce;
if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) {
work_set_target_ratio(work, vhash);
xchg(pdata[19], pdata[21]);
}
res++;
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
return res;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

29
x15/whirlpool.cu

@ -101,7 +101,6 @@ extern "C" int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce, @@ -101,7 +101,6 @@ extern "C" int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce,
whirlpool512_setBlock_80((void*)endiandata, ptarget);
do {
uint32_t foundNonce;
int order = 0;
*hashes_done = pdata[19] - first_nonce + throughput;
@ -113,28 +112,24 @@ extern "C" int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce, @@ -113,28 +112,24 @@ extern "C" int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce,
x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE64(" 64 :", d_hash);
foundNonce = whirlpool512_cpu_finalhash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (foundNonce != UINT32_MAX && bench_algo < 0)
work->nonces[0] = whirlpool512_cpu_finalhash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (work->nonces[0] != UINT32_MAX && bench_algo < 0)
{
const uint32_t Htarg = ptarget[7];
uint32_t vhash[8];
be32enc(&endiandata[19], foundNonce);
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
wcoinhash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
int res = 1;
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
#if 0
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (secNonce != 0) {
pdata[21] = secNonce;
res++;
}
#endif
pdata[19] = foundNonce;
return res;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
pdata[19] = work->nonces[0] + 1; // cursor
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}
if ((uint64_t) throughput + pdata[19] >= max_nonce) {

45
x15/x14.cu

@ -198,32 +198,35 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce, @@ -198,32 +198,35 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
if (work->nonces[0] != UINT32_MAX)
{
const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8];
uint32_t _ALIGN(64) vhash[8];
/* check now with the CPU to confirm */
be32enc(&endiandata[19], foundNonce);
x14hash(vhash64, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
int res = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64);
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
x14hash(vhash64, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
be32enc(&endiandata[19], work->nonces[0]);
x14hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
x14hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
pdata[19] = foundNonce;
return res;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

45
x15/x15.cu

@ -205,31 +205,34 @@ extern "C" int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce, @@ -205,31 +205,34 @@ extern "C" int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8];
uint32_t _ALIGN(64) vhash[8];
/* check now with the CPU to confirm */
be32enc(&endiandata[19], foundNonce);
x15hash(vhash64, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
int res = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64);
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
x15hash(vhash64, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
be32enc(&endiandata[19], work->nonces[0]);
x15hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
x15hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
pdata[19] = foundNonce;
return res;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}

47
x17/x17.cu

@ -230,34 +230,37 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u @@ -230,34 +230,37 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u
*hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash64[8];
be32enc(&endiandata[19], foundNonce);
x17hash(vhash64, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
int res = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64);
if (secNonce != 0) {
be32enc(&endiandata[19], secNonce);
x17hash(vhash64, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
res++;
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
x17hash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
x17hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
pdata[19] = foundNonce;
return res;
} else {
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
// x11+ coins could do some random error, but not on retry
if (!warn) {
warn++; continue;
warn++;
pdata[19] = work->nonces[0] + 1;
continue;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
warn = 0;
}
}

49
zr5.cu

@ -431,12 +431,12 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work, @@ -431,12 +431,12 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work,
if (work_restart[thr_id].restart)
return -1;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
uint32_t vhash64[8];
uint32_t _ALIGN(64) vhash[8];
uint32_t oldp19 = pdata[19];
uint32_t offset = foundNonce - pdata[19];
uint32_t offset = work->nonces[0] - pdata[19];
uint32_t pok = 0;
uint16_t h_pok;
@ -444,32 +444,31 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work, @@ -444,32 +444,31 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work,
cudaMemcpy(&h_pok, d_poks[thr_id] + offset, sizeof(uint16_t), cudaMemcpyDeviceToHost);
pok = version | (0x10000UL * h_pok);
pdata[0] = pok; pdata[19] = foundNonce;
zr5hash(vhash64, pdata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
int res = 1;
work_set_target_ratio(work, vhash64);
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, oldp19, d_hash[thr_id], 1);
if (secNonce != 0) {
offset = secNonce - oldp19;
pdata[0] = pok; pdata[19] = work->nonces[0];
zr5hash(vhash, pdata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, oldp19, d_hash[thr_id], 1);
if (work->nonces[1] != 0) {
offset = work->nonces[1] - oldp19;
cudaMemcpy(&h_pok, d_poks[thr_id] + offset, sizeof(uint16_t), cudaMemcpyDeviceToHost);
pok = version | (0x10000UL * h_pok);
memcpy(tmpdata, pdata, 80);
tmpdata[0] = pok; tmpdata[19] = secNonce;
zr5hash(vhash64, tmpdata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash64);
pdata[21] = secNonce;
pdata[22] = pok;
res++;
tmpdata[0] = pok; tmpdata[19] = work->nonces[1];
zr5hash(vhash, tmpdata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
bn_set_target_ratio(work, vhash, 1);
pdata[19] = max(pdata[19], work->nonces[1]); // cursor
pdata[20] = pok; // second nonce "pok"
work->valid_nonces++;
}
pdata[19]++;
}
return res;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
pdata[19]++;
return work->valid_nonces;
}
else if (vhash[7] > ptarget[7]) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[0] = oldp0;
}
} else

Loading…
Cancel
Save