Browse Source

migrate 2nd nonce storage of most algos

This allow to keep pdata[19] as cursor between scans, and later, to sort them..

remains... heavy, scrypt, sia...
2upstream
Tanguy Pruvot 8 years ago
parent
commit
0ff75791e5
  1. 41
      Algo256/blake256.cu
  2. 40
      Algo256/bmw.cu
  3. 10
      Algo256/decred.cu
  4. 31
      Algo256/keccak256.cu
  5. 29
      Algo256/vanilla.cu
  6. 46
      JHA/jackpotcoin.cu
  7. 22
      ccminer.cpp
  8. 52
      lyra2/lyra2RE.cu
  9. 50
      lyra2/lyra2REv2.cu
  10. 34
      myriadgroestl.cpp
  11. 23
      neoscrypt/neoscrypt.cpp
  12. 31
      pentablake.cu
  13. 47
      quark/nist5.cu
  14. 41
      quark/quarkcoin.cu
  15. 46
      qubit/deep.cu
  16. 37
      qubit/luffa.cu
  17. 45
      qubit/qubit.cu
  18. 59
      skein.cu
  19. 39
      skein2.cpp
  20. 47
      x11/c11.cu
  21. 51
      x11/fresh.cu
  22. 50
      x11/s3.cu
  23. 47
      x11/sib.cu
  24. 21
      x11/veltor.cu
  25. 47
      x11/x11.cu
  26. 53
      x11/x11evo.cu
  27. 37
      x13/x13.cu
  28. 27
      x15/whirlpool.cu
  29. 45
      x15/x14.cu
  30. 45
      x15/x15.cu
  31. 47
      x17/x17.cu
  32. 49
      zr5.cu

41
Algo256/blake256.cu

@ -45,7 +45,7 @@ static uint32_t *h_resNonce[MAX_GPUS];
/* max count of found nonces in one call */ /* max count of found nonces in one call */
#define NBN 2 #define NBN 2
static uint32_t extra_results[NBN] = { UINT32_MAX }; static __thread uint32_t extra_results[NBN] = { UINT32_MAX };
#define GSPREC(a,b,c,d,x,y) { \ #define GSPREC(a,b,c,d,x,y) { \
v[a] += (m[x] ^ c_u256[y]) + v[b]; \ v[a] += (m[x] ^ c_u256[y]) + v[b]; \
@ -519,46 +519,51 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
do { do {
// GPU HASH (second block only, first is midstate) // GPU HASH (second block only, first is midstate)
uint32_t foundNonce = blake256_cpu_hash_16(thr_id, throughput, pdata[19], targetHigh, blakerounds); work->nonces[0] = blake256_cpu_hash_16(thr_id, throughput, pdata[19], targetHigh, blakerounds);
if (foundNonce != UINT32_MAX) *hashes_done = pdata[19] - first_nonce + throughput;
if (work->nonces[0] != UINT32_MAX)
{ {
uint32_t vhashcpu[8]; uint32_t _ALIGN(64) vhashcpu[8];
uint32_t Htarg = ptarget[6]; const uint32_t Htarg = ptarget[6];
for (int k=16; k < 19; k++) for (int k=16; k < 19; k++)
be32enc(&endiandata[k], pdata[k]); be32enc(&endiandata[k], pdata[k]);
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], work->nonces[0]);
blake256hash(vhashcpu, endiandata, blakerounds); blake256hash(vhashcpu, endiandata, blakerounds);
if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget)) if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget))
{ {
rc = 1; work->valid_nonces = 1;
work_set_target_ratio(work, vhashcpu); work_set_target_ratio(work, vhashcpu);
*hashes_done = pdata[19] - first_nonce + throughput;
pdata[19] = foundNonce;
#if NBN > 1 #if NBN > 1
if (extra_results[0] != UINT32_MAX) { if (extra_results[0] != UINT32_MAX) {
be32enc(&endiandata[19], extra_results[0]); work->nonces[1] = extra_results[0];
be32enc(&endiandata[19], work->nonces[1]);
blake256hash(vhashcpu, endiandata, blakerounds); blake256hash(vhashcpu, endiandata, blakerounds);
if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget)) { if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget)) {
pdata[21] = extra_results[0];
if (bn_hash_target_ratio(vhashcpu, ptarget) > work->shareratio[0]) { if (bn_hash_target_ratio(vhashcpu, ptarget) > work->shareratio[0]) {
work_set_target_ratio(work, vhashcpu); work_set_target_ratio(work, vhashcpu);
xchg(pdata[21], pdata[19]); xchg(work->nonces[0], work->nonces[1]);
} else {
bn_set_target_ratio(work, vhashcpu, 1);
} }
rc = 2; work->valid_nonces = 2;
} }
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
extra_results[0] = UINT32_MAX; extra_results[0] = UINT32_MAX;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
} }
#endif #endif
return rc; return work->valid_nonces;
} }
else if (opt_debug) { else if (vhashcpu[6] > Htarg) {
applog_hash((uchar*)ptarget); gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
applog_compare_hash((uchar*)vhashcpu, (uchar*)ptarget); pdata[19] = work->nonces[0] + 1;
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); continue;
} }
} }

40
Algo256/bmw.cu

@ -43,7 +43,6 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
bool swapnonce = true;
uint32_t throughput = cuda_default_throughput(thr_id, 1U << 21); uint32_t throughput = cuda_default_throughput(thr_id, 1U << 21);
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
@ -77,24 +76,37 @@ extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, u
cuda_check_cpu_setTarget(ptarget); cuda_check_cpu_setTarget(ptarget);
do { do {
bmw256_cpu_hash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], (int) swapnonce); bmw256_cpu_hash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], 1);
uint32_t foundNonce = cuda_check_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id]);
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
if (foundNonce != UINT32_MAX) work->nonces[0] = cuda_check_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{ {
uint32_t _ALIGN(64) vhash64[8]; const uint32_t Htarg = ptarget[7];
endiandata[19] = swab32_if(foundNonce, swapnonce); uint32_t _ALIGN(64) vhash[8];
bmw_hash(vhash64, endiandata); be32enc(&endiandata[19], work->nonces[0]);
bmw_hash(vhash, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
pdata[19] = swab32_if(foundNonce,!swapnonce); if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work_set_target_ratio(work, vhash64); work->valid_nonces = 1;
return 1; work_set_target_ratio(work, vhash);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
bmw_hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
} }
else { return work->valid_nonces;
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); }
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
} }
} }

10
Algo256/decred.cu

@ -391,6 +391,8 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce
// GPU HASH // GPU HASH
decred_gpu_hash_nonce <<<grid, block>>> (throughput, (*pnonce), d_resNonce[thr_id], targetHigh); decred_gpu_hash_nonce <<<grid, block>>> (throughput, (*pnonce), d_resNonce[thr_id], targetHigh);
*hashes_done = (*pnonce) - first_nonce + throughput;
// first cell contains the valid nonces count // first cell contains the valid nonces count
cudaMemcpy(resNonces, d_resNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); cudaMemcpy(resNonces, d_resNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
@ -404,9 +406,8 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce
decred_hash(vhash, endiandata); decred_hash(vhash, endiandata);
if (vhash[6] <= ptarget[6] && fulltest(vhash, ptarget)) if (vhash[6] <= ptarget[6] && fulltest(vhash, ptarget))
{ {
int rc = work->valid_nonces = 1; work->valid_nonces = 1;
work_set_target_ratio(work, vhash); work_set_target_ratio(work, vhash);
*hashes_done = (*pnonce) - first_nonce + throughput;
work->nonces[0] = swab32(resNonces[1]); work->nonces[0] = swab32(resNonces[1]);
*pnonce = work->nonces[0]; *pnonce = work->nonces[0];
@ -417,7 +418,6 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce
decred_hash(vhash, endiandata); decred_hash(vhash, endiandata);
if (vhash[6] <= ptarget[6] && fulltest(vhash, ptarget)) { if (vhash[6] <= ptarget[6] && fulltest(vhash, ptarget)) {
work->nonces[1] = swab32(resNonces[n]); work->nonces[1] = swab32(resNonces[n]);
if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) { if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) {
// we really want the best first ? depends... // we really want the best first ? depends...
work->shareratio[1] = work->shareratio[0]; work->shareratio[1] = work->shareratio[0];
@ -429,7 +429,7 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce
bn_set_target_ratio(work, vhash, 1); bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++; work->valid_nonces++;
} }
rc = 2; // MAX_NONCES submit limited to 2 work->valid_nonces = 2; // MAX_NONCES submit limited to 2
gpulog(LOG_DEBUG, thr_id, "multiple nonces 1:%08x (%g) %u:%08x (%g)", gpulog(LOG_DEBUG, thr_id, "multiple nonces 1:%08x (%g) %u:%08x (%g)",
work->nonces[0], work->sharediff[0], n, work->nonces[1], work->sharediff[1]); work->nonces[0], work->sharediff[0], n, work->nonces[1], work->sharediff[1]);
@ -438,7 +438,7 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce
gpulog(LOG_WARNING, thr_id, "result %u for %08x does not validate on CPU!", n, resNonces[n]); gpulog(LOG_WARNING, thr_id, "result %u for %08x does not validate on CPU!", n, resNonces[n]);
} }
} }
return rc; return work->valid_nonces;
} else if (vhash[6] > ptarget[6]) { } else if (vhash[6] > ptarget[6]) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", resNonces[1]); gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", resNonces[1]);

31
Algo256/keccak256.cu

@ -65,7 +65,7 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no
init[thr_id] = true; init[thr_id] = true;
} }
for (int k=0; k < 20; k++) { for (int k=0; k < 19; k++) {
be32enc(&endiandata[k], pdata[k]); be32enc(&endiandata[k], pdata[k]);
} }
@ -75,20 +75,25 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); work->nonces[0] = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
if (foundNonce != UINT32_MAX && bench_algo < 0) if (work->nonces[0] != UINT32_MAX && bench_algo < 0)
{ {
uint32_t _ALIGN(64) vhash64[8]; const uint32_t Htarg = ptarget[7];
be32enc(&endiandata[19], foundNonce); uint32_t _ALIGN(64) vhash[8];
keccak256_hash(vhash64, endiandata);
be32enc(&endiandata[19], work->nonces[0]);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { keccak256_hash(vhash, endiandata);
work_set_target_ratio(work, vhash64);
pdata[19] = foundNonce; if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
return 1; work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
pdata[19] = work->nonces[0] + 1;
return work->valid_nonces;
} }
else { else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
} }
} }

29
Algo256/vanilla.cu

@ -416,6 +416,7 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc
do { do {
vanilla_gpu_hash_16_8<<<grid,block, 0, streams[thr_id]>>>(throughput, pdata[19], d_resNonce[thr_id], targetHigh); vanilla_gpu_hash_16_8<<<grid,block, 0, streams[thr_id]>>>(throughput, pdata[19], d_resNonce[thr_id], targetHigh);
cudaMemcpyAsync(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost,streams[thr_id]); cudaMemcpyAsync(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost,streams[thr_id]);
*hashes_done = pdata[19] - first_nonce + throughput;
cudaStreamSynchronize(streams[thr_id]); cudaStreamSynchronize(streams[thr_id]);
if (h_resNonce[thr_id][0] != UINT32_MAX){ if (h_resNonce[thr_id][0] != UINT32_MAX){
@ -429,31 +430,41 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc
vanillahash(vhashcpu, endiandata, blakerounds); vanillahash(vhashcpu, endiandata, blakerounds);
if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget)) { if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget)) {
rc = 1; work->valid_nonces = 1;
work->nonces[0] = h_resNonce[thr_id][0];
work_set_target_ratio(work, vhashcpu); work_set_target_ratio(work, vhashcpu);
*hashes_done = pdata[19] - first_nonce + throughput;
pdata[19] = h_resNonce[thr_id][0];
#if NBN > 1 #if NBN > 1
if (h_resNonce[thr_id][1] != UINT32_MAX) { if (h_resNonce[thr_id][1] != UINT32_MAX) {
work->nonces[1] = h_resNonce[thr_id][1];
be32enc(&endiandata[19], h_resNonce[thr_id][1]); be32enc(&endiandata[19], h_resNonce[thr_id][1]);
vanillahash(vhashcpu, endiandata, blakerounds); vanillahash(vhashcpu, endiandata, blakerounds);
pdata[21] = h_resNonce[thr_id][1];
if (bn_hash_target_ratio(vhashcpu, ptarget) > work->shareratio[0]) { if (bn_hash_target_ratio(vhashcpu, ptarget) > work->shareratio[0]) {
work_set_target_ratio(work, vhashcpu); work_set_target_ratio(work, vhashcpu);
xchg(pdata[19], pdata[21]); xchg(work->nonces[0], work->nonces[1]);
} }
rc = 2; work->valid_nonces = 2;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
} }
#endif #endif
return rc; return work->valid_nonces;
} }
else { else if (vhashcpu[6] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", h_resNonce[thr_id][0]); gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", h_resNonce[thr_id][0]);
pdata[19] = work->nonces[0] + 1;
continue;
} }
} }
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
pdata[19] = max_nonce;
break;
}
pdata[19] += throughput; pdata[19] += throughput;
} while (!work_restart[thr_id].restart && ((uint64_t)max_nonce > ((uint64_t)(pdata[19]) + (uint64_t)throughput)));
} while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce; *hashes_done = pdata[19] - first_nonce;
MyStreamSynchronize(NULL, 0, dev_id); MyStreamSynchronize(NULL, 0, dev_id);

46
JHA/jackpotcoin.cu

@ -214,34 +214,40 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
uint32_t foundNonce = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++); work->nonces[0] = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
uint32_t vhash64[8]; const uint32_t Htarg = ptarget[7];
be32enc(&endiandata[19], foundNonce); uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
// jackpothash function gibt die Zahl der Runden zurück // jackpothash function gibt die Zahl der Runden zurück
jackpothash(vhash64, endiandata); jackpothash(vhash, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
int res = 1; work->valid_nonces = 1;
work_set_target_ratio(work, vhash64); work_set_target_ratio(work, vhash);
#if 0 #if 0
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (secNonce != 0) { if (work->nonces[1] != 0) {
be32enc(&endiandata[19], secNonce); be32enc(&endiandata[19], work->nonces[1]);
nist5hash(vhash64, endiandata); jackpothash(vhash, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) bn_set_target_ratio(work, vhash, 1);
work_set_target_ratio(work, vhash64); work->valid_nonces++;
pdata[21] = secNonce; pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
res++; } else {
pdata[19] = work->nonces[0] + 1; // cursor
} }
#else
pdata[19] = work->nonces[0] + 1; // cursor
#endif #endif
pdata[19] = foundNonce; return work->valid_nonces;
return res; }
} else { else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
} }
} }

22
ccminer.cpp

@ -2343,24 +2343,13 @@ static void *miner_thread(void *userdata)
/* record scanhash elapsed time */ /* record scanhash elapsed time */
gettimeofday(&tv_end, NULL); gettimeofday(&tv_end, NULL);
// todo: update all algos to use work->nonces and pdata[19] as counter
switch (opt_algo) { switch (opt_algo) {
case ALGO_BLAKE2S: // algos to migrate to replace pdata[21] by work.nonces[]
case ALGO_CRYPTOLIGHT: case ALGO_HEAVY:
case ALGO_CRYPTONIGHT: case ALGO_SCRYPT:
case ALGO_DECRED: case ALGO_SCRYPT_JANE:
case ALGO_LBRY:
case ALGO_SIA: case ALGO_SIA:
case ALGO_VELTOR: //case ALGO_WHIRLPOOLX:
case ALGO_WILDKECCAK:
// migrated algos
break;
case ALGO_ZR5:
// algos with only work.nonces[1] set
work.nonces[0] = nonceptr[0];
break;
default:
// algos with 2 results in pdata and work.nonces unset
work.nonces[0] = nonceptr[0]; work.nonces[0] = nonceptr[0];
work.nonces[1] = nonceptr[2]; work.nonces[1] = nonceptr[2];
} }
@ -2483,7 +2472,6 @@ static void *miner_thread(void *userdata)
work.submit_nonce_id = 1; work.submit_nonce_id = 1;
nonceptr[0] = work.nonces[1]; nonceptr[0] = work.nonces[1];
if (opt_algo == ALGO_ZR5) { if (opt_algo == ALGO_ZR5) {
// todo: use + 4..6 index for pok to allow multiple nonces
work.data[0] = work.data[22]; // pok work.data[0] = work.data[22]; // pok
work.data[22] = 0; work.data[22] = 0;
} }

52
lyra2/lyra2RE.cu

@ -136,7 +136,6 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce,
do { do {
int order = 0; int order = 0;
uint32_t foundNonce;
blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
keccak256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); keccak256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
@ -146,35 +145,34 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
foundNonce = groestl256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); work->nonces[0] = groestl256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
uint32_t _ALIGN(64) vhash64[8]; const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], foundNonce);
lyra2re_hash(vhash64, endiandata); be32enc(&endiandata[19], work->nonces[0]);
lyra2re_hash(vhash, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
int res = 1; if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
uint32_t secNonce = groestl256_getSecNonce(thr_id, 1); work->valid_nonces = 1;
work_set_target_ratio(work, vhash64); work_set_target_ratio(work, vhash);
if (secNonce != UINT32_MAX) work->nonces[1] = groestl256_getSecNonce(thr_id, 1);
{ if (work->nonces[1] != UINT32_MAX) {
be32enc(&endiandata[19], secNonce); be32enc(&endiandata[19], work->nonces[1]);
lyra2re_hash(vhash64, endiandata); lyra2re_hash(vhash, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { bn_set_target_ratio(work, vhash, 1);
if (opt_debug) work->valid_nonces++;
gpulog(LOG_BLUE, thr_id, "found second nonce %08x", secNonce); pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) } else {
work_set_target_ratio(work, vhash64); pdata[19] = work->nonces[0] + 1; // cursor
pdata[21] = secNonce;
res++;
} }
return work->valid_nonces;
} }
pdata[19] = foundNonce; else if (vhash[7] > Htarg) {
return res; gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
} else { pdata[19] = work->nonces[0] + 1;
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); continue;
} }
} }

50
lyra2/lyra2REv2.cu

@ -142,7 +142,6 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc
do { do {
int order = 0; int order = 0;
uint32_t foundNonces[2] = { 0, 0 };
blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
TRACE("blake :"); TRACE("blake :");
@ -157,37 +156,36 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc
cubehash256_cpu_hash_32(thr_id, throughput,pdata[19], d_hash[thr_id], order++); cubehash256_cpu_hash_32(thr_id, throughput,pdata[19], d_hash[thr_id], order++);
TRACE("cube :"); TRACE("cube :");
bmw256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], foundNonces); memset(work->nonces, 0, sizeof(work->nonces));
bmw256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], work->nonces);
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
if (foundNonces[0] != 0) if (work->nonces[0] != 0)
{ {
uint32_t vhash64[8]; const uint32_t Htarg = ptarget[7];
be32enc(&endiandata[19], foundNonces[0]); uint32_t _ALIGN(64) vhash[8];
lyra2v2_hash(vhash64, endiandata); be32enc(&endiandata[19], work->nonces[0]);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) lyra2v2_hash(vhash, endiandata);
{
int res = 1; if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work_set_target_ratio(work, vhash64); work->valid_nonces = 1;
pdata[19] = foundNonces[0]; work_set_target_ratio(work, vhash);
// check if there was another one... if (work->nonces[1] != 0) {
if (foundNonces[1] != 0) be32enc(&endiandata[19], work->nonces[1]);
{ lyra2v2_hash(vhash, endiandata);
be32enc(&endiandata[19], foundNonces[1]); bn_set_target_ratio(work, vhash, 1);
lyra2v2_hash(vhash64, endiandata); work->valid_nonces++;
pdata[21] = foundNonces[1]; pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) { } else {
work_set_target_ratio(work, vhash64); pdata[19] = work->nonces[0] + 1; // cursor
xchg(pdata[19], pdata[21]);
} }
res++; return work->valid_nonces;
} }
return res; else if (vhash[7] > Htarg) {
} gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
else pdata[19] = work->nonces[0] + 1;
{ continue;
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonces[0]);
} }
} }

34
myriadgroestl.cpp

@ -67,34 +67,36 @@ int scanhash_myriad(int thr_id, struct work *work, uint32_t max_nonce, unsigned
myriadgroestl_cpu_setBlock(thr_id, endiandata, ptarget); myriadgroestl_cpu_setBlock(thr_id, endiandata, ptarget);
do { do {
// GPU memset(work->nonces, 0xff, sizeof(work->nonces));
uint32_t foundNonces[2] = { UINT32_MAX, UINT32_MAX };
myriadgroestl_cpu_hash(thr_id, throughput, pdata[19], foundNonces); // GPU
myriadgroestl_cpu_hash(thr_id, throughput, pdata[19], work->nonces);
*hashes_done = pdata[19] - start_nonce + throughput; *hashes_done = pdata[19] - start_nonce + throughput;
if (foundNonces[0] < UINT32_MAX && bench_algo < 0) if (work->nonces[0] < UINT32_MAX && bench_algo < 0)
{ {
uint32_t _ALIGN(64) vhash[8]; uint32_t _ALIGN(64) vhash[8];
endiandata[19] = swab32(foundNonces[0]); endiandata[19] = swab32(work->nonces[0]);
myriadhash(vhash, endiandata); myriadhash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash); work_set_target_ratio(work, vhash);
work->nonces[0] = foundNonces[0]; if (work->nonces[1] != UINT32_MAX) {
pdata[19] = foundNonces[0]; endiandata[19] = swab32(work->nonces[1]);
// search for another nonce
if (foundNonces[1] != UINT32_MAX) {
endiandata[19] = swab32(foundNonces[1]);
myriadhash(vhash, endiandata); myriadhash(vhash, endiandata);
pdata[21] = foundNonces[1]; // to drop
work->nonces[1] = foundNonces[1];
bn_set_target_ratio(work, vhash, 1); bn_set_target_ratio(work, vhash, 1);
return 2; work->valid_nonces = 2;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
} }
return 1; else if (vhash[7] > ptarget[7]) {
} else if (vhash[7] > ptarget[7]) { gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonces[0]); pdata[19] = work->nonces[0] + 1;
continue;
} }
} }

23
neoscrypt/neoscrypt.cpp

@ -62,28 +62,31 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign
neoscrypt_setBlockTarget(endiandata,ptarget); neoscrypt_setBlockTarget(endiandata,ptarget);
do { do {
uint32_t foundNonces[2] = { UINT32_MAX, UINT32_MAX }; memset(work->nonces, 0xff, sizeof(work->nonces));
neoscrypt_hash_k4(thr_id, throughput, pdata[19], foundNonces, have_stratum); neoscrypt_hash_k4(thr_id, throughput, pdata[19], work->nonces, have_stratum);
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
if (foundNonces[0] != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8]; uint32_t _ALIGN(64) vhash[8];
if (have_stratum) { if (have_stratum) {
be32enc(&endiandata[19], foundNonces[0]); be32enc(&endiandata[19], work->nonces[0]);
} else { } else {
endiandata[19] = foundNonces[0]; endiandata[19] = work->nonces[0];
} }
neoscrypt((uchar*)vhash, (uchar*) endiandata, 0x80000620U); neoscrypt((uchar*)vhash, (uchar*) endiandata, 0x80000620U);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash); work_set_target_ratio(work, vhash);
pdata[19] = foundNonces[0]; pdata[19] = work->nonces[0] + 1; // cursor
return 1; return work->valid_nonces;
} else { }
gpulog(LOG_WARNING, thr_id, "nonce %08x does not validate on CPU!", foundNonces[0]); else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "nonce %08x does not validate on CPU!", work->nonces[0]);
} }
} }

31
pentablake.cu

@ -98,21 +98,34 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
uint32_t vhash[8]; const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], work->nonces[0]);
pentablakehash(vhash, endiandata); pentablakehash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
rc = 1; work->valid_nonces = 1;
work_set_target_ratio(work, vhash); work_set_target_ratio(work, vhash);
pdata[19] = foundNonce; work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
return rc; if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
pentablakehash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else { } else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
} }
} }

47
quark/nist5.cu

@ -56,7 +56,6 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce,
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
int res = 0;
uint32_t throughput = cuda_default_throughput(thr_id, 1 << 20); // 256*256*16 uint32_t throughput = cuda_default_throughput(thr_id, 1 << 20); // 256*256*16
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
@ -101,6 +100,8 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce,
quark_blake512_cpu_setBlock_80(thr_id, endiandata); quark_blake512_cpu_setBlock_80(thr_id, endiandata);
cuda_check_cpu_setTarget(ptarget); cuda_check_cpu_setTarget(ptarget);
work->valid_nonces = 0;
do { do {
int order = 0; int order = 0;
@ -113,31 +114,33 @@ extern "C" int scanhash_nist5(int thr_id, struct work *work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8]; uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], work->nonces[0]);
nist5hash(vhash64, endiandata); nist5hash(vhash, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
res = 1; work->valid_nonces = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work_set_target_ratio(work, vhash);
work_set_target_ratio(work, vhash64); work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (secNonce != 0) { if (work->nonces[1] != 0) {
be32enc(&endiandata[19], secNonce); be32enc(&endiandata[19], work->nonces[1]);
nist5hash(vhash64, endiandata); nist5hash(vhash, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) bn_set_target_ratio(work, vhash, 1);
work_set_target_ratio(work, vhash64); work->valid_nonces++;
pdata[21] = secNonce; pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
res++; } else {
pdata[19] = work->nonces[0] + 1; // cursor
} }
pdata[19] = foundNonce;
goto out; goto out;
} }
else { else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
} }
} }
@ -157,7 +160,7 @@ out:
cudaStreamDestroy(stream[i]); cudaStreamDestroy(stream[i]);
#endif #endif
return res; return work->valid_nonces;
} }
// ressources cleanup // ressources cleanup

41
quark/quarkcoin.cu

@ -180,7 +180,6 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce,
do { do {
int order = 0; int order = 0;
uint32_t foundNonce;
uint32_t nrm1=0, nrm2=0, nrm3=0; uint32_t nrm1=0, nrm2=0, nrm3=0;
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
@ -229,8 +228,8 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce,
quark_keccak512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++); quark_keccak512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);
quark_jh512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++); quark_jh512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
foundNonce = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++); work->nonces[0] = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
work->nonces[1] = 0;
} else { } else {
/* algo permutations are made with 2 different buffers */ /* algo permutations are made with 2 different buffers */
@ -263,31 +262,47 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce,
TRACE("perm3 :"); TRACE("perm3 :");
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
} }
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
uint32_t vhash[8]; uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], work->nonces[0]);
quarkhash(vhash, endiandata); quarkhash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work_set_target_ratio(work, vhash); work_set_target_ratio(work, vhash);
pdata[19] = foundNonce; if (work->nonces[1] != 0) {
return 1; be32enc(&endiandata[19], work->nonces[1]);
quarkhash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else { } else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); pdata[19] = work->nonces[0] + 1; // cursor
applog_hash((uchar*) vhash); }
applog_hash((uchar*) ptarget); return work->valid_nonces;
} }
else if (vhash[7] > ptarget[7]) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
pdata[19] = max_nonce;
break;
} }
pdata[19] += throughput; pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart); } while (!work_restart[thr_id].restart);
return 0; return 0;
} }

46
qubit/deep.cu

@ -96,30 +96,33 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
uint32_t _ALIGN(64) vhash64[8]; const uint32_t Htarg = ptarget[7];
be32enc(&endiandata[19], foundNonce); uint32_t _ALIGN(64) vhash[8];
deephash(vhash64, endiandata); be32enc(&endiandata[19], work->nonces[0]);
deephash(vhash, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
int res = 1; if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work->valid_nonces = 1;
work_set_target_ratio(work, vhash64); work_set_target_ratio(work, vhash);
if (secNonce != 0) { work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
be32enc(&endiandata[19], secNonce); if (work->nonces[1] != 0) {
deephash(vhash64, endiandata); be32enc(&endiandata[19], work->nonces[1]);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) deephash(vhash, endiandata);
work_set_target_ratio(work, vhash64); bn_set_target_ratio(work, vhash, 1);
pdata[21] = secNonce; work->valid_nonces++;
res++; pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
} }
pdata[19] = foundNonce; return work->valid_nonces;
return res;
} }
else { else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
} }
} }
@ -127,7 +130,6 @@ extern "C" int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce,
pdata[19] = max_nonce; pdata[19] = max_nonce;
break; break;
} }
pdata[19] += throughput; pdata[19] += throughput;
} while (!work_restart[thr_id].restart); } while (!work_restart[thr_id].restart);

37
qubit/luffa.cu

@ -72,19 +72,33 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
uint32_t _ALIGN(64) vhash64[8]; const uint32_t Htarg = ptarget[7];
be32enc(&endiandata[19], foundNonce); uint32_t _ALIGN(64) vhash[8];
luffa_hash(vhash64, endiandata); be32enc(&endiandata[19], work->nonces[0]);
luffa_hash(vhash, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
work_set_target_ratio(work, vhash64); if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
pdata[19] = foundNonce; work->valid_nonces = 1;
return 1; work_set_target_ratio(work, vhash);
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
luffa_hash(vhash, endiandata);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else { } else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
} }
} }
@ -92,7 +106,6 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce,
pdata[19] = max_nonce; pdata[19] = max_nonce;
break; break;
} }
pdata[19] += throughput; pdata[19] += throughput;
} while (!work_restart[thr_id].restart); } while (!work_restart[thr_id].restart);

45
qubit/qubit.cu

@ -112,31 +112,33 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8]; uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], work->nonces[0]);
qubithash(vhash64, endiandata); qubithash(vhash, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
int res = 1; work->valid_nonces = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work_set_target_ratio(work, vhash);
work_set_target_ratio(work, vhash64); work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (secNonce != 0) { if (work->nonces[1] != 0) {
be32enc(&endiandata[19], secNonce); be32enc(&endiandata[19], work->nonces[1]);
qubithash(vhash64, endiandata); qubithash(vhash, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) bn_set_target_ratio(work, vhash, 1);
work_set_target_ratio(work, vhash64); work->valid_nonces++;
pdata[21] = secNonce; pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
res++; } else {
pdata[19] = work->nonces[0] + 1; // cursor
} }
pdata[19] = foundNonce; return work->valid_nonces;
return res;
} }
else { else if (vhash[7] > Htarg) {
applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce); gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
} }
} }
@ -144,7 +146,6 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce,
pdata[19] = max_nonce; pdata[19] = max_nonce;
break; break;
} }
pdata[19] += throughput; pdata[19] += throughput;
} while (!work_restart[thr_id].restart); } while (!work_restart[thr_id].restart);

59
skein.cu

@ -342,10 +342,6 @@ extern "C" void skeincoinhash(void *output, const void *input)
memcpy(output, hash, 32); memcpy(output, hash, 32);
} }
static __inline uint32_t swab32_if(uint32_t val, bool iftrue) {
return iftrue ? swab32(val) : val;
}
static bool init[MAX_GPUS] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
@ -355,7 +351,6 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
const int swap = 1;
sm5 = (device_sm[device_map[thr_id]] >= 500); sm5 = (device_sm[device_map[thr_id]] >= 500);
bool checkSecnonce = (have_stratum || have_longpoll) && !sm5; bool checkSecnonce = (have_stratum || have_longpoll) && !sm5;
@ -363,7 +358,6 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
uint32_t throughput = cuda_default_throughput(thr_id, 1U << 20); uint32_t throughput = cuda_default_throughput(thr_id, 1U << 20);
if (init[thr_id]) throughput = min(throughput, (max_nonce - first_nonce)); if (init[thr_id]) throughput = min(throughput, (max_nonce - first_nonce));
uint32_t foundNonce, secNonce = 0;
uint64_t target64 = 0; uint64_t target64 = 0;
if (opt_benchmark) if (opt_benchmark)
@ -409,54 +403,45 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
if (sm5) { if (sm5) {
/* cuda_skeincoin.cu */ /* cuda_skeincoin.cu */
foundNonce = skeincoin_hash_sm5(thr_id, throughput, pdata[19], swap, target64, &secNonce); work->nonces[0] = skeincoin_hash_sm5(thr_id, throughput, pdata[19], 1, target64, &work->nonces[1]);
} else { } else {
/* quark/cuda_skein512.cu */ /* quark/cuda_skein512.cu */
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], swap); skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
sha2_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id]); sha2_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id]);
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
} }
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
uint32_t _ALIGN(64) vhash[8]; uint32_t _ALIGN(64) vhash[8];
endiandata[19] = swab32_if(foundNonce, swap); endiandata[19] = swab32(work->nonces[0]);
skeincoinhash(vhash, endiandata); skeincoinhash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
int res = 1; work->valid_nonces = 1;
uint8_t num = res;
work_set_target_ratio(work, vhash); work_set_target_ratio(work, vhash);
if (checkSecnonce) { if (checkSecnonce) {
secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num); work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], work->valid_nonces);
} if (work->nonces[1] != 0) {
while (secNonce != 0 && res < 2) /* todo: up to 6 */ endiandata[19] = swab32(work->nonces[1]);
{
endiandata[19] = swab32_if(secNonce, swap);
skeincoinhash(vhash, endiandata); skeincoinhash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
// todo: use 19 20 21... zr5 pok to adapt... work->valid_nonces++;
endiandata[19] = swab32_if(secNonce, swap); bn_set_target_ratio(work, vhash, 1);
skeincoinhash(vhash, endiandata);
if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0])
work_set_target_ratio(work, vhash);
pdata[19+res*2] = swab32_if(secNonce, !swap);
res++;
} }
num++; pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
//if (checkSecnonce) } else {
// secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num); pdata[19] = work->nonces[0] + 1;
//else
break; // only one secNonce...
} }
if (res > 1 && opt_debug) } else {
applog(LOG_BLUE, "GPU #%d: %d/%d valid nonces !!!", device_map[thr_id], res, (int)num); pdata[19] = work->nonces[0] + 1; // cursor for next scan
pdata[19] = swab32_if(foundNonce, !swap);
return res;
} }
else { return work->valid_nonces;
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); }
else if (vhash[7] > ptarget[7]) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
} }
} }

39
skein2.cpp

@ -35,17 +35,12 @@ void skein2hash(void *output, const void *input)
static bool init[MAX_GPUS] = { 0 }; static bool init[MAX_GPUS] = { 0 };
static __inline uint32_t swab32_if(uint32_t val, bool iftrue) {
return iftrue ? swab32(val) : val;
}
int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{ {
int dev_id = device_map[thr_id]; int dev_id = device_map[thr_id];
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
const int swap = 1; // to toggle nonce endian
uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 256*256*8 uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 256*256*8
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
@ -85,35 +80,39 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned
int order = 0; int order = 0;
// Hash with CUDA // Hash with CUDA
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], swap); skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
uint32_t _ALIGN(64) vhash[8]; uint32_t _ALIGN(64) vhash[8];
endiandata[19] = swab32_if(foundNonce, swap); endiandata[19] = swab32(work->nonces[0]);
skein2hash(vhash, endiandata); skein2hash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
int res = 1; work->valid_nonces = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash); work_set_target_ratio(work, vhash);
if (secNonce != 0) { if (work->nonces[1] != 0) {
endiandata[19] = swab32_if(secNonce, swap); endiandata[19] = swab32(work->nonces[1]);
skein2hash(vhash, endiandata); skein2hash(vhash, endiandata);
work->valid_nonces++;
bn_set_target_ratio(work, vhash, 1); bn_set_target_ratio(work, vhash, 1);
pdata[21] = work->nonces[1] = swab32_if(secNonce, !swap); gpulog(LOG_DEBUG, thr_id, "found second nonce %08x!", endiandata[19]);
gpulog(LOG_DEBUG, thr_id, "found second nonce %08x!", swab32(secNonce)); pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
res++;
}
pdata[19] = work->nonces[0] = swab32_if(foundNonce, !swap);
return res;
} else { } else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); pdata[19] = work->nonces[0] + 1; // cursor for next scan
}
return work->valid_nonces;
}
else if (vhash[7] > ptarget[7]) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
} }
} }

47
x11/c11.cu

@ -155,7 +155,6 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u
do { do {
int order = 0; int order = 0;
uint32_t foundNonce;
// Hash with CUDA // Hash with CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
@ -181,32 +180,32 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8]; uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], work->nonces[0]);
c11hash(vhash64, endiandata); c11hash(vhash, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
int res = 1; work->valid_nonces = 1;
// check if there was some other ones... work_set_target_ratio(work, vhash);
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64); if (work->nonces[1] != 0) {
if (secNonce != 0) { be32enc(&endiandata[19], work->nonces[1]);
be32enc(&endiandata[19], secNonce); c11hash(vhash, endiandata);
c11hash(vhash64, endiandata); bn_set_target_ratio(work, vhash, 1);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) work->valid_nonces++;
work_set_target_ratio(work, vhash64); pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
pdata[21] = secNonce;
res++;
}
pdata[19] = foundNonce;
return res;
} else { } else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); pdata[19] = work->nonces[0] + 1; // cursor
pdata[19] = foundNonce + 1; }
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue; continue;
} }
} }

51
x11/fresh.cu

@ -108,7 +108,6 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce,
x11_shavite512_setBlock_80((void*)endiandata); x11_shavite512_setBlock_80((void*)endiandata);
cuda_check_cpu_setTarget(ptarget); cuda_check_cpu_setTarget(ptarget);
do { do {
uint32_t foundNonce;
int order = 0; int order = 0;
// GPU Hash // GPU Hash
@ -126,35 +125,41 @@ extern "C" int scanhash_fresh(int thr_id, struct work* work, uint32_t max_nonce,
#endif #endif
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
uint32_t vhash64[8]; const uint32_t Htarg = ptarget[7];
be32enc(&endiandata[19], foundNonce); uint32_t _ALIGN(64) vhash[8];
fresh_hash(vhash64, endiandata); be32enc(&endiandata[19], work->nonces[0]);
fresh_hash(vhash, endiandata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
int res = 1; if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work->valid_nonces = 1;
work_set_target_ratio(work, vhash64); work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
if (secNonce != 0) { work_set_target_ratio(work, vhash);
be32enc(&endiandata[19], secNonce); if (work->nonces[1] != 0) {
fresh_hash(vhash64, endiandata); be32enc(&endiandata[19], work->nonces[1]);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) fresh_hash(vhash, endiandata);
work_set_target_ratio(work, vhash64); bn_set_target_ratio(work, vhash, 1);
pdata[21] = secNonce; work->valid_nonces++;
res++; pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
}
pdata[19] = foundNonce;
return res;
} else { } else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
} }
} }
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
pdata[19] = max_nonce;
break;
}
pdata[19] += throughput; pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart); } while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce + 1; *hashes_done = pdata[19] - first_nonce + 1;
return 0; return 0;

50
x11/s3.cu

@ -107,8 +107,6 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un
cuda_check_cpu_setTarget(ptarget); cuda_check_cpu_setTarget(ptarget);
do { do {
const uint32_t Htarg = ptarget[7];
uint32_t foundNonce;
int order = 0; int order = 0;
x11_shavite512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); x11_shavite512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
@ -120,31 +118,33 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
if (foundNonce != UINT32_MAX)
{ {
uint32_t vhash64[8]; const uint32_t Htarg = ptarget[7];
be32enc(&endiandata[19], foundNonce); uint32_t _ALIGN(64) vhash[8];
s3hash(vhash64, endiandata); be32enc(&endiandata[19], work->nonces[0]);
s3hash(vhash, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
int res = 1; if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work->valid_nonces = 1;
work_set_target_ratio(work, vhash64); work_set_target_ratio(work, vhash);
if (secNonce != 0) { work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
be32enc(&endiandata[19], secNonce); if (work->nonces[1] != 0) {
s3hash(vhash64, endiandata); be32enc(&endiandata[19], work->nonces[1]);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) s3hash(vhash, endiandata);
work_set_target_ratio(work, vhash64); bn_set_target_ratio(work, vhash, 1);
pdata[21] = secNonce; work->valid_nonces++;
res++; pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
}
pdata[19] = foundNonce;
return res;
} else { } else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
} }
} }

47
x11/sib.cu

@ -152,7 +152,6 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u
do { do {
int order = 0; int order = 0;
uint32_t foundNonce;
// Hash with CUDA // Hash with CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
@ -178,32 +177,34 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("echo => "); TRACE("echo => ");
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8]; uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], work->nonces[0]);
sibhash(vhash64, endiandata); sibhash(vhash, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
int res = 1; work->valid_nonces = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work_set_target_ratio(work, vhash);
work_set_target_ratio(work, vhash64); work->nonces[1] =cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
if (secNonce != 0) { if (work->nonces[1] != 0) {
be32enc(&endiandata[19], secNonce); be32enc(&endiandata[19], work->nonces[1]);
sibhash(vhash64, endiandata); sibhash(vhash, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) bn_set_target_ratio(work, vhash, 1);
work_set_target_ratio(work, vhash64); work->valid_nonces++;
pdata[21] = secNonce; pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
res++; } else {
pdata[19] = work->nonces[0] + 1; // cursor
} }
pdata[19] = foundNonce; return work->valid_nonces;
return res; }
} else if (vhash64[7] > Htarg && !opt_quiet) { else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); if (!opt_quiet)
pdata[19] = foundNonce + 1; gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue; continue;
} }
} }

21
x11/veltor.cu

@ -120,31 +120,34 @@ extern "C" int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce
veltorhash(vhash, endiandata); veltorhash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) if (vhash[7] <= Htarg && fulltest(vhash, ptarget))
{ {
int res = 1;
work_set_target_ratio(work, vhash);
work->nonces[0] = startNounce + h_resNonce[0]; work->nonces[0] = startNounce + h_resNonce[0];
work->valid_nonces = 1;
work_set_target_ratio(work, vhash);
if (h_resNonce[1] != UINT32_MAX) if (h_resNonce[1] != UINT32_MAX)
{ {
uint32_t secNonce = work->nonces[1] = startNounce + h_resNonce[1]; uint32_t secNonce = work->nonces[1] = startNounce + h_resNonce[1];
gpulog(LOG_DEBUG, thr_id, "Found 2nd nonce: %08x", secNonce);
be32enc(&endiandata[19], secNonce); be32enc(&endiandata[19], secNonce);
veltorhash(vhash, endiandata); veltorhash(vhash, endiandata);
work->nonces[1] = secNonce; work->nonces[1] = secNonce;
if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) { if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) {
work_set_target_ratio(work, vhash); work_set_target_ratio(work, vhash);
xchg(work->nonces[1], work->nonces[0]); xchg(work->nonces[1], work->nonces[0]);
} else { } else {
bn_set_target_ratio(work, vhash, res); bn_set_target_ratio(work, vhash, work->valid_nonces);
} }
res++; work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor
} }
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; // next scan return work->valid_nonces;
return res;
} }
else if (vhash[7] > Htarg && !opt_quiet) { else if (vhash[7] > Htarg) {
if (!opt_quiet)
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", h_resNonce[0]); gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", h_resNonce[0]);
cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t)); cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t));
pdata[19] = h_resNonce[0] + 1;
continue;
} }
} }
if ((uint64_t) throughput + pdata[19] >= max_nonce) { if ((uint64_t) throughput + pdata[19] >= max_nonce) {

47
x11/x11.cu

@ -144,7 +144,6 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u
do { do {
int order = 0; int order = 0;
uint32_t foundNonce;
// Hash with CUDA // Hash with CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
@ -168,33 +167,33 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("echo => "); TRACE("echo => ");
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); *hashes_done = pdata[19] - first_nonce + throughput;
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{ {
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8]; uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], work->nonces[0]);
x11hash(vhash64, endiandata); x11hash(vhash, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
int res = 1; work->valid_nonces = 1;
// check if there was some other ones... work_set_target_ratio(work, vhash);
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64); if (work->nonces[1] != 0) {
*hashes_done = pdata[19] - first_nonce + throughput; be32enc(&endiandata[19], work->nonces[1]);
if (secNonce != 0) { x11hash(vhash, endiandata);
be32enc(&endiandata[19], secNonce); bn_set_target_ratio(work, vhash, 1);
x11hash(vhash64, endiandata); work->valid_nonces++;
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
work_set_target_ratio(work, vhash64); } else {
pdata[21] = secNonce; pdata[19] = work->nonces[0] + 1; // cursor
res++;
} }
pdata[19] = foundNonce; return work->valid_nonces;
return res;
} else { } else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = foundNonce + 1; pdata[19] = work->nonces[0] + 1;
continue; continue;
} }
} }

53
x11/x11evo.cu

@ -290,7 +290,6 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce
do { do {
int order = 1; int order = 1;
uint32_t foundNonce;
// Hash with CUDA // Hash with CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]);
@ -344,38 +343,38 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce
break; break;
case ECHO: case ECHO:
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE("echo => "); TRACE("echo :");
break; break;
} }
} }
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); *hashes_done = pdata[19] - first_nonce + throughput;
if (foundNonce != UINT32_MAX)
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{ {
uint32_t _ALIGN(64) vhash64[8];
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
be32enc(&endiandata[19], foundNonce); uint32_t _ALIGN(64) vhash[8];
x11evo_hash(vhash64, endiandata); be32enc(&endiandata[19], work->nonces[0]);
x11evo_hash(vhash, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
int res = 1; if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
// check if there was some other ones... work->valid_nonces = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work_set_target_ratio(work, vhash);
work_set_target_ratio(work, vhash64); work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
*hashes_done = pdata[19] - first_nonce + throughput; pdata[19] = work->nonces[0] + 1; // cursor
if (secNonce != 0) { if (work->nonces[1] != 0) {
be32enc(&endiandata[19], secNonce); be32enc(&endiandata[19], work->nonces[1]);
x11evo_hash(vhash64, endiandata); x11evo_hash(vhash, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) bn_set_target_ratio(work, vhash, 1);
work_set_target_ratio(work, vhash64); pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
pdata[21] = secNonce; gpulog(LOG_DEBUG, thr_id, "second nonce %08x! cursor %08x", work->nonces[1], pdata[19]);
res++; work->valid_nonces++;
} }
pdata[19] = foundNonce; return work->valid_nonces;
return res; } else if (vhash[7] > Htarg) {
} else if (vhash64[7] > Htarg) { gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); pdata[19] = work->nonces[0] + 1;
pdata[19] = foundNonce + 1;
continue; continue;
} }
} }
@ -388,7 +387,7 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce
} while (!work_restart[thr_id].restart); } while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce; *hashes_done = pdata[19] - first_nonce + 1;
return 0; return 0;
} }

37
x13/x13.cu

@ -165,7 +165,6 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u
cuda_check_cpu_setTarget(ptarget); cuda_check_cpu_setTarget(ptarget);
do { do {
uint32_t foundNonce;
int order = 0; int order = 0;
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++; quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]); order++;
@ -185,31 +184,33 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
uint32_t vhash[8]; const uint32_t Htarg = ptarget[7];
be32enc(&endiandata[19], foundNonce); uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
x13hash(vhash, endiandata); x13hash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) { if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
int res = 1; work->valid_nonces = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash); work_set_target_ratio(work, vhash);
pdata[19] = foundNonce; if (work->nonces[1] != 0) {
if (secNonce != 0) { be32enc(&endiandata[19], work->nonces[1]);
be32enc(&endiandata[19], secNonce);
x13hash(vhash, endiandata); x13hash(vhash, endiandata);
pdata[21] = secNonce; bn_set_target_ratio(work, vhash, 1);
if (bn_hash_target_ratio(vhash, ptarget) > work->shareratio[0]) { work->valid_nonces++;
work_set_target_ratio(work, vhash); pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
xchg(pdata[19], pdata[21]); } else {
pdata[19] = work->nonces[0] + 1; // cursor
} }
res++; return work->valid_nonces;
} }
return res; else if (vhash[7] > Htarg) {
} else { gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); pdata[19] = work->nonces[0] + 1;
continue;
} }
} }

27
x15/whirlpool.cu

@ -101,7 +101,6 @@ extern "C" int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce,
whirlpool512_setBlock_80((void*)endiandata, ptarget); whirlpool512_setBlock_80((void*)endiandata, ptarget);
do { do {
uint32_t foundNonce;
int order = 0; int order = 0;
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
@ -113,28 +112,24 @@ extern "C" int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce,
x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x15_whirlpool_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
TRACE64(" 64 :", d_hash); TRACE64(" 64 :", d_hash);
foundNonce = whirlpool512_cpu_finalhash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); work->nonces[0] = whirlpool512_cpu_finalhash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (foundNonce != UINT32_MAX && bench_algo < 0) if (work->nonces[0] != UINT32_MAX && bench_algo < 0)
{ {
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
uint32_t vhash[8]; uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], work->nonces[0]);
wcoinhash(vhash, endiandata); wcoinhash(vhash, endiandata);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
int res = 1; work->valid_nonces = 1;
work_set_target_ratio(work, vhash); work_set_target_ratio(work, vhash);
#if 0 pdata[19] = work->nonces[0] + 1; // cursor
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); return work->valid_nonces;
if (secNonce != 0) {
pdata[21] = secNonce;
res++;
} }
#endif else if (vhash[7] > Htarg) {
pdata[19] = foundNonce; gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
return res; pdata[19] = work->nonces[0] + 1;
} else { continue;
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
} }
} }
if ((uint64_t) throughput + pdata[19] >= max_nonce) { if ((uint64_t) throughput + pdata[19] >= max_nonce) {

45
x15/x14.cu

@ -198,32 +198,35 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8]; uint32_t _ALIGN(64) vhash[8];
/* check now with the CPU to confirm */ /* check now with the CPU to confirm */
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], work->nonces[0]);
x14hash(vhash64, endiandata); x14hash(vhash, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
int res = 1; work->valid_nonces = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64); work_set_target_ratio(work, vhash);
if (secNonce != 0) { if (work->nonces[1] != 0) {
be32enc(&endiandata[19], secNonce); be32enc(&endiandata[19], work->nonces[1]);
x14hash(vhash64, endiandata); x14hash(vhash, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) bn_set_target_ratio(work, vhash, 1);
work_set_target_ratio(work, vhash64); work->valid_nonces++;
pdata[21] = secNonce; pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
res++;
}
pdata[19] = foundNonce;
return res;
} else { } else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
} }
} }

45
x15/x15.cu

@ -205,31 +205,34 @@ extern "C" int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce,
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
uint32_t vhash64[8]; uint32_t _ALIGN(64) vhash[8];
/* check now with the CPU to confirm */ /* check now with the CPU to confirm */
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], work->nonces[0]);
x15hash(vhash64, endiandata); x15hash(vhash, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
int res = 1; work->valid_nonces = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64); work_set_target_ratio(work, vhash);
if (secNonce != 0) { if (work->nonces[1] != 0) {
be32enc(&endiandata[19], secNonce); be32enc(&endiandata[19], work->nonces[1]);
x15hash(vhash64, endiandata); x15hash(vhash, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) bn_set_target_ratio(work, vhash, 1);
work_set_target_ratio(work, vhash64); work->valid_nonces++;
pdata[21] = secNonce; pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
res++;
}
pdata[19] = foundNonce;
return res;
} else { } else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
} }
} }

47
x17/x17.cu

@ -230,34 +230,37 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
const uint32_t Htarg = ptarget[7]; const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash64[8]; uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], work->nonces[0]);
x17hash(vhash64, endiandata); x17hash(vhash, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) { if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
int res = 1; work->valid_nonces = 1;
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash64); work_set_target_ratio(work, vhash);
if (secNonce != 0) { if (work->nonces[1] != 0) {
be32enc(&endiandata[19], secNonce); be32enc(&endiandata[19], work->nonces[1]);
x17hash(vhash64, endiandata); x17hash(vhash, endiandata);
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) bn_set_target_ratio(work, vhash, 1);
work_set_target_ratio(work, vhash64); work->valid_nonces++;
pdata[21] = secNonce; pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
res++;
}
pdata[19] = foundNonce;
return res;
} else { } else {
pdata[19] = work->nonces[0] + 1; // cursor
}
return work->valid_nonces;
}
else if (vhash[7] > Htarg) {
// x11+ coins could do some random error, but not on retry // x11+ coins could do some random error, but not on retry
if (!warn) { if (!warn) {
warn++; continue; warn++;
pdata[19] = work->nonces[0] + 1;
continue;
} else { } else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
warn = 0; warn = 0;
} }
} }

49
zr5.cu

@ -431,12 +431,12 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work,
if (work_restart[thr_id].restart) if (work_restart[thr_id].restart)
return -1; return -1;
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]); work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (foundNonce != UINT32_MAX) if (work->nonces[0] != UINT32_MAX)
{ {
uint32_t vhash64[8]; uint32_t _ALIGN(64) vhash[8];
uint32_t oldp19 = pdata[19]; uint32_t oldp19 = pdata[19];
uint32_t offset = foundNonce - pdata[19]; uint32_t offset = work->nonces[0] - pdata[19];
uint32_t pok = 0; uint32_t pok = 0;
uint16_t h_pok; uint16_t h_pok;
@ -444,32 +444,31 @@ extern "C" int scanhash_zr5(int thr_id, struct work *work,
cudaMemcpy(&h_pok, d_poks[thr_id] + offset, sizeof(uint16_t), cudaMemcpyDeviceToHost); cudaMemcpy(&h_pok, d_poks[thr_id] + offset, sizeof(uint16_t), cudaMemcpyDeviceToHost);
pok = version | (0x10000UL * h_pok); pok = version | (0x10000UL * h_pok);
pdata[0] = pok; pdata[19] = foundNonce; pdata[0] = pok; pdata[19] = work->nonces[0];
zr5hash(vhash64, pdata); zr5hash(vhash, pdata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
int res = 1; work->valid_nonces = 1;
work_set_target_ratio(work, vhash64); work_set_target_ratio(work, vhash);
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, oldp19, d_hash[thr_id], 1); work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, oldp19, d_hash[thr_id], 1);
if (secNonce != 0) { if (work->nonces[1] != 0) {
offset = secNonce - oldp19; offset = work->nonces[1] - oldp19;
cudaMemcpy(&h_pok, d_poks[thr_id] + offset, sizeof(uint16_t), cudaMemcpyDeviceToHost); cudaMemcpy(&h_pok, d_poks[thr_id] + offset, sizeof(uint16_t), cudaMemcpyDeviceToHost);
pok = version | (0x10000UL * h_pok); pok = version | (0x10000UL * h_pok);
memcpy(tmpdata, pdata, 80); memcpy(tmpdata, pdata, 80);
tmpdata[0] = pok; tmpdata[19] = secNonce; tmpdata[0] = pok; tmpdata[19] = work->nonces[1];
zr5hash(vhash64, tmpdata); zr5hash(vhash, tmpdata);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio[0]) bn_set_target_ratio(work, vhash, 1);
work_set_target_ratio(work, vhash64); pdata[19] = max(pdata[19], work->nonces[1]); // cursor
pdata[21] = secNonce; pdata[20] = pok; // second nonce "pok"
pdata[22] = pok; work->valid_nonces++;
res++;
} }
}
return res;
} else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce);
pdata[19]++; pdata[19]++;
}
return work->valid_nonces;
}
else if (vhash[7] > ptarget[7]) {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[0] = oldp0; pdata[0] = oldp0;
} }
} else } else

Loading…
Cancel
Save