Browse Source

blake: small change for the second nonce

2upstream
Tanguy Pruvot 9 years ago
parent
commit
7c1137f335
  1. 55
      Algo256/blake256.cu

55
Algo256/blake256.cu

@ -261,23 +261,12 @@ void blake256_gpu_hash_16(const uint32_t threads, const uint32_t startNonce, uin
if (h[7] == 0 && cuda_swab32(h[6]) <= highTarget) { if (h[7] == 0 && cuda_swab32(h[6]) <= highTarget) {
#if NBN == 2 #if NBN == 2
/* keep the smallest nonce, + extra one if found */ if (resNonce[0] != UINT32_MAX)
if (resNonce[0] > nonce) {
resNonce[1] = resNonce[0];
resNonce[0] = nonce;
}
else
resNonce[1] = nonce; resNonce[1] = nonce;
else
resNonce[0] = nonce;
#else #else
resNonce[0] = nonce; resNonce[0] = nonce;
#endif
#ifdef _DEBUG
if (trace) {
uint64_t high64 = ((uint64_t*)h)[3];
printf("gpu: %16llx\n", high64);
printf("gpu: %08x.%08x\n", h[7], h[6]);
printf("tgt: %16llx\n", highTarget);
}
#endif #endif
} }
} }
@ -420,33 +409,26 @@ const uint64_t highTarget, const int rounds, const bool trace)
GSPREC(0, 5, 0xA, 0xF, 5, 0); GSPREC(0, 5, 0xA, 0xF, 5, 0);
GSPREC(1, 6, 0xB, 0xC, 15, 4); GSPREC(1, 6, 0xB, 0xC, 15, 4);
GSPREC(2, 7, 0x8, 0xD, 8, 6); GSPREC(2, 7, 0x8, 0xD, 8, 6);
GSPREC(3, 4, 0x9, 0xE, 2, 10); //GSPREC(3, 4, 0x9, 0xE, 2, 10);
// { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, // { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
// only compute h6 & 7 // only compute h6 & 7
h[6U] ^= v[6U] ^ v[14U]; //h[6] ^= v[6] ^ v[14];
h[7U] ^= v[7U] ^ v[15U]; //h[7] ^= v[7] ^ v[15];
if (h[7] == 0 && cuda_swab32(h[6]) <= highTarget) { if ((h[7]^v[7]^v[15]) == 0) // h7
{
GSPREC(3, 4, 0x9, 0xE, 2, 10);
if (cuda_swab32(h[6]^v[6]^v[14]) <= highTarget) {
#if NBN == 2 #if NBN == 2
/* keep the smallest nonce, + extra one if found */ if (resNonce[0] != UINT32_MAX)
if (resNonce[0] > nonce) { resNonce[1] = nonce;
resNonce[1] = resNonce[0]; else
resNonce[0] = nonce; resNonce[0] = nonce;
}
else
resNonce[1] = nonce;
#else #else
resNonce[0] = nonce; resNonce[0] = nonce;
#endif #endif
#ifdef _DEBUG
if (trace) {
uint64_t high64 = ((uint64_t*)h)[3];
printf("gpu: %16llx\n", high64);
printf("gpu: %08x.%08x\n", h[7], h[6]);
printf("tgt: %16llx\n", highTarget);
} }
#endif
} }
} }
} }
@ -469,9 +451,7 @@ static uint32_t blake256_cpu_hash_16(const int thr_id, const uint32_t threads, c
else else
blake256_gpu_hash_16 <<<grid, block>>> (threads, startNonce, d_resNonce[thr_id], highTarget, (int)rounds, opt_tracegpu); blake256_gpu_hash_16 <<<grid, block>>> (threads, startNonce, d_resNonce[thr_id], highTarget, (int)rounds, opt_tracegpu);
// cudaDeviceSynchronize();
if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) { if (cudaSuccess == cudaMemcpy(h_resNonce[thr_id], d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost)) {
//cudaDeviceSynchronize(); /* seems no more required */
result = h_resNonce[thr_id][0]; result = h_resNonce[thr_id][0];
for (int n=0; n < (NBN-1); n++) for (int n=0; n < (NBN-1); n++)
extra_results[n] = h_resNonce[thr_id][n+1]; extra_results[n] = h_resNonce[thr_id][n+1];
@ -586,7 +566,10 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
blake256hash(vhashcpu, endiandata, blakerounds); blake256hash(vhashcpu, endiandata, blakerounds);
if (vhashcpu[6] <= Htarg /* && fulltest(vhashcpu, ptarget) */) { if (vhashcpu[6] <= Htarg /* && fulltest(vhashcpu, ptarget) */) {
pdata[21] = extra_results[0]; pdata[21] = extra_results[0];
applog(LOG_BLUE, "1:%x 2:%x", foundNonce, extra_results[0]); if (bn_hash_target_ratio(vhashcpu, ptarget) > work->shareratio) {
work_set_target_ratio(work, vhashcpu);
xchg(pdata[21], pdata[19]);
}
rc = 2; rc = 2;
} }
extra_results[0] = UINT32_MAX; extra_results[0] = UINT32_MAX;

Loading…
Cancel
Save