Browse Source

Enhance stale work detection + throughput fixes

seems to resolve solo mining lock on share.
export also computed solo work diff in api (not perfect)

In high rate algos, throughput should be unsigned...
This fixes keccak, blake and doom problems

And change terminal color of debug lines, to be selectable in putty,
color code is not supported in windows but selection is ok there.
2upstream
Tanguy Pruvot 10 years ago
parent
commit
49a73971c4
  1. 8
      Algo256/blake256.cu
  2. 13
      Algo256/keccak256.cu
  3. 40
      ccminer.cpp
  4. 4
      miner.h
  5. 19
      qubit/doom.cu

8
Algo256/blake256.cu

@ -452,14 +452,16 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt @@ -452,14 +452,16 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
if (vhashcpu[6] <= Htarg && fulltest(vhashcpu, ptarget))
{
rc = 1;
*hashes_done = pdata[19] - first_nonce + throughput;
pdata[19] = foundNonce;
*hashes_done = pdata[19] - first_nonce + 1;
#if NBN > 1
if (extra_results[0] != UINT32_MAX) {
be32enc(&endiandata[19], extra_results[0]);
blake256hash(vhashcpu, endiandata, blakerounds);
if (vhashcpu[6] <= Htarg /* && fulltest(vhashcpu, ptarget) */) {
pdata[21] = extra_results[0];
applog(LOG_BLUE, "1:%x 2:%x", foundNonce, extra_results[0]);
*hashes_done = max(*hashes_done, extra_results[0] - first_nonce + 1);
rc = 2;
}
extra_results[0] = UINT32_MAX;
@ -477,7 +479,6 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt @@ -477,7 +479,6 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
}
if ((uint64_t) pdata[19] + throughput > (uint64_t) max_nonce) {
pdata[19] = max_nonce;
break;
}
@ -485,6 +486,7 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt @@ -485,6 +486,7 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
} while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce + 1; // (+1 to prevent locks)
*hashes_done = pdata[19] - first_nonce;
return rc;
}

13
Algo256/keccak256.cu

@ -41,8 +41,8 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata, @@ -41,8 +41,8 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
int throughput = opt_work_size ? opt_work_size : (1 << 21); // 256*256*8*4
throughput = min(throughput, (int)(max_nonce - first_nonce));
uint32_t throughput = opt_work_size ? opt_work_size : (1 << 21); // 256*256*8*4
throughput = min(throughput, (max_nonce - first_nonce));
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0005;
@ -51,7 +51,7 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata, @@ -51,7 +51,7 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
cudaSetDevice(device_map[thr_id]);
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
keccak256_cpu_init(thr_id, throughput);
keccak256_cpu_init(thr_id, (int) throughput);
init[thr_id] = true;
}
@ -65,7 +65,7 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata, @@ -65,7 +65,7 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
do {
int order = 0;
uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], order++);
if (foundNonce != UINT32_MAX)
{
uint32_t Htarg = ptarget[7];
@ -83,8 +83,7 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata, @@ -83,8 +83,7 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
}
}
if ((uint64_t) pdata[19] + throughput > (uint64_t) max_nonce) {
pdata[19] = max_nonce;
if ((uint64_t) pdata[19] + throughput > max_nonce) {
break;
}
@ -92,6 +91,6 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata, @@ -92,6 +91,6 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
} while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce + 1;
*hashes_done = pdata[19] - first_nonce;
return 0;
}

40
ccminer.cpp

@ -484,6 +484,7 @@ static bool work_decode(const json_t *val, struct work *work) @@ -484,6 +484,7 @@ static bool work_decode(const json_t *val, struct work *work)
/**
* Calculate the work difficulty as double
* Not sure it works with pools
*/
static void calc_diff(struct work *work, int known)
{
@ -548,26 +549,24 @@ static int share_result(int result, const char *reason) @@ -548,26 +549,24 @@ static int share_result(int result, const char *reason)
static bool submit_upstream_work(CURL *curl, struct work *work)
{
json_t *val, *res, *reason;
bool stale_work = false;
char s[384];
bool stale_work;
/* discard if a new bloc was sent */
stale_work = work->height != g_work.height;
if (have_stratum && !stale_work) {
pthread_mutex_lock(&g_work_lock);
if (strlen(work->job_id + 8)) {
/* stale if not the current job id */
if (strlen(work->job_id + 8))
stale_work = strcmp(work->job_id + 8, g_work.job_id + 8);
} else {
/* fallback when no job id (compare hash) */
stale_work = memcmp(&work->data[1], &g_work.data[1], 32);
pthread_mutex_unlock(&g_work_lock);
}
if (stale_work) {
pthread_mutex_unlock(&g_work_lock);
if (opt_debug)
applog(LOG_WARNING, "stale work detected, discarding");
return true;
}
calc_diff(work, 0);
pthread_mutex_unlock(&g_work_lock);
if (have_stratum) {
uint32_t sent;
@ -1049,14 +1048,15 @@ static void *miner_thread(void *userdata) @@ -1049,14 +1048,15 @@ static void *miner_thread(void *userdata)
stratum_gen_work(&stratum, &g_work);
}
} else {
int min_scantime = scan_time;
/* obtain new work from internal workio thread */
pthread_mutex_lock(&g_work_lock);
if (time(NULL) - g_work_time >= min_scantime || nonceptr[0] >= end_nonce) {
if ((time(NULL) - g_work_time) >= scan_time || nonceptr[0] >= (end_nonce - 0x100)) {
if (opt_debug && g_work_time && !opt_quiet)
applog(LOG_DEBUG, "work time %u/%us nonce %x/%x", time(NULL) - g_work_time,
scan_time, nonceptr[0], end_nonce);
/* obtain new work from internal workio thread */
if (unlikely(!get_work(mythr, &g_work))) {
applog(LOG_ERR, "work retrieval failed, exiting "
"mining thread %d", mythr->id);
pthread_mutex_unlock(&g_work_lock);
applog(LOG_ERR, "work retrieval failed, exiting mining thread %d", mythr->id);
goto out;
}
g_work_time = time(NULL);
@ -1065,6 +1065,8 @@ static void *miner_thread(void *userdata) @@ -1065,6 +1065,8 @@ static void *miner_thread(void *userdata)
if (!opt_benchmark && memcmp(work.target, g_work.target, sizeof(work.target))) {
calc_diff(&g_work, 0);
if (!have_stratum)
global_diff = g_work.difficulty;
if (opt_debug) {
uint64_t target64 = g_work.target[7] * 0x100000000ULL + g_work.target[6];
applog(LOG_DEBUG, "job %s target change: %llx (%.1f)", g_work.job_id, target64, g_work.difficulty);
@ -1175,9 +1177,6 @@ static void *miner_thread(void *userdata) @@ -1175,9 +1177,6 @@ static void *miner_thread(void *userdata)
}
}
#endif
if (opt_algo == ALGO_KECCAK && max64 == UINT32_MAX) {
max64 = 0x7FFFFFFFUL;
}
/* never let small ranges at end */
if (end_nonce >= UINT32_MAX - 256)
end_nonce = UINT32_MAX;
@ -1345,6 +1344,7 @@ static void *miner_thread(void *userdata) @@ -1345,6 +1344,7 @@ static void *miner_thread(void *userdata)
timeval_subtract(&diff, &tv_end, &tv_start);
if (diff.tv_usec || diff.tv_sec) {
double dtime = (double) diff.tv_sec + 1e-6 * diff.tv_usec;
/* hashrate factors for some algos */
double rate_factor = 1.0;
@ -1357,14 +1357,14 @@ static void *miner_thread(void *userdata) @@ -1357,14 +1357,14 @@ static void *miner_thread(void *userdata)
}
/* store thread hashrate */
if (dtime > 0.0) {
pthread_mutex_lock(&stats_lock);
if (diff.tv_sec + 1e-6 * diff.tv_usec > 0.0) {
thr_hashrates[thr_id] = hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec);
thr_hashrates[thr_id] = hashes_done / dtime;
thr_hashrates[thr_id] *= rate_factor;
stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id], (uint8_t) rc, work.height);
}
pthread_mutex_unlock(&stats_lock);
}
}
if (rc > 1)
work.scanned_to = nonceptr[2];
@ -1392,8 +1392,10 @@ static void *miner_thread(void *userdata) @@ -1392,8 +1392,10 @@ static void *miner_thread(void *userdata)
/* loopcnt: ignore first loop hashrate */
if (loopcnt && thr_id == (opt_n_threads - 1)) {
double hashrate = 0.;
pthread_mutex_lock(&stats_lock);
for (int i = 0; i < opt_n_threads && thr_hashrates[i]; i++)
hashrate += stats_get_speed(i, thr_hashrates[i]);
pthread_mutex_unlock(&stats_lock);
if (opt_benchmark) {
sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", hashrate / 1000.);
applog(LOG_NOTICE, "Total: %s kH/s", s);

4
miner.h

@ -505,7 +505,11 @@ extern long device_sm[8]; @@ -505,7 +505,11 @@ extern long device_sm[8];
#define CL_CY2 "\x1B[22;36m" /* cyan */
#define CL_SIL "\x1B[22;37m" /* gray */
#ifdef WIN32
#define CL_GRY "\x1B[01;30m" /* dark gray */
#else
#define CL_GRY "\x1B[90m" /* dark gray selectable in putty */
#endif
#define CL_LRD "\x1B[01;31m" /* light red */
#define CL_LGR "\x1B[01;32m" /* light green */
#define CL_LYL "\x1B[01;33m" /* tooltips */

19
qubit/doom.cu

@ -40,8 +40,8 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata, @@ -40,8 +40,8 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
{
const uint32_t first_nonce = pdata[19];
uint32_t endiandata[20];
int throughput = opt_work_size ? opt_work_size : (1 << 22); // 256*256*8*8
throughput = min(throughput, (int)(max_nonce - first_nonce));
uint32_t throughput = opt_work_size ? opt_work_size : (1 << 22); // 256*256*8*8
throughput = min(throughput, (max_nonce - first_nonce));
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000f;
@ -52,7 +52,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata, @@ -52,7 +52,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
qubit_luffa512_cpu_init(thr_id, throughput);
qubit_luffa512_cpu_init(thr_id, (int) throughput);
init[thr_id] = true;
}
@ -65,7 +65,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata, @@ -65,7 +65,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
do {
int order = 0;
uint32_t foundNonce = qubit_luffa512_cpu_finalhash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
uint32_t foundNonce = qubit_luffa512_cpu_finalhash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], order++);
if (foundNonce != UINT32_MAX)
{
const uint32_t Htarg = ptarget[7];
@ -74,23 +74,22 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata, @@ -74,23 +74,22 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
doomhash(vhash64, endiandata);
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
int res = 1;
*hashes_done = pdata[19] - first_nonce + throughput;
*hashes_done = min(max_nonce - first_nonce, (uint64_t) pdata[19] - first_nonce + throughput);
pdata[19] = foundNonce;
return res;
return 1;
}
else {
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
}
}
pdata[19] += throughput;
if ((uint64_t) pdata[19] + throughput > max_nonce) {
pdata[19] = max_nonce;
// pdata[19] = max_nonce;
break;
}
pdata[19] += throughput;
} while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce + 1;

Loading…
Cancel
Save