|
|
@ -63,9 +63,6 @@ static const char *msg_reply = "Reply"; |
|
|
|
#define KLN_KILLWORK_TEMP 53.5 |
|
|
|
#define KLN_KILLWORK_TEMP 53.5 |
|
|
|
#define KLN_COOLED_DOWN 45.5 |
|
|
|
#define KLN_COOLED_DOWN 45.5 |
|
|
|
|
|
|
|
|
|
|
|
// If 5 late updates in a row, try to reset the device
|
|
|
|
|
|
|
|
#define KLN_LATE_UPDATE_LIMIT 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Work older than 5s will already be completed |
|
|
|
* Work older than 5s will already be completed |
|
|
|
* FYI it must not be possible to complete 256 work |
|
|
|
* FYI it must not be possible to complete 256 work |
|
|
@ -74,12 +71,29 @@ static const char *msg_reply = "Reply"; |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
#define OLD_WORK_MS ((int)(5 * 1000)) |
|
|
|
#define OLD_WORK_MS ((int)(5 * 1000)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
|
|
* How many incorrect slave counts to ignore in a row |
|
|
|
|
|
|
|
* 2 means it allows random grabage returned twice |
|
|
|
|
|
|
|
* Until slaves are implemented, this should never occur |
|
|
|
|
|
|
|
* so allowing 2 in a row should ignore random errros |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
#define KLN_ISS_IGNORE 2 |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* If the queue status hasn't been updated for this long then do it now |
|
|
|
* If the queue status hasn't been updated for this long then do it now |
|
|
|
* 5GH/s = 859ms per full nonce range |
|
|
|
* 5GH/s = 859ms per full nonce range |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
#define LATE_UPDATE_MS ((int)(2.5 * 1000)) |
|
|
|
#define LATE_UPDATE_MS ((int)(2.5 * 1000)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// If 5 late updates in a row, try to reset the device
|
|
|
|
|
|
|
|
#define LATE_UPDATE_LIMIT 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// If the reset fails sleep for 1s
|
|
|
|
|
|
|
|
#define LATE_UPDATE_SLEEP_MS 1000 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// However give up after 8s
|
|
|
|
|
|
|
|
#define LATE_UPDATE_NODEV_MS ((int)(8.0 * 1000)) |
|
|
|
|
|
|
|
|
|
|
|
struct device_drv klondike_drv; |
|
|
|
struct device_drv klondike_drv; |
|
|
|
|
|
|
|
|
|
|
|
typedef struct klondike_header { |
|
|
|
typedef struct klondike_header { |
|
|
@ -199,7 +213,6 @@ typedef struct jobque { |
|
|
|
} JOBQUE; |
|
|
|
} JOBQUE; |
|
|
|
|
|
|
|
|
|
|
|
struct klondike_info { |
|
|
|
struct klondike_info { |
|
|
|
bool shutdown; |
|
|
|
|
|
|
|
pthread_rwlock_t stat_lock; |
|
|
|
pthread_rwlock_t stat_lock; |
|
|
|
struct thr_info replies_thr; |
|
|
|
struct thr_info replies_thr; |
|
|
|
cglock_t klist_lock; |
|
|
|
cglock_t klist_lock; |
|
|
@ -216,6 +229,7 @@ struct klondike_info { |
|
|
|
uint64_t hashcount; |
|
|
|
uint64_t hashcount; |
|
|
|
uint64_t errorcount; |
|
|
|
uint64_t errorcount; |
|
|
|
uint64_t noisecount; |
|
|
|
uint64_t noisecount; |
|
|
|
|
|
|
|
int incorrect_slave_sequential; |
|
|
|
|
|
|
|
|
|
|
|
// us Delay from USB reply to being processed
|
|
|
|
// us Delay from USB reply to being processed
|
|
|
|
double delay_count; |
|
|
|
double delay_count; |
|
|
@ -540,7 +554,7 @@ static KLIST *GetReply(struct cgpu_info *klncgpu, uint8_t cmd, uint8_t dev) |
|
|
|
KLIST *kitem; |
|
|
|
KLIST *kitem; |
|
|
|
int retries = CMD_REPLY_RETRIES; |
|
|
|
int retries = CMD_REPLY_RETRIES; |
|
|
|
|
|
|
|
|
|
|
|
while (retries-- > 0 && klninfo->shutdown == false) { |
|
|
|
while (retries-- > 0 && klncgpu->shutdown == false) { |
|
|
|
cgsleep_ms(REPLY_WAIT_TIME); |
|
|
|
cgsleep_ms(REPLY_WAIT_TIME); |
|
|
|
cg_rlock(&klninfo->klist_lock); |
|
|
|
cg_rlock(&klninfo->klist_lock); |
|
|
|
kitem = klninfo->used; |
|
|
|
kitem = klninfo->used; |
|
|
@ -947,13 +961,13 @@ static void *klondike_get_replies(void *userdata) |
|
|
|
struct klondike_info *klninfo = (struct klondike_info *)(klncgpu->device_data); |
|
|
|
struct klondike_info *klninfo = (struct klondike_info *)(klncgpu->device_data); |
|
|
|
KLIST *kitem = NULL; |
|
|
|
KLIST *kitem = NULL; |
|
|
|
char *hexdata; |
|
|
|
char *hexdata; |
|
|
|
int err, recd, slaves, dev; |
|
|
|
int err, recd, slaves, dev, isc; |
|
|
|
bool overheat; |
|
|
|
bool overheat, sent; |
|
|
|
|
|
|
|
|
|
|
|
applog(LOG_DEBUG, "%s%i: listening for replies", |
|
|
|
applog(LOG_DEBUG, "%s%i: listening for replies", |
|
|
|
klncgpu->drv->name, klncgpu->device_id); |
|
|
|
klncgpu->drv->name, klncgpu->device_id); |
|
|
|
|
|
|
|
|
|
|
|
while (klninfo->shutdown == false) { |
|
|
|
while (klncgpu->shutdown == false) { |
|
|
|
if (klncgpu->usbinfo.nodev) |
|
|
|
if (klncgpu->usbinfo.nodev) |
|
|
|
return NULL; |
|
|
|
return NULL; |
|
|
|
|
|
|
|
|
|
|
@ -1019,16 +1033,27 @@ static void *klondike_get_replies(void *userdata) |
|
|
|
cgtime(&(klninfo->jobque[dev].last_update)); |
|
|
|
cgtime(&(klninfo->jobque[dev].last_update)); |
|
|
|
slaves = klninfo->status[0].kline.ws.slavecount; |
|
|
|
slaves = klninfo->status[0].kline.ws.slavecount; |
|
|
|
overheat = klninfo->jobque[dev].overheat; |
|
|
|
overheat = klninfo->jobque[dev].overheat; |
|
|
|
|
|
|
|
if (dev == 0) { |
|
|
|
|
|
|
|
if (kitem->kline.ws.slavecount != slaves) |
|
|
|
|
|
|
|
isc = ++klninfo->incorrect_slave_sequential; |
|
|
|
|
|
|
|
else |
|
|
|
|
|
|
|
isc = klninfo->incorrect_slave_sequential = 0; |
|
|
|
|
|
|
|
} |
|
|
|
wr_unlock(&(klninfo->stat_lock)); |
|
|
|
wr_unlock(&(klninfo->stat_lock)); |
|
|
|
|
|
|
|
|
|
|
|
if (kitem->kline.ws.slavecount != slaves) { |
|
|
|
if (isc) { |
|
|
|
applog(LOG_ERR, "%s%i:%d reply [%c] has a diff # of slaves=%d" |
|
|
|
applog(LOG_ERR, "%s%i:%d reply [%c] has a diff" |
|
|
|
" (curr=%d) dropping device to hotplug", |
|
|
|
" # of slaves=%d (curr=%d)%s", |
|
|
|
klncgpu->drv->name, klncgpu->device_id, |
|
|
|
klncgpu->drv->name, |
|
|
|
dev, (char)(kitem->kline.ws.cmd), |
|
|
|
klncgpu->device_id, |
|
|
|
|
|
|
|
dev, |
|
|
|
|
|
|
|
(char)(kitem->kline.ws.cmd), |
|
|
|
(int)(kitem->kline.ws.slavecount), |
|
|
|
(int)(kitem->kline.ws.slavecount), |
|
|
|
slaves); |
|
|
|
slaves, |
|
|
|
klninfo->shutdown = true; |
|
|
|
isc <= KLN_ISS_IGNORE ? "" : |
|
|
|
|
|
|
|
" disabling device"); |
|
|
|
|
|
|
|
if (isc > KLN_ISS_IGNORE) |
|
|
|
|
|
|
|
usb_nodev(klncgpu); |
|
|
|
break; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
@ -1048,15 +1073,16 @@ static void *klondike_get_replies(void *userdata) |
|
|
|
zero_kline(&kline); |
|
|
|
zero_kline(&kline); |
|
|
|
kline.hd.cmd = KLN_CMD_ABORT; |
|
|
|
kline.hd.cmd = KLN_CMD_ABORT; |
|
|
|
kline.hd.dev = dev; |
|
|
|
kline.hd.dev = dev; |
|
|
|
if (!SendCmd(klncgpu, &kline, KSENDHD(0))) { |
|
|
|
sent = SendCmd(klncgpu, &kline, KSENDHD(0)); |
|
|
|
applog(LOG_ERR, "%s%i:%d failed to abort work" |
|
|
|
kln_disable(klncgpu, dev, false); |
|
|
|
" - dropping device to hotplug", |
|
|
|
if (!sent) { |
|
|
|
|
|
|
|
applog(LOG_ERR, "%s%i:%d overheat failed to" |
|
|
|
|
|
|
|
" abort work - disabling device", |
|
|
|
klncgpu->drv->name, |
|
|
|
klncgpu->drv->name, |
|
|
|
klncgpu->device_id, |
|
|
|
klncgpu->device_id, |
|
|
|
dev); |
|
|
|
dev); |
|
|
|
klninfo->shutdown = true; |
|
|
|
usb_nodev(klncgpu); |
|
|
|
} |
|
|
|
} |
|
|
|
kln_disable(klncgpu, dev, false); |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
@ -1157,7 +1183,7 @@ static void klondike_shutdown(struct thr_info *thr) |
|
|
|
|
|
|
|
|
|
|
|
kln_disable(klncgpu, klninfo->status[0].kline.ws.slavecount, true); |
|
|
|
kln_disable(klncgpu, klninfo->status[0].kline.ws.slavecount, true); |
|
|
|
|
|
|
|
|
|
|
|
klncgpu->shutdown = klninfo->shutdown = true; |
|
|
|
klncgpu->shutdown = true; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static void klondike_thread_enable(struct thr_info *thr) |
|
|
|
static void klondike_thread_enable(struct thr_info *thr) |
|
|
@ -1243,10 +1269,13 @@ static bool klondike_queue_full(struct cgpu_info *klncgpu) |
|
|
|
{ |
|
|
|
{ |
|
|
|
struct klondike_info *klninfo = (struct klondike_info *)(klncgpu->device_data); |
|
|
|
struct klondike_info *klninfo = (struct klondike_info *)(klncgpu->device_data); |
|
|
|
struct work *work = NULL; |
|
|
|
struct work *work = NULL; |
|
|
|
int dev, queued, slaves, seq; |
|
|
|
int dev, queued, slaves, seq, howlong; |
|
|
|
struct timeval now; |
|
|
|
struct timeval now; |
|
|
|
bool nowork; |
|
|
|
bool nowork; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (klncgpu->shutdown == true) |
|
|
|
|
|
|
|
return true; |
|
|
|
|
|
|
|
|
|
|
|
cgtime(&now); |
|
|
|
cgtime(&now); |
|
|
|
rd_lock(&(klninfo->stat_lock)); |
|
|
|
rd_lock(&(klninfo->stat_lock)); |
|
|
|
slaves = klninfo->status[0].kline.ws.slavecount; |
|
|
|
slaves = klninfo->status[0].kline.ws.slavecount; |
|
|
@ -1255,7 +1284,7 @@ static bool klondike_queue_full(struct cgpu_info *klncgpu) |
|
|
|
klninfo->jobque[dev].late_update_count++; |
|
|
|
klninfo->jobque[dev].late_update_count++; |
|
|
|
seq = ++klninfo->jobque[dev].late_update_sequential; |
|
|
|
seq = ++klninfo->jobque[dev].late_update_sequential; |
|
|
|
rd_unlock(&(klninfo->stat_lock)); |
|
|
|
rd_unlock(&(klninfo->stat_lock)); |
|
|
|
if (seq < KLN_LATE_UPDATE_LIMIT) { |
|
|
|
if (seq < LATE_UPDATE_LIMIT) { |
|
|
|
applog(LOG_ERR, "%s%i:%d late update", |
|
|
|
applog(LOG_ERR, "%s%i:%d late update", |
|
|
|
klncgpu->drv->name, klncgpu->device_id, dev); |
|
|
|
klncgpu->drv->name, klncgpu->device_id, dev); |
|
|
|
klondike_get_stats(klncgpu); |
|
|
|
klondike_get_stats(klncgpu); |
|
|
@ -1263,17 +1292,22 @@ static bool klondike_queue_full(struct cgpu_info *klncgpu) |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
applog(LOG_ERR, "%s%i:%d late update (%d) reached - attempting reset", |
|
|
|
applog(LOG_ERR, "%s%i:%d late update (%d) reached - attempting reset", |
|
|
|
klncgpu->drv->name, klncgpu->device_id, |
|
|
|
klncgpu->drv->name, klncgpu->device_id, |
|
|
|
dev, KLN_LATE_UPDATE_LIMIT); |
|
|
|
dev, LATE_UPDATE_LIMIT); |
|
|
|
control_init(klncgpu); |
|
|
|
control_init(klncgpu); |
|
|
|
kln_enable(klncgpu); |
|
|
|
kln_enable(klncgpu); |
|
|
|
klondike_get_stats(klncgpu); |
|
|
|
klondike_get_stats(klncgpu); |
|
|
|
rd_lock(&(klninfo->stat_lock)); |
|
|
|
rd_lock(&(klninfo->stat_lock)); |
|
|
|
if (ms_tdiff(&now, &(klninfo->jobque[dev].last_update)) > LATE_UPDATE_MS) { |
|
|
|
howlong = ms_tdiff(&now, &(klninfo->jobque[dev].last_update)); |
|
|
|
|
|
|
|
if (howlong > LATE_UPDATE_MS) { |
|
|
|
rd_unlock(&(klninfo->stat_lock)); |
|
|
|
rd_unlock(&(klninfo->stat_lock)); |
|
|
|
|
|
|
|
if (howlong > LATE_UPDATE_NODEV_MS) { |
|
|
|
applog(LOG_ERR, "%s%i:%d reset failed - dropping device", |
|
|
|
applog(LOG_ERR, "%s%i:%d reset failed - dropping device", |
|
|
|
klncgpu->drv->name, klncgpu->device_id, dev); |
|
|
|
klncgpu->drv->name, klncgpu->device_id, dev); |
|
|
|
klninfo->shutdown = true; |
|
|
|
usb_nodev(klncgpu); |
|
|
|
return false; |
|
|
|
} else |
|
|
|
|
|
|
|
cgsleep_ms(LATE_UPDATE_SLEEP_MS); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return true; |
|
|
|
} |
|
|
|
} |
|
|
|
break; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
@ -1360,6 +1394,7 @@ static int64_t klondike_scanwork(struct thr_info *thr) |
|
|
|
klninfo->noncecount = 0; |
|
|
|
klninfo->noncecount = 0; |
|
|
|
rd_unlock(&(klninfo->stat_lock)); |
|
|
|
rd_unlock(&(klninfo->stat_lock)); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return newhashcount; |
|
|
|
return newhashcount; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|