Browse Source

Merge branch 'master' into kncminer

nfactor-troky
Con Kolivas 11 years ago
parent
commit
d3f33888fd
  1. 93
      driver-klondike.c
  2. 14
      usbutils.c
  3. 3
      usbutils.h

93
driver-klondike.c

@ -63,9 +63,6 @@ static const char *msg_reply = "Reply";
#define KLN_KILLWORK_TEMP 53.5 #define KLN_KILLWORK_TEMP 53.5
#define KLN_COOLED_DOWN 45.5 #define KLN_COOLED_DOWN 45.5
// If 5 late updates in a row, try to reset the device
#define KLN_LATE_UPDATE_LIMIT 5
/* /*
* Work older than 5s will already be completed * Work older than 5s will already be completed
* FYI it must not be possible to complete 256 work * FYI it must not be possible to complete 256 work
@ -74,12 +71,29 @@ static const char *msg_reply = "Reply";
*/ */
#define OLD_WORK_MS ((int)(5 * 1000)) #define OLD_WORK_MS ((int)(5 * 1000))
/*
* How many incorrect slave counts to ignore in a row
* 2 means it allows random grabage returned twice
* Until slaves are implemented, this should never occur
* so allowing 2 in a row should ignore random errros
*/
#define KLN_ISS_IGNORE 2
/* /*
* If the queue status hasn't been updated for this long then do it now * If the queue status hasn't been updated for this long then do it now
* 5GH/s = 859ms per full nonce range * 5GH/s = 859ms per full nonce range
*/ */
#define LATE_UPDATE_MS ((int)(2.5 * 1000)) #define LATE_UPDATE_MS ((int)(2.5 * 1000))
// If 5 late updates in a row, try to reset the device
#define LATE_UPDATE_LIMIT 5
// If the reset fails sleep for 1s
#define LATE_UPDATE_SLEEP_MS 1000
// However give up after 8s
#define LATE_UPDATE_NODEV_MS ((int)(8.0 * 1000))
struct device_drv klondike_drv; struct device_drv klondike_drv;
typedef struct klondike_header { typedef struct klondike_header {
@ -199,7 +213,6 @@ typedef struct jobque {
} JOBQUE; } JOBQUE;
struct klondike_info { struct klondike_info {
bool shutdown;
pthread_rwlock_t stat_lock; pthread_rwlock_t stat_lock;
struct thr_info replies_thr; struct thr_info replies_thr;
cglock_t klist_lock; cglock_t klist_lock;
@ -216,6 +229,7 @@ struct klondike_info {
uint64_t hashcount; uint64_t hashcount;
uint64_t errorcount; uint64_t errorcount;
uint64_t noisecount; uint64_t noisecount;
int incorrect_slave_sequential;
// us Delay from USB reply to being processed // us Delay from USB reply to being processed
double delay_count; double delay_count;
@ -540,7 +554,7 @@ static KLIST *GetReply(struct cgpu_info *klncgpu, uint8_t cmd, uint8_t dev)
KLIST *kitem; KLIST *kitem;
int retries = CMD_REPLY_RETRIES; int retries = CMD_REPLY_RETRIES;
while (retries-- > 0 && klninfo->shutdown == false) { while (retries-- > 0 && klncgpu->shutdown == false) {
cgsleep_ms(REPLY_WAIT_TIME); cgsleep_ms(REPLY_WAIT_TIME);
cg_rlock(&klninfo->klist_lock); cg_rlock(&klninfo->klist_lock);
kitem = klninfo->used; kitem = klninfo->used;
@ -947,13 +961,13 @@ static void *klondike_get_replies(void *userdata)
struct klondike_info *klninfo = (struct klondike_info *)(klncgpu->device_data); struct klondike_info *klninfo = (struct klondike_info *)(klncgpu->device_data);
KLIST *kitem = NULL; KLIST *kitem = NULL;
char *hexdata; char *hexdata;
int err, recd, slaves, dev; int err, recd, slaves, dev, isc;
bool overheat; bool overheat, sent;
applog(LOG_DEBUG, "%s%i: listening for replies", applog(LOG_DEBUG, "%s%i: listening for replies",
klncgpu->drv->name, klncgpu->device_id); klncgpu->drv->name, klncgpu->device_id);
while (klninfo->shutdown == false) { while (klncgpu->shutdown == false) {
if (klncgpu->usbinfo.nodev) if (klncgpu->usbinfo.nodev)
return NULL; return NULL;
@ -1019,16 +1033,27 @@ static void *klondike_get_replies(void *userdata)
cgtime(&(klninfo->jobque[dev].last_update)); cgtime(&(klninfo->jobque[dev].last_update));
slaves = klninfo->status[0].kline.ws.slavecount; slaves = klninfo->status[0].kline.ws.slavecount;
overheat = klninfo->jobque[dev].overheat; overheat = klninfo->jobque[dev].overheat;
if (dev == 0) {
if (kitem->kline.ws.slavecount != slaves)
isc = ++klninfo->incorrect_slave_sequential;
else
isc = klninfo->incorrect_slave_sequential = 0;
}
wr_unlock(&(klninfo->stat_lock)); wr_unlock(&(klninfo->stat_lock));
if (kitem->kline.ws.slavecount != slaves) { if (isc) {
applog(LOG_ERR, "%s%i:%d reply [%c] has a diff # of slaves=%d" applog(LOG_ERR, "%s%i:%d reply [%c] has a diff"
" (curr=%d) dropping device to hotplug", " # of slaves=%d (curr=%d)%s",
klncgpu->drv->name, klncgpu->device_id, klncgpu->drv->name,
dev, (char)(kitem->kline.ws.cmd), klncgpu->device_id,
dev,
(char)(kitem->kline.ws.cmd),
(int)(kitem->kline.ws.slavecount), (int)(kitem->kline.ws.slavecount),
slaves); slaves,
klninfo->shutdown = true; isc <= KLN_ISS_IGNORE ? "" :
" disabling device");
if (isc > KLN_ISS_IGNORE)
usb_nodev(klncgpu);
break; break;
} }
@ -1048,15 +1073,16 @@ static void *klondike_get_replies(void *userdata)
zero_kline(&kline); zero_kline(&kline);
kline.hd.cmd = KLN_CMD_ABORT; kline.hd.cmd = KLN_CMD_ABORT;
kline.hd.dev = dev; kline.hd.dev = dev;
if (!SendCmd(klncgpu, &kline, KSENDHD(0))) { sent = SendCmd(klncgpu, &kline, KSENDHD(0));
applog(LOG_ERR, "%s%i:%d failed to abort work" kln_disable(klncgpu, dev, false);
" - dropping device to hotplug", if (!sent) {
applog(LOG_ERR, "%s%i:%d overheat failed to"
" abort work - disabling device",
klncgpu->drv->name, klncgpu->drv->name,
klncgpu->device_id, klncgpu->device_id,
dev); dev);
klninfo->shutdown = true; usb_nodev(klncgpu);
} }
kln_disable(klncgpu, dev, false);
} }
} }
} }
@ -1157,7 +1183,7 @@ static void klondike_shutdown(struct thr_info *thr)
kln_disable(klncgpu, klninfo->status[0].kline.ws.slavecount, true); kln_disable(klncgpu, klninfo->status[0].kline.ws.slavecount, true);
klncgpu->shutdown = klninfo->shutdown = true; klncgpu->shutdown = true;
} }
static void klondike_thread_enable(struct thr_info *thr) static void klondike_thread_enable(struct thr_info *thr)
@ -1243,10 +1269,13 @@ static bool klondike_queue_full(struct cgpu_info *klncgpu)
{ {
struct klondike_info *klninfo = (struct klondike_info *)(klncgpu->device_data); struct klondike_info *klninfo = (struct klondike_info *)(klncgpu->device_data);
struct work *work = NULL; struct work *work = NULL;
int dev, queued, slaves, seq; int dev, queued, slaves, seq, howlong;
struct timeval now; struct timeval now;
bool nowork; bool nowork;
if (klncgpu->shutdown == true)
return true;
cgtime(&now); cgtime(&now);
rd_lock(&(klninfo->stat_lock)); rd_lock(&(klninfo->stat_lock));
slaves = klninfo->status[0].kline.ws.slavecount; slaves = klninfo->status[0].kline.ws.slavecount;
@ -1255,7 +1284,7 @@ static bool klondike_queue_full(struct cgpu_info *klncgpu)
klninfo->jobque[dev].late_update_count++; klninfo->jobque[dev].late_update_count++;
seq = ++klninfo->jobque[dev].late_update_sequential; seq = ++klninfo->jobque[dev].late_update_sequential;
rd_unlock(&(klninfo->stat_lock)); rd_unlock(&(klninfo->stat_lock));
if (seq < KLN_LATE_UPDATE_LIMIT) { if (seq < LATE_UPDATE_LIMIT) {
applog(LOG_ERR, "%s%i:%d late update", applog(LOG_ERR, "%s%i:%d late update",
klncgpu->drv->name, klncgpu->device_id, dev); klncgpu->drv->name, klncgpu->device_id, dev);
klondike_get_stats(klncgpu); klondike_get_stats(klncgpu);
@ -1263,17 +1292,22 @@ static bool klondike_queue_full(struct cgpu_info *klncgpu)
} else { } else {
applog(LOG_ERR, "%s%i:%d late update (%d) reached - attempting reset", applog(LOG_ERR, "%s%i:%d late update (%d) reached - attempting reset",
klncgpu->drv->name, klncgpu->device_id, klncgpu->drv->name, klncgpu->device_id,
dev, KLN_LATE_UPDATE_LIMIT); dev, LATE_UPDATE_LIMIT);
control_init(klncgpu); control_init(klncgpu);
kln_enable(klncgpu); kln_enable(klncgpu);
klondike_get_stats(klncgpu); klondike_get_stats(klncgpu);
rd_lock(&(klninfo->stat_lock)); rd_lock(&(klninfo->stat_lock));
if (ms_tdiff(&now, &(klninfo->jobque[dev].last_update)) > LATE_UPDATE_MS) { howlong = ms_tdiff(&now, &(klninfo->jobque[dev].last_update));
if (howlong > LATE_UPDATE_MS) {
rd_unlock(&(klninfo->stat_lock)); rd_unlock(&(klninfo->stat_lock));
applog(LOG_ERR, "%s%i:%d reset failed - dropping device", if (howlong > LATE_UPDATE_NODEV_MS) {
klncgpu->drv->name, klncgpu->device_id, dev); applog(LOG_ERR, "%s%i:%d reset failed - dropping device",
klninfo->shutdown = true; klncgpu->drv->name, klncgpu->device_id, dev);
return false; usb_nodev(klncgpu);
} else
cgsleep_ms(LATE_UPDATE_SLEEP_MS);
return true;
} }
break; break;
} }
@ -1360,6 +1394,7 @@ static int64_t klondike_scanwork(struct thr_info *thr)
klninfo->noncecount = 0; klninfo->noncecount = 0;
rd_unlock(&(klninfo->stat_lock)); rd_unlock(&(klninfo->stat_lock));
} }
return newhashcount; return newhashcount;
} }

14
usbutils.c

@ -1355,6 +1355,20 @@ static void release_cgpu(struct cgpu_info *cgpu)
cgminer_usb_unlock_bd(cgpu->drv, cgpu->usbinfo.bus_number, cgpu->usbinfo.device_address); cgminer_usb_unlock_bd(cgpu->drv, cgpu->usbinfo.bus_number, cgpu->usbinfo.device_address);
} }
/*
* Force a NODEV on a device so it goes back to hotplug
*/
void usb_nodev(struct cgpu_info *cgpu)
{
int pstate;
DEVWLOCK(cgpu, pstate);
release_cgpu(cgpu);
DEVWUNLOCK(cgpu, pstate);
}
/* /*
* Use the same usbdev thus locking is across all related devices * Use the same usbdev thus locking is across all related devices
*/ */

3
usbutils.h

@ -358,7 +358,8 @@ bool async_usb_transfers(void);
void cancel_usb_transfers(void); void cancel_usb_transfers(void);
void usb_all(int level); void usb_all(int level);
const char *usb_cmdname(enum usb_cmds cmd); const char *usb_cmdname(enum usb_cmds cmd);
void usb_applog(struct cgpu_info *bflsc, enum usb_cmds cmd, char *msg, int amount, int err); void usb_applog(struct cgpu_info *cgpu, enum usb_cmds cmd, char *msg, int amount, int err);
void usb_nodev(struct cgpu_info *cgpu);
struct cgpu_info *usb_copy_cgpu(struct cgpu_info *orig); struct cgpu_info *usb_copy_cgpu(struct cgpu_info *orig);
struct cgpu_info *usb_alloc_cgpu(struct device_drv *drv, int threads); struct cgpu_info *usb_alloc_cgpu(struct device_drv *drv, int threads);
struct cgpu_info *usb_free_cgpu(struct cgpu_info *cgpu); struct cgpu_info *usb_free_cgpu(struct cgpu_info *cgpu);

Loading…
Cancel
Save