From aaaa8a52fdc6ee0b7b894cfc0a417502d60dedbc Mon Sep 17 00:00:00 2001 From: Kano Date: Sun, 30 Sep 2012 17:37:01 +1000 Subject: [PATCH 1/3] Icarus catch more USB errors and close/reopen the port --- driver-icarus.c | 67 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 14 deletions(-) diff --git a/driver-icarus.c b/driver-icarus.c index 4214c31b..cc74df2a 100644 --- a/driver-icarus.c +++ b/driver-icarus.c @@ -223,6 +223,11 @@ static void rev(unsigned char *s, size_t l) #define icarus_open2(devpath, baud, purge) serial_open(devpath, baud, ICARUS_READ_FAULT_DECISECONDS, purge) #define icarus_open(devpath, baud) icarus_open2(devpath, baud, false) +#define ICA_GETS_ERROR -1 +#define ICA_GETS_OK 0 +#define ICA_GETS_RESTART 1 +#define ICA_GETS_TIMEOUT 2 + static int icarus_gets(unsigned char *buf, int fd, struct timeval *tv_finish, struct thr_info *thr, int read_count) { ssize_t ret = 0; @@ -233,12 +238,14 @@ static int icarus_gets(unsigned char *buf, int fd, struct timeval *tv_finish, st // Read reply 1 byte at a time to get earliest tv_finish while (true) { ret = read(fd, buf, 1); + if (ret < 0) + return ICA_GETS_ERROR; if (first) gettimeofday(tv_finish, NULL); if (ret >= read_amount) - return 0; + return ICA_GETS_OK; if (ret > 0) { buf += ret; @@ -254,16 +261,16 @@ static int icarus_gets(unsigned char *buf, int fd, struct timeval *tv_finish, st "Icarus Read: No data in %.2f seconds", (float)rc/(float)TIME_FACTOR); } - return 1; + return ICA_GETS_TIMEOUT; } - if (thr->work_restart) { + if (thr && thr->work_restart) { if (opt_debug) { applog(LOG_DEBUG, "Icarus Read: Work restart at %.2f seconds", (float)(rc)/(float)TIME_FACTOR); } - return 1; + return ICA_GETS_RESTART; } } } @@ -281,6 +288,13 @@ static int icarus_write(int fd, const void *buf, size_t bufLen) #define icarus_close(fd) close(fd) +static void do_icarus_close(struct thr_info *thr) +{ + struct cgpu_info *icarus = thr->cgpu; + icarus_close(icarus->device_fd); + icarus->device_fd = -1; +} + static const char *timing_mode_str(enum timing_mode timing_mode) { switch(timing_mode) { @@ -533,10 +547,7 @@ static bool icarus_detect_one(const char *devpath) gettimeofday(&tv_start, NULL); memset(nonce_bin, 0, sizeof(nonce_bin)); - struct thr_info dummy = { - .work_restart = false, - }; - icarus_gets(nonce_bin, fd, &tv_finish, &dummy, 1); + icarus_gets(nonce_bin, fd, &tv_finish, NULL, 1); icarus_close(fd); @@ -563,6 +574,7 @@ static bool icarus_detect_one(const char *devpath) icarus = calloc(1, sizeof(struct cgpu_info)); icarus->api = &icarus_api; icarus->device_path = strdup(devpath); + icarus->device_fd = -1; icarus->threads = 1; add_cgpu(icarus); icarus_info = realloc(icarus_info, sizeof(struct ICARUS_INFO *) * (total_devices + 1)); @@ -607,6 +619,8 @@ static bool icarus_prepare(struct thr_info *thr) struct timeval now; + icarus->device_fd = -1; + int fd = icarus_open(icarus->device_path, icarus_info[icarus->device_id]->baud); if (unlikely(-1 == fd)) { applog(LOG_ERR, "Failed to open Icarus on %s", @@ -653,6 +667,17 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work, elapsed.tv_sec = elapsed.tv_usec = 0; icarus = thr->cgpu; + if (icarus->device_fd == -1) + if (!icarus_prepare(thr)) { + applog(LOG_ERR, "ICA%i: Comms error", icarus->device_id); + icarus->device_last_not_well = time(NULL); + icarus->device_not_well_reason = REASON_DEV_COMMS_ERROR; + icarus->dev_comms_error_count++; + + // fail the device if the reopen attempt fails + return -1; + } + fd = icarus->device_fd; memset(ob_bin, 0, sizeof(ob_bin)); @@ -664,8 +689,10 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work, tcflush(fd, TCOFLUSH); #endif ret = icarus_write(fd, ob_bin, sizeof(ob_bin)); - if (ret) - return -1; /* This should never happen */ + if (ret) { + do_icarus_close(thr); + return 0; /* This should never happen */ + } gettimeofday(&tv_start, NULL); @@ -682,12 +709,19 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work, memset(nonce_bin, 0, sizeof(nonce_bin)); info = icarus_info[icarus->device_id]; ret = icarus_gets(nonce_bin, fd, &tv_finish, thr, info->read_count); + if (ret == ICA_GETS_ERROR) { + do_icarus_close(thr); + applog(LOG_ERR, "ICA%i: Comms error", icarus->device_id); + icarus->device_last_not_well = time(NULL); + icarus->device_not_well_reason = REASON_DEV_COMMS_ERROR; + icarus->dev_comms_error_count++; + return 0; + } work->blk.nonce = 0xffffffff; - memcpy((char *)&nonce, nonce_bin, sizeof(nonce_bin)); // aborted before becoming idle, get new work - if (nonce == 0 && ret) { + if (ret == ICA_GETS_TIMEOUT || ret == ICA_GETS_RESTART) { timersub(&tv_finish, &tv_start, &elapsed); // ONLY up to just when it aborted @@ -709,6 +743,8 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work, return estimate_hashes; } + memcpy((char *)&nonce, nonce_bin, sizeof(nonce_bin)); + #if !defined (__BIG_ENDIAN__) && !defined(MIPSEB) nonce = swab32(nonce); #endif @@ -717,6 +753,10 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work, submit_nonce(thr, work, nonce); was_hw_error = (curr_hw_errors > icarus->hw_errors); + // Force a USB close/reopen on any hw error + if (was_hw_error) + do_icarus_close(thr); + hash_count = (nonce & info->nonce_mask); hash_count++; hash_count *= info->fpga_count; @@ -862,8 +902,7 @@ static struct api_data *icarus_api_stats(struct cgpu_info *cgpu) static void icarus_shutdown(struct thr_info *thr) { - struct cgpu_info *icarus = thr->cgpu; - icarus_close(icarus->device_fd); + do_icarus_close(thr); } struct device_api icarus_api = { From 6d6692ce9ae8dfbcdde8a3a8560bbfb35a9e450e Mon Sep 17 00:00:00 2001 From: Kano Date: Sun, 30 Sep 2012 17:38:27 +1000 Subject: [PATCH 2/3] api.c DEBUG message has no paramter --- api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api.c b/api.c index 8ebf94da..1b22087c 100644 --- a/api.c +++ b/api.c @@ -558,7 +558,7 @@ struct CODES { { SEVERITY_ERR, MSG_INVBOOL, PARAM_NONE, "Invalid parameter should be true or false" }, { SEVERITY_SUCC, MSG_FOO, PARAM_BOOL, "Failover-Only set to %s" }, { SEVERITY_SUCC, MSG_MINECOIN,PARAM_NONE, "CGMiner coin" }, - { SEVERITY_SUCC, MSG_DEBUGSET,PARAM_STR, "Debug settings" }, + { SEVERITY_SUCC, MSG_DEBUGSET,PARAM_NONE, "Debug settings" }, #ifdef HAVE_AN_FPGA { SEVERITY_SUCC, MSG_PGAIDENT,PARAM_PGA, "Identify command sent to PGA%d" }, { SEVERITY_WARN, MSG_PGANOID, PARAM_PGA, "PGA%d does not support identify" }, From c2b1504e505882a94487ce01b7eac714599191ed Mon Sep 17 00:00:00 2001 From: Kano Date: Sun, 30 Sep 2012 17:44:36 +1000 Subject: [PATCH 3/3] Icarus USB write failure is also a comms error --- driver-icarus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/driver-icarus.c b/driver-icarus.c index cc74df2a..b3aa34bd 100644 --- a/driver-icarus.c +++ b/driver-icarus.c @@ -691,6 +691,10 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work, ret = icarus_write(fd, ob_bin, sizeof(ob_bin)); if (ret) { do_icarus_close(thr); + applog(LOG_ERR, "ICA%i: Comms error", icarus->device_id); + icarus->device_last_not_well = time(NULL); + icarus->device_not_well_reason = REASON_DEV_COMMS_ERROR; + icarus->dev_comms_error_count++; return 0; /* This should never happen */ }