From 58008a37420f5baa97ddc50493858c32c6036865 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sat, 12 Oct 2013 09:05:58 +1100 Subject: [PATCH 01/10] Add a hash_driver_work function to allow for drivers that wish to do their own work queueing and management. --- cgminer.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ miner.h | 1 + 2 files changed, 48 insertions(+) diff --git a/cgminer.c b/cgminer.c index 1de5e084..da4eaad6 100644 --- a/cgminer.c +++ b/cgminer.c @@ -6532,6 +6532,53 @@ void hash_queued_work(struct thr_info *mythr) cgpu->deven = DEV_DISABLED; } +/* This version of hash_work is for devices drivers that want to do their own + * work management entirely, usually by using get_work(). Note that get_work + * is a blocking function and will wait indefinitely if no work is available + * so this must be taken into consideration in the driver. */ +void hash_driver_work(struct thr_info *mythr) +{ + struct timeval tv_start = {0, 0}, tv_end; + struct cgpu_info *cgpu = mythr->cgpu; + struct device_drv *drv = cgpu->drv; + const int thr_id = mythr->id; + int64_t hashes_done = 0; + + while (likely(!cgpu->shutdown)) { + struct timeval diff; + int64_t hashes; + + mythr->work_restart = false; + + hashes = drv->scanwork(mythr); + + if (unlikely(hashes == -1 )) { + applog(LOG_ERR, "%s %d failure, disabling!", drv->name, cgpu->device_id); + cgpu->deven = DEV_DISABLED; + dev_error(cgpu, REASON_THREAD_ZERO_HASH); + mt_disable(mythr, thr_id, drv); + } + + hashes_done += hashes; + cgtime(&tv_end); + timersub(&tv_end, &tv_start, &diff); + /* Update the hashmeter at most 5 times per second */ + if ((hashes_done && (diff.tv_sec > 0 || diff.tv_usec > 200000)) || + diff.tv_sec >= opt_log_interval) { + hashmeter(thr_id, &diff, hashes_done); + hashes_done = 0; + copy_time(&tv_start, &tv_end); + } + + if (unlikely(mythr->pause || cgpu->deven != DEV_ENABLED)) + mt_disable(mythr, thr_id, drv); + + if (unlikely(mythr->work_restart)) + drv->flush_work(cgpu); + } + cgpu->deven = DEV_DISABLED; +} + void *miner_thread(void *userdata) { struct thr_info *mythr = userdata; diff --git a/miner.h b/miner.h index ce39db52..a32103a9 100644 --- a/miner.h +++ b/miner.h @@ -1390,6 +1390,7 @@ extern struct work *find_queued_work_bymidstate(struct cgpu_info *cgpu, char *mi extern struct work *clone_queued_work_bymidstate(struct cgpu_info *cgpu, char *midstate, size_t midstatelen, char *data, int offset, size_t datalen); extern void work_completed(struct cgpu_info *cgpu, struct work *work); extern struct work *take_queued_work_bymidstate(struct cgpu_info *cgpu, char *midstate, size_t midstatelen, char *data, int offset, size_t datalen); +extern void hash_driver_work(struct thr_info *mythr); extern void hash_queued_work(struct thr_info *mythr); extern void _wlog(const char *str); extern void _wlogprint(const char *str); From 835ad82441b5046212d36ec3012be868fd4435a4 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sat, 12 Oct 2013 09:32:07 +1100 Subject: [PATCH 02/10] Convert the bitfury driver to use the hash_driver_work version of hash_work. --- cgminer.c | 2 +- driver-bitfury.c | 16 +++++++++++----- miner.h | 1 + 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/cgminer.c b/cgminer.c index da4eaad6..6ed3523d 100644 --- a/cgminer.c +++ b/cgminer.c @@ -5974,7 +5974,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work) cgtime(&work->tv_staged); } -static struct work *get_work(struct thr_info *thr, const int thr_id) +struct work *get_work(struct thr_info *thr, const int thr_id) { struct work *work = NULL; diff --git a/driver-bitfury.c b/driver-bitfury.c index 0cb9fef5..e5757f5b 100644 --- a/driver-bitfury.c +++ b/driver-bitfury.c @@ -220,18 +220,24 @@ static bool bitfury_checkresults(struct thr_info *thr, struct work *work, uint32 return false; } -static int64_t bitfury_scanhash(struct thr_info *thr, struct work *work, - int64_t __maybe_unused max_nonce) +static int64_t bitfury_scanwork(struct thr_info *thr) { struct cgpu_info *bitfury = thr->cgpu; struct bitfury_info *info = bitfury->device_data; struct timeval tv_now; + struct work *work; double nonce_rate; int64_t ret = 0; int amount, i; char buf[45]; int ms_diff; + work = get_work(thr, thr->id); + if (unlikely(thr->work_restart)) { + free_work(work); + return 0; + } + buf[0] = 'W'; memcpy(buf + 1, work->midstate, 32); memcpy(buf + 33, work->data + 64, 12); @@ -298,8 +304,7 @@ static int64_t bitfury_scanhash(struct thr_info *thr, struct work *work, cascade: for (i = BF1ARRAY_SIZE; i > 0; i--) info->prevwork[i] = info->prevwork[i - 1]; - info->prevwork[0] = copy_work(work); - work->blk.nonce = 0xffffffff; + info->prevwork[0] = work; info->cycles++; info->total_nonces += info->nonces; @@ -358,7 +363,8 @@ struct device_drv bitfury_drv = { .dname = "bitfury", .name = "BF1", .drv_detect = bitfury_detect, - .scanhash = bitfury_scanhash, + .hash_work = &hash_driver_work, + .scanwork = bitfury_scanwork, .get_api_stats = bitfury_api_stats, .reinit_device = bitfury_init, .thread_shutdown = bitfury_shutdown, diff --git a/miner.h b/miner.h index a32103a9..ca7738dd 100644 --- a/miner.h +++ b/miner.h @@ -1384,6 +1384,7 @@ extern void submit_tested_work(struct thr_info *thr, struct work *work); extern bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce); extern bool submit_noffset_nonce(struct thr_info *thr, struct work *work, uint32_t nonce, int noffset); +extern struct work *get_work(struct thr_info *thr, const int thr_id); extern struct work *get_queued(struct cgpu_info *cgpu); extern struct work *__find_work_bymidstate(struct work *que, char *midstate, size_t midstatelen, char *data, int offset, size_t datalen); extern struct work *find_queued_work_bymidstate(struct cgpu_info *cgpu, char *midstate, size_t midstatelen, char *data, int offset, size_t datalen); From d3c215fda6cff4d70f9202b0b68455b065012deb Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sat, 12 Oct 2013 09:53:21 +1100 Subject: [PATCH 03/10] Provide a lower level __bin2hex function that does not allocate memory itself. --- miner.h | 1 + util.c | 14 +++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/miner.h b/miner.h index ca7738dd..70faa270 100644 --- a/miner.h +++ b/miner.h @@ -952,6 +952,7 @@ extern json_t *json_rpc_call(CURL *curl, const char *url, const char *userpass, #endif extern const char *proxytype(proxytypes_t proxytype); extern char *get_proxy(char *url, struct pool *pool); +extern void __bin2hex(char *s, const unsigned char *p, size_t len); extern char *bin2hex(const unsigned char *p, size_t len); extern bool hex2bin(unsigned char *p, const char *hexstr, size_t len); diff --git a/util.c b/util.c index 27b8c3bf..8e032810 100644 --- a/util.c +++ b/util.c @@ -584,6 +584,16 @@ char *get_proxy(char *url, struct pool *pool) return url; } +/* Adequate size s==len*2 + 1 must be alloced to use this variant */ +void __bin2hex(char *s, const unsigned char *p, size_t len) +{ + int i; + + for (i = 0; i < (int)len; i++) + sprintf(s + (i * 2), "%02x", (unsigned int)p[i]); + +} + /* Returns a malloced array string of a binary value of arbitrary length. The * array is rounded up to a 4 byte size to appease architectures that need * aligned array sizes */ @@ -591,7 +601,6 @@ char *bin2hex(const unsigned char *p, size_t len) { ssize_t slen; char *s; - int i; slen = len * 2 + 1; if (slen % 4) @@ -600,8 +609,7 @@ char *bin2hex(const unsigned char *p, size_t len) if (unlikely(!s)) quithere(1, "Failed to calloc"); - for (i = 0; i < (int)len; i++) - sprintf(s + (i * 2), "%02x", (unsigned int)p[i]); + __bin2hex(s, p, len); return s; } From e0c90359fde790b5ebfca481ea66e2133fb263fb Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sat, 12 Oct 2013 09:56:48 +1100 Subject: [PATCH 04/10] Use stack memory in test_work_current, avoiding a malloc/free cycle each time. --- cgminer.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/cgminer.c b/cgminer.c index 6ed3523d..c1cd9389 100644 --- a/cgminer.c +++ b/cgminer.c @@ -4006,15 +4006,15 @@ static void set_blockdiff(const struct work *work) static bool test_work_current(struct work *work) { bool ret = true; - char *hexstr; + char hexstr[20]; if (work->mandatory) return ret; /* Hack to work around dud work sneaking into test */ - hexstr = bin2hex(work->data + 8, 18); + __bin2hex(hexstr, work->data + 8, 18); if (!strncmp(hexstr, "000000000000000000000000000000000000", 36)) - goto out_free; + return ret; /* Search to see if this block exists yet and if not, consider it a * new block and set the current block details to this one */ @@ -4049,7 +4049,7 @@ static bool test_work_current(struct work *work) applog(LOG_DEBUG, "Deleted block %d from database", deleted_block); set_curblock(hexstr, work->data); if (unlikely(new_blocks == 1)) - goto out_free; + return ret; work->work_block = ++work_block; @@ -4072,8 +4072,6 @@ static bool test_work_current(struct work *work) } } work->longpoll = false; -out_free: - free(hexstr); return ret; } From 9f8023a959772f4e8e62ed2a3d10dfea08351c57 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sat, 12 Oct 2013 10:34:07 +1100 Subject: [PATCH 05/10] Use stack memory for hex used in stratum share submissions. --- cgminer.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/cgminer.c b/cgminer.c index c1cd9389..8e1c9a95 100644 --- a/cgminer.c +++ b/cgminer.c @@ -5465,8 +5465,8 @@ static void *stratum_sthread(void *userdata) quit(1, "Failed to create stratum_q in stratum_sthread"); while (42) { + char noncehex[12], nonce2hex[20]; struct stratum_share *sshare; - char *noncehex, *nonce2hex; uint32_t *hash32, nonce; char s[1024], nonce2[8]; struct work *work; @@ -5495,7 +5495,7 @@ static void *stratum_sthread(void *userdata) /* This work item is freed in parse_stratum_response */ sshare->work = work; nonce = *((uint32_t *)(work->data + 76)); - noncehex = bin2hex((const unsigned char *)&nonce, 4); + __bin2hex(noncehex, (const unsigned char *)&nonce, 4); memset(s, 0, 1024); mutex_lock(&sshare_lock); @@ -5506,15 +5506,11 @@ static void *stratum_sthread(void *userdata) memset(nonce2, 0, 8); /* We only use uint32_t sized nonce2 increments internally */ memcpy(nonce2, &work->nonce2, sizeof(uint32_t)); - nonce2hex = bin2hex((const unsigned char *)nonce2, work->nonce2_len); - if (unlikely(!nonce2hex)) - quit(1, "Failed to bin2hex nonce2 in stratum_thread"); + __bin2hex(nonce2hex, (const unsigned char *)nonce2, work->nonce2_len); snprintf(s, sizeof(s), "{\"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\": %d, \"method\": \"mining.submit\"}", pool->rpc_user, work->job_id, nonce2hex, work->ntime, noncehex, sshare->id); - free(noncehex); - free(nonce2hex); applog(LOG_INFO, "Submitting share %08lx to pool %d", (long unsigned int)htole32(hash32[6]), pool->pool_no); From 56edabc64d11d98f6931ef5784c39aaf2cba7eb8 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sat, 12 Oct 2013 11:06:54 +1100 Subject: [PATCH 06/10] Use a timeout with usb handle events set to a nominal 200ms and wait for the polling thread to shut down before deinitialising libusb. --- cgminer.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cgminer.c b/cgminer.c index 8e1c9a95..40cb2afb 100644 --- a/cgminer.c +++ b/cgminer.c @@ -3218,8 +3218,6 @@ static void __kill_work(void) /* Release USB resources in case it's a restart * and not a QUIT */ if (!opt_scrypt) { - usb_polling = false; - applog(LOG_DEBUG, "Releasing all USB devices"); usb_cleanup(); @@ -7221,6 +7219,8 @@ static void clean_up(void) clear_adl(nDevs); #endif #ifdef USE_USBUTILS + usb_polling = false; + pthread_join(usb_poll_thread, NULL); libusb_exit(NULL); #endif @@ -7792,11 +7792,12 @@ static void probe_pools(void) #ifdef USE_USBUTILS static void *libusb_poll_thread(void __maybe_unused *arg) { + struct timeval tv_end = {0, 200000}; + RenameThread("usbpoll"); - pthread_detach(pthread_self()); while (usb_polling) - libusb_handle_events(NULL); + libusb_handle_events_timeout_completed(NULL, &tv_end, NULL); return NULL; } From 84f642f5639b29d020d971ce3deb495235412433 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sat, 12 Oct 2013 15:06:48 +1100 Subject: [PATCH 07/10] Although async transfers are meant to use heap memory, we never return before the transfer function has completed so stack memory will suffice for control transfers, fixing a memory leak in the process. --- usbutils.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/usbutils.c b/usbutils.c index 5050ec2f..7dce3488 100644 --- a/usbutils.c +++ b/usbutils.c @@ -2712,12 +2712,9 @@ static int usb_control_transfer(libusb_device_handle *dev_handle, uint8_t bmRequ unsigned char *buffer, uint16_t wLength, unsigned int timeout) { struct usb_transfer ut; + unsigned char buf[70]; int err, transferred; - unsigned char *buf; - buf = malloc(70); - if (unlikely(!buf)) - quit(1, "Failed to malloc buf in usb_control_transfer"); init_usb_transfer(&ut); mutex_lock(&ut.mutex); libusb_fill_control_setup(buf, bmRequestType, bRequest, wValue, From 578fabe07c8646498cbb38b035ea0d4a0bdc4fe9 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sat, 12 Oct 2013 16:54:39 +1100 Subject: [PATCH 08/10] Do not perform bfi int patching for opencl1.2 or later. --- ocl.c | 34 ++++++++++++++++++++-------------- ocl.h | 1 + 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/ocl.c b/ocl.c index f7184dbb..977eee81 100644 --- a/ocl.c +++ b/ocl.c @@ -342,6 +342,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) /* Check for OpenCL >= 1.0 support, needed for global offset parameter usage. */ char * devoclver = malloc(1024); const char * ocl10 = "OpenCL 1.0"; + const char * ocl11 = "OpenCL 1.1"; status = clGetDeviceInfo(devices[gpu], CL_DEVICE_VERSION, 1024, (void *)devoclver, NULL); if (status != CL_SUCCESS) { @@ -349,8 +350,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) return NULL; } find = strstr(devoclver, ocl10); - if (!find) + if (!find) { clState->hasOpenCL11plus = true; + find = strstr(devoclver, ocl11); + if (!find) + clState->hasOpenCL12plus = true; + } status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL); if (status != CL_SUCCESS) { @@ -618,19 +623,20 @@ build: if (clState->hasBitAlign) { strcat(CompilerOptions, " -D BITALIGN"); applog(LOG_DEBUG, "cl_amd_media_ops found, setting BITALIGN"); - if (strstr(name, "Cedar") || - strstr(name, "Redwood") || - strstr(name, "Juniper") || - strstr(name, "Cypress" ) || - strstr(name, "Hemlock" ) || - strstr(name, "Caicos" ) || - strstr(name, "Turks" ) || - strstr(name, "Barts" ) || - strstr(name, "Cayman" ) || - strstr(name, "Antilles" ) || - strstr(name, "Wrestler" ) || - strstr(name, "Zacate" ) || - strstr(name, "WinterPark" )) + if (!clState->hasOpenCL12plus && + (strstr(name, "Cedar") || + strstr(name, "Redwood") || + strstr(name, "Juniper") || + strstr(name, "Cypress" ) || + strstr(name, "Hemlock" ) || + strstr(name, "Caicos" ) || + strstr(name, "Turks" ) || + strstr(name, "Barts" ) || + strstr(name, "Cayman" ) || + strstr(name, "Antilles" ) || + strstr(name, "Wrestler" ) || + strstr(name, "Zacate" ) || + strstr(name, "WinterPark" ))) patchbfi = true; } else applog(LOG_DEBUG, "cl_amd_media_ops not found, will not set BITALIGN"); diff --git a/ocl.h b/ocl.h index 984e7d62..ed8bfde8 100644 --- a/ocl.h +++ b/ocl.h @@ -27,6 +27,7 @@ typedef struct { #endif bool hasBitAlign; bool hasOpenCL11plus; + bool hasOpenCL12plus; bool goffset; cl_uint vwidth; size_t max_work_size; From c0690286481750aa55b894c1d89b041c92788c2c Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sat, 12 Oct 2013 17:44:28 +1100 Subject: [PATCH 09/10] Free a libusb transfer after we have finished using it to avoid a dereference in usb_control_transfer --- usbutils.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/usbutils.c b/usbutils.c index 7dce3488..2ec6df5f 100644 --- a/usbutils.c +++ b/usbutils.c @@ -2245,7 +2245,6 @@ static int callback_wait(struct usb_transfer *ut, int *transferred, unsigned int /* No need to sort out mutexes here since they won't be reused */ *transferred = transfer->actual_length; - libusb_free_transfer(transfer); return ret; } @@ -2296,6 +2295,7 @@ usb_bulk_transfer(struct libusb_device_handle *dev_handle, int intinfo, errn = errno; if (!err) err = callback_wait(&ut, transferred, timeout); + libusb_free_transfer(ut.transfer); STATS_TIMEVAL(&tv_finish); USB_STATS(cgpu, &tv_start, &tv_finish, err, mode, cmd, seq, timeout); @@ -2728,10 +2728,13 @@ static int usb_control_transfer(libusb_device_handle *dev_handle, uint8_t bmRequ unsigned char *ofbuf = libusb_control_transfer_get_data(ut.transfer); memcpy(buffer, ofbuf, transferred); - return transferred; + err = transferred; + goto out; } if ((err) == LIBUSB_TRANSFER_CANCELLED) err = LIBUSB_ERROR_TIMEOUT; +out: + libusb_free_transfer(ut.transfer); return err; } From 84de52c1c659da16c361f3471c6214f976e090ac Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sun, 13 Oct 2013 10:11:46 +1100 Subject: [PATCH 10/10] Use a write lock when performing any USB control transfers to prevent concurrent transfers. --- usbutils.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/usbutils.c b/usbutils.c index 2ec6df5f..00bfcce7 100644 --- a/usbutils.c +++ b/usbutils.c @@ -2812,21 +2812,22 @@ out_: return err; } +/* We use the write devlock for control transfers since some control transfers + * are rare but may be changing settings within the device causing problems + * if concurrent transfers are happening. Using the write lock serialises + * any transfers. */ int _usb_transfer(struct cgpu_info *cgpu, uint8_t request_type, uint8_t bRequest, uint16_t wValue, uint16_t wIndex, uint32_t *data, int siz, unsigned int timeout, enum usb_cmds cmd) { int pstate, err; - DEVRLOCK(cgpu, pstate); + DEVWLOCK(cgpu, pstate); err = __usb_transfer(cgpu, request_type, bRequest, wValue, wIndex, data, siz, timeout, cmd); - if (NOCONTROLDEV(err)) { - cg_ruwlock(&cgpu->usbinfo.devlock); + if (NOCONTROLDEV(err)) release_cgpu(cgpu); - cg_dwlock(&cgpu->usbinfo.devlock); - } - DEVRUNLOCK(cgpu, pstate); + DEVWUNLOCK(cgpu, pstate); return err; } @@ -2840,7 +2841,7 @@ int _usb_transfer_read(struct cgpu_info *cgpu, uint8_t request_type, uint8_t bRe unsigned char tbuf[64]; int err, pstate; - DEVRLOCK(cgpu, pstate); + DEVWLOCK(cgpu, pstate); USBDEBUG("USB debug: _usb_transfer_read(%s (nodev=%s),type=%"PRIu8",req=%"PRIu8",value=%"PRIu16",index=%"PRIu16",bufsiz=%d,timeout=%u,cmd=%s)", cgpu->drv->name, bool_str(cgpu->usbinfo.nodev), request_type, bRequest, wValue, wIndex, bufsiz, timeout, usb_cmdname(cmd)); @@ -2896,13 +2897,10 @@ int _usb_transfer_read(struct cgpu_info *cgpu, uint8_t request_type, uint8_t bRe err, libusb_error_name(err)); } out_noerrmsg: - if (NOCONTROLDEV(err)) { - cg_ruwlock(&cgpu->usbinfo.devlock); + if (NOCONTROLDEV(err)) release_cgpu(cgpu); - cg_dwlock(&cgpu->usbinfo.devlock); - } - DEVRUNLOCK(cgpu, pstate); + DEVWUNLOCK(cgpu, pstate); return err; }