From 7638511290d3fc2fd03bace77e01789ffec11385 Mon Sep 17 00:00:00 2001 From: ckolivas Date: Tue, 1 May 2012 09:54:34 +1000 Subject: [PATCH 01/25] We should not retry submitting shares indefinitely or we may end up with a huge backlog during network outages, so discard stale shares if we failed to submit them and they've become stale in the interim. --- cgminer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cgminer.c b/cgminer.c index a4d21a64..28dd38b0 100644 --- a/cgminer.c +++ b/cgminer.c @@ -2158,8 +2158,8 @@ static void *submit_work_thread(void *userdata) /* submit solution to bitcoin via JSON-RPC */ while (!submit_upstream_work(work, pool->submit_curl)) { - if (!opt_submit_stale && stale_work(work, true) && !pool->submit_old) { - applog(LOG_NOTICE, "Stale share detected on submit retry, discarding"); + if (stale_work(work, true)) { + applog(LOG_NOTICE, "Share became stale while retrying submit, discarding"); total_stale++; pool->stale_shares++; break; @@ -2211,8 +2211,8 @@ static void *submit_extra_work(void *userdata) /* submit solution to bitcoin via JSON-RPC */ while (!submit_upstream_work(work, curl)) { - if (!opt_submit_stale && stale_work(work, true)) { - applog(LOG_NOTICE, "Stale share detected, discarding"); + if (stale_work(work, true)) { + applog(LOG_NOTICE, "Share became stale while retrying submit, discarding"); total_stale++; pool->stale_shares++; break; From adda6e6680a9808145da84df7953a0ed9d4f9bed Mon Sep 17 00:00:00 2001 From: ckolivas Date: Tue, 1 May 2012 17:48:23 +1000 Subject: [PATCH 02/25] Do not recruit extra connection threads if there have been connection errors to the pool in question. --- cgminer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cgminer.c b/cgminer.c index 28dd38b0..dba807a4 100644 --- a/cgminer.c +++ b/cgminer.c @@ -2080,7 +2080,7 @@ static bool workio_get_work(struct workio_cmd *wc) struct pool *pool = select_pool(wc->lagging); pthread_t get_thread; - if (list_empty(&pool->getwork_q->q)) + if (list_empty(&pool->getwork_q->q) || pool->submit_fail) return tq_push(pool->getwork_q, wc); if (unlikely(pthread_create(&get_thread, NULL, get_extra_work, (void *)wc))) { @@ -2242,10 +2242,11 @@ out: * any size hardware */ static bool workio_submit_work(struct workio_cmd *wc) { + struct pool *pool = wc->u.work->pool; pthread_t submit_thread; - if (list_empty(&wc->u.work->pool->submit_q->q)) - return tq_push(wc->u.work->pool->submit_q, wc); + if (list_empty(&pool->submit_q->q) || pool->submit_fail) + return tq_push(pool->submit_q, wc); if (unlikely(pthread_create(&submit_thread, NULL, submit_extra_work, (void *)wc))) { applog(LOG_ERR, "Failed to create submit_work_thread"); From 89053c78da20af8aa84da77f42cfef8121c2c7e5 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Tue, 1 May 2012 19:12:37 +1000 Subject: [PATCH 03/25] There is no need for the submit and getwork curls to be tied to the pool struct. --- cgminer.c | 18 ++++++++++-------- miner.h | 3 --- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/cgminer.c b/cgminer.c index dba807a4..ad8ad968 100644 --- a/cgminer.c +++ b/cgminer.c @@ -1965,6 +1965,7 @@ static void *get_work_thread(void *userdata) { struct pool *pool = (struct pool *)userdata; struct workio_cmd *wc; + CURL *curl; pthread_detach(pthread_self()); @@ -1973,9 +1974,8 @@ static void *get_work_thread(void *userdata) if (!pool->getwork_q) quit(1, "Failed to tq_new in get_work_thread"); - /* getwork_curl never cleared */ - pool->getwork_curl = curl_easy_init(); - if (unlikely(!pool->getwork_curl)) + curl = curl_easy_init(); + if (unlikely(!curl)) quit(1, "Failed to initialise pool getwork CURL"); while ((wc = tq_pop(pool->getwork_q, NULL)) != NULL) { @@ -1992,7 +1992,7 @@ static void *get_work_thread(void *userdata) ret_work->pool = pool; /* obtain new work from bitcoin via JSON-RPC */ - while (!get_upstream_work(ret_work, pool->getwork_curl)) { + while (!get_upstream_work(ret_work, curl)) { if (unlikely((opt_retries >= 0) && (++failures > opt_retries))) { applog(LOG_ERR, "json_rpc_call failed, terminating workio thread"); free_work(ret_work); @@ -2019,6 +2019,7 @@ static void *get_work_thread(void *userdata) workio_cmd_free(wc); } + curl_easy_cleanup(curl); return NULL; } @@ -2121,6 +2122,7 @@ static void *submit_work_thread(void *userdata) { struct pool *pool = (struct pool *)userdata; struct workio_cmd *wc; + CURL *curl; pthread_detach(pthread_self()); @@ -2129,9 +2131,8 @@ static void *submit_work_thread(void *userdata) if (!pool->submit_q ) quit(1, "Failed to tq_new in submit_work_thread"); - /* submit_curl never cleared */ - pool->submit_curl = curl_easy_init(); - if (unlikely(!pool->submit_curl)) + curl = curl_easy_init(); + if (unlikely(!curl)) quit(1, "Failed to initialise pool submit CURL"); while ((wc = tq_pop(pool->submit_q, NULL)) != NULL) { @@ -2157,7 +2158,7 @@ static void *submit_work_thread(void *userdata) } /* submit solution to bitcoin via JSON-RPC */ - while (!submit_upstream_work(work, pool->submit_curl)) { + while (!submit_upstream_work(work, curl)) { if (stale_work(work, true)) { applog(LOG_NOTICE, "Share became stale while retrying submit, discarding"); total_stale++; @@ -2180,6 +2181,7 @@ static void *submit_work_thread(void *userdata) workio_cmd_free(wc); } + curl_easy_cleanup(curl); return NULL; } diff --git a/miner.h b/miner.h index 2d238045..6c750be2 100644 --- a/miner.h +++ b/miner.h @@ -634,9 +634,6 @@ struct pool { pthread_t longpoll_thread; pthread_t submit_thread; pthread_t getwork_thread; - - CURL *submit_curl; - CURL *getwork_curl; }; struct work { From 7d288eac9fa1e541343e957ca86c2c52ae316f28 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Tue, 1 May 2012 19:59:37 +1000 Subject: [PATCH 04/25] Implement a scaleable networking framework designed to cope with any sized network requirements, yet minimise the number of connections being reoped. Do this by create a ring buffer linked list of curl handles to be used by getwork, recruiting extra handles when none is immediately available. --- cgminer.c | 111 +++++++++++++++++++----------------------------------- miner.h | 7 ++++ 2 files changed, 45 insertions(+), 73 deletions(-) diff --git a/cgminer.c b/cgminer.c index ad8ad968..ef0187ce 100644 --- a/cgminer.c +++ b/cgminer.c @@ -379,7 +379,6 @@ static void sharelog(const char*disposition, const struct work*work) } static void *submit_work_thread(void *userdata); -static void *get_work_thread(void *userdata); static void add_pool(void) { @@ -392,13 +391,13 @@ static void add_pool(void) pools[total_pools++] = pool; if (unlikely(pthread_mutex_init(&pool->pool_lock, NULL))) quit(1, "Failed to pthread_mutex_init in add_pool"); + INIT_LIST_HEAD(&pool->curlring); + /* Make sure the pool doesn't think we've been idle since time 0 */ pool->tv_idle.tv_sec = ~0UL; if (unlikely(pthread_create(&pool->submit_thread, NULL, submit_work_thread, (void *)pool))) quit(1, "Failed to create pool submit thread"); - if (unlikely(pthread_create(&pool->getwork_thread, NULL, get_work_thread, (void *)pool))) - quit(1, "Failed to create pool getwork thread"); } /* Pool variant of test and set */ @@ -1957,77 +1956,46 @@ static void sighandler(int __maybe_unused sig) kill_work(); } -/* One get work thread is created per pool, so as to use one curl handle for - * all getwork reqeusts from the same pool, minimising connections opened, but - * separate from the submit work curl handle to not delay share submissions due - * to getwork traffic */ -static void *get_work_thread(void *userdata) +/* Called with pool_lock held */ +static void recruit_curl(struct pool *pool) { - struct pool *pool = (struct pool *)userdata; - struct workio_cmd *wc; - CURL *curl; - - pthread_detach(pthread_self()); - - /* getwork_q memory never freed */ - pool->getwork_q = tq_new(); - if (!pool->getwork_q) - quit(1, "Failed to tq_new in get_work_thread"); - - curl = curl_easy_init(); - if (unlikely(!curl)) - quit(1, "Failed to initialise pool getwork CURL"); + struct curl_ent *ce = calloc(sizeof(struct curl_ent), 1); - while ((wc = tq_pop(pool->getwork_q, NULL)) != NULL) { - struct work *ret_work; - int failures = 0; + ce->curl = curl_easy_init(); + if (unlikely(!ce->curl || !ce)) + quit(1, "Failed to init in recruit_curl"); - ret_work = make_work(); - - if (wc->thr) - ret_work->thr = wc->thr; - else - ret_work->thr = NULL; - - ret_work->pool = pool; - - /* obtain new work from bitcoin via JSON-RPC */ - while (!get_upstream_work(ret_work, curl)) { - if (unlikely((opt_retries >= 0) && (++failures > opt_retries))) { - applog(LOG_ERR, "json_rpc_call failed, terminating workio thread"); - free_work(ret_work); - kill_work(); - break; - } + list_add(&ce->node, &pool->curlring); + applog(LOG_DEBUG, "Recruited new curl for pool %d", pool->pool_no); +} - /* pause, then restart work-request loop */ - applog(LOG_DEBUG, "json_rpc_call failed on get work, retry after %d seconds", - fail_pause); - sleep(fail_pause); - fail_pause += opt_fail_pause; - } - fail_pause = opt_fail_pause; +static struct curl_ent *pop_curl_entry(struct pool *pool) +{ + struct curl_ent *ce; - applog(LOG_DEBUG, "Pushing work to requesting thread"); + mutex_lock(&pool->pool_lock); + if (list_empty(&pool->curlring)) + recruit_curl(pool); + ce = list_entry(pool->curlring.next, struct curl_ent, node); + list_del(&ce->node); + mutex_unlock(&pool->pool_lock);; - /* send work to requesting thread */ - if (unlikely(!tq_push(thr_info[stage_thr_id].q, ret_work))) { - applog(LOG_ERR, "Failed to tq_push work in workio_get_work"); - kill_work(); - free_work(ret_work); - } - workio_cmd_free(wc); - } + return ce; +} - curl_easy_cleanup(curl); - return NULL; +static void push_curl_entry(struct curl_ent *ce, struct pool *pool) +{ + mutex_lock(&pool->pool_lock); + list_add(&ce->node, &pool->curlring); + mutex_unlock(&pool->pool_lock); } -static void *get_extra_work(void *userdata) +static void *get_work_thread(void *userdata) { struct workio_cmd *wc = (struct workio_cmd *)userdata; - struct work *ret_work = make_work();; - CURL *curl = curl_easy_init(); + struct work *ret_work = make_work(); + struct curl_ent *ce; + struct pool *pool; int failures = 0; pthread_detach(pthread_self()); @@ -2039,10 +2007,11 @@ static void *get_extra_work(void *userdata) else ret_work->thr = NULL; - ret_work->pool = select_pool(wc->lagging); + pool = ret_work->pool = select_pool(wc->lagging); + ce = pop_curl_entry(pool); /* obtain new work from bitcoin via JSON-RPC */ - while (!get_upstream_work(ret_work, curl)) { + while (!get_upstream_work(ret_work, ce->curl)) { if (unlikely((opt_retries >= 0) && (++failures > opt_retries))) { applog(LOG_ERR, "json_rpc_call failed, terminating workio thread"); free_work(ret_work); @@ -2069,7 +2038,7 @@ static void *get_extra_work(void *userdata) out: workio_cmd_free(wc); - curl_easy_cleanup(curl); + push_curl_entry(ce, pool); return NULL; } @@ -2078,13 +2047,9 @@ out: * requests */ static bool workio_get_work(struct workio_cmd *wc) { - struct pool *pool = select_pool(wc->lagging); pthread_t get_thread; - if (list_empty(&pool->getwork_q->q) || pool->submit_fail) - return tq_push(pool->getwork_q, wc); - - if (unlikely(pthread_create(&get_thread, NULL, get_extra_work, (void *)wc))) { + if (unlikely(pthread_create(&get_thread, NULL, get_work_thread, (void *)wc))) { applog(LOG_ERR, "Failed to create get_work_thread"); return false; } @@ -4450,6 +4415,8 @@ int add_pool_details(bool live, char *url, char *user, char *pass) pool->prio = total_pools; if (unlikely(pthread_mutex_init(&pool->pool_lock, NULL))) quit (1, "Failed to pthread_mutex_init in input_pool"); + INIT_LIST_HEAD(&pool->curlring); + pool->rpc_url = url; pool->rpc_user = user; pool->rpc_pass = pass; @@ -4462,8 +4429,6 @@ int add_pool_details(bool live, char *url, char *user, char *pass) if (unlikely(pthread_create(&pool->submit_thread, NULL, submit_work_thread, (void *)pool))) quit(1, "Failed to create pool submit thread"); - if (unlikely(pthread_create(&pool->getwork_thread, NULL, get_work_thread, (void *)pool))) - quit(1, "Failed to create pool getwork thread"); /* Test the pool is not idle if we're live running, otherwise * it will be tested separately */ diff --git a/miner.h b/miner.h index 6c750be2..3f6f9ba3 100644 --- a/miner.h +++ b/miner.h @@ -598,6 +598,11 @@ typedef struct { } dev_blk_ctx; #endif +struct curl_ent { + CURL *curl; + struct list_head node; +}; + struct pool { int pool_no; int prio; @@ -634,6 +639,8 @@ struct pool { pthread_t longpoll_thread; pthread_t submit_thread; pthread_t getwork_thread; + + struct list_head curlring; }; struct work { From ec3d057016fbb86c68adb5142917b25d18a7a845 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Tue, 1 May 2012 21:07:16 +1000 Subject: [PATCH 05/25] Use the ringbuffer of curls from the same pool for submit as well as getwork threads. Since the curl handles were already connected to the same pool and are immediately available, share submission will not be delayed by getworks. --- cgminer.c | 93 ++++--------------------------------------------------- 1 file changed, 6 insertions(+), 87 deletions(-) diff --git a/cgminer.c b/cgminer.c index ef0187ce..1cfa90f9 100644 --- a/cgminer.c +++ b/cgminer.c @@ -378,8 +378,6 @@ static void sharelog(const char*disposition, const struct work*work) applog(LOG_ERR, "sharelog fwrite error"); } -static void *submit_work_thread(void *userdata); - static void add_pool(void) { struct pool *pool; @@ -395,9 +393,6 @@ static void add_pool(void) /* Make sure the pool doesn't think we've been idle since time 0 */ pool->tv_idle.tv_sec = ~0UL; - - if (unlikely(pthread_create(&pool->submit_thread, NULL, submit_work_thread, (void *)pool))) - quit(1, "Failed to create pool submit thread"); } /* Pool variant of test and set */ @@ -2079,83 +2074,13 @@ static bool stale_work(struct work *work, bool share) return false; } -/* One submit work thread is created per pool, so as to use one curl handle - * for all submissions to the same pool, minimising connections opened, but - * separate from the getwork curl handle to not delay share submission due to - * getwork traffic */ -static void *submit_work_thread(void *userdata) -{ - struct pool *pool = (struct pool *)userdata; - struct workio_cmd *wc; - CURL *curl; - - pthread_detach(pthread_self()); - /* submit_q memory never freed */ - pool->submit_q = tq_new(); - if (!pool->submit_q ) - quit(1, "Failed to tq_new in submit_work_thread"); - - curl = curl_easy_init(); - if (unlikely(!curl)) - quit(1, "Failed to initialise pool submit CURL"); - - while ((wc = tq_pop(pool->submit_q, NULL)) != NULL) { - struct work *work = wc->u.work; - int failures = 0; - - if (stale_work(work, true)) { - if (pool->submit_old) - applog(LOG_NOTICE, "Stale share, submitting as pool %d requested", - pool->pool_no); - else if (opt_submit_stale) - applog(LOG_NOTICE, "Stale share from pool %d, submitting as user requested", - pool->pool_no); - else { - applog(LOG_NOTICE, "Stale share from pool %d, discarding", - pool->pool_no); - sharelog("discard", work); - total_stale++; - pool->stale_shares++; - workio_cmd_free(wc); - continue; - } - } - - /* submit solution to bitcoin via JSON-RPC */ - while (!submit_upstream_work(work, curl)) { - if (stale_work(work, true)) { - applog(LOG_NOTICE, "Share became stale while retrying submit, discarding"); - total_stale++; - pool->stale_shares++; - break; - } - if (unlikely((opt_retries >= 0) && (++failures > opt_retries))) { - applog(LOG_ERR, "Failed %d retries ...terminating workio thread", opt_retries); - kill_work(); - break; - } - - /* pause, then restart work-request loop */ - applog(LOG_INFO, "json_rpc_call failed on submit_work, retry after %d seconds", - fail_pause); - sleep(fail_pause); - fail_pause += opt_fail_pause; - } - fail_pause = opt_fail_pause; - workio_cmd_free(wc); - } - - curl_easy_cleanup(curl); - return NULL; -} - -static void *submit_extra_work(void *userdata) +static void *submit_work_thread(void *userdata) { struct workio_cmd *wc = (struct workio_cmd *)userdata; struct work *work = wc->u.work; struct pool *pool = work->pool; - CURL *curl = curl_easy_init(); + struct curl_ent *ce; int failures = 0; pthread_detach(pthread_self()); @@ -2176,8 +2101,9 @@ static void *submit_extra_work(void *userdata) } } + ce = pop_curl_entry(pool); /* submit solution to bitcoin via JSON-RPC */ - while (!submit_upstream_work(work, curl)) { + while (!submit_upstream_work(work, ce->curl)) { if (stale_work(work, true)) { applog(LOG_NOTICE, "Share became stale while retrying submit, discarding"); total_stale++; @@ -2197,9 +2123,9 @@ static void *submit_extra_work(void *userdata) fail_pause += opt_fail_pause; } fail_pause = opt_fail_pause; + push_curl_entry(ce, pool); out: workio_cmd_free(wc); - curl_easy_cleanup(curl); return NULL; } @@ -2209,13 +2135,9 @@ out: * any size hardware */ static bool workio_submit_work(struct workio_cmd *wc) { - struct pool *pool = wc->u.work->pool; pthread_t submit_thread; - if (list_empty(&pool->submit_q->q) || pool->submit_fail) - return tq_push(pool->submit_q, wc); - - if (unlikely(pthread_create(&submit_thread, NULL, submit_extra_work, (void *)wc))) { + if (unlikely(pthread_create(&submit_thread, NULL, submit_work_thread, (void *)wc))) { applog(LOG_ERR, "Failed to create submit_work_thread"); return false; } @@ -4427,9 +4349,6 @@ int add_pool_details(bool live, char *url, char *user, char *pass) pool->tv_idle.tv_sec = ~0UL; - if (unlikely(pthread_create(&pool->submit_thread, NULL, submit_work_thread, (void *)pool))) - quit(1, "Failed to create pool submit thread"); - /* Test the pool is not idle if we're live running, otherwise * it will be tested separately */ pool->enabled = true; From 85008a78539f79bbec1f25fcffafe6a232e2597b Mon Sep 17 00:00:00 2001 From: ckolivas Date: Wed, 2 May 2012 10:12:07 +1000 Subject: [PATCH 06/25] Reap curls that are unused for over a minute. This allows connections to be closed, thereby allowing the number of curl handles to always be the minimum necessary to not delay networking. --- cgminer.c | 26 ++++++++++++++++++++++++-- miner.h | 1 + 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/cgminer.c b/cgminer.c index 1cfa90f9..f92f98e8 100644 --- a/cgminer.c +++ b/cgminer.c @@ -1951,7 +1951,8 @@ static void sighandler(int __maybe_unused sig) kill_work(); } -/* Called with pool_lock held */ +/* Called with pool_lock held. Recruit an extra curl if none are available for + * this pool. */ static void recruit_curl(struct pool *pool) { struct curl_ent *ce = calloc(sizeof(struct curl_ent), 1); @@ -1981,7 +1982,8 @@ static struct curl_ent *pop_curl_entry(struct pool *pool) static void push_curl_entry(struct curl_ent *ce, struct pool *pool) { mutex_lock(&pool->pool_lock); - list_add(&ce->node, &pool->curlring); + list_add_tail(&ce->node, &pool->curlring); + gettimeofday(&ce->tv, NULL); mutex_unlock(&pool->pool_lock); } @@ -3980,6 +3982,25 @@ void reinit_device(struct cgpu_info *cgpu) static struct timeval rotate_tv; +/* We reap curls if they are unused for over a minute */ +static void reap_curl(struct pool *pool) +{ + struct curl_ent *ent, *iter; + struct timeval now; + + gettimeofday(&now, NULL); + mutex_lock(&pool->pool_lock); + list_for_each_entry_safe(ent, iter, &pool->curlring, node) { + if (now.tv_sec - ent->tv.tv_sec > 60) { + applog(LOG_DEBUG, "Reaped curl from pool %d", pool->pool_no); + list_del(&ent->node); + curl_easy_cleanup(ent->curl); + free(ent); + } + } + mutex_unlock(&pool->pool_lock); +} + static void *watchpool_thread(void __maybe_unused *userdata) { pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); @@ -3993,6 +4014,7 @@ static void *watchpool_thread(void __maybe_unused *userdata) for (i = 0; i < total_pools; i++) { struct pool *pool = pools[i]; + reap_curl(pool); if (!pool->enabled) continue; diff --git a/miner.h b/miner.h index 3f6f9ba3..3e114c4d 100644 --- a/miner.h +++ b/miner.h @@ -601,6 +601,7 @@ typedef struct { struct curl_ent { CURL *curl; struct list_head node; + struct timeval tv; }; struct pool { From 5cf4e188fdc0906419723fe8294175d81e1d1e5d Mon Sep 17 00:00:00 2001 From: ckolivas Date: Wed, 2 May 2012 11:17:06 +1000 Subject: [PATCH 07/25] Increase pool watch interval to 30 seconds. --- cgminer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cgminer.c b/cgminer.c index f92f98e8..d65dccc8 100644 --- a/cgminer.c +++ b/cgminer.c @@ -4031,7 +4031,7 @@ static void *watchpool_thread(void __maybe_unused *userdata) switch_pools(NULL); } - sleep(10); + sleep(30); } return NULL; } From 01c93bc1d4c064ca1ee052c8c1445d40e2e56d53 Mon Sep 17 00:00:00 2001 From: Kano Date: Wed, 2 May 2012 12:23:32 +1000 Subject: [PATCH 08/25] api.c bug - remove extra ']'s in notify command --- api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api.c b/api.c index 91223020..72a1a214 100644 --- a/api.c +++ b/api.c @@ -1875,7 +1875,7 @@ void notifystatus(int device, struct cgpu_info *cgpu, bool isjson) // ALL counters (and only counters) must start the name with a '*' // Simplifies future external support for adding new counters sprintf(buf, isjson - ? "%s{\"NOTIFY\":%d,\"Name\":\"%s\",\"ID\":%d,\"Last Well\":%lu,\"Last Not Well\":%lu,\"Reason Not Well\":\"%s\",\"*Thread Fail Init\":%d,\"*Thread Zero Hash\":%d,\"*Thread Fail Queue\":%d,\"*Dev Sick Idle 60s\":%d,\"*Dev Dead Idle 600s\":%d,\"*Dev Nostart\":%d,\"*Dev Over Heat\":%d,\"*Dev Thermal Cutoff\":%d}" JSON_CLOSE + ? "%s{\"NOTIFY\":%d,\"Name\":\"%s\",\"ID\":%d,\"Last Well\":%lu,\"Last Not Well\":%lu,\"Reason Not Well\":\"%s\",\"*Thread Fail Init\":%d,\"*Thread Zero Hash\":%d,\"*Thread Fail Queue\":%d,\"*Dev Sick Idle 60s\":%d,\"*Dev Dead Idle 600s\":%d,\"*Dev Nostart\":%d,\"*Dev Over Heat\":%d,\"*Dev Thermal Cutoff\":%d}" : "%sNOTIFY=%d,Name=%s,ID=%d,Last Well=%lu,Last Not Well=%lu,Reason Not Well=%s,*Thread Fail Init=%d,*Thread Zero Hash=%d,*Thread Fail Queue=%d,*Dev Sick Idle 60s=%d,*Dev Dead Idle 600s=%d,*Dev Nostart=%d,*Dev Over Heat=%d,*Dev Thermal Cutoff=%d" SEPSTR, (isjson && (device > 0)) ? COMMA : BLANK, device, cgpu->api->name, cgpu->device_id, From 524e2fb8f26ec0d7e8820c5530dafa57b56415bc Mon Sep 17 00:00:00 2001 From: Kano Date: Wed, 2 May 2012 16:11:11 +1000 Subject: [PATCH 09/25] api.c V1.9 add 'restart' + redesign 'quit' so thread exits cleanly --- README | 7 +++-- api.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 91 insertions(+), 12 deletions(-) diff --git a/README b/README index d5260f82..c5155eaf 100644 --- a/README +++ b/README @@ -647,7 +647,7 @@ The STATUS section is: This defaults to the cgminer version but is the value of --api-description if it was specified at runtime. -For API version 1.8: +For API version 1.9: The list of requests - a (*) means it requires privileged access - and replies are: @@ -768,7 +768,7 @@ The list of requests - a (*) means it requires privileged access - and replies a stating success or failure saving the cgminer config to filename - quit (*) none There is no status section but just a single "BYE|" + quit (*) none There is no status section but just a single "BYE" reply before cgminer quits notify NOTIFY The last status and history count of each devices problem @@ -798,6 +798,9 @@ The list of requests - a (*) means it requires privileged access - and replies a by the 'devs' command e.g. DEVDETAILS=0,Name=GPU,ID=0,Driver=opencl,...| + restart (*) none There is no status section but just a single "RESTART" + reply before cgminer restarts + When you enable, disable or restart a GPU or PGA, you will also get Thread messages in the cgminer status window diff --git a/api.c b/api.c index 72a1a214..6c02a972 100644 --- a/api.c +++ b/api.c @@ -158,7 +158,7 @@ static const char SEPARATOR = '|'; #define SEPSTR "|" static const char GPUSEP = ','; -static const char *APIVERSION = "1.8"; +static const char *APIVERSION = "1.9"; static const char *DEAD = "Dead"; static const char *SICK = "Sick"; static const char *NOSTART = "NoStart"; @@ -229,6 +229,7 @@ static const char *OSINFO = #define _NOTIFY "NOTIFY" #define _DEVDETAILS "DEVDETAILS" #define _BYE "BYE" +#define _RESTART "RESTART" static const char ISJSON = '{'; #define JSON0 "{" @@ -260,6 +261,7 @@ static const char ISJSON = '{'; #define JSON_NOTIFY JSON1 _NOTIFY JSON2 #define JSON_DEVDETAILS JSON1 _DEVDETAILS JSON2 #define JSON_BYE JSON1 _BYE JSON1 +#define JSON_RESTART JSON1 _RESTART JSON1 #define JSON_CLOSE JSON3 #define JSON_END JSON4 @@ -479,6 +481,12 @@ static int my_thr_id = 0; static int bye = 0; static bool ping = true; +// Used to control quit restart access to shutdown variables +static pthread_mutex_t quit_restart_lock; + +static int do_a_quit = 0; +static int do_a_restart = 0; + static time_t when = 0; // when the request occurred struct IP4ACCESS { @@ -612,7 +620,7 @@ static int pgadevice(int pgaid) } #endif -// All replies (except BYE) start with a message +// All replies (except BYE and RESTART) start with a message // thus for JSON, message() inserts JSON_START at the front // and send_result() adds JSON_END at the end static char *message(int messageid, int paramid, char *param2, bool isjson) @@ -1811,22 +1819,26 @@ static void gpuvddc(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, boo #endif } -static void send_result(SOCKETTYPE c, bool isjson); - -void doquit(SOCKETTYPE c, __maybe_unused char *param, bool isjson) +void doquit(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson) { if (isjson) strcpy(io_buffer, JSON_START JSON_BYE); else strcpy(io_buffer, _BYE); - send_result(c, isjson); - *io_buffer = '\0'; bye = 1; + do_a_quit = 1; +} - PTH(&thr_info[my_thr_id]) = 0L; +void dorestart(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson) +{ + if (isjson) + strcpy(io_buffer, JSON_START JSON_RESTART); + else + strcpy(io_buffer, _RESTART); - kill_work(); + bye = 1; + do_a_restart = 1; } void privileged(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson) @@ -2017,6 +2029,7 @@ struct CMDS { { "privileged", privileged, true }, { "notify", notify, false }, { "devdetails", devdetails, false }, + { "restart", dorestart, true }, { NULL, NULL, false } }; @@ -2041,11 +2054,12 @@ static void send_result(SOCKETTYPE c, bool isjson) else applog(LOG_DEBUG, "API: sent %d", n); } - } static void tidyup(__maybe_unused void *arg) { + mutex_lock(&quit_restart_lock); + bye = 1; if (sock != INVSOCK) { @@ -2068,6 +2082,8 @@ static void tidyup(__maybe_unused void *arg) free(io_buffer); io_buffer = NULL; } + + mutex_unlock(&quit_restart_lock); } /* @@ -2172,8 +2188,37 @@ popipo: free(buf); } +static void *quit_thread(__maybe_unused void *userdata) +{ + // allow thread creator to finish whatever it's doing + mutex_lock(&quit_restart_lock); + mutex_unlock(&quit_restart_lock); + + if (opt_debug) + applog(LOG_DEBUG, "API: killing cgminer"); + + kill_work(); + + return NULL; +} + +static void *restart_thread(__maybe_unused void *userdata) +{ + // allow thread creator to finish whatever it's doing + mutex_lock(&quit_restart_lock); + mutex_unlock(&quit_restart_lock); + + if (opt_debug) + applog(LOG_DEBUG, "API: restarting cgminer"); + + app_restart(); + + return NULL; +} + void api(int api_thr_id) { + struct thr_info bye_thr; char buf[BUFSIZ]; char param_buf[BUFSIZ]; const char *localaddr = "127.0.0.1"; @@ -2197,6 +2242,8 @@ void api(int api_thr_id) bool did; int i; + mutex_init(&quit_restart_lock); + pthread_cleanup_push(tidyup, NULL); my_thr_id = api_thr_id; @@ -2408,4 +2455,33 @@ void api(int api_thr_id) } die: pthread_cleanup_pop(true); + + if (opt_debug) + applog(LOG_DEBUG, "API: terminating due to: %s", + do_a_quit ? "QUIT" : (do_a_restart ? "RESTART" : (bye ? "BYE" : "UNKNOWN!"))); + + mutex_lock(&quit_restart_lock); + + if (do_a_restart != 0) { + + if (thr_info_create(&bye_thr, NULL, restart_thread, &bye_thr)) { + mutex_unlock(&quit_restart_lock); + quit(1, "API failed to initiate a restart - aborting"); + } + + pthread_detach(bye_thr.pth); + + } else + if (do_a_quit != 0) { + + if (thr_info_create(&bye_thr, NULL, quit_thread, &bye_thr)) { + mutex_unlock(&quit_restart_lock); + quit(1, "API failed to initiate a clean quit - aborting"); + } + + pthread_detach(bye_thr.pth); + + } + + mutex_unlock(&quit_restart_lock); } From 09fe8bcb12755b7084056b64cfb10dec86381e14 Mon Sep 17 00:00:00 2001 From: Kano Date: Wed, 2 May 2012 20:53:10 +1000 Subject: [PATCH 10/25] Icarus - minimise code delays and name timer variables --- driver-icarus.c | 72 +++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/driver-icarus.c b/driver-icarus.c index b0c3b4fa..83e526b4 100644 --- a/driver-icarus.c +++ b/driver-icarus.c @@ -196,7 +196,7 @@ static int icarus_write(int fd, const void *buf, size_t bufLen) static bool icarus_detect_one(const char *devpath) { - struct timeval tv1, tv2; + struct timeval tv_start, tv_finish; int fd; // Block 171874 nonce = (0xa2870100) = 0x000187a2 @@ -225,11 +225,11 @@ static bool icarus_detect_one(const char *devpath) hex2bin(ob_bin, golden_ob, sizeof(ob_bin)); icarus_write(fd, ob_bin, sizeof(ob_bin)); - gettimeofday(&tv1, NULL); + gettimeofday(&tv_start, NULL); memset(nonce_bin, 0, sizeof(nonce_bin)); icarus_gets(nonce_bin, sizeof(nonce_bin), fd, -1, 1); - gettimeofday(&tv2, NULL); + gettimeofday(&tv_finish, NULL); icarus_close(fd); @@ -315,7 +315,9 @@ static uint64_t icarus_scanhash(struct thr_info *thr, struct work *work, char *ob_hex, *nonce_hex; uint32_t nonce; uint32_t hash_count; - struct timeval tv1, tv2, elapsed; + struct timeval tv_start, tv_finish, elapsed; + + elapsed.tv_sec = elapsed.tv_usec = 0; icarus = thr->cgpu; fd = icarus->device_fd; @@ -329,30 +331,37 @@ static uint64_t icarus_scanhash(struct thr_info *thr, struct work *work, tcflush(fd, TCOFLUSH); #endif ret = icarus_write(fd, ob_bin, sizeof(ob_bin)); - gettimeofday(&tv1, NULL); + if (opt_debug) + gettimeofday(&tv_start, NULL); if (ret) return 0; /* This should never happen */ - ob_hex = bin2hex(ob_bin, sizeof(ob_bin)); - if (ob_hex) { - applog(LOG_DEBUG, "Icarus %d sent: %s", - icarus->device_id, ob_hex); - free(ob_hex); + if (opt_debug) { + ob_hex = bin2hex(ob_bin, sizeof(ob_bin)); + if (ob_hex) { + applog(LOG_DEBUG, "Icarus %d sent: %s", + icarus->device_id, ob_hex); + free(ob_hex); + } } - /* Icarus will return 8 bytes nonces or nothing */ + /* Icarus will return 4 bytes nonces or nothing */ memset(nonce_bin, 0, sizeof(nonce_bin)); ret = icarus_gets(nonce_bin, sizeof(nonce_bin), fd, thr_id, ICARUS_READ_COUNT_DEFAULT); - gettimeofday(&tv2, NULL); + + if (opt_debug) + gettimeofday(&tv_finish, NULL); memcpy((char *)&nonce, nonce_bin, sizeof(nonce_bin)); // aborted before becoming idle, get new work if (nonce == 0 && ret) { - timersub(&tv2, &tv1, &elapsed); - applog(LOG_DEBUG, "Icarus %d no nonce = 0x%08x hashes (%ld.%06lds)", - icarus->device_id, ESTIMATE_HASHES, elapsed.tv_sec, elapsed.tv_usec); + if (opt_debug) { + timersub(&tv_finish, &tv_start, &elapsed); + applog(LOG_DEBUG, "Icarus %d no nonce = 0x%08x hashes (%ld.%06lds)", + icarus->device_id, ESTIMATE_HASHES, elapsed.tv_sec, elapsed.tv_usec); + } return ESTIMATE_HASHES; } @@ -363,27 +372,26 @@ static uint64_t icarus_scanhash(struct thr_info *thr, struct work *work, work->blk.nonce = 0xffffffff; submit_nonce(thr, work, nonce); - timersub(&tv2, &tv1, &elapsed); + if (opt_debug) { + timersub(&tv_finish, &tv_start, &elapsed); - nonce_hex = bin2hex(nonce_bin, sizeof(nonce_bin)); - if (nonce_hex) { - applog(LOG_DEBUG, "Icarus %d returned (elapsed %ld.%06ld seconds): %s", - icarus->device_id, elapsed.tv_sec, elapsed.tv_usec, nonce_hex); - free(nonce_hex); + nonce_hex = bin2hex(nonce_bin, sizeof(nonce_bin)); + if (nonce_hex) { + applog(LOG_DEBUG, "Icarus %d returned (elapsed %ld.%06ld seconds): %s", + icarus->device_id, elapsed.tv_sec, elapsed.tv_usec, nonce_hex); + free(nonce_hex); + } } hash_count = (nonce & 0x7fffffff); - if (hash_count == 0) - hash_count = 2; - else { - if (hash_count++ == 0x7fffffff) - hash_count = 0xffffffff; - else - hash_count <<= 1; - } - - applog(LOG_DEBUG, "Icarus %d nonce = 0x%08x = 0x%08x hashes (%ld.%06lds)", - icarus->device_id, nonce, hash_count, elapsed.tv_sec, elapsed.tv_usec); + if (hash_count++ == 0x7fffffff) + hash_count = 0xffffffff; + else + hash_count <<= 1; + + if (opt_debug) + applog(LOG_DEBUG, "Icarus %d nonce = 0x%08x = 0x%08x hashes (%ld.%06lds)", + icarus->device_id, nonce, hash_count, elapsed.tv_sec, elapsed.tv_usec); return hash_count; } From afa3fa56b63af56a13f7ba1a60b94ab77cb2df3a Mon Sep 17 00:00:00 2001 From: Kano Date: Wed, 2 May 2012 22:22:09 +1000 Subject: [PATCH 11/25] API bool's and 1TBS fixes --- api.c | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/api.c b/api.c index 6c02a972..29b144fb 100644 --- a/api.c +++ b/api.c @@ -478,14 +478,14 @@ struct CODES { }; static int my_thr_id = 0; -static int bye = 0; +static bool bye; static bool ping = true; // Used to control quit restart access to shutdown variables static pthread_mutex_t quit_restart_lock; -static int do_a_quit = 0; -static int do_a_restart = 0; +static bool do_a_quit; +static bool do_a_restart; static time_t when = 0; // when the request occurred @@ -1826,8 +1826,8 @@ void doquit(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson else strcpy(io_buffer, _BYE); - bye = 1; - do_a_quit = 1; + bye = true; + do_a_quit = true; } void dorestart(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson) @@ -1837,8 +1837,8 @@ void dorestart(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isj else strcpy(io_buffer, _RESTART); - bye = 1; - do_a_restart = 1; + bye = true; + do_a_restart = true; } void privileged(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson) @@ -2060,7 +2060,7 @@ static void tidyup(__maybe_unused void *arg) { mutex_lock(&quit_restart_lock); - bye = 1; + bye = true; if (sock != INVSOCK) { shutdown(sock, SHUT_RDWR); @@ -2323,7 +2323,7 @@ void api(int api_thr_id) io_buffer = malloc(MYBUFSIZ+1); msg_buffer = malloc(MYBUFSIZ+1); - while (bye == 0) { + while (!bye) { clisiz = sizeof(cli); if (SOCKETFAIL(c = accept(sock, (struct sockaddr *)(&cli), &clisiz))) { applog(LOG_ERR, "API failed (%s)%s", SOCKERRMSG, UNAVAILABLE); @@ -2462,26 +2462,19 @@ die: mutex_lock(&quit_restart_lock); - if (do_a_restart != 0) { - + if (do_a_restart) { if (thr_info_create(&bye_thr, NULL, restart_thread, &bye_thr)) { mutex_unlock(&quit_restart_lock); quit(1, "API failed to initiate a restart - aborting"); } - pthread_detach(bye_thr.pth); - - } else - if (do_a_quit != 0) { - - if (thr_info_create(&bye_thr, NULL, quit_thread, &bye_thr)) { - mutex_unlock(&quit_restart_lock); - quit(1, "API failed to initiate a clean quit - aborting"); - } - - pthread_detach(bye_thr.pth); - + } else if (do_a_quit) { + if (thr_info_create(&bye_thr, NULL, quit_thread, &bye_thr)) { + mutex_unlock(&quit_restart_lock); + quit(1, "API failed to initiate a clean quit - aborting"); } + pthread_detach(bye_thr.pth); + } mutex_unlock(&quit_restart_lock); } From 124ee9b28a8ab53822e42df17700e61cf88dcb63 Mon Sep 17 00:00:00 2001 From: Luke Dashjr Date: Thu, 26 Apr 2012 15:42:09 -0400 Subject: [PATCH 12/25] Bugfix: thread_shutdown shouldn't try to free the device, since it's needed afterward --- driver-icarus.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/driver-icarus.c b/driver-icarus.c index 83e526b4..7a7ec494 100644 --- a/driver-icarus.c +++ b/driver-icarus.c @@ -398,21 +398,8 @@ static uint64_t icarus_scanhash(struct thr_info *thr, struct work *work, static void icarus_shutdown(struct thr_info *thr) { - struct cgpu_info *icarus; - - if (thr->cgpu) { - icarus = thr->cgpu; - - if (icarus->device_path) - free(icarus->device_path); - - close(icarus->device_fd); - - devices[icarus->device_id] = NULL; - free(icarus); - - thr->cgpu = NULL; - } + struct cgpu_info *icarus = thr->cgpu; + icarus_close(icarus->device_fd); } struct device_api icarus_api = { From 4ba9006d0e951b01773f40ca37b36a8e6768057a Mon Sep 17 00:00:00 2001 From: Luke Dashjr Date: Wed, 2 May 2012 10:28:09 -0400 Subject: [PATCH 13/25] Disable failing devices such that the user can attempt to re-enable them --- cgminer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cgminer.c b/cgminer.c index d65dccc8..b2397e58 100644 --- a/cgminer.c +++ b/cgminer.c @@ -3720,11 +3720,14 @@ void *miner_thread(void *userdata) } if (unlikely(!hashes)) { + applog(LOG_ERR, "%s %d failure, disabling!", api->name, cgpu->device_id); + cgpu->deven = DEV_DISABLED; + cgpu->device_last_not_well = time(NULL); cgpu->device_not_well_reason = REASON_THREAD_ZERO_HASH; cgpu->thread_zero_hash_count++; - goto out; + goto disabled; } hashes_done += hashes; @@ -3785,6 +3788,7 @@ void *miner_thread(void *userdata) if (unlikely(mythr->pause || cgpu->deven != DEV_ENABLED)) { applog(LOG_WARNING, "Thread %d being disabled", thr_id); +disabled: mythr->rolling = mythr->cgpu->rolling = 0; applog(LOG_DEBUG, "Popping wakeup ping in miner thread"); thread_reportout(mythr); From 06023e549efa649979b501cd448e1098b715860f Mon Sep 17 00:00:00 2001 From: Luke Dashjr Date: Wed, 2 May 2012 10:34:31 -0400 Subject: [PATCH 14/25] Bugfix: Return failure, rather than quit, if BFwrite fails --- driver-bitforce.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/driver-bitforce.c b/driver-bitforce.c index 4306235f..a59338cd 100644 --- a/driver-bitforce.c +++ b/driver-bitforce.c @@ -79,14 +79,18 @@ static void BFgets(char *buf, size_t bufLen, int fd) buf[0] = '\0'; } -static void BFwrite(int fd, const void *buf, ssize_t bufLen) +static ssize_t BFwrite2(int fd, const void *buf, ssize_t bufLen) { - ssize_t ret = write(fd, buf, bufLen); - - if (unlikely(ret != bufLen)) - quit(1, "BFwrite failed"); + return write(fd, buf, bufLen); } +#define BFwrite(fd, buf, bufLen) do { \ + if ((bufLen) != BFwrite2(fd, buf, bufLen)) { \ + applog(LOG_ERR, "Error writing to BitForce (" #buf ")"); \ + return 0; \ + } \ +} while(0) + #define BFclose(fd) close(fd) static bool bitforce_detect_one(const char *devpath) From d4405de704e968578471d3a4e5d66c688523854e Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 3 May 2012 08:56:42 +1000 Subject: [PATCH 15/25] Generalise add_pool() functions since they're repeated in add_pool_details. --- cgminer.c | 19 ++++++------------- miner.h | 2 ++ 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/cgminer.c b/cgminer.c index b2397e58..2733b8f5 100644 --- a/cgminer.c +++ b/cgminer.c @@ -378,7 +378,8 @@ static void sharelog(const char*disposition, const struct work*work) applog(LOG_ERR, "sharelog fwrite error"); } -static void add_pool(void) +/* Return value is ignored if not called from add_pool_details */ +static struct pool *add_pool(void) { struct pool *pool; @@ -393,6 +394,8 @@ static void add_pool(void) /* Make sure the pool doesn't think we've been idle since time 0 */ pool->tv_idle.tv_sec = ~0UL; + + return pool; } /* Pool variant of test and set */ @@ -4351,19 +4354,12 @@ char *curses_input(const char *query) int add_pool_details(bool live, char *url, char *user, char *pass) { - struct pool *pool = NULL; + struct pool *pool; if (total_pools == MAX_POOLS) return ADD_POOL_MAXIMUM; - pool = calloc(sizeof(struct pool), 1); - if (!pool) - quit(1, "Failed to realloc pools in add_pool_details"); - pool->pool_no = total_pools; - pool->prio = total_pools; - if (unlikely(pthread_mutex_init(&pool->pool_lock, NULL))) - quit (1, "Failed to pthread_mutex_init in input_pool"); - INIT_LIST_HEAD(&pool->curlring); + pool = add_pool(); pool->rpc_url = url; pool->rpc_user = user; @@ -4373,14 +4369,11 @@ int add_pool_details(bool live, char *url, char *user, char *pass) quit(1, "Failed to malloc userpass"); sprintf(pool->rpc_userpass, "%s:%s", pool->rpc_user, pool->rpc_pass); - pool->tv_idle.tv_sec = ~0UL; - /* Test the pool is not idle if we're live running, otherwise * it will be tested separately */ pool->enabled = true; if (live && !pool_active(pool, false)) pool->idle = true; - pools[total_pools++] = pool; return ADD_POOL_OK; } diff --git a/miner.h b/miner.h index 3e114c4d..e74d6f2f 100644 --- a/miner.h +++ b/miner.h @@ -641,6 +641,8 @@ struct pool { pthread_t submit_thread; pthread_t getwork_thread; + pthread_mutex_t cr_lock; + pthread_cond_t cr_cond; struct list_head curlring; }; From 5081c1824a0ba2a2549394499776f14b2857f138 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 3 May 2012 09:24:25 +1000 Subject: [PATCH 16/25] Limit the number of curls we recruit on communication failures and with delaynet enabled to 5 by maintaining a per-pool curl count, and using a pthread conditional that wakes up when one is returned to the ring buffer. --- cgminer.c | 22 ++++++++++++++++++---- miner.h | 2 +- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/cgminer.c b/cgminer.c index 2733b8f5..f5e987eb 100644 --- a/cgminer.c +++ b/cgminer.c @@ -390,6 +390,8 @@ static struct pool *add_pool(void) pools[total_pools++] = pool; if (unlikely(pthread_mutex_init(&pool->pool_lock, NULL))) quit(1, "Failed to pthread_mutex_init in add_pool"); + if (unlikely(pthread_cond_init(&pool->cr_cond, NULL))) + quit(1, "Failed to pthread_cond_init in add_pool"); INIT_LIST_HEAD(&pool->curlring); /* Make sure the pool doesn't think we've been idle since time 0 */ @@ -1965,19 +1967,29 @@ static void recruit_curl(struct pool *pool) quit(1, "Failed to init in recruit_curl"); list_add(&ce->node, &pool->curlring); - applog(LOG_DEBUG, "Recruited new curl for pool %d", pool->pool_no); + pool->curls++; + applog(LOG_DEBUG, "Recruited curl %d for pool %d", pool->curls, pool->pool_no); } +/* Grab an available curl if there is one. If not, then recruit extra curls + * unless we are in a submit_fail situation, or we have opt_delaynet enabled + * and there are already 5 curls in circulation */ static struct curl_ent *pop_curl_entry(struct pool *pool) { struct curl_ent *ce; mutex_lock(&pool->pool_lock); - if (list_empty(&pool->curlring)) + if (!pool->curls) recruit_curl(pool); + else if (list_empty(&pool->curlring)) { + if ((pool->submit_fail || opt_delaynet) && pool->curls > 4) + pthread_cond_wait(&pool->cr_cond, &pool->pool_lock); + else + recruit_curl(pool); + } ce = list_entry(pool->curlring.next, struct curl_ent, node); list_del(&ce->node); - mutex_unlock(&pool->pool_lock);; + mutex_unlock(&pool->pool_lock); return ce; } @@ -1987,6 +1999,7 @@ static void push_curl_entry(struct curl_ent *ce, struct pool *pool) mutex_lock(&pool->pool_lock); list_add_tail(&ce->node, &pool->curlring); gettimeofday(&ce->tv, NULL); + pthread_cond_signal(&pool->cr_cond); mutex_unlock(&pool->pool_lock); } @@ -3999,7 +4012,8 @@ static void reap_curl(struct pool *pool) mutex_lock(&pool->pool_lock); list_for_each_entry_safe(ent, iter, &pool->curlring, node) { if (now.tv_sec - ent->tv.tv_sec > 60) { - applog(LOG_DEBUG, "Reaped curl from pool %d", pool->pool_no); + applog(LOG_DEBUG, "Reaped curl %d from pool %d", pool->curls, pool->pool_no); + pool->curls--; list_del(&ent->node); curl_easy_cleanup(ent->curl); free(ent); diff --git a/miner.h b/miner.h index e74d6f2f..85d4e7a2 100644 --- a/miner.h +++ b/miner.h @@ -641,7 +641,7 @@ struct pool { pthread_t submit_thread; pthread_t getwork_thread; - pthread_mutex_t cr_lock; + int curls; pthread_cond_t cr_cond; struct list_head curlring; }; From d5d8c2cc5b6b7cfbda29c569b364f4511cd59bf3 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 3 May 2012 10:35:30 +1000 Subject: [PATCH 17/25] Implement an older header fix for a label existing before the pthread_cleanup macro. --- api.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/api.c b/api.c index 29b144fb..8571dccd 100644 --- a/api.c +++ b/api.c @@ -2454,6 +2454,10 @@ void api(int api_thr_id) CLOSESOCKET(c); } die: + /* Blank line fix for older compilers since pthread_cleanup_pop is a + * macro that gets confused by a label existing immediately before it + */ + ; pthread_cleanup_pop(true); if (opt_debug) From 068018225c94550f6590d5494262ee5a92883fee Mon Sep 17 00:00:00 2001 From: Luke Dashjr Date: Wed, 2 May 2012 23:09:39 -0400 Subject: [PATCH 18/25] Bugfix: Use a 64-bit type for hashes_done (miner_thread) since it can overflow 32-bit on some FPGAs --- cgminer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cgminer.c b/cgminer.c index f5e987eb..dad5e9bd 100644 --- a/cgminer.c +++ b/cgminer.c @@ -3141,7 +3141,7 @@ static inline void thread_reportout(struct thr_info *thr) } static void hashmeter(int thr_id, struct timeval *diff, - unsigned long hashes_done) + unsigned long long hashes_done) { struct timeval temp_tv_end, total_diff; double secs; @@ -3171,7 +3171,7 @@ static void hashmeter(int thr_id, struct timeval *diff, double thread_rolling = 0.0; int i; - applog(LOG_DEBUG, "[thread %d: %lu hashes, %.0f khash/sec]", + applog(LOG_DEBUG, "[thread %d: %llu hashes, %.0f khash/sec]", thr_id, hashes_done, hashes_done / secs); /* Rolling average for each thread and each device */ @@ -3672,7 +3672,7 @@ void *miner_thread(void *userdata) struct timeval tv_start, tv_end, tv_workstart, tv_lastupdate; struct timeval diff, sdiff, wdiff; uint32_t max_nonce = api->can_limit_work ? api->can_limit_work(mythr) : 0xffffffff; - uint32_t hashes_done = 0; + unsigned long long hashes_done = 0; uint32_t hashes; struct work *work = make_work(); unsigned const int request_interval = opt_scantime * 2 / 3 ? : 1; From a4f1af1733db08145c59d2e8acf2986d06b11094 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 3 May 2012 15:35:13 +1000 Subject: [PATCH 19/25] Detect pools that have issues represented by endless rejected shares and disable them, with a parameter to optionally disable this feature. --- cgminer.c | 22 ++++++++++++++++++++++ miner.h | 1 + 2 files changed, 23 insertions(+) diff --git a/cgminer.c b/cgminer.c index dad5e9bd..ddf0ebb5 100644 --- a/cgminer.c +++ b/cgminer.c @@ -135,6 +135,7 @@ int opt_api_port = 4028; bool opt_api_listen = false; bool opt_api_network = false; bool opt_delaynet = false; +bool opt_disable_pool = true; char *opt_kernel_path; char *cgminer_path; @@ -835,6 +836,9 @@ static struct opt_table opt_config_table[] = { opt_hidden #endif ), + OPT_WITHOUT_ARG("--no-pool-disable", + opt_set_invbool, &opt_disable_pool, + "Do not automatically disable pools that continually reject shares"), OPT_WITHOUT_ARG("--no-restart", opt_set_invbool, &opt_restart, #ifdef HAVE_OPENCL @@ -1653,6 +1657,7 @@ static bool submit_upstream_work(const struct work *work, CURL *curl) cgpu->accepted++; total_accepted++; pool->accepted++; + pool->seq_rejects = 0; cgpu->last_share_pool = pool->pool_no; cgpu->last_share_pool_time = time(NULL); applog(LOG_DEBUG, "PROOF OF WORK RESULT: true (yay!!!)"); @@ -1674,6 +1679,7 @@ static bool submit_upstream_work(const struct work *work, CURL *curl) cgpu->rejected++; total_rejected++; pool->rejected++; + pool->seq_rejects++; applog(LOG_DEBUG, "PROOF OF WORK RESULT: false (booooo)"); if (!QUIET) { char where[17]; @@ -1704,6 +1710,22 @@ static bool submit_upstream_work(const struct work *work, CURL *curl) hashshow, cgpu->api->name, cgpu->device_id, where, reason); sharelog(disposition, work); } + + /* Once we have more than a nominal amount of sequential rejects, + * at least 10 and more than the current utility rate per minute, + * disable the pool because some pool error is likely to have + * ensued. */ + if (pool->seq_rejects > 10 && opt_disable_pool && total_pools > 1) { + double utility = total_accepted / ( total_secs ? total_secs : 1 ) * 60; + + if (pool->seq_rejects > utility) { + applog(LOG_WARNING, "Pool %d rejected %d sequential shares, disabling!", + pool->pool_no, pool->seq_rejects); + pool->enabled = false; + if (pool == current_pool()) + switch_pools(NULL); + } + } } cgpu->utility = cgpu->accepted / ( total_secs ? total_secs : 1 ) * 60; diff --git a/miner.h b/miner.h index 85d4e7a2..27decf2d 100644 --- a/miner.h +++ b/miner.h @@ -608,6 +608,7 @@ struct pool { int pool_no; int prio; int accepted, rejected; + int seq_rejects; bool submit_fail; bool idle; From 41d5813d8f169e88057b747fac7cebd2a6a1022a Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 3 May 2012 15:37:29 +1000 Subject: [PATCH 20/25] Convert hashes to an unsigned long long as well. --- cgminer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cgminer.c b/cgminer.c index ddf0ebb5..6ebfbc41 100644 --- a/cgminer.c +++ b/cgminer.c @@ -3695,7 +3695,7 @@ void *miner_thread(void *userdata) struct timeval diff, sdiff, wdiff; uint32_t max_nonce = api->can_limit_work ? api->can_limit_work(mythr) : 0xffffffff; unsigned long long hashes_done = 0; - uint32_t hashes; + unsigned long long hashes; struct work *work = make_work(); unsigned const int request_interval = opt_scantime * 2 / 3 ? : 1; unsigned const long request_nonce = MAXTHREADS / 3 * 2; From e3a3c68aea4f1aadcb8588085c05c0e9ec1e1665 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 3 May 2012 15:57:04 +1000 Subject: [PATCH 21/25] Readme updates. --- README | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README b/README index c5155eaf..4769e9dd 100644 --- a/README +++ b/README @@ -26,7 +26,7 @@ IRC Channel: irc://irc.freenode.net/cgminer -License: GPLv2. See COPYING for details. +License: GPLv3. See COPYING for details. READ EXECUTIVE SUMMARY BELOW FOR FIRST TIME USERS! @@ -133,6 +133,7 @@ Options for both config file and command line: --log|-l Interval in seconds between log output (default: 5) --monitor|-m Use custom pipe cmd for output messages --net-delay Impose small delays in networking to not overload slow routers +--no-pool-disable Do not automatically disable pools that continually reject shares --no-submit-stale Don't submit shares if they are detected as stale --pass|-p Password for bitcoin JSON-RPC server --per-device-stats Force verbose mode and output per-device statistics @@ -285,7 +286,6 @@ Current pool management strategy: Failover S gives you: -[L]ongpoll: On [Q]ueue: 1 [S]cantime: 60 [E]xpiry: 120 @@ -1008,7 +1008,7 @@ no longer under active development and will not be supported unless someone steps up to help maintain it. No binary builds supporting CPU mining will be released but CPU mining can be built into cgminer when it is compiled. -Q: I upgraded cgminer version and mu hashrate suddenly dropped! +Q: I upgraded cgminer version and my hashrate suddenly dropped! A: No, you upgraded your SDK version unwittingly between upgrades of cgminer and that caused your hashrate to drop. See the next question. From eb1521a4b69f85359dda1accbf63d5b0d1df6799 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 3 May 2012 16:10:27 +1000 Subject: [PATCH 22/25] NEWS updates. --- NEWS | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/NEWS b/NEWS index af9b8bb9..e5664bf9 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,44 @@ +Version 2.4.0 - May 3, 2012 + +- Convert hashes to an unsigned long long as well. +- Detect pools that have issues represented by endless rejected shares and +disable them, with a parameter to optionally disable this feature. +- Bugfix: Use a 64-bit type for hashes_done (miner_thread) since it can overflow +32-bit on some FPGAs +- Implement an older header fix for a label existing before the pthread_cleanup +macro. +- Limit the number of curls we recruit on communication failures and with +delaynet enabled to 5 by maintaining a per-pool curl count, and using a pthread +conditional that wakes up when one is returned to the ring buffer. +- Generalise add_pool() functions since they're repeated in add_pool_details. +- Bugfix: Return failure, rather than quit, if BFwrite fails +- Disable failing devices such that the user can attempt to re-enable them +- Bugfix: thread_shutdown shouldn't try to free the device, since it's needed +afterward +- API bool's and 1TBS fixes +- Icarus - minimise code delays and name timer variables +- api.c V1.9 add 'restart' + redesign 'quit' so thread exits cleanly +- api.c bug - remove extra ']'s in notify command +- Increase pool watch interval to 30 seconds. +- Reap curls that are unused for over a minute. This allows connections to be +closed, thereby allowing the number of curl handles to always be the minimum +necessary to not delay networking. +- Use the ringbuffer of curls from the same pool for submit as well as getwork +threads. Since the curl handles were already connected to the same pool and are +immediately available, share submission will not be delayed by getworks. +- Implement a scaleable networking framework designed to cope with any sized +network requirements, yet minimise the number of connections being reopened. Do +this by create a ring buffer linked list of curl handles to be used by getwork, +recruiting extra handles when none is immediately available. +- There is no need for the submit and getwork curls to be tied to the pool +struct. +- Do not recruit extra connection threads if there have been connection errors +to the pool in question. +- We should not retry submitting shares indefinitely or we may end up with a +huge backlog during network outages, so discard stale shares if we failed to +submit them and they've become stale in the interim. + + Version 2.3.6 - April 29, 2012 - Shorten stale share messages slightly. From 81286a9073812af9fd0617db83eedd0a82f2fdfe Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 3 May 2012 16:19:19 +1000 Subject: [PATCH 23/25] Bump version to 2.4.0 --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 57058f67..a22baedc 100644 --- a/configure.ac +++ b/configure.ac @@ -1,8 +1,8 @@ ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## m4_define([v_maj], [2]) -m4_define([v_min], [3]) -m4_define([v_mic], [6]) +m4_define([v_min], [4]) +m4_define([v_mic], [0]) ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## m4_define([v_ver], [v_maj.v_min.v_mic]) m4_define([lt_rev], m4_eval(v_maj + v_min)) From f0985f997889a06200757cbd02ffe04bf67cd85f Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 3 May 2012 16:25:07 +1000 Subject: [PATCH 24/25] Only show longpoll warning once when it has failed. --- cgminer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cgminer.c b/cgminer.c index 6ebfbc41..26259c6e 100644 --- a/cgminer.c +++ b/cgminer.c @@ -3990,8 +3990,9 @@ retry_pool: if (end.tv_sec - start.tv_sec > 30) continue; if (opt_retries == -1 || failures++ < opt_retries) { - applog(LOG_WARNING, - "longpoll failed for %s, sleeping for 30s", pool->lp_url); + if (failures == 1) + applog(LOG_WARNING, + "longpoll failed for %s, retrying every 30s", pool->lp_url); sleep(30); } else { applog(LOG_ERR, From 614328352cf22671401f3f6de14a804b0aa4fdb6 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 3 May 2012 16:25:53 +1000 Subject: [PATCH 25/25] More NEWS. --- NEWS | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS b/NEWS index e5664bf9..6e56809e 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,6 @@ Version 2.4.0 - May 3, 2012 +- Only show longpoll warning once when it has failed. - Convert hashes to an unsigned long long as well. - Detect pools that have issues represented by endless rejected shares and disable them, with a parameter to optionally disable this feature.