diff --git a/AUTHORS b/AUTHORS index c95f6a9f..b58f3588 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,4 +1,4 @@ Original CPU mining software: Jeff Garzik GPU mining and rewrite: Con Kolivas 15qSxP1SQcUX3o4nhkfdbgyoWEFMomJ4rZ BitFORCE FPGA mining and refactor: Luke Dashjr 1NbRmS6a4dniwHHoSS9v3tEYUpP1Z5VVdL -API+: Andrew Smith 1Jjk2LmktEQKnv8r2cZ9MvLiZwZ9gxabKm +API+: Andrew Smith 1Jjk2LmktEQKnv8r2cZ9MvLiZwZ9gxabKm diff --git a/README b/README index 6393c0b6..7c411d90 100644 --- a/README +++ b/README @@ -596,7 +596,7 @@ An example request in both formats to set GPU 0 fan to 80%: The format of each reply (unless stated otherwise) is a STATUS section followed by an optional detail section -For API version 1.4: +For API version 1.4 and later: The STATUS section is: @@ -622,7 +622,7 @@ The STATUS section is: This defaults to the cgminer version but is the value of --api-description if it was specified at runtime. -For API version 1.4: +For API version 1.6: The list of requests - a (*) means it requires privileged access - and replies are: @@ -633,12 +633,14 @@ The list of requests - a (*) means it requires privileged access - and replies a config CONFIG Some miner configuration information: GPU Count=N, <- the number of GPUs + PGA Count=N, <- the number of PGAs CPU Count=N, <- the number of CPUs Pool Count=N, <- the number of Pools ADL=X, <- Y or N if ADL is compiled in the code ADL in use=X, <- Y or N if any GPU has ADL Strategy=Name, <- the current pool strategy - Log Interval=N| <- log interval (--log N) + Log Interval=N, <- log interval (--log N) + Device Code=GPU ICA | <- spaced list of compiled devices summary SUMMARY The status summary of the miner e.g. Elapsed=NNN,Found Blocks=N,Getworks=N,...| @@ -646,16 +648,22 @@ The list of requests - a (*) means it requires privileged access - and replies a pools POOLS The status of each pool e.g. Pool=0,URL=http://pool.com:6311,Status=Alive,...| - devs DEVS Each available CPU and GPU with their details + devs DEVS Each available GPU, PGA and CPU with their details e.g. GPU=0,Accepted=NN,MHS av=NNN,...,Intensity=D| Last Share Time=NNN, <- standand long time in seconds (or 0 if none) of last accepted share Last Share Pool=N, <- pool number (or -1 if none) + Will not report PGAs if PGA mining is disabled Will not report CPUs if CPU mining is disabled gpu|N GPU The details of a single GPU number N in the same format and details as for DEVS + pga|N PGA The details of a single PGA number N in the same + format and details as for DEVS + This is only available if PGA mining is enabled + Use 'pgacount' or 'config' first to see if there are any + cpu|N CPU The details of a single CPU number N in the same format and details as for DEVS This is only available if CPU mining is enabled @@ -663,6 +671,9 @@ The list of requests - a (*) means it requires privileged access - and replies a gpucount GPUS Count=N| <- the number of GPUs + pgacount PGAS Count=N| <- the number of PGAs + Always returns 0 if PGA mining is disabled + cpucount CPUS Count=N| <- the number of CPUs Always returns 0 if CPU mining is disabled @@ -714,7 +725,7 @@ The list of requests - a (*) means it requires privileged access - and replies a stating the results of setting GPU N clock to V MHz gpufan|N,V (*) - none There is no reply section just the STATUS section + none There is no reply section just the STATUS section stating the results of setting GPU N fan speed to V% gpuvddc|N,V (*) @@ -729,6 +740,9 @@ The list of requests - a (*) means it requires privileged access - and replies a quit (*) none There is no status section but just a single "BYE|" reply before cgminer quits + notify NOTIFY The last status and history count of each devices problem + e.g. NOTIFY=0,Name=GPU,ID=0,Last Well=1332432290,...| + privileged (*) none There is no reply section just the STATUS section stating an error if you do not have privileged access @@ -923,6 +937,12 @@ it fail when php is installed properly but I only get errors about Sockets not working in the logs? A: http://us.php.net/manual/en/sockets.installation.php +Q: What is a PGA? +A: At the moment, cgminer supports 2 FPGA's: Icarus and BitForce. +They are Field-Programmable Gate Arrays that have been programmed to do Bitcoin +mining. Since the acronym needs to be only 3 characters, the "Field-" part has +been skipped. + --- This code is provided entirely free of charge by the programmer in his spare diff --git a/adl.c b/adl.c index 7a4707e0..ed172163 100644 --- a/adl.c +++ b/adl.c @@ -992,6 +992,10 @@ static bool fan_autotune(int gpu, int temp, int fanpercent, int lasttemp) if (temp > ga->overtemp && fanpercent < iMax) { applog(LOG_WARNING, "Overheat detected on GPU %d, increasing fan to 100%", gpu); newpercent = iMax; + + cgpu->device_last_not_well = time(NULL); + cgpu->device_not_well_reason = REASON_DEV_OVER_HEAT; + cgpu->dev_over_heat_count++; } else if (temp > ga->targettemp && fanpercent < top && temp >= lasttemp) { applog(LOG_DEBUG, "Temperature over target, increasing fanspeed"); if (temp > ga->targettemp + opt_hysteresis) @@ -1076,9 +1080,17 @@ void gpu_autotune(int gpu, enum dev_enable *denable) applog(LOG_WARNING, "Hit thermal cutoff limit on GPU %d, disabling!", gpu); *denable = DEV_RECOVER; newengine = ga->minspeed; + + cgpu->device_last_not_well = time(NULL); + cgpu->device_not_well_reason = REASON_DEV_THERMAL_CUTOFF; + cgpu->dev_thermal_cutoff_count++; } else if (temp > ga->overtemp && engine > ga->minspeed) { applog(LOG_WARNING, "Overheat detected, decreasing GPU %d clock speed", gpu); newengine = ga->minspeed; + + cgpu->device_last_not_well = time(NULL); + cgpu->device_not_well_reason = REASON_DEV_OVER_HEAT; + cgpu->dev_over_heat_count++; } else if (temp > ga->targettemp + opt_hysteresis && engine > ga->minspeed && fan_optimal) { applog(LOG_DEBUG, "Temperature %d degrees over target, decreasing clock speed", opt_hysteresis); newengine = engine - ga->lpOdParameters.sEngineClock.iStep; diff --git a/api.c b/api.c index dd2af0b7..1879c38f 100644 --- a/api.c +++ b/api.c @@ -6,6 +6,10 @@ * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 3 of the License, or (at your option) * any later version. See COPYING for more details. + * + * Note: the code always includes GPU support even if there are no GPUs + * this simplifies handling multiple other device code being included + * depending on compile options */ #include "config.h" @@ -134,12 +138,13 @@ #endif // Big enough for largest API request -// though a PC with 100s of CPUs may exceed the size ... +// though a PC with 100s of PGAs/CPUs may exceed the size ... // Current code assumes it can socket send this size also -#define MYBUFSIZ 32768 +#define MYBUFSIZ 65432 // TODO: intercept before it's exceeded // Number of requests to queue - normally would be small -#define QUEUE 10 +// However lots of PGA's may mean more +#define QUEUE 100 static char *io_buffer = NULL; static char *msg_buffer = NULL; @@ -152,7 +157,7 @@ static const char *COMMA = ","; static const char SEPARATOR = '|'; static const char GPUSEP = ','; -static const char *APIVERSION = "1.4"; +static const char *APIVERSION = "1.6"; static const char *DEAD = "Dead"; static const char *SICK = "Sick"; static const char *NOSTART = "NoStart"; @@ -164,20 +169,60 @@ static const char *DYNAMIC = _DYNAMIC; static const char *YES = "Y"; static const char *NO = "N"; +static const char *DEVICECODE = "" +#ifdef HAVE_OPENCL + "GPU " +#endif +#ifdef USE_BITFORCE + "BFL " +#endif +#ifdef USE_ICARUS + "ICA " +#endif +#ifdef WANT_CPUMINE + "CPU " +#endif + ""; + +static const char *OSINFO = +#if defined(__linux) + "Linux"; +#else +#if defined(__APPLE__) + "Apple"; +#else +#if defined (WIN32) + "Windows"; +#else +#if defined(unix) + "Unix"; +#else + "Unknown"; +#endif +#endif +#endif +#endif + #define _DEVS "DEVS" #define _POOLS "POOLS" #define _SUMMARY "SUMMARY" #define _STATUS "STATUS" #define _VERSION "VERSION" #define _MINECON "CONFIG" +#define _GPU "GPU" + +#if defined(USE_BITFORCE) || defined(USE_ICARUS) +#define _PGA "PGA" +#endif #ifdef WANT_CPUMINE #define _CPU "CPU" #endif -#define _GPU "GPU" -#define _CPUS "CPUS" #define _GPUS "GPUS" +#define _PGAS "PGAS" +#define _CPUS "CPUS" +#define _NOTIFY "NOTIFY" #define _BYE "BYE" static const char ISJSON = '{'; @@ -196,12 +241,18 @@ static const char ISJSON = '{'; #define JSON_MINECON JSON1 _MINECON JSON2 #define JSON_GPU JSON1 _GPU JSON2 +#if defined(USE_BITFORCE) || defined(USE_ICARUS) +#define JSON_PGA JSON1 _PGA JSON2 +#endif + #ifdef WANT_CPUMINE #define JSON_CPU JSON1 _CPU JSON2 #endif #define JSON_GPUS JSON1 _GPUS JSON2 +#define JSON_PGAS JSON1 _PGAS JSON2 #define JSON_CPUS JSON1 _CPUS JSON2 +#define JSON_NOTIFY JSON1 _NOTIFY JSON2 #define JSON_BYE JSON1 _BYE JSON1 #define JSON_CLOSE JSON3 #define JSON_END JSON4 @@ -269,6 +320,15 @@ static const char *JSON_PARAMETER = "parameter"; #define MSG_TOOMANYP 54 #define MSG_ADDPOOL 55 +#if defined(USE_BITFORCE) || defined(USE_ICARUS) +#define MSG_PGANON 56 +#define MSG_PGADEV 57 +#define MSG_INVPGA 58 +#endif + +#define MSG_NUMPGA 59 +#define MSG_NOTIFY 60 + enum code_severity { SEVERITY_ERR, SEVERITY_WARN, @@ -279,16 +339,17 @@ enum code_severity { enum code_parameters { PARAM_GPU, + PARAM_PGA, PARAM_CPU, PARAM_GPUMAX, + PARAM_PGAMAX, PARAM_CPUMAX, PARAM_PMAX, PARAM_POOLMAX, -#ifdef WANT_CPUMINE - PARAM_GCMAX, -#else - PARAM_GMAX, -#endif + +// Single generic case: have the code resolve it - see below + PARAM_DMAX, + PARAM_CMD, PARAM_POOL, PARAM_STR, @@ -310,25 +371,43 @@ struct CODES { { SEVERITY_ERR, MSG_GPUNON, PARAM_NONE, "No GPUs" }, { SEVERITY_SUCC, MSG_POOL, PARAM_PMAX, "%d Pool(s)" }, { SEVERITY_ERR, MSG_NOPOOL, PARAM_NONE, "No pools" }, + + { SEVERITY_SUCC, MSG_DEVS, PARAM_DMAX, "%d GPU(s)" +#if defined(USE_BITFORCE) || defined(USE_ICARUS) + " - %d PGA(s)" +#endif #ifdef WANT_CPUMINE - { SEVERITY_SUCC, MSG_DEVS, PARAM_GCMAX, "%d GPU(s) - %d CPU(s)" }, - { SEVERITY_ERR, MSG_NODEVS, PARAM_NONE, "No GPUs/CPUs" }, -#else - { SEVERITY_SUCC, MSG_DEVS, PARAM_GMAX, "%d GPU(s)" }, - { SEVERITY_ERR, MSG_NODEVS, PARAM_NONE, "No GPUs" }, + " - %d CPU(s)" +#endif + }, + + { SEVERITY_ERR, MSG_NODEVS, PARAM_NONE, "No GPUs" +#if defined(USE_BITFORCE) || defined(USE_ICARUS) + "/PGAs" #endif +#ifdef WANT_CPUMINE + "/CPUs" +#endif + }, + { SEVERITY_SUCC, MSG_SUMM, PARAM_NONE, "Summary" }, { SEVERITY_INFO, MSG_GPUDIS, PARAM_GPU, "GPU %d set disable flag" }, { SEVERITY_INFO, MSG_GPUREI, PARAM_GPU, "GPU %d restart attempted" }, { SEVERITY_ERR, MSG_INVCMD, PARAM_NONE, "Invalid command" }, { SEVERITY_ERR, MSG_MISID, PARAM_NONE, "Missing device id parameter" }, { SEVERITY_SUCC, MSG_GPUDEV, PARAM_GPU, "GPU%d" }, +#if defined(USE_BITFORCE) || defined(USE_ICARUS) + { SEVERITY_ERR, MSG_PGANON, PARAM_NONE, "No PGAs" }, + { SEVERITY_SUCC, MSG_PGADEV, PARAM_PGA, "PGA%d" }, + { SEVERITY_ERR, MSG_INVPGA, PARAM_PGAMAX, "Invalid PGA id %d - range is 0 - %d" }, +#endif #ifdef WANT_CPUMINE { SEVERITY_ERR, MSG_CPUNON, PARAM_NONE, "No CPUs" }, { SEVERITY_SUCC, MSG_CPUDEV, PARAM_CPU, "CPU%d" }, { SEVERITY_ERR, MSG_INVCPU, PARAM_CPUMAX, "Invalid CPU id %d - range is 0 - %d" }, #endif { SEVERITY_SUCC, MSG_NUMGPU, PARAM_NONE, "GPU count" }, + { SEVERITY_SUCC, MSG_NUMPGA, PARAM_NONE, "PGA count" }, { SEVERITY_SUCC, MSG_NUMCPU, PARAM_NONE, "CPU count" }, { SEVERITY_SUCC, MSG_VERSION, PARAM_NONE, "CGMiner versions" }, { SEVERITY_ERR, MSG_INVJSON, PARAM_NONE, "Invalid JSON" }, @@ -364,6 +443,7 @@ struct CODES { { SEVERITY_ERR, MSG_INVPDP, PARAM_STR, "Invalid addpool details '%s'" }, { SEVERITY_ERR, MSG_TOOMANYP,PARAM_NONE, "Reached maximum number of pools (%d)" }, { SEVERITY_SUCC, MSG_ADDPOOL, PARAM_STR, "Added pool '%s'" }, + { SEVERITY_SUCC, MSG_NOTIFY, PARAM_NONE, "Notify" }, { SEVERITY_FAIL, 0, 0, NULL } }; @@ -382,6 +462,54 @@ struct IP4ACCESS { static struct IP4ACCESS *ipaccess = NULL; static int ips = 0; +#ifdef USE_BITFORCE +extern struct device_api bitforce_api; +#endif + +#ifdef USE_ICARUS +extern struct device_api icarus_api; +#endif + +#if defined(USE_BITFORCE) || defined(USE_ICARUS) +static int numpgas() +{ + int count = 0; + int i; + + for (i = 0; i < total_devices; i++) { +#ifdef USE_BITFORCE + if (devices[i]->api == &bitforce_api) + count++; +#endif +#ifdef USE_ICARUS + if (devices[i]->api == &icarus_api) + count++; +#endif + } + return count; +} + +static int pgadevice(int pgaid) +{ + int count = 0; + int i; + + for (i = 0; i < total_devices; i++) { +#ifdef USE_BITFORCE + if (devices[i]->api == &bitforce_api) + count++; +#endif +#ifdef USE_ICARUS + if (devices[i]->api == &icarus_api) + count++; +#endif + if (count == (pgaid + 1)) + return i; + } + return -1; +} +#endif + // All replies (except BYE) start with a message // thus for JSON, message() inserts JSON_START at the front // and send_result() adds JSON_END at the end @@ -389,6 +517,9 @@ static char *message(int messageid, int paramid, char *param2, bool isjson) { char severity; char *ptr; +#if defined(USE_BITFORCE) || defined(USE_ICARUS) + int pga; +#endif #ifdef WANT_CPUMINE int cpu; #endif @@ -421,6 +552,7 @@ static char *message(int messageid, int paramid, char *param2, bool isjson) switch(codes[i].params) { case PARAM_GPU: + case PARAM_PGA: case PARAM_CPU: sprintf(ptr, codes[i].description, paramid); break; @@ -430,26 +562,47 @@ static char *message(int messageid, int paramid, char *param2, bool isjson) case PARAM_GPUMAX: sprintf(ptr, codes[i].description, paramid, nDevs - 1); break; +#if defined(USE_BITFORCE) || defined(USE_ICARUS) + case PARAM_PGAMAX: + pga = numpgas(); + sprintf(ptr, codes[i].description, paramid, pga - 1); + break; +#endif +#ifdef WANT_CPUMINE + case PARAM_CPUMAX: + if (opt_n_threads > 0) + cpu = num_processors; + else + cpu = 0; + sprintf(ptr, codes[i].description, paramid, cpu - 1); + break; +#endif case PARAM_PMAX: sprintf(ptr, codes[i].description, total_pools); break; case PARAM_POOLMAX: sprintf(ptr, codes[i].description, paramid, total_pools - 1); break; + case PARAM_DMAX: +#if defined(USE_BITFORCE) || defined(USE_ICARUS) + pga = numpgas(); +#endif #ifdef WANT_CPUMINE - case PARAM_GCMAX: if (opt_n_threads > 0) cpu = num_processors; else cpu = 0; +#endif - sprintf(ptr, codes[i].description, nDevs, cpu); - break; -#else - case PARAM_GMAX: - sprintf(ptr, codes[i].description, nDevs); - break; + sprintf(ptr, codes[i].description, nDevs +#if defined(USE_BITFORCE) || defined(USE_ICARUS) + , pga #endif +#ifdef WANT_CPUMINE + , cpu +#endif + ); + break; case PARAM_CMD: sprintf(ptr, codes[i].description, JSON_COMMAND); break; @@ -500,6 +653,7 @@ static void apiversion(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, static void minerconfig(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson) { char buf[BUFSIZ]; + int pgacount = 0; int cpucount = 0; char *adlinuse = (char *)NO; #ifdef HAVE_ADL @@ -516,6 +670,10 @@ static void minerconfig(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, const char *adl = NO; #endif +#if defined(USE_BITFORCE) || defined(USE_ICARUS) + pgacount = numpgas(); +#endif + #ifdef WANT_CPUMINE cpucount = opt_n_threads > 0 ? num_processors : 0; #endif @@ -523,9 +681,9 @@ static void minerconfig(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, strcpy(io_buffer, message(MSG_MINECON, 0, NULL, isjson)); if (isjson) - sprintf(buf, "," JSON_MINECON "{\"GPU Count\":%d,\"CPU Count\":%d,\"Pool Count\":%d,\"ADL\":\"%s\",\"ADL in use\":\"%s\",\"Strategy\":\"%s\",\"Log Interval\":\"%d\"}" JSON_CLOSE, nDevs, cpucount, total_pools, adl, adlinuse, strategies[pool_strategy].s, opt_log_interval); + sprintf(buf, "," JSON_MINECON "{\"GPU Count\":%d,\"PGA Count\":%d,\"CPU Count\":%d,\"Pool Count\":%d,\"ADL\":\"%s\",\"ADL in use\":\"%s\",\"Strategy\":\"%s\",\"Log Interval\":%d,\"Device Code\":\"%s\",\"OS\":\"%s\"}" JSON_CLOSE, nDevs, pgacount, cpucount, total_pools, adl, adlinuse, strategies[pool_strategy].s, opt_log_interval, DEVICECODE, OSINFO); else - sprintf(buf, _MINECON ",GPU Count=%d,CPU Count=%d,Pool Count=%d,ADL=%s,ADL in use=%s,Strategy=%s,Log Interval=%d%c", nDevs, cpucount, total_pools, adl, adlinuse, strategies[pool_strategy].s, opt_log_interval, SEPARATOR); + sprintf(buf, _MINECON ",GPU Count=%d,PGA Count=%d,CPU Count=%d,Pool Count=%d,ADL=%s,ADL in use=%s,Strategy=%s,Log Interval=%d,Device Code=%s,OS=%s%c", nDevs, pgacount, cpucount, total_pools, adl, adlinuse, strategies[pool_strategy].s, opt_log_interval, DEVICECODE, OSINFO, SEPARATOR); strcat(io_buffer, buf); } @@ -589,6 +747,59 @@ static void gpustatus(int gpu, bool isjson) } } +#if defined(USE_BITFORCE) || defined(USE_ICARUS) +static void pgastatus(int pga, bool isjson) +{ + char buf[BUFSIZ]; + char *enabled; + char *status; + int numpga = numpgas(); + + if (numpga > 0 && pga >= 0 && pga < numpga) { + int dev = pgadevice(pga); + if (dev < 0) // Should never happen + return; + + struct cgpu_info *cgpu = devices[dev]; + + cgpu->utility = cgpu->accepted / ( total_secs ? total_secs : 1 ) * 60; + + if (cgpu->deven != DEV_DISABLED) + enabled = (char *)YES; + else + enabled = (char *)NO; + + if (cgpu->status == LIFE_DEAD) + status = (char *)DEAD; + else if (cgpu->status == LIFE_SICK) + status = (char *)SICK; + else if (cgpu->status == LIFE_NOSTART) + status = (char *)NOSTART; + else + status = (char *)ALIVE; + + if (isjson) + sprintf(buf, "{\"PGA\":%d,\"Name\":\"%s\",\"ID\":%d,\"Enabled\":\"%s\",\"Status\":\"%s\",\"Temperature\":%.2f,\"MHS av\":%.2f,\"MHS %ds\":%.2f,\"Accepted\":%d,\"Rejected\":%d,\"Hardware Errors\":%d,\"Utility\":%.2f,\"Last Share Pool\":%d,\"Last Share Time\":%lu,\"Total MH\":%.4f}", + pga, cgpu->api->name, cgpu->device_id, + enabled, status, cgpu->temp, + cgpu->total_mhashes / total_secs, opt_log_interval, cgpu->rolling, + cgpu->accepted, cgpu->rejected, cgpu->hw_errors, cgpu->utility, + ((unsigned long)(cgpu->last_share_pool_time) > 0) ? cgpu->last_share_pool : -1, + (unsigned long)(cgpu->last_share_pool_time), cgpu->total_mhashes); + else + sprintf(buf, "PGA=%d,Name=%s,ID=%d,Enabled=%s,Status=%s,Temperature=%.2f,MHS av=%.2f,MHS %ds=%.2f,Accepted=%d,Rejected=%d,Hardware Errors=%d,Utility=%.2f,Last Share Pool=%d,Last Share Time=%lu,Total MH=%.4f%c", + pga, cgpu->api->name, cgpu->device_id, + enabled, status, cgpu->temp, + cgpu->total_mhashes / total_secs, opt_log_interval, cgpu->rolling, + cgpu->accepted, cgpu->rejected, cgpu->hw_errors, cgpu->utility, + ((unsigned long)(cgpu->last_share_pool_time) > 0) ? cgpu->last_share_pool : -1, + (unsigned long)(cgpu->last_share_pool_time), cgpu->total_mhashes, SEPARATOR); + + strcat(io_buffer, buf); + } +} +#endif + #ifdef WANT_CPUMINE static void cpustatus(int cpu, bool isjson) { @@ -623,6 +834,7 @@ static void cpustatus(int cpu, bool isjson) static void devstatus(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson) { + int devcount = 0; int i; if (nDevs == 0 && opt_n_threads == 0) { @@ -638,19 +850,37 @@ static void devstatus(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, b } for (i = 0; i < nDevs; i++) { - if (isjson && i > 0) + if (isjson && devcount > 0) strcat(io_buffer, COMMA); gpustatus(i, isjson); + + devcount++; } +#if defined(USE_BITFORCE) || defined(USE_ICARUS) + int numpga = numpgas(); + + if (numpga > 0) + for (i = 0; i < numpga; i++) { + if (isjson && devcount > 0) + strcat(io_buffer, COMMA); + + pgastatus(i, isjson); + + devcount++; + } +#endif + #ifdef WANT_CPUMINE if (opt_n_threads > 0) for (i = 0; i < num_processors; i++) { - if (isjson && (i > 0 || nDevs > 0)) + if (isjson && devcount > 0) strcat(io_buffer, COMMA); cpustatus(i, isjson); + + devcount++; } #endif @@ -691,6 +921,42 @@ static void gpudev(__maybe_unused SOCKETTYPE c, char *param, bool isjson) strcat(io_buffer, JSON_CLOSE); } +#if defined(USE_BITFORCE) || defined(USE_ICARUS) +static void pgadev(__maybe_unused SOCKETTYPE c, char *param, bool isjson) +{ + int numpga = numpgas(); + int id; + + if (numpga == 0) { + strcpy(io_buffer, message(MSG_PGANON, 0, NULL, isjson)); + return; + } + + if (param == NULL || *param == '\0') { + strcpy(io_buffer, message(MSG_MISID, 0, NULL, isjson)); + return; + } + + id = atoi(param); + if (id < 0 || id >= numpga) { + strcpy(io_buffer, message(MSG_INVPGA, id, NULL, isjson)); + return; + } + + strcpy(io_buffer, message(MSG_PGADEV, id, NULL, isjson)); + + if (isjson) { + strcat(io_buffer, COMMA); + strcat(io_buffer, JSON_PGA); + } + + pgastatus(id, isjson); + + if (isjson) + strcat(io_buffer, JSON_CLOSE); +} +#endif + #ifdef WANT_CPUMINE static void cpudev(__maybe_unused SOCKETTYPE c, char *param, bool isjson) { @@ -949,6 +1215,25 @@ static void gpucount(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, bo strcat(io_buffer, buf); } +static void pgacount(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson) +{ + char buf[BUFSIZ]; + int count = 0; + +#if defined(USE_BITFORCE) || defined(USE_ICARUS) + count = numpgas(); +#endif + + strcpy(io_buffer, message(MSG_NUMPGA, 0, NULL, isjson)); + + if (isjson) + sprintf(buf, "," JSON_PGAS "{\"Count\":%d}" JSON_CLOSE, count); + else + sprintf(buf, _PGAS ",Count=%d%c", count, SEPARATOR); + + strcat(io_buffer, buf); +} + static void cpucount(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson) { char buf[BUFSIZ]; @@ -1315,6 +1600,89 @@ void privileged(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool is strcpy(io_buffer, message(MSG_ACCOK, 0, NULL, isjson)); } +void notifystatus(int device, struct cgpu_info *cgpu, bool isjson) +{ + char buf[BUFSIZ]; + char *reason; + + if (cgpu->device_last_not_well == 0) + reason = REASON_NONE; + else + switch(cgpu->device_not_well_reason) { + case REASON_THREAD_FAIL_INIT: + reason = REASON_THREAD_FAIL_INIT_STR; + break; + case REASON_THREAD_ZERO_HASH: + reason = REASON_THREAD_ZERO_HASH_STR; + break; + case REASON_THREAD_FAIL_QUEUE: + reason = REASON_THREAD_FAIL_QUEUE_STR; + break; + case REASON_DEV_SICK_IDLE_60: + reason = REASON_DEV_SICK_IDLE_60_STR; + break; + case REASON_DEV_DEAD_IDLE_600: + reason = REASON_DEV_DEAD_IDLE_600_STR; + break; + case REASON_DEV_NOSTART: + reason = REASON_DEV_NOSTART_STR; + break; + case REASON_DEV_OVER_HEAT: + reason = REASON_DEV_OVER_HEAT_STR; + break; + case REASON_DEV_THERMAL_CUTOFF: + reason = REASON_DEV_THERMAL_CUTOFF_STR; + break; + default: + reason = REASON_UNKNOWN_STR; + break; + } + + // ALL counters (and only counters) must start the name with a '*' + // Simplifies future external support for adding new counters + if (isjson) + sprintf(buf, "%s{\"NOTIFY\":%d,\"Name\":\"%s\",\"ID\":%d,\"Last Well\":%lu,\"Last Not Well\":%lu,\"Reason Not Well\":\"%s\",\"*Thread Fail Init\":%d,\"*Thread Zero Hash\":%d,\"*Thread Fail Queue\":%d,\"*Dev Sick Idle 60s\":%d,\"*Dev Dead Idle 600s\":%d,\"*Dev Nostart\":%d,\"*Dev Over Heat\":%d,\"*Dev Thermal Cutoff\":%d}" JSON_CLOSE, + device > 0 ? "," : "", device, cgpu->api->name, cgpu->device_id, + cgpu->device_last_well, cgpu->device_last_not_well, reason, + cgpu->thread_fail_init_count, cgpu->thread_zero_hash_count, + cgpu->thread_fail_queue_count, cgpu->dev_sick_idle_60_count, + cgpu->dev_dead_idle_600_count, cgpu->dev_nostart_count, + cgpu->dev_over_heat_count, cgpu->dev_thermal_cutoff_count); + else + sprintf(buf, "NOTIFY=%d,Name=%s,ID=%d,Last Well=%lu,Last Not Well=%lu,Reason Not Well=%s,*Thread Fail Init=%d,*Thread Zero Hash=%d,*Thread Fail Queue=%d,*Dev Sick Idle 60s=%d,*Dev Dead Idle 600s=%d,*Dev Nostart=%d,*Dev Over Heat=%d,*Dev Thermal Cutoff=%d%c", + device, cgpu->api->name, cgpu->device_id, + cgpu->device_last_well, cgpu->device_last_not_well, reason, + cgpu->thread_fail_init_count, cgpu->thread_zero_hash_count, + cgpu->thread_fail_queue_count, cgpu->dev_sick_idle_60_count, + cgpu->dev_dead_idle_600_count, cgpu->dev_nostart_count, + cgpu->dev_over_heat_count, cgpu->dev_thermal_cutoff_count, SEPARATOR); + + strcat(io_buffer, buf); +} + +static void notify(__maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson) +{ + int i; + + if (total_devices == 0) { + strcpy(io_buffer, message(MSG_NODEVS, 0, NULL, isjson)); + return; + } + + strcpy(io_buffer, message(MSG_NOTIFY, 0, NULL, isjson)); + + if (isjson) { + strcat(io_buffer, COMMA); + strcat(io_buffer, JSON_NOTIFY); + } + + for (i = 0; i < total_devices; i++) + notifystatus(i, devices[i], isjson); + + if (isjson) + strcat(io_buffer, JSON_CLOSE); +} + void dosave(__maybe_unused SOCKETTYPE c, char *param, bool isjson) { FILE *fcfg; @@ -1350,10 +1718,14 @@ struct CMDS { { "gpudisable", gpudisable, true }, { "gpurestart", gpurestart, true }, { "gpu", gpudev, false }, +#if defined(USE_BITFORCE) || defined(USE_ICARUS) + { "pga", pgadev, false }, +#endif #ifdef WANT_CPUMINE { "cpu", cpudev, false }, #endif { "gpucount", gpucount, false }, + { "pgacount", pgacount, false }, { "cpucount", cpucount, false }, { "switchpool", switchpool, true }, { "addpool", addpool, true }, @@ -1367,6 +1739,7 @@ struct CMDS { { "save", dosave, true }, { "quit", doquit, true }, { "privileged", privileged, true }, + { "notify", notify, false }, { NULL, NULL, false } }; diff --git a/bitforce.c b/bitforce.c index 556354f8..1118ee16 100644 --- a/bitforce.c +++ b/bitforce.c @@ -309,6 +309,10 @@ static uint64_t bitforce_scanhash(struct thr_info *thr, struct work *work, uint6 if (temp > bitforce->cutofftemp) { applog(LOG_WARNING, "Hit thermal cutoff limit on %s %d, disabling!", bitforce->api->name, bitforce->device_id); bitforce->deven = DEV_RECOVER; + + bitforce->device_last_not_well = time(NULL); + bitforce->device_not_well_reason = REASON_DEV_THERMAL_CUTOFF; + bitforce->dev_thermal_cutoff_count++; } } } diff --git a/cgminer.c b/cgminer.c index ab862d99..5efd380e 100644 --- a/cgminer.c +++ b/cgminer.c @@ -2889,6 +2889,7 @@ void thread_reportin(struct thr_info *thr) gettimeofday(&thr->last, NULL); thr->cgpu->status = LIFE_WELL; thr->getwork = false; + thr->cgpu->device_last_well = time(NULL); } static inline void thread_reportout(struct thr_info *thr) @@ -2909,8 +2910,10 @@ static void hashmeter(int thr_id, struct timeval *diff, bool showlog = false; /* Update the last time this thread reported in */ - if (thr_id >= 0) + if (thr_id >= 0) { gettimeofday(&thr_info[thr_id].last, NULL); + thr_info[thr_id].cgpu->device_last_well = time(NULL); + } /* Don't bother calculating anything if we're not displaying it */ if (opt_realquiet || !opt_log_interval) @@ -3426,8 +3429,13 @@ void *miner_thread(void *userdata) bool requested = false; pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - if (api->thread_init && !api->thread_init(mythr)) + if (api->thread_init && !api->thread_init(mythr)) { + cgpu->device_last_not_well = time(NULL); + cgpu->device_not_well_reason = REASON_THREAD_FAIL_INIT; + cgpu->thread_fail_init_count++; + goto out; + } thread_reportout(mythr); applog(LOG_DEBUG, "Popping ping in miner thread"); @@ -3476,8 +3484,14 @@ void *miner_thread(void *userdata) break; } - if (unlikely(!hashes)) + if (unlikely(!hashes)) { + cgpu->device_last_not_well = time(NULL); + cgpu->device_not_well_reason = REASON_THREAD_ZERO_HASH; + cgpu->thread_zero_hash_count++; + goto out; + } + hashes_done += hashes; if (hashes > cgpu->max_hashes) cgpu->max_hashes = hashes; @@ -3497,6 +3511,11 @@ void *miner_thread(void *userdata) thread_reportout(mythr); if (unlikely(!queue_request(mythr, false))) { applog(LOG_ERR, "Failed to queue_request in miner_thread %d", thr_id); + + cgpu->device_last_not_well = time(NULL); + cgpu->device_not_well_reason = REASON_THREAD_FAIL_QUEUE; + cgpu->thread_fail_queue_count++; + goto out; } thread_reportin(mythr); @@ -3882,11 +3901,16 @@ static void *watchdog_thread(void __maybe_unused *userdata) if (gpus[gpu].status != LIFE_WELL && now.tv_sec - thr->last.tv_sec < 60) { applog(LOG_ERR, "Device %d recovered, GPU %d declared WELL!", i, gpu); gpus[gpu].status = LIFE_WELL; + gpus[gpu].device_last_well = time(NULL); } else if (now.tv_sec - thr->last.tv_sec > 60 && gpus[gpu].status == LIFE_WELL) { thr->rolling = thr->cgpu->rolling = 0; gpus[gpu].status = LIFE_SICK; applog(LOG_ERR, "Device %d idle for more than 60 seconds, GPU %d declared SICK!", i, gpu); gettimeofday(&thr->sick, NULL); + + gpus[gpu].device_last_not_well = time(NULL); + gpus[gpu].device_not_well_reason = REASON_DEV_SICK_IDLE_60; + gpus[gpu].dev_sick_idle_60_count++; #ifdef HAVE_ADL if (adl_active && gpus[gpu].has_adl && gpu_activity(gpu) > 50) { applog(LOG_ERR, "GPU still showing activity suggesting a hard hang."); @@ -3901,6 +3925,10 @@ static void *watchdog_thread(void __maybe_unused *userdata) gpus[gpu].status = LIFE_DEAD; applog(LOG_ERR, "Device %d not responding for more than 10 minutes, GPU %d declared DEAD!", i, gpu); gettimeofday(&thr->sick, NULL); + + gpus[gpu].device_last_not_well = time(NULL); + gpus[gpu].device_not_well_reason = REASON_DEV_DEAD_IDLE_600; + gpus[gpu].dev_dead_idle_600_count++; } else if (now.tv_sec - thr->sick.tv_sec > 60 && (gpus[i].status == LIFE_SICK || gpus[i].status == LIFE_DEAD)) { /* Attempt to restart a GPU that's sick or dead once every minute */ diff --git a/device-gpu.c b/device-gpu.c index 34c115d0..ec72c976 100644 --- a/device-gpu.c +++ b/device-gpu.c @@ -1191,6 +1191,11 @@ static bool opencl_thread_prepare(struct thr_info *thr) } cgpu->deven = DEV_DISABLED; cgpu->status = LIFE_NOSTART; + + cgpu->device_last_not_well = time(NULL); + cgpu->device_not_well_reason = REASON_DEV_NOSTART; + cgpu->dev_nostart_count++; + return false; } applog(LOG_INFO, "initCl() finished. Found %s", name); @@ -1250,6 +1255,8 @@ static bool opencl_thread_init(struct thr_info *thr) gpu->status = LIFE_WELL; + gpu->device_last_well = time(NULL); + return true; } diff --git a/icarus.c b/icarus.c index f5e20559..276cd8c7 100644 --- a/icarus.c +++ b/icarus.c @@ -100,7 +100,10 @@ static int icarus_open(const char *devpath) NULL, OPEN_EXISTING, 0, NULL); if (unlikely(hSerial == INVALID_HANDLE_VALUE)) return -1; - /* TODO: Needs setup read block time. just like VTIME = 10 */ + + COMMTIMEOUTS cto = {1000, 0, 1000, 0, 1000}; + SetCommTimeouts(hSerial, &cto); + return _open_osfhandle((LONG)hSerial, 0); #endif } @@ -120,7 +123,7 @@ static int icarus_gets(unsigned char *buf, size_t bufLen, int fd) rc++; if (rc == ICARUS_READ_FAULT_COUNT) { - applog(LOG_WARNING, + applog(LOG_DEBUG, "Icarus Read: No data in %d seconds", rc); return 1; } diff --git a/miner.h b/miner.h index 5f5fd68d..f244e819 100644 --- a/miner.h +++ b/miner.h @@ -221,6 +221,28 @@ enum cl_kernels { KL_DIABLO, }; +enum dev_reason { + REASON_THREAD_FAIL_INIT, + REASON_THREAD_ZERO_HASH, + REASON_THREAD_FAIL_QUEUE, + REASON_DEV_SICK_IDLE_60, + REASON_DEV_DEAD_IDLE_600, + REASON_DEV_NOSTART, + REASON_DEV_OVER_HEAT, + REASON_DEV_THERMAL_CUTOFF, +}; + +#define REASON_NONE "None" +#define REASON_THREAD_FAIL_INIT_STR "Thread failed to init" +#define REASON_THREAD_ZERO_HASH_STR "Thread got zero hashes" +#define REASON_THREAD_FAIL_QUEUE_STR "Thread failed to queue work" +#define REASON_DEV_SICK_IDLE_60_STR "Device idle for 60s" +#define REASON_DEV_DEAD_IDLE_600_STR "Device dead - idle for 600s" +#define REASON_DEV_NOSTART_STR "Device failed to start" +#define REASON_DEV_OVER_HEAT_STR "Device over heated" +#define REASON_DEV_THERMAL_CUTOFF_STR "Device reached thermal cutoff" +#define REASON_UNKNOWN_STR "Unknown reason - code bug" + struct cgpu_info { int cgminer_id; struct device_api *api; @@ -272,6 +294,18 @@ struct cgpu_info { #endif int last_share_pool; time_t last_share_pool_time; + + time_t device_last_well; + time_t device_last_not_well; + enum dev_reason device_not_well_reason; + int thread_fail_init_count; + int thread_zero_hash_count; + int thread_fail_queue_count; + int dev_sick_idle_60_count; + int dev_dead_idle_600_count; + int dev_nostart_count; + int dev_over_heat_count; // It's a warning but worth knowing + int dev_thermal_cutoff_count; }; struct thread_q { diff --git a/miner.php b/miner.php index d385a8ff..5cdcc7e4 100644 --- a/miner.php +++ b/miner.php @@ -1,30 +1,61 @@ Mine
'switchpool', + 'Enable' => 'enablepool', + 'Disable' => 'disablepool' ); +# +function showhead($cmd, $item, $values) +{ + global $poolcmd, $readonly; + + echo ''; + + foreach ($values as $name => $value) + { + if ($name == '0') + $name = ' '; + echo ""; + } + + if ($cmd == 'pools' && $readonly === false) + foreach ($poolcmd as $name => $pcmd) + echo ""; + + echo ''; } # function details($cmd, $list) { + global $poolcmd, $readonly; + + $dfmt = 'H:i:s j-M-Y \U\T\CP'; + $stas = array('S' => 'Success', 'W' => 'Warning', 'I' => 'Informational', 'E' => 'Error', 'F' => 'Fatal'); $tb = '
$name$name
'; @@ -211,7 +300,7 @@ function details($cmd, $list) echo $tb; - echo ''; + echo ''; echo $te.$tb; @@ -219,56 +308,40 @@ function details($cmd, $list) { echo ''; echo ''; + if (isset($list['STATUS']['When'])) + echo ''; $sta = $list['STATUS']['STATUS']; echo ''; echo ''; echo ''; } - echo $te.$tb; $section = ''; - $poolcmd = array( 'Switch to' => 'switchpool', - 'Enable' => 'enablepool', - 'Disable' => 'disablepool' ); - foreach ($list as $item => $values) { - if ($item != 'STATUS') - { - $section = $item; - - echo ''; - - foreach ($values as $name => $value) - { - if ($name == '0') - $name = ' '; - echo ""; - } - - if ($cmd == 'pools') - foreach ($poolcmd as $name => $pcmd) - echo ""; + if ($item == 'STATUS') + continue; - echo ''; + $sectionname = preg_replace('/\d/', '', $item); - break; + if ($sectionname != $section) + { + echo $te.$tb; + showhead($cmd, $item, $values); + $section = $sectionname; } - } - - foreach ($list as $item => $values) - { - if ($item == 'STATUS') - continue; echo ''; foreach ($values as $name => $value) - echo ''; + { + list($showvalue, $class) = fmt($section, $name, $value); + echo "$showvalue"; + } - if ($cmd == 'pools') + if ($cmd == 'pools' && $readonly === false) { reset($values); $pool = current($values); @@ -294,7 +367,7 @@ function details($cmd, $list) global $devs; $devs = null; # -function gpubuttons($count, $info) +function gpubuttons($count) { global $devs; @@ -408,7 +481,7 @@ function process($cmds, $rd, $ro) # function display() { - global $error; + global $error, $readonly, $notify; $error = null; @@ -418,7 +491,8 @@ function display() echo ""; $arg = trim(getparam('arg', true)); @@ -427,15 +501,20 @@ function display() $cmds = array( 'devs' => 'device list', 'summary' => 'summary information', - 'pools' => 'pool list', - 'config' => 'cgminer config'); + 'pools' => 'pool list'); + + if ($notify) + $cmds['notify'] = 'device status'; + + $cmds['config'] = 'cgminer config'; process($cmds, $rd, $ro); - if ($error == null) + if ($error == null && $readonly === false) processgpus($rd, $ro); } # +htmlhead(); display(); # ?>
Date: '.date('H:i:s j-M-Y \U\T\CP').'
Date: '.date($dfmt).'
Computer: '.$list['STATUS']['Description'].'When: '.date($dfmt, $list['STATUS']['When']).'Status: '.$stas[$sta].'Message: '.$list['STATUS']['Msg'].'
$name$name
'.fmt($section, $name, $value).'
"; echo ""; echo " "; - echo ""; + if ($readonly === false) + echo ""; echo "