@ -4567,7 +4567,6 @@ static void age_work(void)
# define WATCHDOG_DEAD_TIME 600
# define WATCHDOG_DEAD_TIME 600
# define WATCHDOG_SICK_COUNT (WATCHDOG_SICK_TIME / WATCHDOG_INTERVAL)
# define WATCHDOG_SICK_COUNT (WATCHDOG_SICK_TIME / WATCHDOG_INTERVAL)
# define WATCHDOG_DEAD_COUNT (WATCHDOG_DEAD_TIME / WATCHDOG_INTERVAL)
# define WATCHDOG_DEAD_COUNT (WATCHDOG_DEAD_TIME / WATCHDOG_INTERVAL)
# define WATCHDOG_LOW_HASH 1.0 /* consider < 1MH too low for any device */
static void * watchdog_thread ( void __maybe_unused * userdata )
static void * watchdog_thread ( void __maybe_unused * userdata )
{
{
@ -4648,9 +4647,6 @@ static void *watchdog_thread(void __maybe_unused *userdata)
struct cgpu_info * cgpu = devices [ i ] ;
struct cgpu_info * cgpu = devices [ i ] ;
struct thr_info * thr = cgpu - > thr [ 0 ] ;
struct thr_info * thr = cgpu - > thr [ 0 ] ;
enum dev_enable * denable ;
enum dev_enable * denable ;
bool dev_count_well ;
bool dev_count_sick ;
bool dev_count_dead ;
char dev_str [ 8 ] ;
char dev_str [ 8 ] ;
int gpu ;
int gpu ;
@ -4682,21 +4678,12 @@ static void *watchdog_thread(void __maybe_unused *userdata)
if ( ! strcmp ( cgpu - > api - > dname , " cpu " ) )
if ( ! strcmp ( cgpu - > api - > dname , " cpu " ) )
continue ;
continue ;
# endif
# endif
if ( cgpu - > rolling < WATCHDOG_LOW_HASH )
if ( cgpu - > status ! = LIFE_WELL & & ( now . tv_sec - thr - > last . tv_sec < WATCHDOG_SICK_TIME ) ) {
cgpu - > low_count + + ;
else
cgpu - > low_count = 0 ;
dev_count_well = ( cgpu - > low_count < WATCHDOG_SICK_COUNT ) ;
dev_count_sick = ( cgpu - > low_count > WATCHDOG_SICK_COUNT ) ;
dev_count_dead = ( cgpu - > low_count > WATCHDOG_DEAD_COUNT ) ;
if ( cgpu - > status ! = LIFE_WELL & & ( now . tv_sec - thr - > last . tv_sec < WATCHDOG_SICK_TIME ) & & dev_count_well ) {
if ( cgpu - > status ! = LIFE_INIT )
if ( cgpu - > status ! = LIFE_INIT )
applog ( LOG_ERR , " %s: Recovered, declaring WELL! " , dev_str ) ;
applog ( LOG_ERR , " %s: Recovered, declaring WELL! " , dev_str ) ;
cgpu - > status = LIFE_WELL ;
cgpu - > status = LIFE_WELL ;
cgpu - > device_last_well = time ( NULL ) ;
cgpu - > device_last_well = time ( NULL ) ;
} else if ( cgpu - > status = = LIFE_WELL & & ( ( now . tv_sec - thr - > last . tv_sec > WATCHDOG_SICK_TIME ) | | dev_count_sick ) ) {
} else if ( cgpu - > status = = LIFE_WELL & & ( now . tv_sec - thr - > last . tv_sec > WATCHDOG_SICK_TIME ) ) {
thr - > rolling = cgpu - > rolling = 0 ;
thr - > rolling = cgpu - > rolling = 0 ;
cgpu - > status = LIFE_SICK ;
cgpu - > status = LIFE_SICK ;
applog ( LOG_ERR , " %s: Idle for more than 60 seconds, declaring SICK! " , dev_str ) ;
applog ( LOG_ERR , " %s: Idle for more than 60 seconds, declaring SICK! " , dev_str ) ;
@ -4715,7 +4702,7 @@ static void *watchdog_thread(void __maybe_unused *userdata)
applog ( LOG_ERR , " %s: Attempting to restart " , dev_str ) ;
applog ( LOG_ERR , " %s: Attempting to restart " , dev_str ) ;
reinit_device ( cgpu ) ;
reinit_device ( cgpu ) ;
}
}
} else if ( cgpu - > status = = LIFE_SICK & & ( ( now . tv_sec - thr - > last . tv_sec > WATCHDOG_DEAD_TIME ) | | dev_count_dead ) ) {
} else if ( cgpu - > status = = LIFE_SICK & & ( now . tv_sec - thr - > last . tv_sec > WATCHDOG_DEAD_TIME ) ) {
cgpu - > status = LIFE_DEAD ;
cgpu - > status = LIFE_DEAD ;
applog ( LOG_ERR , " %s: Not responded for more than 10 minutes, declaring DEAD! " , dev_str ) ;
applog ( LOG_ERR , " %s: Not responded for more than 10 minutes, declaring DEAD! " , dev_str ) ;
gettimeofday ( & thr - > sick , NULL ) ;
gettimeofday ( & thr - > sick , NULL ) ;