From 27b05db4a5044005895019c6965a7df9691a234b Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sun, 9 Oct 2011 12:59:45 +1100 Subject: [PATCH] Use ADL activity report to tell us if a sick GPU is still busy suggesting it is hard hung and do not attempt to restart it. --- main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/main.c b/main.c index cee360bc..64e2eaf1 100644 --- a/main.c +++ b/main.c @@ -5017,6 +5017,12 @@ static void *watchdog_thread(void *userdata) gpus[gpu].status = LIFE_SICK; applog(LOG_ERR, "Thread %d idle for more than 60 seconds, GPU %d declared SICK!", i, gpu); gettimeofday(&thr->sick, NULL); +#ifdef HAVE_ADL + if (adl_active && gpus[gpu].has_adl && gpu_activity(gpu) > 50) { + applog(LOG_ERR, "GPU still showing activity suggesting a hard hang."); + applog(LOG_ERR, "Will not attempt to auto-restart it."); + } else +#endif if (opt_restart) { applog(LOG_ERR, "Attempting to restart GPU"); reinit_device(thr->cgpu);