Browse Source

Use ADL activity report to tell us if a sick GPU is still busy suggesting it is hard hung and do not attempt to restart it.

nfactor-troky
Con Kolivas 13 years ago
parent
commit
27b05db4a5
  1. 6
      main.c

6
main.c

@ -5017,6 +5017,12 @@ static void *watchdog_thread(void *userdata)
gpus[gpu].status = LIFE_SICK; gpus[gpu].status = LIFE_SICK;
applog(LOG_ERR, "Thread %d idle for more than 60 seconds, GPU %d declared SICK!", i, gpu); applog(LOG_ERR, "Thread %d idle for more than 60 seconds, GPU %d declared SICK!", i, gpu);
gettimeofday(&thr->sick, NULL); gettimeofday(&thr->sick, NULL);
#ifdef HAVE_ADL
if (adl_active && gpus[gpu].has_adl && gpu_activity(gpu) > 50) {
applog(LOG_ERR, "GPU still showing activity suggesting a hard hang.");
applog(LOG_ERR, "Will not attempt to auto-restart it.");
} else
#endif
if (opt_restart) { if (opt_restart) {
applog(LOG_ERR, "Attempting to restart GPU"); applog(LOG_ERR, "Attempting to restart GPU");
reinit_device(thr->cgpu); reinit_device(thr->cgpu);

Loading…
Cancel
Save