Only use one thread to determine if a GPU is sick or well, and make sure to reset the sick restart attempt time.

2025-09-03 09:42:17 +00:00 · 2011-08-23 10:47:47 +10:00 · 2011-08-23 10:47:47 +10:00 · 088ee2fa29
commit 088ee2fa29
parent 02e126f42d
1 changed files with 9 additions and 2 deletions
--- a/main.c
+++ b/main.c
@ -4128,8 +4128,14 @@ static void *watchdog_thread(void *userdata)

 		//for (i = 0; i < mining_threads; i++) {
 		for (i = 0; i < gpu_threads; i++) {
-			struct thr_info *thr = &thr_info[i];
-			int gpu = thr->cgpu->cpu_gpu;
+			struct thr_info *thr;
+			int gpu;
+
+			/* Use only one thread per device to determine if the GPU is healthy */
+			if (i >= nDevs)
+				break;
+			thr = &thr_info[i];
+			gpu = thr->cgpu->cpu_gpu;

 			/* Thread is waiting on getwork or disabled */
 			if (thr->getwork || !gpu_devices[gpu])
@ -4150,6 +4156,7 @@ static void *watchdog_thread(void *userdata)
 				applog(LOG_ERR, "Thread %d not responding for more than 10 minutes, GPU %d declared DEAD!", i, gpu);
 			} else if (now.tv_sec - thr->sick.tv_sec > 60 && gpus[i].status == LIFE_SICK) {
 				/* Attempt to restart a GPU once every minute */
+				gettimeofday(&thr->sick, NULL);
 				reinit_device(thr->cgpu);
 			}
 		}