cuda: reduce possible segfaults on exit

not perfect but helps...
10 years ago · 85394cf22b
5 changed files with 26 additions and 12 deletions
--- a/ccminer.cpp
+++ b/ccminer.cpp
@ -58,6 +58,7 @@ BOOL WINAPI ConsoleHandler(DWORD);
 // from cuda.cpp
 int cuda_num_devices();
 void cuda_devicenames();
 void cuda_reset_device(int thr_id, bool *init);
 void cuda_shutdown();
 int cuda_finddevice(char *name);
 void cuda_print_devices();
@ -211,7 +212,6 @@ int device_lookup_gap[MAX_GPUS] = { 0 };
 int device_interactive[MAX_GPUS] = { 0 };
 int opt_nfactor = 0;
 bool opt_autotune = true;
 bool abort_flag = false;
 char *jane_params = NULL;
 // pools (failover/getwork infos)
@ -243,6 +243,7 @@ int longpoll_thr_id = -1;
 int stratum_thr_id = -1;
 int api_thr_id = -1;
 bool stratum_need_reset = false;
 volatile bool abort_flag = false;
 struct work_restart *work_restart = NULL;
 static int app_exit_code = EXIT_CODE_OK;
 uint32_t zr5_pok = 0;
@ -1947,7 +1948,7 @@ static void *miner_thread(void *userdata)
 		/* record scanhash elapsed time */
 		gettimeofday(&tv_end, NULL);
-		if (rc && opt_debug)
+		if (rc > 0 && opt_debug)
 			applog(LOG_NOTICE, CL_CYN "found => %08x" CL_GRN " %08x", nonceptr[0], swab32(nonceptr[0])); // data[19]
 		if (rc > 1 && opt_debug)
 			applog(LOG_NOTICE, CL_CYN "found => %08x" CL_GRN " %08x", nonceptr[2], swab32(nonceptr[2])); // data[21]
@ -1979,7 +1980,7 @@ static void *miner_thread(void *userdata)
 		if (rc > 1)
 			work.scanned_to = nonceptr[2];
-		else if (rc)
+		else if (rc > 0)
 			work.scanned_to = nonceptr[0];
 		else {
 			work.scanned_to = max_nonce;
@ -1990,6 +1991,9 @@ static void *miner_thread(void *userdata)
 			}
 		}
 		if (abort_flag)
 			break; // time to leave the mining loop...
 		if (check_dups)
 			hashlog_remember_scan_range(&work);
@ -2023,7 +2027,7 @@ static void *miner_thread(void *userdata)
 			firstwork_time = time(NULL);
 		/* if nonce found, submit work */
-		if (rc && !opt_benchmark) {
+		if (rc > 0 && !opt_benchmark) {
 			if (!submit_work(mythr, &work))
 				break;
@ -3486,7 +3490,8 @@ int main(int argc, char *argv[])
 	if (hnvml) {
 		applog(LOG_INFO, "NVML GPU monitoring enabled.");
 		for (int n=0; n < opt_n_threads; n++) {
-			nvml_set_clocks(hnvml, device_map[n]);
+			if (nvml_set_clocks(hnvml, device_map[n]) == 1)
 				cuda_reset_device(n, NULL);
 		}
 	}
 #else
--- a/cuda.cpp
+++ b/cuda.cpp
@ -1,6 +1,7 @@
 #include <stdio.h>
 #include <memory.h>
 #include <string.h>
 #include <unistd.h>
 #include <map>
 #ifndef _WIN32
@ -60,6 +61,7 @@ void cuda_devicenames()
 		exit(1);
 	}
 	GPU_N = min(MAX_GPUS, GPU_N);
 	for (int i=0; i < GPU_N; i++)
 	{
 		cudaDeviceProp props;
@ -177,13 +179,20 @@ int cuda_gpu_clocks(struct cgpu_info *gpu)
 void cuda_reset_device(int thr_id, bool *init)
 {
 	int dev_id = device_map[thr_id];
-	for (int i=0; i < MAX_GPUS; i++) {
+	cudaSetDevice(dev_id);
-		if (device_map[i] == dev_id) {
+	if (init != NULL) {
-			init[i] = false;
+		// with init array, its meant to be used in algo's scan code...
 		for (int i=0; i < MAX_GPUS; i++) {
 			if (device_map[i] == dev_id) {
 				init[i] = false;
 			}
 		}
 		// force exit from algo's scan loops/function
 		restart_threads();
 		cudaDeviceSynchronize();
 		while (cudaStreamQuery(NULL) == cudaErrorNotReady)
 			usleep(1000);
 	}
 	restart_threads();
 	cudaDeviceSynchronize();
 	cudaDeviceReset();
 }
--- a/miner.h
+++ b/miner.h
@ -490,6 +490,7 @@ extern struct thr_info *thr_info;
 extern int longpoll_thr_id;
 extern int stratum_thr_id;
 extern int api_thr_id;
 extern volatile bool abort_flag;
 extern struct work_restart *work_restart;
 extern bool opt_trust_pool;
 extern uint16_t opt_vote;
--- a/nvml.cpp
+++ b/nvml.cpp
@ -342,7 +342,7 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id)
 	}
 	gpu_clocks_changed[dev_id] = 1;
-	return 0;
+	return 1;
 }
 /* reset default app clocks to an used device */
--- a/scrypt/salsa_kernel.h
+++ b/scrypt/salsa_kernel.h
@ -23,7 +23,6 @@ extern char *device_name[MAX_GPUS];
 extern bool opt_autotune;
 extern int opt_nfactor;
 extern char *jane_params;
 extern bool abort_flag;
 extern int parallel;
 extern void get_currentalgo(char* buf, int sz);