reset: take care of multi-threaded gpus (-d 0,0)

to be tested... could create problems when reset in a chain like x11...
2015-04-21 09:11:04 +02:00 · 2015-04-21 09:11:04 +02:00 · 1ad34dc13d
commit 1ad34dc13d
parent c7698afb4a
6 changed files with 23 additions and 11 deletions
--- a/ccminer.cpp
+++ b/ccminer.cpp
@ -58,7 +58,7 @@ BOOL WINAPI ConsoleHandler(DWORD);
 // from cuda.cpp
 int cuda_num_devices();
 void cuda_devicenames();
-void cuda_devicereset();
+void cuda_shutdown();
 int cuda_finddevice(char *name);
 void cuda_print_devices();

@ -485,7 +485,7 @@ void proper_exit(int reason)
 {
 	abort_flag = true;
 	usleep(200 * 1000);
-	cuda_devicereset();
+	cuda_shutdown();

 	if (reason == EXIT_CODE_OK && app_exit_code != EXIT_CODE_OK) {
 		reason = app_exit_code;
--- a/cuda.cpp
+++ b/cuda.cpp
@ -82,9 +82,9 @@ void cuda_print_devices()
 	}
 }

-// Can't be called directly in cpu-miner.c
-void cuda_devicereset()
+void cuda_shutdown()
 {
+	cudaDeviceSynchronize();
 	cudaDeviceReset();
 }

@ -173,6 +173,20 @@ int cuda_gpu_clocks(struct cgpu_info *gpu)
 	return -1;
 }

+// if we use 2 threads on the same gpu, we need to reinit the threads
+void cuda_reset_device(int thr_id, bool *init)
+{
+	int dev_id = device_map[thr_id];
+	for (int i=0; i < MAX_GPUS; i++) {
+		if (device_map[i] == dev_id) {
+			init[i] = false;
+		}
+	}
+	restart_threads();
+	cudaDeviceSynchronize();
+	cudaDeviceReset();
+}
+
 void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func)
 {
 	struct cgpu_info *gpu = &thr_info[thr_id].gpu;
--- a/cuda_helper.h
+++ b/cuda_helper.h
@ -24,6 +24,7 @@ extern int cuda_arch[MAX_GPUS];

 // common functions
 extern int cuda_get_arch(int thr_id);
+extern void cuda_reset_device(int thr_id, bool *init);
 extern void cuda_check_cpu_init(int thr_id, uint32_t threads);
 extern void cuda_check_cpu_setTarget(const void *ptarget);
 extern uint32_t cuda_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash);
--- a/skein.cu
+++ b/skein.cu
@ -359,7 +359,6 @@ extern "C" int scanhash_skeincoin(int thr_id, uint32_t *pdata, const uint32_t *p

 	if (!init[thr_id])
 	{
-		cudaDeviceReset();
 		cudaSetDevice(device_map[thr_id]);

 		cudaMalloc(&d_hash[thr_id], throughput * 64U);
--- a/skein2.cu
+++ b/skein2.cu
@ -48,15 +48,14 @@ extern "C" int scanhash_skein2(int thr_id, uint32_t *pdata, const uint32_t *ptar

 	if (!init[thr_id])
 	{
-		cudaDeviceReset();
 		cudaSetDevice(device_map[thr_id]);

 		cudaMalloc(&d_hash[thr_id], throughput * 64U);

-                quark_skein512_cpu_init(thr_id, throughput);
-                cuda_check_cpu_init(thr_id, throughput);
+		quark_skein512_cpu_init(thr_id, throughput);
+		cuda_check_cpu_init(thr_id, throughput);

-                CUDA_SAFE_CALL(cudaDeviceSynchronize());
+		CUDA_SAFE_CALL(cudaDeviceSynchronize());

 		init[thr_id] = true;
 	}
--- a/zr5.cu
+++ b/zr5.cu
@ -302,7 +302,6 @@ extern "C" int scanhash_zr5(int thr_id, uint32_t *pdata, const uint32_t *ptarget

 	if (!init[thr_id])
 	{
-		cudaDeviceReset();
 		cudaSetDevice(device_map[thr_id]);

 		// constants
@ -408,7 +407,7 @@ extern "C" int scanhash_zr5(int thr_id, uint32_t *pdata, const uint32_t *ptarget
 				applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], foundNonce);

 				// reinit the card..
-				init[thr_id] = false;
+				cuda_reset_device(thr_id, init);

 				pdata[19]++;
 				pdata[0] = oldp0;