Add missing real cuda arch checks

8 years ago · 2cdf2ddd43
6 changed files with 11 additions and 0 deletions
--- a/Algo256/blake256.cu
+++ b/Algo256/blake256.cu
@ -506,6 +506,8 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
				@@ -506,6 +506,8 @@ extern "C" int scanhash_blake256(int thr_id, struct work* work, uint32_t max_non
 		}
 		gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

+		cuda_get_arch(thr_id);
+
 		CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), -1);
 		CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), -1);
 		init[thr_id] = true;
--- a/Algo256/blake2s.cu
+++ b/Algo256/blake2s.cu
@ -458,6 +458,8 @@ extern "C" int scanhash_blake2s(int thr_id, struct work *work, uint32_t max_nonc
				@@ -458,6 +458,8 @@ extern "C" int scanhash_blake2s(int thr_id, struct work *work, uint32_t max_nonc
 		}
 		gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

+		cuda_get_arch(thr_id);
+
 		CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], maxResults * sizeof(uint32_t)), -1);
 		CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], maxResults * sizeof(uint32_t)), -1);
 		init[thr_id] = true;
--- a/Algo256/decred.cu
+++ b/Algo256/decred.cu
@ -374,6 +374,8 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce
				@@ -374,6 +374,8 @@ extern "C" int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce
 		}
 		gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

+		cuda_get_arch(thr_id);
+
 		CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], MAX_RESULTS*sizeof(uint32_t)), -1);
 		CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], MAX_RESULTS*sizeof(uint32_t)), -1);
 		init[thr_id] = true;
--- a/Algo256/vanilla.cu
+++ b/Algo256/vanilla.cu
@ -394,6 +394,8 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc
				@@ -394,6 +394,8 @@ extern "C" int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonc
 		}
 		gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

+		cuda_get_arch(thr_id);
+
 		CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), -1);
 		CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), -1);
 		cudaStreamCreate(&streams[thr_id]);
--- a/lbry/lbry.cu
+++ b/lbry/lbry.cu
@ -119,6 +119,7 @@ extern "C" int scanhash_lbry(int thr_id, struct work *work, uint32_t max_nonce,
				@@ -119,6 +119,7 @@ extern "C" int scanhash_lbry(int thr_id, struct work *work, uint32_t max_nonce,
 		}
 		gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

+		cuda_get_arch(thr_id);

 		if (CUDART_VERSION == 6050) {
 			applog(LOG_ERR, "This lbry kernel is not compatible with CUDA 6.5!");
--- a/skein.cu
+++ b/skein.cu
@ -374,6 +374,8 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
				@@ -374,6 +374,8 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
 		}
 		gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

+		cuda_get_arch(thr_id);
+
 		if (sm5) {
 			skeincoin_init(thr_id);
 		} else {