cuda: check for errors on cuda mem alloc

2014-08-17 22:41:05 +02:00 · 2014-08-17 22:41:05 +02:00 · a9a3ad8afc
commit a9a3ad8afc
parent 6984a001d6
6 changed files with 36 additions and 67 deletions
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -175,7 +175,7 @@ copy "$(CudaToolkitBinDir)\cudart32*.dll" "$(OutDir)"</Command>
      <PtxAsOptionV>true</PtxAsOptionV>
      <Keep>false</Keep>
      <CodeGeneration>compute_50,sm_50</CodeGeneration>
-      <Defines>--optimize 2</Defines>
+      <Defines></Defines>
    </CudaCompile>
    <CudaLink>
      <GPUDebugInfo>false</GPUDebugInfo>
--- a/cuda_helper.h
+++ b/cuda_helper.h
@ -1,6 +1,8 @@
 #ifndef CUDA_HELPER_H
 #define CUDA_HELPER_H

+#include <cuda_runtime.h>
+
 static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI)
 {
 #if __CUDA_ARCH__ >= 130
@ -83,4 +85,15 @@ __forceinline__ __device__ uint64_t ROTL64(const uint64_t value, const int offse
 #define ROTL64(x, n)        (((x) << (n)) | ((x) >> (64 - (n))))
 #endif

+// Macro to catch CUDA errors in CUDA runtime calls
+#define CUDA_SAFE_CALL(call)                                          \
+do {                                                                  \
+    cudaError_t err = call;                                           \
+    if (cudaSuccess != err) {                                         \
+        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
+                 __FILE__, __LINE__, cudaGetErrorString(err) );       \
+        exit(EXIT_FAILURE);                                           \
+    }                                                                 \
+} while (0)
+
 #endif // #ifndef CUDA_HELPER_H
--- a/x11/x11.cu
+++ b/x11/x11.cu
@ -18,7 +18,7 @@ extern "C"
 }

 #include <stdint.h>
-#include <cuda_runtime.h>
+#include <cuda_helper.h>

 // aus cpu-miner.c
 extern int device_map[8];
@ -157,10 +157,10 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
 	static bool init[8] = {0,0,0,0,0,0,0,0};
 	if (!init[thr_id])
 	{
-		cudaSetDevice(device_map[thr_id]);
-
+		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
 		// Konstanten kopieren, Speicher belegen
-		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
+
 		quark_blake512_cpu_init(thr_id, throughput);
 		quark_groestl512_cpu_init(thr_id, throughput);
 		quark_skein512_cpu_init(thr_id, throughput);
@ -173,6 +173,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
 		x11_simd512_cpu_init(thr_id, throughput);
 		x11_echo512_cpu_init(thr_id, throughput);
 		quark_check_cpu_init(thr_id, throughput);
+
 		init[thr_id] = true;
 	}

@ -184,43 +185,24 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
 	quark_check_cpu_setTarget(ptarget);

 	do {
+		uint32_t foundNonce;
 		int order = 0;

-		// erstes Blake512 Hash mit CUDA
+		// Hash with CUDA
 		quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für BMW512
 		quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für Groestl512
 		quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für Skein512
 		quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für JH512
 		quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für Keccak512
 		quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für Luffa512
 		x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für Cubehash512
 		x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für Shavite512
 		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für SIMD512
 		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für ECHO512
 		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);

 		// Scan nach Gewinner Hashes auf der GPU
-		uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		if  (foundNonce != 0xffffffff)
 		{
 			uint32_t vhash64[8];
--- a/x13/x13.cu
+++ b/x13/x13.cu
@ -23,7 +23,7 @@ extern "C"
 }

 #include <stdint.h>
-#include <cuda_runtime.h>
+#include <cuda_helper.h>

 // aus cpu-miner.c
 extern int device_map[8];
@ -178,10 +178,9 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
 	static bool init[8] = {0,0,0,0,0,0,0,0};
 	if (!init[thr_id])
 	{
-		cudaSetDevice(device_map[thr_id]);
+		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
+		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));

-		// Konstanten kopieren, Speicher belegen
-		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
 		quark_blake512_cpu_init(thr_id, throughput);
 		quark_groestl512_cpu_init(thr_id, throughput);
 		quark_skein512_cpu_init(thr_id, throughput);
@ -196,12 +195,11 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
 		x13_hamsi512_cpu_init(thr_id, throughput);
 		x13_fugue512_cpu_init(thr_id, throughput);
 		quark_check_cpu_init(thr_id, throughput);
+
 		init[thr_id] = true;
 	}

-	//unsigned char echobefore[64], echoafter[64];
-
-    uint32_t endiandata[20];
+	uint32_t endiandata[20];
 	for (int k=0; k < 20; k++)
 		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);

@ -209,47 +207,25 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
 	quark_check_cpu_setTarget(ptarget);

 	do {
+		uint32_t foundNonce;
 		int order = 0;

-        // erstes Blake512 Hash mit CUDA
 		quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für BMW512
 		quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für Groestl512
 		quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für Skein512
 		quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für JH512
 		quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für Keccak512
 		quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für Luffa512
 		x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für Cubehash512
 		x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für Shavite512
 		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für SIMD512
 		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-		// das ist der unbedingte Branch für ECHO512
 		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
 		x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
-        x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);

 		// Scan nach Gewinner Hashes auf der GPU
-		uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
 		if  (foundNonce != 0xffffffff)
 		{
 			uint32_t vhash64[8];
--- a/x15/x14.cu
+++ b/x15/x14.cu
@ -25,7 +25,7 @@ extern "C" {
 }

 #include <stdint.h>
-#include <cuda_runtime.h>
+#include <cuda_helper.h>

 // from cpu-miner.c
 extern int device_map[8];
@ -185,9 +185,8 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,

 	if (!init[thr_id])
 	{
-		cudaSetDevice(device_map[thr_id]);
-
-		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
+		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));

 		quark_blake512_cpu_init(thr_id, throughput);
 		quark_groestl512_cpu_init(thr_id, throughput);
--- a/x15/x15.cu
+++ b/x15/x15.cu
@ -26,7 +26,7 @@ extern "C" {
 }

 #include <stdint.h>
-#include <cuda_runtime.h>
+#include <cuda_helper.h>

 // to test gpu hash on a null buffer
 #define NULLTEST 0
@ -212,9 +212,8 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,

 	if (!init[thr_id])
 	{
-		cudaSetDevice(device_map[thr_id]);
-
-		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
+		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));

 		quark_blake512_cpu_init(thr_id, throughput);
 		quark_groestl512_cpu_init(thr_id, throughput);