diff --git a/ccminer.vcxproj b/ccminer.vcxproj
index 8c2e08b..2d0c68e 100644
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@@ -175,7 +175,7 @@ copy "$(CudaToolkitBinDir)\cudart32*.dll" "$(OutDir)"
true
false
compute_50,sm_50
- --optimize 2
+
false
diff --git a/cuda_helper.h b/cuda_helper.h
index 8b0b3f6..72f7b7e 100644
--- a/cuda_helper.h
+++ b/cuda_helper.h
@@ -1,6 +1,8 @@
#ifndef CUDA_HELPER_H
#define CUDA_HELPER_H
+#include
+
static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI)
{
#if __CUDA_ARCH__ >= 130
@@ -83,4 +85,15 @@ __forceinline__ __device__ uint64_t ROTL64(const uint64_t value, const int offse
#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
#endif
+// Macro to catch CUDA errors in CUDA runtime calls
+#define CUDA_SAFE_CALL(call) \
+do { \
+ cudaError_t err = call; \
+ if (cudaSuccess != err) { \
+ fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
+ __FILE__, __LINE__, cudaGetErrorString(err) ); \
+ exit(EXIT_FAILURE); \
+ } \
+} while (0)
+
#endif // #ifndef CUDA_HELPER_H
diff --git a/x11/x11.cu b/x11/x11.cu
index 1e4d69c..2805302 100644
--- a/x11/x11.cu
+++ b/x11/x11.cu
@@ -18,7 +18,7 @@ extern "C"
}
#include
-#include
+#include
// aus cpu-miner.c
extern int device_map[8];
@@ -157,10 +157,10 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id])
{
- cudaSetDevice(device_map[thr_id]);
-
+ CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
// Konstanten kopieren, Speicher belegen
- cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+ CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
+
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
quark_skein512_cpu_init(thr_id, throughput);
@@ -173,6 +173,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
x11_simd512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput);
quark_check_cpu_init(thr_id, throughput);
+
init[thr_id] = true;
}
@@ -184,43 +185,24 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
quark_check_cpu_setTarget(ptarget);
do {
+ uint32_t foundNonce;
int order = 0;
- // erstes Blake512 Hash mit CUDA
+ // Hash with CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für BMW512
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für Groestl512
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für Skein512
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für JH512
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für Keccak512
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für Luffa512
x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für Cubehash512
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für Shavite512
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für SIMD512
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für ECHO512
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// Scan nach Gewinner Hashes auf der GPU
- uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+ foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (foundNonce != 0xffffffff)
{
uint32_t vhash64[8];
diff --git a/x13/x13.cu b/x13/x13.cu
index f48b604..ee94a6e 100644
--- a/x13/x13.cu
+++ b/x13/x13.cu
@@ -23,7 +23,7 @@ extern "C"
}
#include
-#include
+#include
// aus cpu-miner.c
extern int device_map[8];
@@ -178,10 +178,9 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id])
{
- cudaSetDevice(device_map[thr_id]);
+ CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
+ CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
- // Konstanten kopieren, Speicher belegen
- cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
quark_skein512_cpu_init(thr_id, throughput);
@@ -196,12 +195,11 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
x13_hamsi512_cpu_init(thr_id, throughput);
x13_fugue512_cpu_init(thr_id, throughput);
quark_check_cpu_init(thr_id, throughput);
+
init[thr_id] = true;
}
- //unsigned char echobefore[64], echoafter[64];
-
- uint32_t endiandata[20];
+ uint32_t endiandata[20];
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
@@ -209,47 +207,25 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
quark_check_cpu_setTarget(ptarget);
do {
+ uint32_t foundNonce;
int order = 0;
- // erstes Blake512 Hash mit CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für BMW512
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für Groestl512
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für Skein512
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für JH512
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für Keccak512
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für Luffa512
x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für Cubehash512
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für Shavite512
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für SIMD512
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- // das ist der unbedingte Branch für ECHO512
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
-
- x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+ x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// Scan nach Gewinner Hashes auf der GPU
- uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+ foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (foundNonce != 0xffffffff)
{
uint32_t vhash64[8];
diff --git a/x15/x14.cu b/x15/x14.cu
index 3e600f6..31c3ef3 100644
--- a/x15/x14.cu
+++ b/x15/x14.cu
@@ -25,7 +25,7 @@ extern "C" {
}
#include
-#include
+#include
// from cpu-miner.c
extern int device_map[8];
@@ -185,9 +185,8 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
if (!init[thr_id])
{
- cudaSetDevice(device_map[thr_id]);
-
- cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+ CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
+ CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
diff --git a/x15/x15.cu b/x15/x15.cu
index 8049f02..ebdb347 100644
--- a/x15/x15.cu
+++ b/x15/x15.cu
@@ -26,7 +26,7 @@ extern "C" {
}
#include
-#include
+#include
// to test gpu hash on a null buffer
#define NULLTEST 0
@@ -212,9 +212,8 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
if (!init[thr_id])
{
- cudaSetDevice(device_map[thr_id]);
-
- cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+ CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
+ CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);