diff --git a/ccminer.vcxproj b/ccminer.vcxproj index 8c2e08b..2d0c68e 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -175,7 +175,7 @@ copy "$(CudaToolkitBinDir)\cudart32*.dll" "$(OutDir)" true false compute_50,sm_50 - --optimize 2 + false diff --git a/cuda_helper.h b/cuda_helper.h index 8b0b3f6..72f7b7e 100644 --- a/cuda_helper.h +++ b/cuda_helper.h @@ -1,6 +1,8 @@ #ifndef CUDA_HELPER_H #define CUDA_HELPER_H +#include + static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI) { #if __CUDA_ARCH__ >= 130 @@ -83,4 +85,15 @@ __forceinline__ __device__ uint64_t ROTL64(const uint64_t value, const int offse #define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) #endif +// Macro to catch CUDA errors in CUDA runtime calls +#define CUDA_SAFE_CALL(call) \ +do { \ + cudaError_t err = call; \ + if (cudaSuccess != err) { \ + fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\ + __FILE__, __LINE__, cudaGetErrorString(err) ); \ + exit(EXIT_FAILURE); \ + } \ +} while (0) + #endif // #ifndef CUDA_HELPER_H diff --git a/x11/x11.cu b/x11/x11.cu index 1e4d69c..2805302 100644 --- a/x11/x11.cu +++ b/x11/x11.cu @@ -18,7 +18,7 @@ extern "C" } #include -#include +#include // aus cpu-miner.c extern int device_map[8]; @@ -157,10 +157,10 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, static bool init[8] = {0,0,0,0,0,0,0,0}; if (!init[thr_id]) { - cudaSetDevice(device_map[thr_id]); - + CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); // Konstanten kopieren, Speicher belegen - cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput)); + quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); quark_skein512_cpu_init(thr_id, throughput); @@ -173,6 +173,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, x11_simd512_cpu_init(thr_id, throughput); x11_echo512_cpu_init(thr_id, throughput); quark_check_cpu_init(thr_id, throughput); + init[thr_id] = true; } @@ -184,43 +185,24 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, quark_check_cpu_setTarget(ptarget); do { + uint32_t foundNonce; int order = 0; - // erstes Blake512 Hash mit CUDA + // Hash with CUDA quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); - - // das ist der unbedingte Branch für BMW512 quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für Groestl512 quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für Skein512 quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für JH512 quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für Keccak512 quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für Luffa512 x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für Cubehash512 x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für Shavite512 x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für SIMD512 x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für ECHO512 x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); // Scan nach Gewinner Hashes auf der GPU - uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); if (foundNonce != 0xffffffff) { uint32_t vhash64[8]; diff --git a/x13/x13.cu b/x13/x13.cu index f48b604..ee94a6e 100644 --- a/x13/x13.cu +++ b/x13/x13.cu @@ -23,7 +23,7 @@ extern "C" } #include -#include +#include // aus cpu-miner.c extern int device_map[8]; @@ -178,10 +178,9 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, static bool init[8] = {0,0,0,0,0,0,0,0}; if (!init[thr_id]) { - cudaSetDevice(device_map[thr_id]); + CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); + CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput)); - // Konstanten kopieren, Speicher belegen - cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); quark_skein512_cpu_init(thr_id, throughput); @@ -196,12 +195,11 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, x13_hamsi512_cpu_init(thr_id, throughput); x13_fugue512_cpu_init(thr_id, throughput); quark_check_cpu_init(thr_id, throughput); + init[thr_id] = true; } - //unsigned char echobefore[64], echoafter[64]; - - uint32_t endiandata[20]; + uint32_t endiandata[20]; for (int k=0; k < 20; k++) be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); @@ -209,47 +207,25 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, quark_check_cpu_setTarget(ptarget); do { + uint32_t foundNonce; int order = 0; - // erstes Blake512 Hash mit CUDA quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); - - // das ist der unbedingte Branch für BMW512 quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für Groestl512 quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für Skein512 quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für JH512 quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für Keccak512 quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für Luffa512 x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für Cubehash512 x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für Shavite512 x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für SIMD512 x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - // das ist der unbedingte Branch für ECHO512 x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); - - x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); // Scan nach Gewinner Hashes auf der GPU - uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); if (foundNonce != 0xffffffff) { uint32_t vhash64[8]; diff --git a/x15/x14.cu b/x15/x14.cu index 3e600f6..31c3ef3 100644 --- a/x15/x14.cu +++ b/x15/x14.cu @@ -25,7 +25,7 @@ extern "C" { } #include -#include +#include // from cpu-miner.c extern int device_map[8]; @@ -185,9 +185,8 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, if (!init[thr_id]) { - cudaSetDevice(device_map[thr_id]); - - cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); + CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput)); quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); diff --git a/x15/x15.cu b/x15/x15.cu index 8049f02..ebdb347 100644 --- a/x15/x15.cu +++ b/x15/x15.cu @@ -26,7 +26,7 @@ extern "C" { } #include -#include +#include // to test gpu hash on a null buffer #define NULLTEST 0 @@ -212,9 +212,8 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, if (!init[thr_id]) { - cudaSetDevice(device_map[thr_id]); - - cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); + CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput)); quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput);