From 73f22b237a56daa42228243c4769bbd20a16f9d5 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Thu, 20 Nov 2014 17:34:37 +0100 Subject: [PATCH] Prepare trap of hardware/mem failures --- api.cpp | 3 --- api/index.php | 1 + blake32.cu | 8 +++----- ccminer.cpp | 5 +++-- cuda.cpp | 13 ++++++++++++- cuda_checkhash.cu | 6 +++--- cuda_helper.h | 26 ++++++++++++++++++++++---- cuda_nist5.cu | 2 +- miner.h | 3 +++ pentablake.cu | 7 ++----- quark/cuda_quark_blake512.cu | 4 ++-- qubit/qubit.cu | 6 +++--- stats.cpp | 1 - x11/cuda_x11_aes.cu | 18 +++++++++--------- x11/cuda_x11_echo.cu | 2 +- x11/cuda_x11_luffa512.cu | 9 +++++---- x11/cuda_x11_shavite512.cu | 2 +- x11/cuda_x11_simd512.cu | 13 ++++++------- x11/fresh.cu | 8 ++++---- x11/s3.cu | 15 +++++++-------- x11/x11.cu | 11 +++++++---- x13/x13.cu | 12 ++++++++---- x15/x14.cu | 7 ++++--- x15/x15.cu | 7 ++++--- x17/x17.cu | 5 +++-- 25 files changed, 114 insertions(+), 80 deletions(-) diff --git a/api.cpp b/api.cpp index 0e8b5b3..482fcc4 100644 --- a/api.cpp +++ b/api.cpp @@ -101,14 +101,11 @@ static char *buffer = NULL; static time_t startup = 0; static int bye = 0; -extern int opt_intensity; -extern int opt_n_threads; extern char *opt_api_allow; extern int opt_api_listen; /* port */ extern uint64_t global_hashrate; extern uint32_t accepted_count; extern uint32_t rejected_count; -extern int num_processors; extern int device_map[8]; extern char *device_name[8]; diff --git a/api/index.php b/api/index.php index fb57b9e..de15c8d 100644 --- a/api/index.php +++ b/api/index.php @@ -52,6 +52,7 @@ function translateField($key) $intl['H'] = 'Bloc height'; $intl['I'] = 'Intensity'; + $intl['HWF'] = 'Failures'; $intl['TEMP'] = 'T°c'; $intl['FAN'] = 'Fan %'; diff --git a/blake32.cu b/blake32.cu index 65df026..87bd1ca 100644 --- a/blake32.cu +++ b/blake32.cu @@ -17,8 +17,6 @@ extern "C" { /* threads per block and throughput (intensity) */ #define TPB 128 -extern int num_processors; - /* added in sph_blake.c */ extern "C" int blake256_rounds = 14; @@ -428,9 +426,9 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt if (!init[thr_id]) { if (num_processors > 1) - CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); - CUDA_SAFE_CALL(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t))); - CUDA_SAFE_CALL(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t))); + cudaSetDevice(device_map[thr_id]); + CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), 0); + CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), 0); init[thr_id] = true; } diff --git a/ccminer.cpp b/ccminer.cpp index 0a7f003..9ae3cb7 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -1104,9 +1104,9 @@ static void *miner_thread(void *userdata) if (max64 < minmax) { switch (opt_algo) { case ALGO_BLAKECOIN: - minmax = 0x4000000; - break; case ALGO_BLAKE: + minmax = 0x80000000U; + break; case ALGO_DOOM: case ALGO_JACKPOT: case ALGO_KECCAK: @@ -1158,6 +1158,7 @@ static void *miner_thread(void *userdata) applog(LOG_DEBUG, "job done, wait for a new one..."); work_restart[thr_id].restart = 1; hashlog_purge_old(); + stats_purge_old(); // wait a bit for a new job... usleep(500*1000); (*nonceptr) = end_nonce + 1; diff --git a/cuda.cpp b/cuda.cpp index 1a7fc38..780b648 100644 --- a/cuda.cpp +++ b/cuda.cpp @@ -1,7 +1,6 @@ #include #include #include - #include #ifndef _WIN32 @@ -22,6 +21,10 @@ #include "cuda_runtime.h" +#ifdef WIN32 +#include "compat.h" // sleep +#endif + extern char *device_name[8]; extern int device_map[8]; extern int device_sm[8]; @@ -145,3 +148,11 @@ cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id) result = cudaStreamSynchronize(stream); return result; } + +void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func) +{ + struct cgpu_info *gpu = &thr_info[thr_id].gpu; + gpu->hw_errors++; + applog(LOG_ERR, "GPU #%d: %s %s", device_map[thr_id], func, cudaGetErrorString(err)); + sleep(1); +} diff --git a/cuda_checkhash.cu b/cuda_checkhash.cu index e129de8..e20f81d 100644 --- a/cuda_checkhash.cu +++ b/cuda_checkhash.cu @@ -43,11 +43,11 @@ void cuda_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonce __host__ void cuda_check_cpu_init(int thr_id, int threads) { - cudaMallocHost(&h_resNounce[thr_id], 1*sizeof(uint32_t)); - cudaMalloc(&d_resNounce[thr_id], 1*sizeof(uint32_t)); + CUDA_CALL_OR_RET(cudaMallocHost(&h_resNounce[thr_id], 1*sizeof(uint32_t))); + CUDA_CALL_OR_RET(cudaMalloc(&d_resNounce[thr_id], 1*sizeof(uint32_t))); } -// Target Difficulty setzen +// Target Difficulty __host__ void cuda_check_cpu_setTarget(const void *ptarget) { diff --git a/cuda_helper.h b/cuda_helper.h index 2d5af22..249599f 100644 --- a/cuda_helper.h +++ b/cuda_helper.h @@ -22,6 +22,7 @@ extern void cuda_check_cpu_setTarget(const void *ptarget); extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); extern uint32_t cuda_check_hash_fast(int thr_id, int threads, uint32_t startNounce, uint32_t *d_inputHash, int order); extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); +extern void cudaReportHardwareFailure(int thr_id, cudaError_t error, const char* func); extern __device__ __device_builtin__ void __syncthreads(void); @@ -122,17 +123,34 @@ __device__ __forceinline__ uint64_t cuda_swab64(uint64_t x) #endif /*********************************************************************/ -// Macro to catch CUDA errors in CUDA runtime calls +// Macros to catch CUDA errors in CUDA runtime calls + #define CUDA_SAFE_CALL(call) \ do { \ cudaError_t err = call; \ if (cudaSuccess != err) { \ - fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\ - __FILE__, __LINE__, cudaGetErrorString(err) ); \ + fprintf(stderr, "Cuda error in func '%s' at line %i : %s.\n", \ + __FUNCTION__, __LINE__, cudaGetErrorString(err) ); \ exit(EXIT_FAILURE); \ } \ } while (0) +#define CUDA_CALL_OR_RET(call) do { \ + cudaError_t err = call; \ + if (cudaSuccess != err) { \ + cudaReportHardwareFailure(thr_id, err, __FUNCTION__); \ + return; \ + } \ +} while (0) + +#define CUDA_CALL_OR_RET_X(call, ret) do { \ + cudaError_t err = call; \ + if (cudaSuccess != err) { \ + cudaReportHardwareFailure(thr_id, err, __FUNCTION__); \ + return ret; \ + } \ +} while (0) + /*********************************************************************/ #ifdef _WIN64 #define USE_XOR_ASM_OPTS 0 @@ -185,7 +203,7 @@ uint64_t xor8(uint64_t a, uint64_t b, uint64_t c, uint64_t d,uint64_t e,uint64_t return result; } #else -#define xor8(a,b,c,d,e,f,g,h) (a^b^c^d^e^f^g^h) +#define xor8(a,b,c,d,e,f,g,h) ((a^b)^(c^d)^(e^f)^(g^h)) #endif // device asm for x17 diff --git a/cuda_nist5.cu b/cuda_nist5.cu index 9e4f9e7..3a07c38 100644 --- a/cuda_nist5.cu +++ b/cuda_nist5.cu @@ -75,7 +75,7 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata, ((uint32_t*)ptarget)[7] = 0x00FF; int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096 - throughput = min(throughput, max_nonce - first_nonce); + throughput = min(throughput, (int) (max_nonce - first_nonce)); static bool init[8] = {0,0,0,0,0,0,0,0}; if (!init[thr_id]) diff --git a/miner.h b/miner.h index 2280071..4c84c87 100644 --- a/miner.h +++ b/miner.h @@ -415,6 +415,9 @@ extern bool opt_debug; extern bool opt_quiet; extern bool opt_protocol; extern bool opt_tracegpu; +extern int opt_intensity; +extern int opt_n_threads; +extern int num_processors; extern int opt_timeout; extern bool want_longpoll; extern bool have_longpoll; diff --git a/pentablake.cu b/pentablake.cu index aaa6a9b..726ba5f 100644 --- a/pentablake.cu +++ b/pentablake.cu @@ -45,9 +45,6 @@ extern "C" void pentablakehash(void *output, const void *input) #define MAXU 0xffffffffU -// in cpu-miner.c -extern int opt_n_threads; - __constant__ static uint32_t __align__(32) c_Target[8]; @@ -514,8 +511,8 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t * ((uint32_t*)ptarget)[7] = 0x000F; if (!init[thr_id]) { - if (opt_n_threads > 1) { - CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); + if (num_processors > 1) { + cudaSetDevice(device_map[thr_id]); } CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput)); CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], 2*sizeof(uint32_t))); diff --git a/quark/cuda_quark_blake512.cu b/quark/cuda_quark_blake512.cu index 01363ee..64bd15c 100644 --- a/quark/cuda_quark_blake512.cu +++ b/quark/cuda_quark_blake512.cu @@ -239,10 +239,10 @@ __global__ void quark_blake512_gpu_hash_80(int threads, uint32_t startNounce, vo __host__ void quark_blake512_cpu_init(int thr_id, int threads) { // Kopiere die Hash-Tabellen in den GPU-Speicher - cudaMemcpyToSymbol( c_sigma, + CUDA_CALL_OR_RET( cudaMemcpyToSymbol(c_sigma, host_sigma, sizeof(host_sigma), - 0, cudaMemcpyHostToDevice); + 0, cudaMemcpyHostToDevice)); } // Blake512 für 80 Byte grosse Eingangsdaten diff --git a/qubit/qubit.cu b/qubit/qubit.cu index df2f886..31df2f9 100644 --- a/qubit/qubit.cu +++ b/qubit/qubit.cu @@ -26,7 +26,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_simd512_cpu_init(int thr_id, int threads); +extern int x11_simd512_cpu_init(int thr_id, int threads); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_echo512_cpu_init(int thr_id, int threads); @@ -89,14 +89,14 @@ extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata, { cudaSetDevice(device_map[thr_id]); - cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); - qubit_luffa512_cpu_init(thr_id, throughput); x11_cubehash512_cpu_init(thr_id, throughput); x11_shavite512_cpu_init(thr_id, throughput); x11_simd512_cpu_init(thr_id, throughput); x11_echo512_cpu_init(thr_id, throughput); + CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0); + cuda_check_cpu_init(thr_id, throughput); init[thr_id] = true; diff --git a/stats.cpp b/stats.cpp index fef5c30..b4673dd 100644 --- a/stats.cpp +++ b/stats.cpp @@ -18,7 +18,6 @@ static uint64_t uid = 0; #define STATS_PURGE_TIMEOUT 120*60 /* 120 mn */ extern uint64_t global_hashrate; -extern int opt_n_threads; extern int opt_statsavg; extern int device_map[8]; diff --git a/x11/cuda_x11_aes.cu b/x11/cuda_x11_aes.cu index 51f407c..43ec8c3 100644 --- a/x11/cuda_x11_aes.cu +++ b/x11/cuda_x11_aes.cu @@ -275,27 +275,27 @@ static __constant__ uint32_t d_AES1[256]; static __constant__ uint32_t d_AES2[256]; static __constant__ uint32_t d_AES3[256]; -static void aes_cpu_init() +static void aes_cpu_init(int thr_id) { - cudaMemcpyToSymbol( d_AES0, + CUDA_CALL_OR_RET(cudaMemcpyToSymbol( d_AES0, h_AES0, sizeof(h_AES0), - 0, cudaMemcpyHostToDevice); + 0, cudaMemcpyHostToDevice)); - cudaMemcpyToSymbol( d_AES1, + CUDA_CALL_OR_RET(cudaMemcpyToSymbol( d_AES1, h_AES1, sizeof(h_AES1), - 0, cudaMemcpyHostToDevice); + 0, cudaMemcpyHostToDevice)); - cudaMemcpyToSymbol( d_AES2, + CUDA_CALL_OR_RET(cudaMemcpyToSymbol( d_AES2, h_AES2, sizeof(h_AES2), - 0, cudaMemcpyHostToDevice); + 0, cudaMemcpyHostToDevice)); - cudaMemcpyToSymbol( d_AES3, + CUDA_CALL_OR_RET(cudaMemcpyToSymbol( d_AES3, h_AES3, sizeof(h_AES3), - 0, cudaMemcpyHostToDevice); + 0, cudaMemcpyHostToDevice)); } __device__ __forceinline__ diff --git a/x11/cuda_x11_echo.cu b/x11/cuda_x11_echo.cu index 5fc2a0f..c82e4da 100644 --- a/x11/cuda_x11_echo.cu +++ b/x11/cuda_x11_echo.cu @@ -303,7 +303,7 @@ void x11_echo512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash __host__ void x11_echo512_cpu_init(int thr_id, int threads) { - aes_cpu_init(); + aes_cpu_init(thr_id); } __host__ diff --git a/x11/cuda_x11_luffa512.cu b/x11/cuda_x11_luffa512.cu index c6dd1dc..50d0bde 100644 --- a/x11/cuda_x11_luffa512.cu +++ b/x11/cuda_x11_luffa512.cu @@ -356,11 +356,12 @@ __global__ void x11_luffa512_gpu_hash_64(int threads, uint32_t startNounce, uint } -// Setup-Funktionen -__host__ void x11_luffa512_cpu_init(int thr_id, int threads) +// Setup Function +__host__ +void x11_luffa512_cpu_init(int thr_id, int threads) { - cudaMemcpyToSymbol(c_IV, h_IV, sizeof(h_IV), 0, cudaMemcpyHostToDevice); - cudaMemcpyToSymbol(c_CNS, h_CNS, sizeof(h_CNS), 0, cudaMemcpyHostToDevice); + CUDA_CALL_OR_RET(cudaMemcpyToSymbol(c_IV, h_IV, sizeof(h_IV), 0, cudaMemcpyHostToDevice)); + CUDA_CALL_OR_RET(cudaMemcpyToSymbol(c_CNS, h_CNS, sizeof(h_CNS), 0, cudaMemcpyHostToDevice)); } __host__ void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) diff --git a/x11/cuda_x11_shavite512.cu b/x11/cuda_x11_shavite512.cu index dfe6a1e..0131b6c 100644 --- a/x11/cuda_x11_shavite512.cu +++ b/x11/cuda_x11_shavite512.cu @@ -1404,7 +1404,7 @@ void x11_shavite512_gpu_hash_80(int threads, uint32_t startNounce, void *outputH __host__ void x11_shavite512_cpu_init(int thr_id, int threads) { - aes_cpu_init(); + aes_cpu_init(thr_id); } __host__ void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) diff --git a/x11/cuda_x11_simd512.cu b/x11/cuda_x11_simd512.cu index df6e659..49bd1bc 100644 --- a/x11/cuda_x11_simd512.cu +++ b/x11/cuda_x11_simd512.cu @@ -10,9 +10,6 @@ #include "cuda_helper.h" #include -// aus heavy.cu -extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); - int *d_state[8]; uint4 *d_temp4[8]; @@ -624,10 +621,10 @@ x11_simd512_gpu_final_64(int threads, uint32_t startNounce, uint64_t *g_hash, ui } __host__ -void x11_simd512_cpu_init(int thr_id, int threads) +int x11_simd512_cpu_init(int thr_id, int threads) { - CUDA_SAFE_CALL(cudaMalloc(&d_state[thr_id], 32*sizeof(int)*threads)); - CUDA_SAFE_CALL(cudaMalloc(&d_temp4[thr_id], 64*sizeof(uint4)*threads)); + CUDA_CALL_OR_RET_X(cudaMalloc(&d_temp4[thr_id], 64*sizeof(uint4)*threads), (int) err); /* todo: prevent -i 21 */ + CUDA_CALL_OR_RET_X(cudaMalloc(&d_state[thr_id], 32*sizeof(int)*threads), (int) err); cudaMemcpyToSymbol(c_perm, h_perm, sizeof(h_perm), 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(c_IV_512, h_IV_512, sizeof(h_IV_512), 0, cudaMemcpyHostToDevice); @@ -644,7 +641,9 @@ void x11_simd512_cpu_init(int thr_id, int threads) texRef1D_128.normalized = 0; texRef1D_128.filterMode = cudaFilterModePoint; texRef1D_128.addressMode[0] = cudaAddressModeClamp; - CUDA_SAFE_CALL(cudaBindTexture(NULL, &texRef1D_128, d_temp4[thr_id], &channelDesc128, 64*sizeof(uint4)*threads)); + CUDA_CALL_OR_RET_X(cudaBindTexture(NULL, &texRef1D_128, d_temp4[thr_id], &channelDesc128, 64*sizeof(uint4)*threads), (int) err); + + return 0; } __host__ diff --git a/x11/fresh.cu b/x11/fresh.cu index dc1a5a5..8ea3233 100644 --- a/x11/fresh.cu +++ b/x11/fresh.cu @@ -19,7 +19,7 @@ extern void x11_shavite512_setBlock_80(void *pdata); extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_simd512_cpu_init(int thr_id, int threads); +extern int x11_simd512_cpu_init(int thr_id, int threads); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_echo512_cpu_init(int thr_id, int threads); @@ -84,14 +84,14 @@ extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata, if (!init[thr_id]) { - CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); - - CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput + 4)); + cudaSetDevice(device_map[thr_id]); x11_shavite512_cpu_init(thr_id, throughput); x11_simd512_cpu_init(thr_id, throughput); x11_echo512_cpu_init(thr_id, throughput); + CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput + 4), 0); + cuda_check_cpu_init(thr_id, throughput); init[thr_id] = true; diff --git a/x11/s3.cu b/x11/s3.cu index 2c7ac5e..dae3d1b 100644 --- a/x11/s3.cu +++ b/x11/s3.cu @@ -19,7 +19,7 @@ extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void x11_shavite512_setBlock_80(void *pdata); -extern void x11_simd512_cpu_init(int thr_id, int threads); +extern int x11_simd512_cpu_init(int thr_id, int threads); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void quark_skein512_cpu_init(int thr_id, int threads); @@ -56,11 +56,10 @@ extern "C" int scanhash_s3(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; static bool init[8] = { 0 }; -#ifdef WIN32 - // reduce a bit the intensity on windows - int intensity = 19; // 256*256*8; -#else int intensity = 20; // 256*256*8*2; +#ifdef WIN32 + // reduce by one the intensity on windows + intensity--; #endif int throughput = opt_work_size ? opt_work_size : (1 << intensity); throughput = min(throughput, (int)(max_nonce - first_nonce)); @@ -70,14 +69,14 @@ extern "C" int scanhash_s3(int thr_id, uint32_t *pdata, if (!init[thr_id]) { - CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); - - CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput)); + cudaSetDevice(device_map[thr_id]); x11_shavite512_cpu_init(thr_id, throughput); x11_simd512_cpu_init(thr_id, throughput); quark_skein512_cpu_init(thr_id, throughput); + CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0); + cuda_check_cpu_init(thr_id, throughput); init[thr_id] = true; diff --git a/x11/x11.cu b/x11/x11.cu index 121d416..3b8b059 100644 --- a/x11/x11.cu +++ b/x11/x11.cu @@ -51,7 +51,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_simd512_cpu_init(int thr_id, int threads); +extern int x11_simd512_cpu_init(int thr_id, int threads); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_echo512_cpu_init(int thr_id, int threads); @@ -153,8 +153,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, if (!init[thr_id]) { - CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); - CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput)); + cudaSetDevice(device_map[thr_id]); quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); @@ -165,8 +164,12 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, x11_luffa512_cpu_init(thr_id, throughput); x11_cubehash512_cpu_init(thr_id, throughput); x11_shavite512_cpu_init(thr_id, throughput); - x11_simd512_cpu_init(thr_id, throughput); x11_echo512_cpu_init(thr_id, throughput); + if (x11_simd512_cpu_init(thr_id, throughput) != 0) { + return 0; + } + CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0); + cuda_check_cpu_init(thr_id, throughput); init[thr_id] = true; diff --git a/x13/x13.cu b/x13/x13.cu index 0dd72ce..4b59ed3 100644 --- a/x13/x13.cu +++ b/x13/x13.cu @@ -54,7 +54,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_simd512_cpu_init(int thr_id, int threads); +extern int x11_simd512_cpu_init(int thr_id, int threads); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_echo512_cpu_init(int thr_id, int threads); @@ -162,8 +162,7 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, if (!init[thr_id]) { - CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); - CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 2 * 32 * throughput)); + cudaSetDevice(device_map[thr_id]); quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); @@ -174,10 +173,15 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, x11_luffa512_cpu_init(thr_id, throughput); x11_cubehash512_cpu_init(thr_id, throughput); x11_shavite512_cpu_init(thr_id, throughput); - x11_simd512_cpu_init(thr_id, throughput); + if (x11_simd512_cpu_init(thr_id, throughput) != 0) { + return 0; + } x11_echo512_cpu_init(thr_id, throughput); x13_hamsi512_cpu_init(thr_id, throughput); x13_fugue512_cpu_init(thr_id, throughput); + + CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0); + cuda_check_cpu_init(thr_id, throughput); init[thr_id] = true; diff --git a/x15/x14.cu b/x15/x14.cu index b772af0..8ed930f 100644 --- a/x15/x14.cu +++ b/x15/x14.cu @@ -57,7 +57,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_simd512_cpu_init(int thr_id, int threads); +extern int x11_simd512_cpu_init(int thr_id, int threads); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_echo512_cpu_init(int thr_id, int threads); @@ -174,8 +174,7 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, if (!init[thr_id]) { - CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); - CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput)); + cudaSetDevice(device_map[thr_id]); quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); @@ -192,6 +191,8 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, x13_fugue512_cpu_init(thr_id, throughput); x14_shabal512_cpu_init(thr_id, throughput); + CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0); + cuda_check_cpu_init(thr_id, throughput); init[thr_id] = true; } diff --git a/x15/x15.cu b/x15/x15.cu index 589cc43..578daf3 100644 --- a/x15/x15.cu +++ b/x15/x15.cu @@ -58,7 +58,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_simd512_cpu_init(int thr_id, int threads); +extern int x11_simd512_cpu_init(int thr_id, int threads); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_echo512_cpu_init(int thr_id, int threads); @@ -184,8 +184,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, if (!init[thr_id]) { - CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); - CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput)); + cudaSetDevice(device_map[thr_id]); quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); @@ -203,6 +202,8 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, x14_shabal512_cpu_init(thr_id, throughput); x15_whirlpool_cpu_init(thr_id, throughput, 0); + CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0); + cuda_check_cpu_init(thr_id, throughput); init[thr_id] = true; } diff --git a/x17/x17.cu b/x17/x17.cu index 8ae0ceb..33b53f2 100644 --- a/x17/x17.cu +++ b/x17/x17.cu @@ -61,7 +61,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_simd512_cpu_init(int thr_id, int threads); +extern int x11_simd512_cpu_init(int thr_id, int threads); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_echo512_cpu_init(int thr_id, int threads); @@ -203,7 +203,6 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata, if (!init[thr_id]) { cudaSetDevice(device_map[thr_id]); - cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); @@ -223,6 +222,8 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata, x17_sha512_cpu_init(thr_id, throughput); x17_haval256_cpu_init(thr_id, throughput); + CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0); + cuda_check_cpu_init(thr_id, throughput); init[thr_id] = true;