Browse Source

Prepare trap of hardware/mem failures

2upstream
Tanguy Pruvot 10 years ago
parent
commit
73f22b237a
  1. 3
      api.cpp
  2. 1
      api/index.php
  3. 8
      blake32.cu
  4. 5
      ccminer.cpp
  5. 13
      cuda.cpp
  6. 6
      cuda_checkhash.cu
  7. 26
      cuda_helper.h
  8. 2
      cuda_nist5.cu
  9. 3
      miner.h
  10. 7
      pentablake.cu
  11. 4
      quark/cuda_quark_blake512.cu
  12. 6
      qubit/qubit.cu
  13. 1
      stats.cpp
  14. 18
      x11/cuda_x11_aes.cu
  15. 2
      x11/cuda_x11_echo.cu
  16. 9
      x11/cuda_x11_luffa512.cu
  17. 2
      x11/cuda_x11_shavite512.cu
  18. 13
      x11/cuda_x11_simd512.cu
  19. 8
      x11/fresh.cu
  20. 15
      x11/s3.cu
  21. 11
      x11/x11.cu
  22. 12
      x13/x13.cu
  23. 7
      x15/x14.cu
  24. 7
      x15/x15.cu
  25. 5
      x17/x17.cu

3
api.cpp

@ -101,14 +101,11 @@ static char *buffer = NULL;
static time_t startup = 0; static time_t startup = 0;
static int bye = 0; static int bye = 0;
extern int opt_intensity;
extern int opt_n_threads;
extern char *opt_api_allow; extern char *opt_api_allow;
extern int opt_api_listen; /* port */ extern int opt_api_listen; /* port */
extern uint64_t global_hashrate; extern uint64_t global_hashrate;
extern uint32_t accepted_count; extern uint32_t accepted_count;
extern uint32_t rejected_count; extern uint32_t rejected_count;
extern int num_processors;
extern int device_map[8]; extern int device_map[8];
extern char *device_name[8]; extern char *device_name[8];

1
api/index.php

@ -52,6 +52,7 @@ function translateField($key)
$intl['H'] = 'Bloc height'; $intl['H'] = 'Bloc height';
$intl['I'] = 'Intensity'; $intl['I'] = 'Intensity';
$intl['HWF'] = 'Failures';
$intl['TEMP'] = 'T°c'; $intl['TEMP'] = 'T°c';
$intl['FAN'] = 'Fan %'; $intl['FAN'] = 'Fan %';

8
blake32.cu

@ -17,8 +17,6 @@ extern "C" {
/* threads per block and throughput (intensity) */ /* threads per block and throughput (intensity) */
#define TPB 128 #define TPB 128
extern int num_processors;
/* added in sph_blake.c */ /* added in sph_blake.c */
extern "C" int blake256_rounds = 14; extern "C" int blake256_rounds = 14;
@ -428,9 +426,9 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
if (!init[thr_id]) { if (!init[thr_id]) {
if (num_processors > 1) if (num_processors > 1)
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); cudaSetDevice(device_map[thr_id]);
CUDA_SAFE_CALL(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t))); CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), 0);
CUDA_SAFE_CALL(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t))); CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), 0);
init[thr_id] = true; init[thr_id] = true;
} }

5
ccminer.cpp

@ -1104,9 +1104,9 @@ static void *miner_thread(void *userdata)
if (max64 < minmax) { if (max64 < minmax) {
switch (opt_algo) { switch (opt_algo) {
case ALGO_BLAKECOIN: case ALGO_BLAKECOIN:
minmax = 0x4000000;
break;
case ALGO_BLAKE: case ALGO_BLAKE:
minmax = 0x80000000U;
break;
case ALGO_DOOM: case ALGO_DOOM:
case ALGO_JACKPOT: case ALGO_JACKPOT:
case ALGO_KECCAK: case ALGO_KECCAK:
@ -1158,6 +1158,7 @@ static void *miner_thread(void *userdata)
applog(LOG_DEBUG, "job done, wait for a new one..."); applog(LOG_DEBUG, "job done, wait for a new one...");
work_restart[thr_id].restart = 1; work_restart[thr_id].restart = 1;
hashlog_purge_old(); hashlog_purge_old();
stats_purge_old();
// wait a bit for a new job... // wait a bit for a new job...
usleep(500*1000); usleep(500*1000);
(*nonceptr) = end_nonce + 1; (*nonceptr) = end_nonce + 1;

13
cuda.cpp

@ -1,7 +1,6 @@
#include <stdio.h> #include <stdio.h>
#include <memory.h> #include <memory.h>
#include <string.h> #include <string.h>
#include <map> #include <map>
#ifndef _WIN32 #ifndef _WIN32
@ -22,6 +21,10 @@
#include "cuda_runtime.h" #include "cuda_runtime.h"
#ifdef WIN32
#include "compat.h" // sleep
#endif
extern char *device_name[8]; extern char *device_name[8];
extern int device_map[8]; extern int device_map[8];
extern int device_sm[8]; extern int device_sm[8];
@ -145,3 +148,11 @@ cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
result = cudaStreamSynchronize(stream); result = cudaStreamSynchronize(stream);
return result; return result;
} }
void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func)
{
struct cgpu_info *gpu = &thr_info[thr_id].gpu;
gpu->hw_errors++;
applog(LOG_ERR, "GPU #%d: %s %s", device_map[thr_id], func, cudaGetErrorString(err));
sleep(1);
}

6
cuda_checkhash.cu

@ -43,11 +43,11 @@ void cuda_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonce
__host__ __host__
void cuda_check_cpu_init(int thr_id, int threads) void cuda_check_cpu_init(int thr_id, int threads)
{ {
cudaMallocHost(&h_resNounce[thr_id], 1*sizeof(uint32_t)); CUDA_CALL_OR_RET(cudaMallocHost(&h_resNounce[thr_id], 1*sizeof(uint32_t)));
cudaMalloc(&d_resNounce[thr_id], 1*sizeof(uint32_t)); CUDA_CALL_OR_RET(cudaMalloc(&d_resNounce[thr_id], 1*sizeof(uint32_t)));
} }
// Target Difficulty setzen // Target Difficulty
__host__ __host__
void cuda_check_cpu_setTarget(const void *ptarget) void cuda_check_cpu_setTarget(const void *ptarget)
{ {

26
cuda_helper.h

@ -22,6 +22,7 @@ extern void cuda_check_cpu_setTarget(const void *ptarget);
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern uint32_t cuda_check_hash_fast(int thr_id, int threads, uint32_t startNounce, uint32_t *d_inputHash, int order); extern uint32_t cuda_check_hash_fast(int thr_id, int threads, uint32_t startNounce, uint32_t *d_inputHash, int order);
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
extern void cudaReportHardwareFailure(int thr_id, cudaError_t error, const char* func);
extern __device__ __device_builtin__ void __syncthreads(void); extern __device__ __device_builtin__ void __syncthreads(void);
@ -122,17 +123,34 @@ __device__ __forceinline__ uint64_t cuda_swab64(uint64_t x)
#endif #endif
/*********************************************************************/ /*********************************************************************/
// Macro to catch CUDA errors in CUDA runtime calls // Macros to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call) \ #define CUDA_SAFE_CALL(call) \
do { \ do { \
cudaError_t err = call; \ cudaError_t err = call; \
if (cudaSuccess != err) { \ if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\ fprintf(stderr, "Cuda error in func '%s' at line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString(err) ); \ __FUNCTION__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \ exit(EXIT_FAILURE); \
} \ } \
} while (0) } while (0)
#define CUDA_CALL_OR_RET(call) do { \
cudaError_t err = call; \
if (cudaSuccess != err) { \
cudaReportHardwareFailure(thr_id, err, __FUNCTION__); \
return; \
} \
} while (0)
#define CUDA_CALL_OR_RET_X(call, ret) do { \
cudaError_t err = call; \
if (cudaSuccess != err) { \
cudaReportHardwareFailure(thr_id, err, __FUNCTION__); \
return ret; \
} \
} while (0)
/*********************************************************************/ /*********************************************************************/
#ifdef _WIN64 #ifdef _WIN64
#define USE_XOR_ASM_OPTS 0 #define USE_XOR_ASM_OPTS 0
@ -185,7 +203,7 @@ uint64_t xor8(uint64_t a, uint64_t b, uint64_t c, uint64_t d,uint64_t e,uint64_t
return result; return result;
} }
#else #else
#define xor8(a,b,c,d,e,f,g,h) (a^b^c^d^e^f^g^h) #define xor8(a,b,c,d,e,f,g,h) ((a^b)^(c^d)^(e^f)^(g^h))
#endif #endif
// device asm for x17 // device asm for x17

2
cuda_nist5.cu

@ -75,7 +75,7 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
((uint32_t*)ptarget)[7] = 0x00FF; ((uint32_t*)ptarget)[7] = 0x00FF;
int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096 int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096
throughput = min(throughput, max_nonce - first_nonce); throughput = min(throughput, (int) (max_nonce - first_nonce));
static bool init[8] = {0,0,0,0,0,0,0,0}; static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id]) if (!init[thr_id])

3
miner.h

@ -415,6 +415,9 @@ extern bool opt_debug;
extern bool opt_quiet; extern bool opt_quiet;
extern bool opt_protocol; extern bool opt_protocol;
extern bool opt_tracegpu; extern bool opt_tracegpu;
extern int opt_intensity;
extern int opt_n_threads;
extern int num_processors;
extern int opt_timeout; extern int opt_timeout;
extern bool want_longpoll; extern bool want_longpoll;
extern bool have_longpoll; extern bool have_longpoll;

7
pentablake.cu

@ -45,9 +45,6 @@ extern "C" void pentablakehash(void *output, const void *input)
#define MAXU 0xffffffffU #define MAXU 0xffffffffU
// in cpu-miner.c
extern int opt_n_threads;
__constant__ __constant__
static uint32_t __align__(32) c_Target[8]; static uint32_t __align__(32) c_Target[8];
@ -514,8 +511,8 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *
((uint32_t*)ptarget)[7] = 0x000F; ((uint32_t*)ptarget)[7] = 0x000F;
if (!init[thr_id]) { if (!init[thr_id]) {
if (opt_n_threads > 1) { if (num_processors > 1) {
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); cudaSetDevice(device_map[thr_id]);
} }
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput)); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput));
CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], 2*sizeof(uint32_t))); CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], 2*sizeof(uint32_t)));

4
quark/cuda_quark_blake512.cu

@ -239,10 +239,10 @@ __global__ void quark_blake512_gpu_hash_80(int threads, uint32_t startNounce, vo
__host__ void quark_blake512_cpu_init(int thr_id, int threads) __host__ void quark_blake512_cpu_init(int thr_id, int threads)
{ {
// Kopiere die Hash-Tabellen in den GPU-Speicher // Kopiere die Hash-Tabellen in den GPU-Speicher
cudaMemcpyToSymbol( c_sigma, CUDA_CALL_OR_RET( cudaMemcpyToSymbol(c_sigma,
host_sigma, host_sigma,
sizeof(host_sigma), sizeof(host_sigma),
0, cudaMemcpyHostToDevice); 0, cudaMemcpyHostToDevice));
} }
// Blake512 für 80 Byte grosse Eingangsdaten // Blake512 für 80 Byte grosse Eingangsdaten

6
qubit/qubit.cu

@ -26,7 +26,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN
extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads); extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads); extern void x11_echo512_cpu_init(int thr_id, int threads);
@ -89,14 +89,14 @@ extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata,
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
qubit_luffa512_cpu_init(thr_id, throughput); qubit_luffa512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput); x11_cubehash512_cpu_init(thr_id, throughput);
x11_shavite512_cpu_init(thr_id, throughput); x11_shavite512_cpu_init(thr_id, throughput);
x11_simd512_cpu_init(thr_id, throughput); x11_simd512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput); x11_echo512_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
cuda_check_cpu_init(thr_id, throughput); cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;

1
stats.cpp

@ -18,7 +18,6 @@ static uint64_t uid = 0;
#define STATS_PURGE_TIMEOUT 120*60 /* 120 mn */ #define STATS_PURGE_TIMEOUT 120*60 /* 120 mn */
extern uint64_t global_hashrate; extern uint64_t global_hashrate;
extern int opt_n_threads;
extern int opt_statsavg; extern int opt_statsavg;
extern int device_map[8]; extern int device_map[8];

18
x11/cuda_x11_aes.cu

@ -275,27 +275,27 @@ static __constant__ uint32_t d_AES1[256];
static __constant__ uint32_t d_AES2[256]; static __constant__ uint32_t d_AES2[256];
static __constant__ uint32_t d_AES3[256]; static __constant__ uint32_t d_AES3[256];
static void aes_cpu_init() static void aes_cpu_init(int thr_id)
{ {
cudaMemcpyToSymbol( d_AES0, CUDA_CALL_OR_RET(cudaMemcpyToSymbol( d_AES0,
h_AES0, h_AES0,
sizeof(h_AES0), sizeof(h_AES0),
0, cudaMemcpyHostToDevice); 0, cudaMemcpyHostToDevice));
cudaMemcpyToSymbol( d_AES1, CUDA_CALL_OR_RET(cudaMemcpyToSymbol( d_AES1,
h_AES1, h_AES1,
sizeof(h_AES1), sizeof(h_AES1),
0, cudaMemcpyHostToDevice); 0, cudaMemcpyHostToDevice));
cudaMemcpyToSymbol( d_AES2, CUDA_CALL_OR_RET(cudaMemcpyToSymbol( d_AES2,
h_AES2, h_AES2,
sizeof(h_AES2), sizeof(h_AES2),
0, cudaMemcpyHostToDevice); 0, cudaMemcpyHostToDevice));
cudaMemcpyToSymbol( d_AES3, CUDA_CALL_OR_RET(cudaMemcpyToSymbol( d_AES3,
h_AES3, h_AES3,
sizeof(h_AES3), sizeof(h_AES3),
0, cudaMemcpyHostToDevice); 0, cudaMemcpyHostToDevice));
} }
__device__ __forceinline__ __device__ __forceinline__

2
x11/cuda_x11_echo.cu

@ -303,7 +303,7 @@ void x11_echo512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash
__host__ __host__
void x11_echo512_cpu_init(int thr_id, int threads) void x11_echo512_cpu_init(int thr_id, int threads)
{ {
aes_cpu_init(); aes_cpu_init(thr_id);
} }
__host__ __host__

9
x11/cuda_x11_luffa512.cu

@ -356,11 +356,12 @@ __global__ void x11_luffa512_gpu_hash_64(int threads, uint32_t startNounce, uint
} }
// Setup-Funktionen // Setup Function
__host__ void x11_luffa512_cpu_init(int thr_id, int threads) __host__
void x11_luffa512_cpu_init(int thr_id, int threads)
{ {
cudaMemcpyToSymbol(c_IV, h_IV, sizeof(h_IV), 0, cudaMemcpyHostToDevice); CUDA_CALL_OR_RET(cudaMemcpyToSymbol(c_IV, h_IV, sizeof(h_IV), 0, cudaMemcpyHostToDevice));
cudaMemcpyToSymbol(c_CNS, h_CNS, sizeof(h_CNS), 0, cudaMemcpyHostToDevice); CUDA_CALL_OR_RET(cudaMemcpyToSymbol(c_CNS, h_CNS, sizeof(h_CNS), 0, cudaMemcpyHostToDevice));
} }
__host__ void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) __host__ void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)

2
x11/cuda_x11_shavite512.cu

@ -1404,7 +1404,7 @@ void x11_shavite512_gpu_hash_80(int threads, uint32_t startNounce, void *outputH
__host__ void x11_shavite512_cpu_init(int thr_id, int threads) __host__ void x11_shavite512_cpu_init(int thr_id, int threads)
{ {
aes_cpu_init(); aes_cpu_init(thr_id);
} }
__host__ void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) __host__ void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)

13
x11/cuda_x11_simd512.cu

@ -10,9 +10,6 @@
#include "cuda_helper.h" #include "cuda_helper.h"
#include <stdio.h> #include <stdio.h>
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
int *d_state[8]; int *d_state[8];
uint4 *d_temp4[8]; uint4 *d_temp4[8];
@ -624,10 +621,10 @@ x11_simd512_gpu_final_64(int threads, uint32_t startNounce, uint64_t *g_hash, ui
} }
__host__ __host__
void x11_simd512_cpu_init(int thr_id, int threads) int x11_simd512_cpu_init(int thr_id, int threads)
{ {
CUDA_SAFE_CALL(cudaMalloc(&d_state[thr_id], 32*sizeof(int)*threads)); CUDA_CALL_OR_RET_X(cudaMalloc(&d_temp4[thr_id], 64*sizeof(uint4)*threads), (int) err); /* todo: prevent -i 21 */
CUDA_SAFE_CALL(cudaMalloc(&d_temp4[thr_id], 64*sizeof(uint4)*threads)); CUDA_CALL_OR_RET_X(cudaMalloc(&d_state[thr_id], 32*sizeof(int)*threads), (int) err);
cudaMemcpyToSymbol(c_perm, h_perm, sizeof(h_perm), 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(c_perm, h_perm, sizeof(h_perm), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(c_IV_512, h_IV_512, sizeof(h_IV_512), 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(c_IV_512, h_IV_512, sizeof(h_IV_512), 0, cudaMemcpyHostToDevice);
@ -644,7 +641,9 @@ void x11_simd512_cpu_init(int thr_id, int threads)
texRef1D_128.normalized = 0; texRef1D_128.normalized = 0;
texRef1D_128.filterMode = cudaFilterModePoint; texRef1D_128.filterMode = cudaFilterModePoint;
texRef1D_128.addressMode[0] = cudaAddressModeClamp; texRef1D_128.addressMode[0] = cudaAddressModeClamp;
CUDA_SAFE_CALL(cudaBindTexture(NULL, &texRef1D_128, d_temp4[thr_id], &channelDesc128, 64*sizeof(uint4)*threads)); CUDA_CALL_OR_RET_X(cudaBindTexture(NULL, &texRef1D_128, d_temp4[thr_id], &channelDesc128, 64*sizeof(uint4)*threads), (int) err);
return 0;
} }
__host__ __host__

8
x11/fresh.cu

@ -19,7 +19,7 @@ extern void x11_shavite512_setBlock_80(void *pdata);
extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads); extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads); extern void x11_echo512_cpu_init(int thr_id, int threads);
@ -84,14 +84,14 @@ extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
if (!init[thr_id]) if (!init[thr_id])
{ {
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); cudaSetDevice(device_map[thr_id]);
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput + 4));
x11_shavite512_cpu_init(thr_id, throughput); x11_shavite512_cpu_init(thr_id, throughput);
x11_simd512_cpu_init(thr_id, throughput); x11_simd512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput); x11_echo512_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput + 4), 0);
cuda_check_cpu_init(thr_id, throughput); cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;

15
x11/s3.cu

@ -19,7 +19,7 @@ extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void x11_shavite512_setBlock_80(void *pdata); extern void x11_shavite512_setBlock_80(void *pdata);
extern void x11_simd512_cpu_init(int thr_id, int threads); extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_skein512_cpu_init(int thr_id, int threads); extern void quark_skein512_cpu_init(int thr_id, int threads);
@ -56,11 +56,10 @@ extern "C" int scanhash_s3(int thr_id, uint32_t *pdata,
{ {
const uint32_t first_nonce = pdata[19]; const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0 }; static bool init[8] = { 0 };
#ifdef WIN32
// reduce a bit the intensity on windows
int intensity = 19; // 256*256*8;
#else
int intensity = 20; // 256*256*8*2; int intensity = 20; // 256*256*8*2;
#ifdef WIN32
// reduce by one the intensity on windows
intensity--;
#endif #endif
int throughput = opt_work_size ? opt_work_size : (1 << intensity); int throughput = opt_work_size ? opt_work_size : (1 << intensity);
throughput = min(throughput, (int)(max_nonce - first_nonce)); throughput = min(throughput, (int)(max_nonce - first_nonce));
@ -70,14 +69,14 @@ extern "C" int scanhash_s3(int thr_id, uint32_t *pdata,
if (!init[thr_id]) if (!init[thr_id])
{ {
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); cudaSetDevice(device_map[thr_id]);
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput));
x11_shavite512_cpu_init(thr_id, throughput); x11_shavite512_cpu_init(thr_id, throughput);
x11_simd512_cpu_init(thr_id, throughput); x11_simd512_cpu_init(thr_id, throughput);
quark_skein512_cpu_init(thr_id, throughput); quark_skein512_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
cuda_check_cpu_init(thr_id, throughput); cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;

11
x11/x11.cu

@ -51,7 +51,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN
extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads); extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads); extern void x11_echo512_cpu_init(int thr_id, int threads);
@ -153,8 +153,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
if (!init[thr_id]) if (!init[thr_id])
{ {
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); cudaSetDevice(device_map[thr_id]);
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput);
@ -165,8 +164,12 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
x11_luffa512_cpu_init(thr_id, throughput); x11_luffa512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput); x11_cubehash512_cpu_init(thr_id, throughput);
x11_shavite512_cpu_init(thr_id, throughput); x11_shavite512_cpu_init(thr_id, throughput);
x11_simd512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput); x11_echo512_cpu_init(thr_id, throughput);
if (x11_simd512_cpu_init(thr_id, throughput) != 0) {
return 0;
}
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
cuda_check_cpu_init(thr_id, throughput); cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;

12
x13/x13.cu

@ -54,7 +54,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN
extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads); extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads); extern void x11_echo512_cpu_init(int thr_id, int threads);
@ -162,8 +162,7 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
if (!init[thr_id]) if (!init[thr_id])
{ {
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); cudaSetDevice(device_map[thr_id]);
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 2 * 32 * throughput));
quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput);
@ -174,10 +173,15 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
x11_luffa512_cpu_init(thr_id, throughput); x11_luffa512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput); x11_cubehash512_cpu_init(thr_id, throughput);
x11_shavite512_cpu_init(thr_id, throughput); x11_shavite512_cpu_init(thr_id, throughput);
x11_simd512_cpu_init(thr_id, throughput); if (x11_simd512_cpu_init(thr_id, throughput) != 0) {
return 0;
}
x11_echo512_cpu_init(thr_id, throughput); x11_echo512_cpu_init(thr_id, throughput);
x13_hamsi512_cpu_init(thr_id, throughput); x13_hamsi512_cpu_init(thr_id, throughput);
x13_fugue512_cpu_init(thr_id, throughput); x13_fugue512_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
cuda_check_cpu_init(thr_id, throughput); cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;

7
x15/x14.cu

@ -57,7 +57,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN
extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads); extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads); extern void x11_echo512_cpu_init(int thr_id, int threads);
@ -174,8 +174,7 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
if (!init[thr_id]) if (!init[thr_id])
{ {
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); cudaSetDevice(device_map[thr_id]);
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput);
@ -192,6 +191,8 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
x13_fugue512_cpu_init(thr_id, throughput); x13_fugue512_cpu_init(thr_id, throughput);
x14_shabal512_cpu_init(thr_id, throughput); x14_shabal512_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
cuda_check_cpu_init(thr_id, throughput); cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;
} }

7
x15/x15.cu

@ -58,7 +58,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN
extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads); extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads); extern void x11_echo512_cpu_init(int thr_id, int threads);
@ -184,8 +184,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
if (!init[thr_id]) if (!init[thr_id])
{ {
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id])); cudaSetDevice(device_map[thr_id]);
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput);
@ -203,6 +202,8 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
x14_shabal512_cpu_init(thr_id, throughput); x14_shabal512_cpu_init(thr_id, throughput);
x15_whirlpool_cpu_init(thr_id, throughput, 0); x15_whirlpool_cpu_init(thr_id, throughput, 0);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
cuda_check_cpu_init(thr_id, throughput); cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;
} }

5
x17/x17.cu

@ -61,7 +61,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN
extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads); extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads); extern void x11_echo512_cpu_init(int thr_id, int threads);
@ -203,7 +203,6 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata,
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput);
@ -223,6 +222,8 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata,
x17_sha512_cpu_init(thr_id, throughput); x17_sha512_cpu_init(thr_id, throughput);
x17_haval256_cpu_init(thr_id, throughput); x17_haval256_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
cuda_check_cpu_init(thr_id, throughput); cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;

Loading…
Cancel
Save