Browse Source

Prepare trap of hardware/mem failures

2upstream
Tanguy Pruvot 10 years ago
parent
commit
73f22b237a
  1. 3
      api.cpp
  2. 1
      api/index.php
  3. 8
      blake32.cu
  4. 5
      ccminer.cpp
  5. 13
      cuda.cpp
  6. 6
      cuda_checkhash.cu
  7. 26
      cuda_helper.h
  8. 2
      cuda_nist5.cu
  9. 3
      miner.h
  10. 7
      pentablake.cu
  11. 4
      quark/cuda_quark_blake512.cu
  12. 6
      qubit/qubit.cu
  13. 1
      stats.cpp
  14. 18
      x11/cuda_x11_aes.cu
  15. 2
      x11/cuda_x11_echo.cu
  16. 9
      x11/cuda_x11_luffa512.cu
  17. 2
      x11/cuda_x11_shavite512.cu
  18. 13
      x11/cuda_x11_simd512.cu
  19. 8
      x11/fresh.cu
  20. 15
      x11/s3.cu
  21. 11
      x11/x11.cu
  22. 12
      x13/x13.cu
  23. 7
      x15/x14.cu
  24. 7
      x15/x15.cu
  25. 5
      x17/x17.cu

3
api.cpp

@ -101,14 +101,11 @@ static char *buffer = NULL; @@ -101,14 +101,11 @@ static char *buffer = NULL;
static time_t startup = 0;
static int bye = 0;
extern int opt_intensity;
extern int opt_n_threads;
extern char *opt_api_allow;
extern int opt_api_listen; /* port */
extern uint64_t global_hashrate;
extern uint32_t accepted_count;
extern uint32_t rejected_count;
extern int num_processors;
extern int device_map[8];
extern char *device_name[8];

1
api/index.php

@ -52,6 +52,7 @@ function translateField($key) @@ -52,6 +52,7 @@ function translateField($key)
$intl['H'] = 'Bloc height';
$intl['I'] = 'Intensity';
$intl['HWF'] = 'Failures';
$intl['TEMP'] = 'T°c';
$intl['FAN'] = 'Fan %';

8
blake32.cu

@ -17,8 +17,6 @@ extern "C" { @@ -17,8 +17,6 @@ extern "C" {
/* threads per block and throughput (intensity) */
#define TPB 128
extern int num_processors;
/* added in sph_blake.c */
extern "C" int blake256_rounds = 14;
@ -428,9 +426,9 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt @@ -428,9 +426,9 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
if (!init[thr_id]) {
if (num_processors > 1)
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
CUDA_SAFE_CALL(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)));
CUDA_SAFE_CALL(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)));
cudaSetDevice(device_map[thr_id]);
CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), 0);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), 0);
init[thr_id] = true;
}

5
ccminer.cpp

@ -1104,9 +1104,9 @@ static void *miner_thread(void *userdata) @@ -1104,9 +1104,9 @@ static void *miner_thread(void *userdata)
if (max64 < minmax) {
switch (opt_algo) {
case ALGO_BLAKECOIN:
minmax = 0x4000000;
break;
case ALGO_BLAKE:
minmax = 0x80000000U;
break;
case ALGO_DOOM:
case ALGO_JACKPOT:
case ALGO_KECCAK:
@ -1158,6 +1158,7 @@ static void *miner_thread(void *userdata) @@ -1158,6 +1158,7 @@ static void *miner_thread(void *userdata)
applog(LOG_DEBUG, "job done, wait for a new one...");
work_restart[thr_id].restart = 1;
hashlog_purge_old();
stats_purge_old();
// wait a bit for a new job...
usleep(500*1000);
(*nonceptr) = end_nonce + 1;

13
cuda.cpp

@ -1,7 +1,6 @@ @@ -1,7 +1,6 @@
#include <stdio.h>
#include <memory.h>
#include <string.h>
#include <map>
#ifndef _WIN32
@ -22,6 +21,10 @@ @@ -22,6 +21,10 @@
#include "cuda_runtime.h"
#ifdef WIN32
#include "compat.h" // sleep
#endif
extern char *device_name[8];
extern int device_map[8];
extern int device_sm[8];
@ -145,3 +148,11 @@ cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id) @@ -145,3 +148,11 @@ cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
result = cudaStreamSynchronize(stream);
return result;
}
void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func)
{
struct cgpu_info *gpu = &thr_info[thr_id].gpu;
gpu->hw_errors++;
applog(LOG_ERR, "GPU #%d: %s %s", device_map[thr_id], func, cudaGetErrorString(err));
sleep(1);
}

6
cuda_checkhash.cu

@ -43,11 +43,11 @@ void cuda_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonce @@ -43,11 +43,11 @@ void cuda_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonce
__host__
void cuda_check_cpu_init(int thr_id, int threads)
{
cudaMallocHost(&h_resNounce[thr_id], 1*sizeof(uint32_t));
cudaMalloc(&d_resNounce[thr_id], 1*sizeof(uint32_t));
CUDA_CALL_OR_RET(cudaMallocHost(&h_resNounce[thr_id], 1*sizeof(uint32_t)));
CUDA_CALL_OR_RET(cudaMalloc(&d_resNounce[thr_id], 1*sizeof(uint32_t)));
}
// Target Difficulty setzen
// Target Difficulty
__host__
void cuda_check_cpu_setTarget(const void *ptarget)
{

26
cuda_helper.h

@ -22,6 +22,7 @@ extern void cuda_check_cpu_setTarget(const void *ptarget); @@ -22,6 +22,7 @@ extern void cuda_check_cpu_setTarget(const void *ptarget);
extern uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern uint32_t cuda_check_hash_fast(int thr_id, int threads, uint32_t startNounce, uint32_t *d_inputHash, int order);
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
extern void cudaReportHardwareFailure(int thr_id, cudaError_t error, const char* func);
extern __device__ __device_builtin__ void __syncthreads(void);
@ -122,17 +123,34 @@ __device__ __forceinline__ uint64_t cuda_swab64(uint64_t x) @@ -122,17 +123,34 @@ __device__ __forceinline__ uint64_t cuda_swab64(uint64_t x)
#endif
/*********************************************************************/
// Macro to catch CUDA errors in CUDA runtime calls
// Macros to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call) \
do { \
cudaError_t err = call; \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
fprintf(stderr, "Cuda error in func '%s' at line %i : %s.\n", \
__FUNCTION__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
#define CUDA_CALL_OR_RET(call) do { \
cudaError_t err = call; \
if (cudaSuccess != err) { \
cudaReportHardwareFailure(thr_id, err, __FUNCTION__); \
return; \
} \
} while (0)
#define CUDA_CALL_OR_RET_X(call, ret) do { \
cudaError_t err = call; \
if (cudaSuccess != err) { \
cudaReportHardwareFailure(thr_id, err, __FUNCTION__); \
return ret; \
} \
} while (0)
/*********************************************************************/
#ifdef _WIN64
#define USE_XOR_ASM_OPTS 0
@ -185,7 +203,7 @@ uint64_t xor8(uint64_t a, uint64_t b, uint64_t c, uint64_t d,uint64_t e,uint64_t @@ -185,7 +203,7 @@ uint64_t xor8(uint64_t a, uint64_t b, uint64_t c, uint64_t d,uint64_t e,uint64_t
return result;
}
#else
#define xor8(a,b,c,d,e,f,g,h) (a^b^c^d^e^f^g^h)
#define xor8(a,b,c,d,e,f,g,h) ((a^b)^(c^d)^(e^f)^(g^h))
#endif
// device asm for x17

2
cuda_nist5.cu

@ -75,7 +75,7 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata, @@ -75,7 +75,7 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
((uint32_t*)ptarget)[7] = 0x00FF;
int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096
throughput = min(throughput, max_nonce - first_nonce);
throughput = min(throughput, (int) (max_nonce - first_nonce));
static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id])

3
miner.h

@ -415,6 +415,9 @@ extern bool opt_debug; @@ -415,6 +415,9 @@ extern bool opt_debug;
extern bool opt_quiet;
extern bool opt_protocol;
extern bool opt_tracegpu;
extern int opt_intensity;
extern int opt_n_threads;
extern int num_processors;
extern int opt_timeout;
extern bool want_longpoll;
extern bool have_longpoll;

7
pentablake.cu

@ -45,9 +45,6 @@ extern "C" void pentablakehash(void *output, const void *input) @@ -45,9 +45,6 @@ extern "C" void pentablakehash(void *output, const void *input)
#define MAXU 0xffffffffU
// in cpu-miner.c
extern int opt_n_threads;
__constant__
static uint32_t __align__(32) c_Target[8];
@ -514,8 +511,8 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t * @@ -514,8 +511,8 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *
((uint32_t*)ptarget)[7] = 0x000F;
if (!init[thr_id]) {
if (opt_n_threads > 1) {
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
if (num_processors > 1) {
cudaSetDevice(device_map[thr_id]);
}
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput));
CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], 2*sizeof(uint32_t)));

4
quark/cuda_quark_blake512.cu

@ -239,10 +239,10 @@ __global__ void quark_blake512_gpu_hash_80(int threads, uint32_t startNounce, vo @@ -239,10 +239,10 @@ __global__ void quark_blake512_gpu_hash_80(int threads, uint32_t startNounce, vo
__host__ void quark_blake512_cpu_init(int thr_id, int threads)
{
// Kopiere die Hash-Tabellen in den GPU-Speicher
cudaMemcpyToSymbol( c_sigma,
CUDA_CALL_OR_RET( cudaMemcpyToSymbol(c_sigma,
host_sigma,
sizeof(host_sigma),
0, cudaMemcpyHostToDevice);
0, cudaMemcpyHostToDevice));
}
// Blake512 für 80 Byte grosse Eingangsdaten

6
qubit/qubit.cu

@ -26,7 +26,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN @@ -26,7 +26,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN
extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads);
extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads);
@ -89,14 +89,14 @@ extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata, @@ -89,14 +89,14 @@ extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata,
{
cudaSetDevice(device_map[thr_id]);
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
qubit_luffa512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput);
x11_shavite512_cpu_init(thr_id, throughput);
x11_simd512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true;

1
stats.cpp

@ -18,7 +18,6 @@ static uint64_t uid = 0; @@ -18,7 +18,6 @@ static uint64_t uid = 0;
#define STATS_PURGE_TIMEOUT 120*60 /* 120 mn */
extern uint64_t global_hashrate;
extern int opt_n_threads;
extern int opt_statsavg;
extern int device_map[8];

18
x11/cuda_x11_aes.cu

@ -275,27 +275,27 @@ static __constant__ uint32_t d_AES1[256]; @@ -275,27 +275,27 @@ static __constant__ uint32_t d_AES1[256];
static __constant__ uint32_t d_AES2[256];
static __constant__ uint32_t d_AES3[256];
static void aes_cpu_init()
static void aes_cpu_init(int thr_id)
{
cudaMemcpyToSymbol( d_AES0,
CUDA_CALL_OR_RET(cudaMemcpyToSymbol( d_AES0,
h_AES0,
sizeof(h_AES0),
0, cudaMemcpyHostToDevice);
0, cudaMemcpyHostToDevice));
cudaMemcpyToSymbol( d_AES1,
CUDA_CALL_OR_RET(cudaMemcpyToSymbol( d_AES1,
h_AES1,
sizeof(h_AES1),
0, cudaMemcpyHostToDevice);
0, cudaMemcpyHostToDevice));
cudaMemcpyToSymbol( d_AES2,
CUDA_CALL_OR_RET(cudaMemcpyToSymbol( d_AES2,
h_AES2,
sizeof(h_AES2),
0, cudaMemcpyHostToDevice);
0, cudaMemcpyHostToDevice));
cudaMemcpyToSymbol( d_AES3,
CUDA_CALL_OR_RET(cudaMemcpyToSymbol( d_AES3,
h_AES3,
sizeof(h_AES3),
0, cudaMemcpyHostToDevice);
0, cudaMemcpyHostToDevice));
}
__device__ __forceinline__

2
x11/cuda_x11_echo.cu

@ -303,7 +303,7 @@ void x11_echo512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash @@ -303,7 +303,7 @@ void x11_echo512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash
__host__
void x11_echo512_cpu_init(int thr_id, int threads)
{
aes_cpu_init();
aes_cpu_init(thr_id);
}
__host__

9
x11/cuda_x11_luffa512.cu

@ -356,11 +356,12 @@ __global__ void x11_luffa512_gpu_hash_64(int threads, uint32_t startNounce, uint @@ -356,11 +356,12 @@ __global__ void x11_luffa512_gpu_hash_64(int threads, uint32_t startNounce, uint
}
// Setup-Funktionen
__host__ void x11_luffa512_cpu_init(int thr_id, int threads)
// Setup Function
__host__
void x11_luffa512_cpu_init(int thr_id, int threads)
{
cudaMemcpyToSymbol(c_IV, h_IV, sizeof(h_IV), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(c_CNS, h_CNS, sizeof(h_CNS), 0, cudaMemcpyHostToDevice);
CUDA_CALL_OR_RET(cudaMemcpyToSymbol(c_IV, h_IV, sizeof(h_IV), 0, cudaMemcpyHostToDevice));
CUDA_CALL_OR_RET(cudaMemcpyToSymbol(c_CNS, h_CNS, sizeof(h_CNS), 0, cudaMemcpyHostToDevice));
}
__host__ void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)

2
x11/cuda_x11_shavite512.cu

@ -1404,7 +1404,7 @@ void x11_shavite512_gpu_hash_80(int threads, uint32_t startNounce, void *outputH @@ -1404,7 +1404,7 @@ void x11_shavite512_gpu_hash_80(int threads, uint32_t startNounce, void *outputH
__host__ void x11_shavite512_cpu_init(int thr_id, int threads)
{
aes_cpu_init();
aes_cpu_init(thr_id);
}
__host__ void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)

13
x11/cuda_x11_simd512.cu

@ -10,9 +10,6 @@ @@ -10,9 +10,6 @@
#include "cuda_helper.h"
#include <stdio.h>
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
int *d_state[8];
uint4 *d_temp4[8];
@ -624,10 +621,10 @@ x11_simd512_gpu_final_64(int threads, uint32_t startNounce, uint64_t *g_hash, ui @@ -624,10 +621,10 @@ x11_simd512_gpu_final_64(int threads, uint32_t startNounce, uint64_t *g_hash, ui
}
__host__
void x11_simd512_cpu_init(int thr_id, int threads)
int x11_simd512_cpu_init(int thr_id, int threads)
{
CUDA_SAFE_CALL(cudaMalloc(&d_state[thr_id], 32*sizeof(int)*threads));
CUDA_SAFE_CALL(cudaMalloc(&d_temp4[thr_id], 64*sizeof(uint4)*threads));
CUDA_CALL_OR_RET_X(cudaMalloc(&d_temp4[thr_id], 64*sizeof(uint4)*threads), (int) err); /* todo: prevent -i 21 */
CUDA_CALL_OR_RET_X(cudaMalloc(&d_state[thr_id], 32*sizeof(int)*threads), (int) err);
cudaMemcpyToSymbol(c_perm, h_perm, sizeof(h_perm), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(c_IV_512, h_IV_512, sizeof(h_IV_512), 0, cudaMemcpyHostToDevice);
@ -644,7 +641,9 @@ void x11_simd512_cpu_init(int thr_id, int threads) @@ -644,7 +641,9 @@ void x11_simd512_cpu_init(int thr_id, int threads)
texRef1D_128.normalized = 0;
texRef1D_128.filterMode = cudaFilterModePoint;
texRef1D_128.addressMode[0] = cudaAddressModeClamp;
CUDA_SAFE_CALL(cudaBindTexture(NULL, &texRef1D_128, d_temp4[thr_id], &channelDesc128, 64*sizeof(uint4)*threads));
CUDA_CALL_OR_RET_X(cudaBindTexture(NULL, &texRef1D_128, d_temp4[thr_id], &channelDesc128, 64*sizeof(uint4)*threads), (int) err);
return 0;
}
__host__

8
x11/fresh.cu

@ -19,7 +19,7 @@ extern void x11_shavite512_setBlock_80(void *pdata); @@ -19,7 +19,7 @@ extern void x11_shavite512_setBlock_80(void *pdata);
extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads);
extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads);
@ -84,14 +84,14 @@ extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata, @@ -84,14 +84,14 @@ extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
if (!init[thr_id])
{
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput + 4));
cudaSetDevice(device_map[thr_id]);
x11_shavite512_cpu_init(thr_id, throughput);
x11_simd512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput + 4), 0);
cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true;

15
x11/s3.cu

@ -19,7 +19,7 @@ extern void x11_shavite512_cpu_init(int thr_id, int threads); @@ -19,7 +19,7 @@ extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void x11_shavite512_setBlock_80(void *pdata);
extern void x11_simd512_cpu_init(int thr_id, int threads);
extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_skein512_cpu_init(int thr_id, int threads);
@ -56,11 +56,10 @@ extern "C" int scanhash_s3(int thr_id, uint32_t *pdata, @@ -56,11 +56,10 @@ extern "C" int scanhash_s3(int thr_id, uint32_t *pdata,
{
const uint32_t first_nonce = pdata[19];
static bool init[8] = { 0 };
#ifdef WIN32
// reduce a bit the intensity on windows
int intensity = 19; // 256*256*8;
#else
int intensity = 20; // 256*256*8*2;
#ifdef WIN32
// reduce by one the intensity on windows
intensity--;
#endif
int throughput = opt_work_size ? opt_work_size : (1 << intensity);
throughput = min(throughput, (int)(max_nonce - first_nonce));
@ -70,14 +69,14 @@ extern "C" int scanhash_s3(int thr_id, uint32_t *pdata, @@ -70,14 +69,14 @@ extern "C" int scanhash_s3(int thr_id, uint32_t *pdata,
if (!init[thr_id])
{
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput));
cudaSetDevice(device_map[thr_id]);
x11_shavite512_cpu_init(thr_id, throughput);
x11_simd512_cpu_init(thr_id, throughput);
quark_skein512_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true;

11
x11/x11.cu

@ -51,7 +51,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN @@ -51,7 +51,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN
extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads);
extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads);
@ -153,8 +153,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, @@ -153,8 +153,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
if (!init[thr_id])
{
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
cudaSetDevice(device_map[thr_id]);
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
@ -165,8 +164,12 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, @@ -165,8 +164,12 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
x11_luffa512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput);
x11_shavite512_cpu_init(thr_id, throughput);
x11_simd512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput);
if (x11_simd512_cpu_init(thr_id, throughput) != 0) {
return 0;
}
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true;

12
x13/x13.cu

@ -54,7 +54,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN @@ -54,7 +54,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN
extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads);
extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads);
@ -162,8 +162,7 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, @@ -162,8 +162,7 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
if (!init[thr_id])
{
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 2 * 32 * throughput));
cudaSetDevice(device_map[thr_id]);
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
@ -174,10 +173,15 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, @@ -174,10 +173,15 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
x11_luffa512_cpu_init(thr_id, throughput);
x11_cubehash512_cpu_init(thr_id, throughput);
x11_shavite512_cpu_init(thr_id, throughput);
x11_simd512_cpu_init(thr_id, throughput);
if (x11_simd512_cpu_init(thr_id, throughput) != 0) {
return 0;
}
x11_echo512_cpu_init(thr_id, throughput);
x13_hamsi512_cpu_init(thr_id, throughput);
x13_fugue512_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true;

7
x15/x14.cu

@ -57,7 +57,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN @@ -57,7 +57,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN
extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads);
extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads);
@ -174,8 +174,7 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, @@ -174,8 +174,7 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
if (!init[thr_id])
{
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
cudaSetDevice(device_map[thr_id]);
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
@ -192,6 +191,8 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, @@ -192,6 +191,8 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
x13_fugue512_cpu_init(thr_id, throughput);
x14_shabal512_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true;
}

7
x15/x15.cu

@ -58,7 +58,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN @@ -58,7 +58,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN
extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads);
extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads);
@ -184,8 +184,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, @@ -184,8 +184,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
if (!init[thr_id])
{
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
cudaSetDevice(device_map[thr_id]);
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
@ -203,6 +202,8 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, @@ -203,6 +202,8 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
x14_shabal512_cpu_init(thr_id, throughput);
x15_whirlpool_cpu_init(thr_id, throughput, 0);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true;
}

5
x17/x17.cu

@ -61,7 +61,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN @@ -61,7 +61,7 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startN
extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_init(int thr_id, int threads);
extern int x11_simd512_cpu_init(int thr_id, int threads);
extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_echo512_cpu_init(int thr_id, int threads);
@ -203,7 +203,6 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata, @@ -203,7 +203,6 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata,
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
@ -223,6 +222,8 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata, @@ -223,6 +222,8 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata,
x17_sha512_cpu_init(thr_id, throughput);
x17_haval256_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true;

Loading…
Cancel
Save