Browse Source

cuda: check for errors on cuda mem alloc

master
Tanguy Pruvot 10 years ago
parent
commit
a9a3ad8afc
  1. 2
      ccminer.vcxproj
  2. 13
      cuda_helper.h
  3. 34
      x11/x11.cu
  4. 40
      x13/x13.cu
  5. 7
      x15/x14.cu
  6. 7
      x15/x15.cu

2
ccminer.vcxproj

@ -175,7 +175,7 @@ copy "$(CudaToolkitBinDir)\cudart32*.dll" "$(OutDir)"</Command>
<PtxAsOptionV>true</PtxAsOptionV> <PtxAsOptionV>true</PtxAsOptionV>
<Keep>false</Keep> <Keep>false</Keep>
<CodeGeneration>compute_50,sm_50</CodeGeneration> <CodeGeneration>compute_50,sm_50</CodeGeneration>
<Defines>--optimize 2</Defines> <Defines></Defines>
</CudaCompile> </CudaCompile>
<CudaLink> <CudaLink>
<GPUDebugInfo>false</GPUDebugInfo> <GPUDebugInfo>false</GPUDebugInfo>

13
cuda_helper.h

@ -1,6 +1,8 @@
#ifndef CUDA_HELPER_H #ifndef CUDA_HELPER_H
#define CUDA_HELPER_H #define CUDA_HELPER_H
#include <cuda_runtime.h>
static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI) static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI)
{ {
#if __CUDA_ARCH__ >= 130 #if __CUDA_ARCH__ >= 130
@ -83,4 +85,15 @@ __forceinline__ __device__ uint64_t ROTL64(const uint64_t value, const int offse
#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) #define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
#endif #endif
// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call) \
do { \
cudaError_t err = call; \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
#endif // #ifndef CUDA_HELPER_H #endif // #ifndef CUDA_HELPER_H

34
x11/x11.cu

@ -18,7 +18,7 @@ extern "C"
} }
#include <stdint.h> #include <stdint.h>
#include <cuda_runtime.h> #include <cuda_helper.h>
// aus cpu-miner.c // aus cpu-miner.c
extern int device_map[8]; extern int device_map[8];
@ -157,10 +157,10 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
static bool init[8] = {0,0,0,0,0,0,0,0}; static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
// Konstanten kopieren, Speicher belegen // Konstanten kopieren, Speicher belegen
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput);
quark_skein512_cpu_init(thr_id, throughput); quark_skein512_cpu_init(thr_id, throughput);
@ -173,6 +173,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
x11_simd512_cpu_init(thr_id, throughput); x11_simd512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput); x11_echo512_cpu_init(thr_id, throughput);
quark_check_cpu_init(thr_id, throughput); quark_check_cpu_init(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;
} }
@ -184,43 +185,24 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
quark_check_cpu_setTarget(ptarget); quark_check_cpu_setTarget(ptarget);
do { do {
uint32_t foundNonce;
int order = 0; int order = 0;
// erstes Blake512 Hash mit CUDA // Hash with CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
// das ist der unbedingte Branch für BMW512
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Groestl512
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Skein512
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für JH512
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Keccak512
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Luffa512
x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Cubehash512
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Shavite512
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für SIMD512
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für ECHO512
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// Scan nach Gewinner Hashes auf der GPU // Scan nach Gewinner Hashes auf der GPU
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (foundNonce != 0xffffffff) if (foundNonce != 0xffffffff)
{ {
uint32_t vhash64[8]; uint32_t vhash64[8];

40
x13/x13.cu

@ -23,7 +23,7 @@ extern "C"
} }
#include <stdint.h> #include <stdint.h>
#include <cuda_runtime.h> #include <cuda_helper.h>
// aus cpu-miner.c // aus cpu-miner.c
extern int device_map[8]; extern int device_map[8];
@ -178,10 +178,9 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
static bool init[8] = {0,0,0,0,0,0,0,0}; static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
// Konstanten kopieren, Speicher belegen
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput);
quark_skein512_cpu_init(thr_id, throughput); quark_skein512_cpu_init(thr_id, throughput);
@ -196,12 +195,11 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
x13_hamsi512_cpu_init(thr_id, throughput); x13_hamsi512_cpu_init(thr_id, throughput);
x13_fugue512_cpu_init(thr_id, throughput); x13_fugue512_cpu_init(thr_id, throughput);
quark_check_cpu_init(thr_id, throughput); quark_check_cpu_init(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;
} }
//unsigned char echobefore[64], echoafter[64]; uint32_t endiandata[20];
uint32_t endiandata[20];
for (int k=0; k < 20; k++) for (int k=0; k < 20; k++)
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
@ -209,47 +207,25 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
quark_check_cpu_setTarget(ptarget); quark_check_cpu_setTarget(ptarget);
do { do {
uint32_t foundNonce;
int order = 0; int order = 0;
// erstes Blake512 Hash mit CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
// das ist der unbedingte Branch für BMW512
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Groestl512
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Skein512
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für JH512
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Keccak512
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Luffa512
x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Cubehash512
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Shavite512
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für SIMD512
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für ECHO512
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// Scan nach Gewinner Hashes auf der GPU // Scan nach Gewinner Hashes auf der GPU
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (foundNonce != 0xffffffff) if (foundNonce != 0xffffffff)
{ {
uint32_t vhash64[8]; uint32_t vhash64[8];

7
x15/x14.cu

@ -25,7 +25,7 @@ extern "C" {
} }
#include <stdint.h> #include <stdint.h>
#include <cuda_runtime.h> #include <cuda_helper.h>
// from cpu-miner.c // from cpu-miner.c
extern int device_map[8]; extern int device_map[8];
@ -185,9 +185,8 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput);

7
x15/x15.cu

@ -26,7 +26,7 @@ extern "C" {
} }
#include <stdint.h> #include <stdint.h>
#include <cuda_runtime.h> #include <cuda_helper.h>
// to test gpu hash on a null buffer // to test gpu hash on a null buffer
#define NULLTEST 0 #define NULLTEST 0
@ -212,9 +212,8 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
if (!init[thr_id]) if (!init[thr_id])
{ {
cudaSetDevice(device_map[thr_id]); CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput);

Loading…
Cancel
Save