Browse Source

cuda: check for errors on cuda mem alloc

2upstream
Tanguy Pruvot 11 years ago
parent
commit
a9a3ad8afc
  1. 2
      ccminer.vcxproj
  2. 13
      cuda_helper.h
  3. 34
      x11/x11.cu
  4. 40
      x13/x13.cu
  5. 7
      x15/x14.cu
  6. 7
      x15/x15.cu

2
ccminer.vcxproj

@ -175,7 +175,7 @@ copy "$(CudaToolkitBinDir)\cudart32*.dll" "$(OutDir)"</Command> @@ -175,7 +175,7 @@ copy "$(CudaToolkitBinDir)\cudart32*.dll" "$(OutDir)"</Command>
<PtxAsOptionV>true</PtxAsOptionV>
<Keep>false</Keep>
<CodeGeneration>compute_50,sm_50</CodeGeneration>
<Defines>--optimize 2</Defines>
<Defines></Defines>
</CudaCompile>
<CudaLink>
<GPUDebugInfo>false</GPUDebugInfo>

13
cuda_helper.h

@ -1,6 +1,8 @@ @@ -1,6 +1,8 @@
#ifndef CUDA_HELPER_H
#define CUDA_HELPER_H
#include <cuda_runtime.h>
static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI)
{
#if __CUDA_ARCH__ >= 130
@ -83,4 +85,15 @@ __forceinline__ __device__ uint64_t ROTL64(const uint64_t value, const int offse @@ -83,4 +85,15 @@ __forceinline__ __device__ uint64_t ROTL64(const uint64_t value, const int offse
#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
#endif
// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call) \
do { \
cudaError_t err = call; \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
#endif // #ifndef CUDA_HELPER_H

34
x11/x11.cu

@ -18,7 +18,7 @@ extern "C" @@ -18,7 +18,7 @@ extern "C"
}
#include <stdint.h>
#include <cuda_runtime.h>
#include <cuda_helper.h>
// aus cpu-miner.c
extern int device_map[8];
@ -157,10 +157,10 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, @@ -157,10 +157,10 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
// Konstanten kopieren, Speicher belegen
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
quark_skein512_cpu_init(thr_id, throughput);
@ -173,6 +173,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, @@ -173,6 +173,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
x11_simd512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput);
quark_check_cpu_init(thr_id, throughput);
init[thr_id] = true;
}
@ -184,43 +185,24 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, @@ -184,43 +185,24 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
quark_check_cpu_setTarget(ptarget);
do {
uint32_t foundNonce;
int order = 0;
// erstes Blake512 Hash mit CUDA
// Hash with CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
// das ist der unbedingte Branch für BMW512
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Groestl512
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Skein512
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für JH512
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Keccak512
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Luffa512
x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Cubehash512
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Shavite512
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für SIMD512
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für ECHO512
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// Scan nach Gewinner Hashes auf der GPU
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (foundNonce != 0xffffffff)
{
uint32_t vhash64[8];

40
x13/x13.cu

@ -23,7 +23,7 @@ extern "C" @@ -23,7 +23,7 @@ extern "C"
}
#include <stdint.h>
#include <cuda_runtime.h>
#include <cuda_helper.h>
// aus cpu-miner.c
extern int device_map[8];
@ -178,10 +178,9 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, @@ -178,10 +178,9 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
// Konstanten kopieren, Speicher belegen
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
quark_skein512_cpu_init(thr_id, throughput);
@ -196,12 +195,11 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, @@ -196,12 +195,11 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
x13_hamsi512_cpu_init(thr_id, throughput);
x13_fugue512_cpu_init(thr_id, throughput);
quark_check_cpu_init(thr_id, throughput);
init[thr_id] = true;
}
//unsigned char echobefore[64], echoafter[64];
uint32_t endiandata[20];
uint32_t endiandata[20];
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
@ -209,47 +207,25 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, @@ -209,47 +207,25 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
quark_check_cpu_setTarget(ptarget);
do {
uint32_t foundNonce;
int order = 0;
// erstes Blake512 Hash mit CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
// das ist der unbedingte Branch für BMW512
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Groestl512
quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Skein512
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für JH512
quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Keccak512
quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Luffa512
x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Cubehash512
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für Shavite512
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für SIMD512
x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// das ist der unbedingte Branch für ECHO512
x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
// Scan nach Gewinner Hashes auf der GPU
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
if (foundNonce != 0xffffffff)
{
uint32_t vhash64[8];

7
x15/x14.cu

@ -25,7 +25,7 @@ extern "C" { @@ -25,7 +25,7 @@ extern "C" {
}
#include <stdint.h>
#include <cuda_runtime.h>
#include <cuda_helper.h>
// from cpu-miner.c
extern int device_map[8];
@ -185,9 +185,8 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, @@ -185,9 +185,8 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);

7
x15/x15.cu

@ -26,7 +26,7 @@ extern "C" { @@ -26,7 +26,7 @@ extern "C" {
}
#include <stdint.h>
#include <cuda_runtime.h>
#include <cuda_helper.h>
// to test gpu hash on a null buffer
#define NULLTEST 0
@ -212,9 +212,8 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, @@ -212,9 +212,8 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);

Loading…
Cancel
Save