diff --git a/scrypt/blake.cu b/scrypt/blake.cu index 6dde6e8..09ed4e6 100644 --- a/scrypt/blake.cu +++ b/scrypt/blake.cu @@ -12,36 +12,29 @@ #include #include -#include "cuda_runtime.h" -#include "salsa_kernel.h" #include "miner.h" +#include "salsa_kernel.h" +#include "cuda_helper.h" typedef uint32_t sph_u32; -#define SPH_C32(x) ((sph_u32)(x)) -#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) -#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) -#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) +#define SPH_ROTL32 ROTL32 +#define SPH_ROTR32 ROTR32 __constant__ uint64_t ptarget64[4]; __constant__ uint32_t pdata[20]; // define some error checking macros -#undef checkCudaErrors - -#if WIN32 -#define DELIMITER '/' -#else #define DELIMITER '/' -#endif #define __FILENAME__ ( strrchr(__FILE__, DELIMITER) != NULL ? strrchr(__FILE__, DELIMITER)+1 : __FILE__ ) +#undef checkCudaErrors #define checkCudaErrors(x) \ { \ cudaGetLastError(); \ x; \ cudaError_t err = cudaGetLastError(); \ - if (err != cudaSuccess) \ - applog(LOG_ERR, "GPU #%d: cudaError %d (%s) calling '%s' (%s line %d)\n", device_map[thr_id], err, cudaGetErrorString(err), #x, __FILENAME__, __LINE__); \ + if (err != cudaSuccess && !abort_flag) \ + applog(LOG_ERR, "GPU #%d: cudaError %d (%s) (%s line %d)\n", device_map[thr_id], err, cudaGetErrorString(err), __FILENAME__, __LINE__); \ } // from salsa_kernel.cu @@ -54,12 +47,6 @@ extern std::map context_hash[2]; #pragma warning (disable: 4146) #endif -static __device__ sph_u32 cuda_sph_bswap32(sph_u32 x) -{ - return (((x << 24) & 0xff000000u) | ((x << 8) & 0x00ff0000u) - | ((x >> 8) & 0x0000ff00u) | ((x >> 24) & 0x000000ffu)); -} - /** * Encode a 32-bit value into the provided buffer (big endian convention). * @@ -69,7 +56,7 @@ static __device__ sph_u32 cuda_sph_bswap32(sph_u32 x) static __device__ void cuda_sph_enc32be(void *dst, sph_u32 val) { - *(sph_u32 *)dst = cuda_sph_bswap32(val); + *(sph_u32 *)dst = cuda_swab32(val); } #define Z00 0 @@ -344,12 +331,13 @@ cuda_sph_enc32be(void *dst, sph_u32 val) H7 ^= S3 ^ V7 ^ VF; \ } while (0) -__global__ void cuda_blake256_hash( uint64_t *g_out, uint32_t nonce, uint32_t *g_good, bool validate ) +__global__ +void cuda_blake256_hash( uint64_t *g_out, uint32_t nonce, uint32_t *g_good, bool validate ) { uint32_t input[16]; uint64_t output[4]; -#pragma unroll 16 + #pragma unroll for (int i=0; i < 16; ++i) input[i] = pdata[i]; sph_u32 H0 = 0x6A09E667; @@ -369,11 +357,11 @@ __global__ void cuda_blake256_hash( uint64_t *g_out, uint32_t nonce, uint32_t *g T0 = SPH_T32(T0 + 512); COMPRESS32; -#pragma unroll 3 + #pragma unroll for (int i=0; i < 3; ++i) input[i] = pdata[16+i]; input[3] = nonce + ((blockIdx.x * blockDim.x) + threadIdx.x); input[4] = 0x80000000; -#pragma unroll 8 + #pragma unroll 8 for (int i=5; i < 13; ++i) input[i] = 0; input[13] = 0x00000001; input[14] = T1; @@ -398,7 +386,7 @@ __global__ void cuda_blake256_hash( uint64_t *g_out, uint32_t nonce, uint32_t *g if (validate) { g_out += 4 * ((blockIdx.x * blockDim.x) + threadIdx.x); -#pragma unroll 4 + #pragma unroll for (int i=0; i < 4; ++i) g_out[i] = output[i]; } diff --git a/scrypt/fermi_kernel.cu b/scrypt/fermi_kernel.cu index c7f9026..d58bd7f 100644 --- a/scrypt/fermi_kernel.cu +++ b/scrypt/fermi_kernel.cu @@ -14,7 +14,7 @@ #include -#include "cuda_runtime.h" +#include #include "miner.h" #include "salsa_kernel.h" diff --git a/scrypt/keccak.cu b/scrypt/keccak.cu index 142d624..7daed2b 100644 --- a/scrypt/keccak.cu +++ b/scrypt/keccak.cu @@ -16,22 +16,17 @@ #include "salsa_kernel.h" // define some error checking macros -#undef checkCudaErrors - -#if WIN32 -#define DELIMITER '/' -#else #define DELIMITER '/' -#endif #define __FILENAME__ ( strrchr(__FILE__, DELIMITER) != NULL ? strrchr(__FILE__, DELIMITER)+1 : __FILE__ ) +#undef checkCudaErrors #define checkCudaErrors(x) \ { \ cudaGetLastError(); \ x; \ cudaError_t err = cudaGetLastError(); \ - if (err != cudaSuccess) \ - applog(LOG_ERR, "GPU #%d: cudaError %d (%s) calling '%s' (%s line %d)\n", device_map[thr_id], err, cudaGetErrorString(err), #x, __FILENAME__, __LINE__); \ + if (err != cudaSuccess && !abort_flag) \ + applog(LOG_ERR, "GPU #%d: cudaError %d (%s) (%s line %d)\n", device_map[thr_id], err, cudaGetErrorString(err), __FILENAME__, __LINE__); \ } // from salsa_kernel.cu diff --git a/scrypt/kepler_kernel.cu b/scrypt/kepler_kernel.cu index 41e3256..c67806e 100644 --- a/scrypt/kepler_kernel.cu +++ b/scrypt/kepler_kernel.cu @@ -9,7 +9,7 @@ #include -#include "cuda_runtime.h" +#include #include "miner.h" #include "salsa_kernel.h" diff --git a/scrypt/nv_kernel2.cu b/scrypt/nv_kernel2.cu index c581eda..cbb8d01 100644 --- a/scrypt/nv_kernel2.cu +++ b/scrypt/nv_kernel2.cu @@ -11,7 +11,7 @@ #include -#include "cuda_runtime.h" +#include #include "miner.h" #include "salsa_kernel.h" diff --git a/scrypt/salsa_kernel.cu b/scrypt/salsa_kernel.cu index ee0de00..9b179d9 100644 --- a/scrypt/salsa_kernel.cu +++ b/scrypt/salsa_kernel.cu @@ -12,26 +12,16 @@ #include "salsa_kernel.h" +#include "nv_kernel2.h" #include "titan_kernel.h" -#include "fermi_kernel.h" -#include "test_kernel.h" #include "nv_kernel.h" -#include "nv_kernel2.h" #include "kepler_kernel.h" +#include "fermi_kernel.h" +#include "test_kernel.h" #include "miner.h" -#if WIN32 -#ifdef _WIN64 -#define _64BIT 1 -#endif -#else -#if __x86_64__ -#define _64BIT 1 -#endif -#endif - -#if _64BIT +#if defined(_WIN64) || defined(__x86_64__) || defined(__64BIT__) #define MAXMEM 0x300000000ULL // 12 GB (the largest Kepler) #else #define MAXMEM 0xFFFFFFFFULL // nearly 4 GB (32 bit limitations) @@ -42,21 +32,16 @@ #define DMIN 5 // define some error checking macros -#undef checkCudaErrors - -#if WIN32 -#define DELIMITER '/' -#else #define DELIMITER '/' -#endif #define __FILENAME__ ( strrchr(__FILE__, DELIMITER) != NULL ? strrchr(__FILE__, DELIMITER)+1 : __FILE__ ) +#undef checkCudaErrors #define checkCudaErrors(x) \ { \ cudaGetLastError(); \ x; \ cudaError_t err = cudaGetLastError(); \ - if (err != cudaSuccess) \ + if (err != cudaSuccess && !abort_flag) \ applog(LOG_ERR, "GPU #%d: Err %d: %s (%s:%d)", device_map[thr_id], err, cudaGetErrorString(err), __FILENAME__, __LINE__); \ } @@ -78,7 +63,7 @@ KernelInterface *Best_Kernel_Heuristics(cudaDeviceProp *props) kernel = new NV2Kernel(); // we don't want this for Keccak though else if (props->major == 3 && props->minor == 0) kernel = new NVKernel(); - else if (props->major == 2 || props->major == 1) + else kernel = new FermiKernel(); } else @@ -88,7 +73,7 @@ KernelInterface *Best_Kernel_Heuristics(cudaDeviceProp *props) kernel = new TitanKernel(); else if (props->major == 3 && props->minor == 0) kernel = new KeplerKernel(); - else if (props->major == 2 || props->major == 1) + else kernel = new TestKernel(); } return kernel; @@ -861,7 +846,8 @@ bool cuda_scrypt_sync(int thr_id, int stream) if(err != cudaSuccess) { - applog(LOG_ERR, "GPU #%d: CUDA error `%s` while executing the kernel.", device_map[thr_id], cudaGetErrorString(err)); + if (!abort_flag) + applog(LOG_ERR, "GPU #%d: CUDA error `%s` while executing the kernel.", device_map[thr_id], cudaGetErrorString(err)); return false; } diff --git a/scrypt/sha256.cu b/scrypt/sha256.cu index 7d4c2e5..1ecaf92 100644 --- a/scrypt/sha256.cu +++ b/scrypt/sha256.cu @@ -6,7 +6,7 @@ #include -#include "cuda_runtime.h" +#include #include "miner.h" #include "salsa_kernel.h" @@ -14,21 +14,16 @@ #include "sha256.h" // define some error checking macros -#undef checkCudaErrors - -#if WIN32 #define DELIMITER '/' -#else -#define DELIMITER '/' -#endif #define __FILENAME__ ( strrchr(__FILE__, DELIMITER) != NULL ? strrchr(__FILE__, DELIMITER)+1 : __FILE__ ) +#undef checkCudaErrors #define checkCudaErrors(x) { \ cudaGetLastError(); \ x; \ cudaError_t err = cudaGetLastError(); \ - if (err != cudaSuccess) \ - applog(LOG_ERR, "GPU #%d: cudaError %d (%s) calling '%s' (%s line %d)\n", (int) device_map[thr_id], err, cudaGetErrorString(err), #x, __FILENAME__, __LINE__); \ + if (err != cudaSuccess && !abort_flag) \ + applog(LOG_ERR, "GPU #%d: cudaError %d (%s) (%s line %d)\n", (int) device_map[thr_id], err, cudaGetErrorString(err), __FILENAME__, __LINE__); \ } // from salsa_kernel.cu diff --git a/scrypt/test_kernel.cu b/scrypt/test_kernel.cu index f3d2df0..e4467d1 100644 --- a/scrypt/test_kernel.cu +++ b/scrypt/test_kernel.cu @@ -14,7 +14,7 @@ #include -#include "cuda_runtime.h" +#include #include "miner.h" #include "salsa_kernel.h" diff --git a/scrypt/titan_kernel.cu b/scrypt/titan_kernel.cu index 9b64006..8ed901d 100644 --- a/scrypt/titan_kernel.cu +++ b/scrypt/titan_kernel.cu @@ -9,7 +9,7 @@ #include -#include "cuda_runtime.h" +#include #include "miner.h" #include "salsa_kernel.h"