From 9f2dd3ee600ce972d503132ced5aa5946374b343 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Sat, 24 Jan 2015 07:17:48 +0100 Subject: [PATCH] Remove some useless conversions do not impact perfs neither... --- Algo256/cuda_groestl256.cu | 8 ++++---- cuda_helper.h | 16 +++++++++++++--- heavy/cuda_groestl512.cu | 6 +++--- quark/cuda_quark_groestl512_sm20.cu | 4 ++-- x17/cuda_x17_haval512.cu | 4 ++-- x17/cuda_x17_sha512.cu | 4 +--- 6 files changed, 25 insertions(+), 17 deletions(-) diff --git a/Algo256/cuda_groestl256.cu b/Algo256/cuda_groestl256.cu index 0f27b8a..a4c5e47 100644 --- a/Algo256/cuda_groestl256.cu +++ b/Algo256/cuda_groestl256.cu @@ -1,5 +1,8 @@ #include +#define SPH_C32(x) ((uint32_t)(x ## U)) +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) + #include "cuda_helper.h" uint32_t *d_gnounce[MAX_GPUS]; @@ -7,9 +10,6 @@ uint32_t *d_GNonce[MAX_GPUS]; __constant__ uint32_t pTarget[8]; -#define SPH_C32(x) ((uint32_t)(x ## U)) -#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) - #define C32e(x) \ ((SPH_C32(x) >> 24) \ | ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \ @@ -306,4 +306,4 @@ __host__ void groestl256_setTarget(const void *pTargetIn) { cudaMemcpyToSymbol(pTarget, pTargetIn, 8 * sizeof(uint32_t), 0, cudaMemcpyHostToDevice); -} \ No newline at end of file +} diff --git a/cuda_helper.h b/cuda_helper.h index 2a20e6c..720ad9a 100644 --- a/cuda_helper.h +++ b/cuda_helper.h @@ -37,14 +37,24 @@ extern const uint3 threadIdx; #endif #ifndef SPH_C32 -#define SPH_C32(x) ((uint32_t)(x ## U)) +#define SPH_C32(x) (x) +// #define SPH_C32(x) ((uint32_t)(x ## U)) #endif #ifndef SPH_C64 -#define SPH_C64(x) ((uint64_t)(x ## ULL)) +#define SPH_C64(x) (x) +// #define SPH_C64(x) ((uint64_t)(x ## ULL)) #endif -#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) +#ifndef SPH_T32 +#define SPH_T32(x) (x) +// #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) +#endif + +#ifndef SPH_T64 +#define SPH_T64(x) (x) +// #define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) +#endif #if __CUDA_ARCH__ < 320 // Kepler (Compute 3.0) diff --git a/heavy/cuda_groestl512.cu b/heavy/cuda_groestl512.cu index f9fdc0a..3c4030b 100644 --- a/heavy/cuda_groestl512.cu +++ b/heavy/cuda_groestl512.cu @@ -1,6 +1,9 @@ #include #include +#define SPH_C32(x) ((uint32_t)(x ## U)) +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) + #include "cuda_helper.h" // globaler Speicher für alle HeftyHashes aller Threads @@ -13,8 +16,6 @@ uint32_t *d_hash4output[MAX_GPUS]; __constant__ uint32_t groestl_gpu_state[32]; __constant__ uint32_t groestl_gpu_msg[32]; -#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) - #define PC32up(j, r) ((uint32_t)((j) + (r))) #define PC32dn(j, r) 0 #define QC32up(j, r) 0xFFFFFFFF @@ -25,7 +26,6 @@ __constant__ uint32_t groestl_gpu_msg[32]; #define B32_2(x) (((x) >> 16) & 0xFF) #define B32_3(x) ((x) >> 24) -#define SPH_C32(x) ((uint32_t)(x ## U)) #define C32e(x) ((SPH_C32(x) >> 24) \ | ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \ | ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \ diff --git a/quark/cuda_quark_groestl512_sm20.cu b/quark/cuda_quark_groestl512_sm20.cu index e7dfe35..b754fcb 100644 --- a/quark/cuda_quark_groestl512_sm20.cu +++ b/quark/cuda_quark_groestl512_sm20.cu @@ -5,8 +5,8 @@ #define MAXWELL_OR_FERMI 0 #define USE_SHARED 1 -#define SPH_C32(x) ((uint32_t)(x ## U)) -#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) +// #define SPH_C32(x) ((uint32_t)(x ## U)) +// #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) #define PC32up(j, r) ((uint32_t)((j) + (r))) #define PC32dn(j, r) 0 diff --git a/x17/cuda_x17_haval512.cu b/x17/cuda_x17_haval512.cu index ff9912d..ab2cc86 100644 --- a/x17/cuda_x17_haval512.cu +++ b/x17/cuda_x17_haval512.cu @@ -41,13 +41,13 @@ #define USE_SHARED 1 +#define SPH_T64(x) ((x) & 0xFFFFFFFFFFFFFFFFULL) + #include "cuda_helper.h" #define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) -#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) - static __constant__ uint32_t initVector[8]; static const uint32_t c_initVector[8] = { diff --git a/x17/cuda_x17_sha512.cu b/x17/cuda_x17_sha512.cu index 8ef2a27..e996083 100644 --- a/x17/cuda_x17_sha512.cu +++ b/x17/cuda_x17_sha512.cu @@ -36,6 +36,7 @@ #include #define USE_SHARED 1 +#define SPH_C64(x) ((uint64_t)(x ## ULL)) #include "cuda_helper.h" @@ -44,9 +45,6 @@ #define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) -#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) -#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) - static __constant__ uint64_t H_512[8]; static const uint64_t H512[8] = {