Browse Source

Remove some useless conversions

do not impact perfs neither...
master
Tanguy Pruvot 10 years ago
parent
commit
9f2dd3ee60
  1. 8
      Algo256/cuda_groestl256.cu
  2. 16
      cuda_helper.h
  3. 6
      heavy/cuda_groestl512.cu
  4. 4
      quark/cuda_quark_groestl512_sm20.cu
  5. 4
      x17/cuda_x17_haval512.cu
  6. 4
      x17/cuda_x17_sha512.cu

8
Algo256/cuda_groestl256.cu

@ -1,5 +1,8 @@
#include <memory.h> #include <memory.h>
#define SPH_C32(x) ((uint32_t)(x ## U))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#include "cuda_helper.h" #include "cuda_helper.h"
uint32_t *d_gnounce[MAX_GPUS]; uint32_t *d_gnounce[MAX_GPUS];
@ -7,9 +10,6 @@ uint32_t *d_GNonce[MAX_GPUS];
__constant__ uint32_t pTarget[8]; __constant__ uint32_t pTarget[8];
#define SPH_C32(x) ((uint32_t)(x ## U))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define C32e(x) \ #define C32e(x) \
((SPH_C32(x) >> 24) \ ((SPH_C32(x) >> 24) \
| ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \ | ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \
@ -306,4 +306,4 @@ __host__
void groestl256_setTarget(const void *pTargetIn) void groestl256_setTarget(const void *pTargetIn)
{ {
cudaMemcpyToSymbol(pTarget, pTargetIn, 8 * sizeof(uint32_t), 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(pTarget, pTargetIn, 8 * sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
} }

16
cuda_helper.h

@ -37,14 +37,24 @@ extern const uint3 threadIdx;
#endif #endif
#ifndef SPH_C32 #ifndef SPH_C32
#define SPH_C32(x) ((uint32_t)(x ## U)) #define SPH_C32(x) (x)
// #define SPH_C32(x) ((uint32_t)(x ## U))
#endif #endif
#ifndef SPH_C64 #ifndef SPH_C64
#define SPH_C64(x) ((uint64_t)(x ## ULL)) #define SPH_C64(x) (x)
// #define SPH_C64(x) ((uint64_t)(x ## ULL))
#endif #endif
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) #ifndef SPH_T32
#define SPH_T32(x) (x)
// #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#endif
#ifndef SPH_T64
#define SPH_T64(x) (x)
// #define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
#endif
#if __CUDA_ARCH__ < 320 #if __CUDA_ARCH__ < 320
// Kepler (Compute 3.0) // Kepler (Compute 3.0)

6
heavy/cuda_groestl512.cu

@ -1,6 +1,9 @@
#include <stdio.h> #include <stdio.h>
#include <memory.h> #include <memory.h>
#define SPH_C32(x) ((uint32_t)(x ## U))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#include "cuda_helper.h" #include "cuda_helper.h"
// globaler Speicher für alle HeftyHashes aller Threads // globaler Speicher für alle HeftyHashes aller Threads
@ -13,8 +16,6 @@ uint32_t *d_hash4output[MAX_GPUS];
__constant__ uint32_t groestl_gpu_state[32]; __constant__ uint32_t groestl_gpu_state[32];
__constant__ uint32_t groestl_gpu_msg[32]; __constant__ uint32_t groestl_gpu_msg[32];
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define PC32up(j, r) ((uint32_t)((j) + (r))) #define PC32up(j, r) ((uint32_t)((j) + (r)))
#define PC32dn(j, r) 0 #define PC32dn(j, r) 0
#define QC32up(j, r) 0xFFFFFFFF #define QC32up(j, r) 0xFFFFFFFF
@ -25,7 +26,6 @@ __constant__ uint32_t groestl_gpu_msg[32];
#define B32_2(x) (((x) >> 16) & 0xFF) #define B32_2(x) (((x) >> 16) & 0xFF)
#define B32_3(x) ((x) >> 24) #define B32_3(x) ((x) >> 24)
#define SPH_C32(x) ((uint32_t)(x ## U))
#define C32e(x) ((SPH_C32(x) >> 24) \ #define C32e(x) ((SPH_C32(x) >> 24) \
| ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \ | ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \
| ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \ | ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \

4
quark/cuda_quark_groestl512_sm20.cu

@ -5,8 +5,8 @@
#define MAXWELL_OR_FERMI 0 #define MAXWELL_OR_FERMI 0
#define USE_SHARED 1 #define USE_SHARED 1
#define SPH_C32(x) ((uint32_t)(x ## U)) // #define SPH_C32(x) ((uint32_t)(x ## U))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) // #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define PC32up(j, r) ((uint32_t)((j) + (r))) #define PC32up(j, r) ((uint32_t)((j) + (r)))
#define PC32dn(j, r) 0 #define PC32dn(j, r) 0

4
x17/cuda_x17_haval512.cu

@ -41,13 +41,13 @@
#define USE_SHARED 1 #define USE_SHARED 1
#define SPH_T64(x) ((x) & 0xFFFFFFFFFFFFFFFFULL)
#include "cuda_helper.h" #include "cuda_helper.h"
#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) #define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
static __constant__ uint32_t initVector[8]; static __constant__ uint32_t initVector[8];
static const uint32_t c_initVector[8] = { static const uint32_t c_initVector[8] = {

4
x17/cuda_x17_sha512.cu

@ -36,6 +36,7 @@
#include <stdio.h> #include <stdio.h>
#define USE_SHARED 1 #define USE_SHARED 1
#define SPH_C64(x) ((uint64_t)(x ## ULL))
#include "cuda_helper.h" #include "cuda_helper.h"
@ -44,9 +45,6 @@
#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) #define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
static __constant__ uint64_t H_512[8]; static __constant__ uint64_t H_512[8];
static const uint64_t H512[8] = { static const uint64_t H512[8] = {

Loading…
Cancel
Save