|
|
@ -14,6 +14,8 @@ |
|
|
|
#define __CUDA_ARCH__ 500 |
|
|
|
#define __CUDA_ARCH__ 500 |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define TPB 32 |
|
|
|
|
|
|
|
|
|
|
|
#if __CUDA_ARCH__ >= 500 |
|
|
|
#if __CUDA_ARCH__ >= 500 |
|
|
|
|
|
|
|
|
|
|
|
#include "cuda_lyra2_vectors.h" |
|
|
|
#include "cuda_lyra2_vectors.h" |
|
|
@ -22,8 +24,6 @@ |
|
|
|
#define Ncol 4 |
|
|
|
#define Ncol 4 |
|
|
|
#define memshift 3 |
|
|
|
#define memshift 3 |
|
|
|
|
|
|
|
|
|
|
|
#define TPB 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__device__ uint2x4 *DMatrix; |
|
|
|
__device__ uint2x4 *DMatrix; |
|
|
|
|
|
|
|
|
|
|
|
__device__ __forceinline__ uint2 LD4S(const int index) |
|
|
|
__device__ __forceinline__ uint2 LD4S(const int index) |
|
|
|