@ -14,6 +14,8 @@
#define __CUDA_ARCH__ 500
#endif
#define TPB 32
#if __CUDA_ARCH__ >= 500
#include "cuda_lyra2_vectors.h"
@ -22,8 +24,6 @@
#define Ncol 4
#define memshift 3
__device__ uint2x4 *DMatrix;
__device__ __forceinline__ uint2 LD4S(const int index)