diff --git a/cuda_helper.h b/cuda_helper.h index 7a99a72..da0fc2a 100644 --- a/cuda_helper.h +++ b/cuda_helper.h @@ -481,31 +481,12 @@ static __device__ __forceinline__ uint2 operator~ (uint2 a) { return make_uint2( static __device__ __forceinline__ void operator^= (uint2 &a, uint2 b) { a = a ^ b; } static __device__ __forceinline__ uint2 operator+ (uint2 a, uint2 b) { -#if defined(__CUDA_ARCH__) && CUDA_VERSION < 7000 - uint2 result; - asm("{ // uint2 a+b \n\t" - "add.cc.u32 %0, %2, %4; \n\t" - "addc.u32 %1, %3, %5; \n\t" - "}\n" : "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(b.x), "r"(b.y)); - return result; -#else return vectorize(devectorize(a) + devectorize(b)); -#endif } static __device__ __forceinline__ void operator+= (uint2 &a, uint2 b) { a = a + b; } - static __device__ __forceinline__ uint2 operator- (uint2 a, uint2 b) { -#if defined(__CUDA_ARCH__) && CUDA_VERSION < 7000 - uint2 result; - asm("{ // uint2 a-b \n\t" - "sub.cc.u32 %0, %2, %4; \n\t" - "subc.u32 %1, %3, %5; \n\t" - "}\n" : "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(b.x), "r"(b.y)); - return result; -#else return vectorize(devectorize(a) - devectorize(b)); -#endif } static __device__ __forceinline__ void operator-= (uint2 &a, uint2 b) { a = a - b; }