Browse Source

equihash: error fix for the TITAN V

pull/4/head
Tanguy Pruvot 6 years ago
parent
commit
fd406f02c2
  1. 37
      equi/cuda_equi.cu

37
equi/cuda_equi.cu

@ -80,6 +80,8 @@ u32 umin(const u32, const u32);
u32 umax(const u32, const u32); u32 umax(const u32, const u32);
#endif #endif
#define OPT_SYNC_ALL
#if CUDA_VERSION >= 9000 && __CUDA_ARCH__ >= 300 #if CUDA_VERSION >= 9000 && __CUDA_ARCH__ >= 300
#define __shfl2(var, srcLane) __shfl_sync(0xFFFFFFFFu, var, srcLane) #define __shfl2(var, srcLane) __shfl_sync(0xFFFFFFFFu, var, srcLane)
#undef __any #undef __any
@ -514,10 +516,11 @@ __global__ void digit_1(equi<RB, SM>* eq)
u32 si[2]; u32 si[2];
#ifdef OPT_SYNC_ALL
// enable this to make fully safe shared mem operations; // enable this to make fully safe shared mem operations;
// disabled gains some speed, but can rarely cause a crash // disabled gains some speed, but can rarely cause a crash
//__syncthreads(); __syncthreads();
#endif
#pragma unroll #pragma unroll
for (u32 i = 0; i != 2; ++i) for (u32 i = 0; i != 2; ++i)
{ {
@ -654,11 +657,9 @@ __global__ void digit_2(equi<RB, SM>* eq)
uint4 tt[2]; uint4 tt[2];
u32 si[2]; u32 si[2];
#ifdef OPT_SYNC_ALL
// enable this to make fully safe shared mem operations; __syncthreads();
// disabled gains some speed, but can rarely cause a crash #endif
//__syncthreads();
#pragma unroll 2 #pragma unroll 2
for (u32 i = 0; i < 2; i++) for (u32 i = 0; i < 2; i++)
{ {
@ -785,9 +786,9 @@ __global__ void digit_3(equi<RB, SM>* eq)
uint4 tt[2]; uint4 tt[2];
u32 ta[2]; u32 ta[2];
// enable this to make fully safe shared mem operations; #ifdef OPT_SYNC_ALL
// disabled gains some speed, but can rarely cause a crash __syncthreads();
//__syncthreads(); #endif
#pragma unroll 2 #pragma unroll 2
for (u32 i = 0; i < 2; i++) for (u32 i = 0; i < 2; i++)
@ -919,11 +920,9 @@ __global__ void digit_4(equi<RB, SM>* eq)
u32 si[2]; u32 si[2];
uint4 tt[2]; uint4 tt[2];
#ifdef OPT_SYNC_ALL
// enable this to make fully safe shared mem operations; __syncthreads();
// disabled gains some speed, but can rarely cause a crash #endif
//__syncthreads();
#pragma unroll 2 #pragma unroll 2
for (u32 i = 0; i < 2; i++) for (u32 i = 0; i < 2; i++)
{ {
@ -1035,11 +1034,9 @@ __global__ void digit_5(equi<RB, SM>* eq)
u32 si[2]; u32 si[2];
uint4 tt[2]; uint4 tt[2];
#ifdef OPT_SYNC_ALL
// enable this to make fully safe shared mem operations; __syncthreads();
// disabled gains some speed, but can rarely cause a crash #endif
//__syncthreads();
#pragma unroll 2 #pragma unroll 2
for (u32 i = 0; i < 2; i++) for (u32 i = 0; i < 2; i++)
{ {

Loading…
Cancel
Save