Browse Source

luffa/cube: fine tuning of maxregcount for the 750Ti

This allow to get 69 regs used (tested on linux) 69 or 72 make
the compiler to use 64 regs which is not enough on the 750Ti
for optimal performance...
master
Tanguy Pruvot 10 years ago
parent
commit
9f5744d4c0
  1. 2
      Makefile.am
  2. 2
      ccminer.vcxproj
  3. 2
      x11/cuda_x11_luffa512_Cubehash.cu

2
Makefile.am

@ -105,7 +105,7 @@ x11/cuda_x11_luffa512.o: x11/cuda_x11_luffa512.cu @@ -105,7 +105,7 @@ x11/cuda_x11_luffa512.o: x11/cuda_x11_luffa512.cu
$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $<
x11/cuda_x11_luffa512_Cubehash.o: x11/cuda_x11_luffa512_Cubehash.cu
$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $<
$(NVCC) $(nvcc_FLAGS) --maxrregcount=76 -o $@ -c $<
x13/cuda_x13_hamsi512.o: x13/cuda_x13_hamsi512.cu
$(NVCC) $(nvcc_FLAGS) --maxrregcount=72 -o $@ -c $<

2
ccminer.vcxproj

@ -455,7 +455,7 @@ @@ -455,7 +455,7 @@
</CudaCompile>
<CudaCompile Include="x11\cuda_x11_luffa512.cu" />
<CudaCompile Include="x11\cuda_x11_luffa512_Cubehash.cu">
<MaxRegCount>80</MaxRegCount>
<MaxRegCount>76</MaxRegCount>
</CudaCompile>
<CudaCompile Include="x11\cuda_x11_shavite512.cu">
<MaxRegCount>128</MaxRegCount>

2
x11/cuda_x11_luffa512_Cubehash.cu

@ -731,8 +731,6 @@ static void finalization512(uint32_t *statebuffer, uint32_t *statechainv, uint32 @@ -731,8 +731,6 @@ static void finalization512(uint32_t *statebuffer, uint32_t *statechainv, uint32
__global__
#if __CUDA_ARCH__ > 500
__launch_bounds__(256, 4)
#else
__launch_bounds__(256, 3)
#endif
void x11_luffaCubehash512_gpu_hash_64(uint32_t threads, uint32_t *g_hash)
{

Loading…
Cancel
Save