luffa/cube: fine tuning of maxregcount for the 750Ti

This allow to get 69 regs used (tested on linux) 69 or 72 make the compiler to use 64 regs which is not enough on the 750Ti for optimal performance...
2015-06-17 03:56:22 +02:00 · 2015-06-17 03:56:22 +02:00 · 9f5744d4c0
commit 9f5744d4c0
parent 634bea21f5
3 changed files with 2 additions and 4 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -105,7 +105,7 @@ x11/cuda_x11_luffa512.o: x11/cuda_x11_luffa512.cu
 	$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $<

 x11/cuda_x11_luffa512_Cubehash.o: x11/cuda_x11_luffa512_Cubehash.cu
-	$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $<
+	$(NVCC) $(nvcc_FLAGS) --maxrregcount=76 -o $@ -c $<

 x13/cuda_x13_hamsi512.o: x13/cuda_x13_hamsi512.cu
 	$(NVCC) $(nvcc_FLAGS) --maxrregcount=72 -o $@ -c $<
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -455,7 +455,7 @@
    </CudaCompile>
    <CudaCompile Include="x11\cuda_x11_luffa512.cu" />
    <CudaCompile Include="x11\cuda_x11_luffa512_Cubehash.cu">
-      <MaxRegCount>80</MaxRegCount>
+      <MaxRegCount>76</MaxRegCount>
    </CudaCompile>
    <CudaCompile Include="x11\cuda_x11_shavite512.cu">
      <MaxRegCount>128</MaxRegCount>
--- a/x11/cuda_x11_luffa512_Cubehash.cu
+++ b/x11/cuda_x11_luffa512_Cubehash.cu
@ -731,8 +731,6 @@ static void finalization512(uint32_t *statebuffer, uint32_t *statechainv, uint32
 __global__
 #if __CUDA_ARCH__ > 500
 __launch_bounds__(256, 4)
-#else
-__launch_bounds__(256, 3)
 #endif
 void x11_luffaCubehash512_gpu_hash_64(uint32_t threads, uint32_t *g_hash)
 {