From 9f5744d4c053fd57719ff0954a4ead5591f5e158 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Wed, 17 Jun 2015 03:56:22 +0200 Subject: [PATCH] luffa/cube: fine tuning of maxregcount for the 750Ti This allow to get 69 regs used (tested on linux) 69 or 72 make the compiler to use 64 regs which is not enough on the 750Ti for optimal performance... --- Makefile.am | 2 +- ccminer.vcxproj | 2 +- x11/cuda_x11_luffa512_Cubehash.cu | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Makefile.am b/Makefile.am index 2cf49ba..ea8c9b7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -105,7 +105,7 @@ x11/cuda_x11_luffa512.o: x11/cuda_x11_luffa512.cu $(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $< x11/cuda_x11_luffa512_Cubehash.o: x11/cuda_x11_luffa512_Cubehash.cu - $(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $< + $(NVCC) $(nvcc_FLAGS) --maxrregcount=76 -o $@ -c $< x13/cuda_x13_hamsi512.o: x13/cuda_x13_hamsi512.cu $(NVCC) $(nvcc_FLAGS) --maxrregcount=72 -o $@ -c $< diff --git a/ccminer.vcxproj b/ccminer.vcxproj index d42e263..235e36c 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -455,7 +455,7 @@ - 80 + 76 128 diff --git a/x11/cuda_x11_luffa512_Cubehash.cu b/x11/cuda_x11_luffa512_Cubehash.cu index 1e7b914..cab0062 100644 --- a/x11/cuda_x11_luffa512_Cubehash.cu +++ b/x11/cuda_x11_luffa512_Cubehash.cu @@ -731,8 +731,6 @@ static void finalization512(uint32_t *statebuffer, uint32_t *statechainv, uint32 __global__ #if __CUDA_ARCH__ > 500 __launch_bounds__(256, 4) -#else -__launch_bounds__(256, 3) #endif void x11_luffaCubehash512_gpu_hash_64(uint32_t threads, uint32_t *g_hash) {