luffa/cube: fine tuning of maxregcount for the 750Ti
This allow to get 69 regs used (tested on linux) 69 or 72 make the compiler to use 64 regs which is not enough on the 750Ti for optimal performance...
This commit is contained in:
parent
634bea21f5
commit
9f5744d4c0
@ -105,7 +105,7 @@ x11/cuda_x11_luffa512.o: x11/cuda_x11_luffa512.cu
|
||||
$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $<
|
||||
|
||||
x11/cuda_x11_luffa512_Cubehash.o: x11/cuda_x11_luffa512_Cubehash.cu
|
||||
$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $<
|
||||
$(NVCC) $(nvcc_FLAGS) --maxrregcount=76 -o $@ -c $<
|
||||
|
||||
x13/cuda_x13_hamsi512.o: x13/cuda_x13_hamsi512.cu
|
||||
$(NVCC) $(nvcc_FLAGS) --maxrregcount=72 -o $@ -c $<
|
||||
|
@ -455,7 +455,7 @@
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x11\cuda_x11_luffa512.cu" />
|
||||
<CudaCompile Include="x11\cuda_x11_luffa512_Cubehash.cu">
|
||||
<MaxRegCount>80</MaxRegCount>
|
||||
<MaxRegCount>76</MaxRegCount>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="x11\cuda_x11_shavite512.cu">
|
||||
<MaxRegCount>128</MaxRegCount>
|
||||
|
@ -731,8 +731,6 @@ static void finalization512(uint32_t *statebuffer, uint32_t *statechainv, uint32
|
||||
__global__
|
||||
#if __CUDA_ARCH__ > 500
|
||||
__launch_bounds__(256, 4)
|
||||
#else
|
||||
__launch_bounds__(256, 3)
|
||||
#endif
|
||||
void x11_luffaCubehash512_gpu_hash_64(uint32_t threads, uint32_t *g_hash)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user