From 1aec4555cc84e0b119ae317632dee77c189210b8 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Thu, 11 Sep 2014 00:48:47 +0200 Subject: [PATCH] Tune reg. count for qubit (luffa) algos --- Makefile.am | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile.am b/Makefile.am index 7cfaa74..72a7bec 100644 --- a/Makefile.am +++ b/Makefile.am @@ -57,7 +57,7 @@ nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\" #nvcc_ARCH += -gencode=arch=compute_30,code=\"sm_30,compute_30\" nvcc_FLAGS = $(nvcc_ARCH) -I . @CUDA_CFLAGS@ -nvcc_FLAGS += $(JANSSON_INCLUDES) +nvcc_FLAGS += $(JANSSON_INCLUDES) --ptxas-options="-v" # we're now targeting all major compute architectures within one binary. .cu.o: @@ -66,6 +66,9 @@ nvcc_FLAGS += $(JANSSON_INCLUDES) blake32.o: blake32.cu $(NVCC) $(nvcc_FLAGS) --maxrregcount=64 -o $@ -c $< +qubit/qubit_luffa512.o: qubit/qubit_luffa512.cu + $(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $< + # Luffa and Echo are faster with 80 registers than 128 x11/cuda_x11_luffa512.o: x11/cuda_x11_luffa512.cu $(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $<