|
|
|
@ -57,7 +57,7 @@ nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\"
@@ -57,7 +57,7 @@ nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\"
|
|
|
|
|
#nvcc_ARCH += -gencode=arch=compute_30,code=\"sm_30,compute_30\"
|
|
|
|
|
|
|
|
|
|
nvcc_FLAGS = $(nvcc_ARCH) -I . @CUDA_CFLAGS@ |
|
|
|
|
nvcc_FLAGS += $(JANSSON_INCLUDES) |
|
|
|
|
nvcc_FLAGS += $(JANSSON_INCLUDES) --ptxas-options="-v" |
|
|
|
|
|
|
|
|
|
# we're now targeting all major compute architectures within one binary.
|
|
|
|
|
.cu.o: |
|
|
|
@ -66,6 +66,9 @@ nvcc_FLAGS += $(JANSSON_INCLUDES)
@@ -66,6 +66,9 @@ nvcc_FLAGS += $(JANSSON_INCLUDES)
|
|
|
|
|
blake32.o: blake32.cu |
|
|
|
|
$(NVCC) $(nvcc_FLAGS) --maxrregcount=64 -o $@ -c $< |
|
|
|
|
|
|
|
|
|
qubit/qubit_luffa512.o: qubit/qubit_luffa512.cu |
|
|
|
|
$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $< |
|
|
|
|
|
|
|
|
|
# Luffa and Echo are faster with 80 registers than 128
|
|
|
|
|
x11/cuda_x11_luffa512.o: x11/cuda_x11_luffa512.cu |
|
|
|
|
$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $< |
|
|
|
|