fdd5d29071
Previous echo commit was only increasing linux performance, and reducing windows perf compared to the 1.4.9, this one seems to give at least the 1.4.9 on windows, and the same on linux... Shavite optimisation seems ok on both (use now 64 registers) the launch_bounds will force the number of registers, so remove specific Makefile rules on linux... manual "cherry pick" with fixed line endings and some adaptations
106 lines
4.4 KiB
Makefile
106 lines
4.4 KiB
Makefile
# allow to use Host cuda functions in C/C++
|
|
DEF_INCLUDES = @CUDA_INCLUDES@
|
|
|
|
JANSSON_INCLUDES=
|
|
if WANT_JANSSON
|
|
JANSSON_INCLUDES= -I$(top_srcdir)/compat/jansson
|
|
endif
|
|
|
|
EXTRA_DIST = autogen.sh README.txt LICENSE.txt \
|
|
cudaminer.sln cudaminer.vcxproj cudaminer.vcxproj.filters \
|
|
compat/gettimeofday.c compat/getopt/getopt_long.c cpuminer-config.h.in
|
|
|
|
SUBDIRS = compat
|
|
|
|
bin_PROGRAMS = ccminer
|
|
|
|
ccminer_SOURCES = elist.h miner.h compat.h \
|
|
compat/inttypes.h compat/stdbool.h compat/unistd.h \
|
|
compat/sys/time.h compat/getopt/getopt.h \
|
|
crc32.c hefty1.c scrypt.c \
|
|
ccminer.cpp util.cpp \
|
|
api.cpp hashlog.cpp stats.cpp cuda.cpp \
|
|
heavy/heavy.cu \
|
|
heavy/cuda_blake512.cu heavy/cuda_blake512.h \
|
|
heavy/cuda_combine.cu heavy/cuda_combine.h \
|
|
heavy/cuda_groestl512.cu heavy/cuda_groestl512.h \
|
|
heavy/cuda_hefty1.cu heavy/cuda_hefty1.h \
|
|
heavy/cuda_keccak512.cu heavy/cuda_keccak512.h \
|
|
heavy/cuda_sha256.cu heavy/cuda_sha256.h \
|
|
keccak/cuda_keccak256.cu keccak/keccak256.cu \
|
|
fuguecoin.cpp cuda_fugue256.cu sph/fugue.c sph/sph_fugue.h uint256.h \
|
|
groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h \
|
|
myriadgroestl.cpp cuda_myriadgroestl.cu \
|
|
JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
|
|
JHA/cuda_jha_compactionTest.cu cuda_checkhash.cu \
|
|
quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
|
|
quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu \
|
|
quark/quarkcoin.cu quark/animecoin.cu \
|
|
quark/cuda_quark_compactionTest.cu \
|
|
cuda_nist5.cu blake32.cu pentablake.cu \
|
|
sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \
|
|
sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \
|
|
sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
|
|
sph/shabal.c sph/whirlpool.c sph/sha2big.c sph/haval.c \
|
|
qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/doom.cu \
|
|
x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
|
|
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
|
|
x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
|
|
x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \
|
|
x15/whirlpool.cu \
|
|
x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu \
|
|
x11/s3.cu
|
|
|
|
if HAVE_NVML
|
|
ccminer_SOURCES += nvml.cpp
|
|
nvml_defs = -DUSE_WRAPNVML
|
|
nvml_libs = -ldl
|
|
endif
|
|
|
|
if HAVE_WINDOWS
|
|
ccminer_SOURCES += compat/winansi.c
|
|
endif
|
|
|
|
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
|
|
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ $(nvml_libs)
|
|
ccminer_CPPFLAGS = @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(CPPFLAGS) $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) $(DEF_INCLUDES) $(nvml_defs) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
|
|
|
|
nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\"
|
|
#nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\"
|
|
#nvcc_ARCH += -gencode=arch=compute_30,code=\"sm_30,compute_30\"
|
|
|
|
nvcc_FLAGS = $(nvcc_ARCH) @CUDA_INCLUDES@ -I. @CUDA_CFLAGS@
|
|
nvcc_FLAGS += $(JANSSON_INCLUDES) --ptxas-options="-v"
|
|
|
|
# we're now targeting all major compute architectures within one binary.
|
|
.cu.o:
|
|
$(NVCC) $(nvcc_FLAGS) --maxrregcount=128 -o $@ -c $<
|
|
|
|
blake32.o: blake32.cu
|
|
$(NVCC) $(nvcc_FLAGS) --maxrregcount=64 -o $@ -c $<
|
|
|
|
keccak/cuda_keccak256.o: keccak/cuda_keccak256.cu
|
|
$(NVCC) $(nvcc_FLAGS) --maxrregcount=92 -o $@ -c $<
|
|
|
|
qubit/qubit_luffa512.o: qubit/qubit_luffa512.cu
|
|
$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $<
|
|
|
|
# Luffa and Echo are faster with 80 registers than 128
|
|
x11/cuda_x11_luffa512.o: x11/cuda_x11_luffa512.cu
|
|
$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $<
|
|
|
|
x17/cuda_x17_sha512.o: x17/cuda_x17_sha512.cu
|
|
$(NVCC) $(nvcc_FLAGS) --maxrregcount=80 -o $@ -c $<
|
|
|
|
quark/cuda_quark_blake512.o: quark/cuda_quark_blake512.cu
|
|
$(NVCC) $(nvcc_FLAGS) -I cudpp-2.1/include --maxrregcount=80 -o $@ -c $<
|
|
|
|
quark/cuda_quark_keccak512.o: quark/cuda_quark_keccak512.cu
|
|
$(NVCC) $(nvcc_FLAGS) -I cudpp-2.1/include --maxrregcount=88 -o $@ -c $<
|
|
|
|
quark/cuda_quark_compactionTest.o: quark/cuda_quark_compactionTest.cu
|
|
$(NVCC) $(nvcc_FLAGS) -I cudpp-2.1/include --maxrregcount=80 -o $@ -c $<
|
|
|
|
JHA/cuda_jha_compactionTest.o: JHA/cuda_jha_compactionTest.cu
|
|
$(NVCC) $(nvcc_FLAGS) -I cudpp-2.1/include --maxrregcount=80 -o $@ -c $<
|