Browse Source

Makefile: luffa was using more than 80 regs (10 vs 14ms)

add easy switch for nvcc target arch

and move duplicated sha2 to sph folder
master
Tanguy Pruvot 10 years ago
parent
commit
2c24bc93d4
  1. 14
      Makefile.am
  2. 4
      ccminer.vcxproj
  3. 6
      ccminer.vcxproj.filters
  4. 0
      sph/sha2.c

14
Makefile.am

@ -16,8 +16,7 @@ bin_PROGRAMS = ccminer
ccminer_SOURCES = elist.h miner.h compat.h \ ccminer_SOURCES = elist.h miner.h compat.h \
compat/inttypes.h compat/stdbool.h compat/unistd.h \ compat/inttypes.h compat/stdbool.h compat/unistd.h \
compat/sys/time.h compat/getopt/getopt.h \ compat/sys/time.h compat/getopt/getopt.h \
cpu-miner.c util.c sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c hefty1.c scrypt.c sha2.c \ cpu-miner.c util.c hefty1.c scrypt.c \
sph/bmw.h sph/sph_blake.h sph/sph_groestl.h sph/sph_jh.h sph/sph_keccak.h sph/sph_skein.h sph/sph_types.h \
heavy/heavy.cu \ heavy/heavy.cu \
heavy/cuda_blake512.cu heavy/cuda_blake512.h \ heavy/cuda_blake512.cu heavy/cuda_blake512.h \
heavy/cuda_combine.cu heavy/cuda_combine.h \ heavy/cuda_combine.cu heavy/cuda_combine.h \
@ -34,7 +33,8 @@ ccminer_SOURCES = elist.h miner.h compat.h \
quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \ quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \
quark/cuda_quark_compactionTest.cu \ quark/cuda_quark_compactionTest.cu \
cuda_nist5.cu \ cuda_nist5.cu \
sph/cubehash.c sph/echo.c sph/luffa.c sph/shavite.c sph/simd.c \ sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \
sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \
sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \ sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
sph/shabal.c sph/whirlpool.c \ sph/shabal.c sph/whirlpool.c \
x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \ x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
@ -46,7 +46,11 @@ ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
nvcc_FLAGS = -gencode=arch=compute_50,code=\"sm_50,compute_50\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" -I . --ptxas-options=-v --use_fast_math nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\"
#nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\"
#nvcc_ARCH += -gencode=arch=compute_30,code=\"sm_30,compute_30\"
nvcc_FLAGS = $(nvcc_ARCH) -I . --ptxas-options=-v --use_fast_math
nvcc_FLAGS += $(JANSSON_INCLUDES) nvcc_FLAGS += $(JANSSON_INCLUDES)
# we're now targeting all major compute architectures within one binary. # we're now targeting all major compute architectures within one binary.
@ -55,6 +59,8 @@ nvcc_FLAGS += $(JANSSON_INCLUDES)
# Luffa and Echo are faster with 80 registers than 128 # Luffa and Echo are faster with 80 registers than 128
x11/cuda_x11_luffa512.o: x11/cuda_x11_luffa512.cu x11/cuda_x11_luffa512.o: x11/cuda_x11_luffa512.cu
$(NVCC) $(nvcc_FLAGS) @CFLAGS@ --maxrregcount=80 -o $@ -c $<
x11/cuda_x11_echo.o: x11/cuda_x11_echo.cu x11/cuda_x11_echo.o: x11/cuda_x11_echo.cu
$(NVCC) $(nvcc_FLAGS) @CFLAGS@ --maxrregcount=80 -o $@ -c $< $(NVCC) $(nvcc_FLAGS) @CFLAGS@ --maxrregcount=80 -o $@ -c $<

4
ccminer.vcxproj

@ -238,8 +238,7 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
<ClCompile Include="groestlcoin.cpp" /> <ClCompile Include="groestlcoin.cpp" />
<ClCompile Include="hefty1.c" /> <ClCompile Include="hefty1.c" />
<ClCompile Include="myriadgroestl.cpp" /> <ClCompile Include="myriadgroestl.cpp" />
<ClCompile Include="scrypt.c" /> <ClCompile Include="scrypt.c" >
<ClCompile Include="sha2.c">
<Optimization Condition="'$(Configuration)'=='Release'">Full</Optimization> <Optimization Condition="'$(Configuration)'=='Release'">Full</Optimization>
<AdditionalOptions>/Tp %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions>/Tp %(AdditionalOptions)</AdditionalOptions>
</ClCompile> </ClCompile>
@ -253,6 +252,7 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
<ClCompile Include="sph\jh.c" /> <ClCompile Include="sph\jh.c" />
<ClCompile Include="sph\keccak.c" /> <ClCompile Include="sph\keccak.c" />
<ClCompile Include="sph\luffa.c" /> <ClCompile Include="sph\luffa.c" />
<ClCompile Include="sph\sha2.c" />
<ClCompile Include="sph\shabal.c" /> <ClCompile Include="sph\shabal.c" />
<ClCompile Include="sph\shavite.c" /> <ClCompile Include="sph\shavite.c" />
<ClCompile Include="sph\simd.c" /> <ClCompile Include="sph\simd.c" />

6
ccminer.vcxproj.filters

@ -90,9 +90,6 @@
<ClCompile Include="util.c"> <ClCompile Include="util.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="sha2.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="cpu-miner.c"> <ClCompile Include="cpu-miner.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
@ -138,6 +135,9 @@
<ClCompile Include="sph\luffa.c"> <ClCompile Include="sph\luffa.c">
<Filter>Source Files\sph</Filter> <Filter>Source Files\sph</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="sph\sha2.c">
<Filter>Source Files\sph</Filter>
</ClCompile>
<ClCompile Include="sph\shavite.c"> <ClCompile Include="sph\shavite.c">
<Filter>Source Files\sph</Filter> <Filter>Source Files\sph</Filter>
</ClCompile> </ClCompile>

0
sha2.c → sph/sha2.c

Loading…
Cancel
Save