Makefile: luffa was using more than 80 regs (10 vs 14ms)

add easy switch for nvcc target arch and move duplicated sha2 to sph folder
2014-08-23 01:57:12 +02:00 · 2014-08-23 01:57:12 +02:00 · 2c24bc93d4
commit 2c24bc93d4
parent 5cf024cc5d
4 changed files with 17 additions and 11 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -16,8 +16,7 @@ bin_PROGRAMS	= ccminer
 ccminer_SOURCES		= elist.h miner.h compat.h \
 			  compat/inttypes.h compat/stdbool.h compat/unistd.h \
 			  compat/sys/time.h compat/getopt/getopt.h \
-			  cpu-miner.c util.c sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c hefty1.c scrypt.c sha2.c \
-			  sph/bmw.h sph/sph_blake.h sph/sph_groestl.h sph/sph_jh.h sph/sph_keccak.h sph/sph_skein.h sph/sph_types.h \
+			  cpu-miner.c util.c hefty1.c scrypt.c \
 			  heavy/heavy.cu \
 			  heavy/cuda_blake512.cu heavy/cuda_blake512.h \
 			  heavy/cuda_combine.cu heavy/cuda_combine.h \
@ -34,7 +33,8 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \
 			  quark/cuda_quark_compactionTest.cu \
 			  cuda_nist5.cu \
-			  sph/cubehash.c sph/echo.c sph/luffa.c sph/shavite.c sph/simd.c \
+			  sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \
+			  sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \
 			  sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
 			  sph/shabal.c sph/whirlpool.c \
 			  x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
@ -46,7 +46,11 @@ ccminer_LDFLAGS		= $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
 ccminer_LDADD		= @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
 ccminer_CPPFLAGS	= -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME

-nvcc_FLAGS = -gencode=arch=compute_50,code=\"sm_50,compute_50\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" -I . --ptxas-options=-v --use_fast_math
+nvcc_ARCH  = -gencode=arch=compute_50,code=\"sm_50,compute_50\"
+#nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\"
+#nvcc_ARCH += -gencode=arch=compute_30,code=\"sm_30,compute_30\"
+
+nvcc_FLAGS = $(nvcc_ARCH) -I . --ptxas-options=-v --use_fast_math
 nvcc_FLAGS += $(JANSSON_INCLUDES)

 # we're now targeting all major compute architectures within one binary.
@ -55,6 +59,8 @@ nvcc_FLAGS += $(JANSSON_INCLUDES)

 # Luffa and Echo are faster with 80 registers than 128
 x11/cuda_x11_luffa512.o: x11/cuda_x11_luffa512.cu
+	$(NVCC) $(nvcc_FLAGS) @CFLAGS@ --maxrregcount=80 -o $@ -c $<
+
 x11/cuda_x11_echo.o: x11/cuda_x11_echo.cu
 	$(NVCC) $(nvcc_FLAGS) @CFLAGS@ --maxrregcount=80 -o $@ -c $<

--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -238,8 +238,7 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
    <ClCompile Include="groestlcoin.cpp" />
    <ClCompile Include="hefty1.c" />
    <ClCompile Include="myriadgroestl.cpp" />
-    <ClCompile Include="scrypt.c" />
-    <ClCompile Include="sha2.c">
+    <ClCompile Include="scrypt.c" >
      <Optimization Condition="'$(Configuration)'=='Release'">Full</Optimization>
      <AdditionalOptions>/Tp %(AdditionalOptions)</AdditionalOptions>
    </ClCompile>
@ -253,6 +252,7 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
    <ClCompile Include="sph\jh.c" />
    <ClCompile Include="sph\keccak.c" />
    <ClCompile Include="sph\luffa.c" />
+    <ClCompile Include="sph\sha2.c" />
    <ClCompile Include="sph\shabal.c" />
    <ClCompile Include="sph\shavite.c" />
    <ClCompile Include="sph\simd.c" />
@ -525,4 +525,4 @@ copy "$(CudaToolkitBinDir)\cudart64*.dll" "$(OutDir)"</Command>
  <ImportGroup Label="ExtensionTargets">
    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
  </ImportGroup>
-</Project>
+</Project>
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -90,9 +90,6 @@
    <ClCompile Include="util.c">
      <Filter>Source Files</Filter>
    </ClCompile>
-    <ClCompile Include="sha2.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
    <ClCompile Include="cpu-miner.c">
      <Filter>Source Files</Filter>
    </ClCompile>
@ -138,6 +135,9 @@
    <ClCompile Include="sph\luffa.c">
      <Filter>Source Files\sph</Filter>
    </ClCompile>
+    <ClCompile Include="sph\sha2.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
    <ClCompile Include="sph\shavite.c">
      <Filter>Source Files\sph</Filter>
    </ClCompile>
@ -404,4 +404,4 @@
      <Filter>Source Files\CUDA\x15</Filter>
    </CudaCompile>
  </ItemGroup>
-</Project>
+</Project>
--- a/sph/sha2.c
+++ b/sph/sha2.c