Browse Source

committing changes to v0.3 release - added Groestlcoin.

master v0.3
Christian Buchner 11 years ago
parent
commit
b93669a99f
  1. 15
      Makefile.am
  2. 26
      Makefile.in
  3. 19
      README.txt
  4. 3
      ccminer.vcxproj
  5. 9
      ccminer.vcxproj.filters
  6. 20
      configure
  7. 2
      configure.ac
  8. 13
      cpu-miner.c
  9. 124
      cpuminer-config.h
  10. 1
      cuda_blake512.cu
  11. 1
      cuda_combine.cu
  12. 6
      cuda_fugue256.cu
  13. 31
      cuda_groestl512.cu
  14. 463
      cuda_groestlcoin.cu
  15. 8
      cuda_groestlcoin.h
  16. 1
      cuda_hefty1.cu
  17. 1
      cuda_keccak512.cu
  18. 1
      cuda_sha256.cu
  19. 3
      groestl.c
  20. 175
      groestlcoin.cpp
  21. 5
      miner.h

15
Makefile.am

@ -25,13 +25,20 @@ ccminer_SOURCES = elist.h miner.h compat.h \
cuda_hefty1.cu cuda_hefty1.h \ cuda_hefty1.cu cuda_hefty1.h \
cuda_keccak512.cu cuda_keccak512.h \ cuda_keccak512.cu cuda_keccak512.h \
cuda_sha256.cu cuda_sha256.h \ cuda_sha256.cu cuda_sha256.h \
cuda_fugue256.cu \ fuguecoin.cpp cuda_fugue256.cu fugue.c sph_fugue.h uint256.h \
fuguecoin.cpp fugue.c sph_fugue.h uint256.h groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@ ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
.cu.o: .cu.o:
$(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_35 --maxrregcount=124 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
## Thrust needs Compute 2.0 minimum
#heavy.o: heavy.cu
# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
#
#cuda_hefty1.o: cuda_hefty1.cu
# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<

26
Makefile.in

@ -60,8 +60,9 @@ am_ccminer_OBJECTS = ccminer-cpu-miner.$(OBJEXT) \
ccminer-sha2.$(OBJEXT) heavy.$(OBJEXT) cuda_blake512.$(OBJEXT) \ ccminer-sha2.$(OBJEXT) heavy.$(OBJEXT) cuda_blake512.$(OBJEXT) \
cuda_combine.$(OBJEXT) cuda_groestl512.$(OBJEXT) \ cuda_combine.$(OBJEXT) cuda_groestl512.$(OBJEXT) \
cuda_hefty1.$(OBJEXT) cuda_keccak512.$(OBJEXT) \ cuda_hefty1.$(OBJEXT) cuda_keccak512.$(OBJEXT) \
cuda_sha256.$(OBJEXT) cuda_fugue256.$(OBJEXT) \ cuda_sha256.$(OBJEXT) ccminer-fuguecoin.$(OBJEXT) \
ccminer-fuguecoin.$(OBJEXT) ccminer-fugue.$(OBJEXT) cuda_fugue256.$(OBJEXT) ccminer-fugue.$(OBJEXT) \
ccminer-groestlcoin.$(OBJEXT) cuda_groestlcoin.$(OBJEXT)
ccminer_OBJECTS = $(am_ccminer_OBJECTS) ccminer_OBJECTS = $(am_ccminer_OBJECTS)
ccminer_DEPENDENCIES = ccminer_DEPENDENCIES =
ccminer_LINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(ccminer_LDFLAGS) \ ccminer_LINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(ccminer_LDFLAGS) \
@ -275,8 +276,8 @@ ccminer_SOURCES = elist.h miner.h compat.h \
cuda_hefty1.cu cuda_hefty1.h \ cuda_hefty1.cu cuda_hefty1.h \
cuda_keccak512.cu cuda_keccak512.h \ cuda_keccak512.cu cuda_keccak512.h \
cuda_sha256.cu cuda_sha256.h \ cuda_sha256.cu cuda_sha256.h \
cuda_fugue256.cu \ fuguecoin.cpp cuda_fugue256.cu fugue.c sph_fugue.h uint256.h \
fuguecoin.cpp fugue.c sph_fugue.h uint256.h groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@ ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
@ -387,6 +388,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fugue.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fugue.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fuguecoin.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fuguecoin.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-groestl.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-groestl.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-groestlcoin.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-hefty1.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-hefty1.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-keccak.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-keccak.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-scrypt.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-scrypt.Po@am__quote@
@ -561,6 +563,20 @@ ccminer-fuguecoin.obj: fuguecoin.cpp
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o ccminer-fuguecoin.obj `if test -f 'fuguecoin.cpp'; then $(CYGPATH_W) 'fuguecoin.cpp'; else $(CYGPATH_W) '$(srcdir)/fuguecoin.cpp'; fi` @am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o ccminer-fuguecoin.obj `if test -f 'fuguecoin.cpp'; then $(CYGPATH_W) 'fuguecoin.cpp'; else $(CYGPATH_W) '$(srcdir)/fuguecoin.cpp'; fi`
ccminer-groestlcoin.o: groestlcoin.cpp
@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT ccminer-groestlcoin.o -MD -MP -MF $(DEPDIR)/ccminer-groestlcoin.Tpo -c -o ccminer-groestlcoin.o `test -f 'groestlcoin.cpp' || echo '$(srcdir)/'`groestlcoin.cpp
@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/ccminer-groestlcoin.Tpo $(DEPDIR)/ccminer-groestlcoin.Po
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='groestlcoin.cpp' object='ccminer-groestlcoin.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o ccminer-groestlcoin.o `test -f 'groestlcoin.cpp' || echo '$(srcdir)/'`groestlcoin.cpp
ccminer-groestlcoin.obj: groestlcoin.cpp
@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT ccminer-groestlcoin.obj -MD -MP -MF $(DEPDIR)/ccminer-groestlcoin.Tpo -c -o ccminer-groestlcoin.obj `if test -f 'groestlcoin.cpp'; then $(CYGPATH_W) 'groestlcoin.cpp'; else $(CYGPATH_W) '$(srcdir)/groestlcoin.cpp'; fi`
@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/ccminer-groestlcoin.Tpo $(DEPDIR)/ccminer-groestlcoin.Po
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='groestlcoin.cpp' object='ccminer-groestlcoin.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o ccminer-groestlcoin.obj `if test -f 'groestlcoin.cpp'; then $(CYGPATH_W) 'groestlcoin.cpp'; else $(CYGPATH_W) '$(srcdir)/groestlcoin.cpp'; fi`
# This directory's subdirectories are mostly independent; you can cd # This directory's subdirectories are mostly independent; you can cd
# into them and run `make' without going through this Makefile. # into them and run `make' without going through this Makefile.
# To change the values of `make' variables: instead of editing Makefiles, # To change the values of `make' variables: instead of editing Makefiles,
@ -1018,7 +1034,7 @@ uninstall-am: uninstall-binPROGRAMS
.cu.o: .cu.o:
$(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=sm_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_35 --maxrregcount=124 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
#heavy.o: heavy.cu #heavy.o: heavy.cu
# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< # $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<

19
README.txt

@ -1,5 +1,5 @@
ccMiner release 0.2 (Mar 21th 2014) - Pool Mining Release ccMiner release 0.3 (Mar 23th 2014) - Groestlcoin Release
------------------------------------------------------------- -------------------------------------------------------------
*************************************************************** ***************************************************************
@ -36,6 +36,7 @@ its command line interface and options.
-a, --algo=ALGO specify the algorithm to use -a, --algo=ALGO specify the algorithm to use
heavy use to mine Heavycoin heavy use to mine Heavycoin
fugue256 use to mine Fuguecoin fugue256 use to mine Fuguecoin
groestl use to mine Groestlcoin
-o, --url=URL URL of mining server (default: " DEF_RPC_URL ") -o, --url=URL URL of mining server (default: " DEF_RPC_URL ")
-O, --userpass=U:P username:password pair for mining server -O, --userpass=U:P username:password pair for mining server
@ -66,24 +67,29 @@ its command line interface and options.
Example for Heavycoin Mining on heavycoinpool.com with a single gpu in your system Example for Heavycoin Mining on heavycoinpool.com with a single gpu in your system
cudaminer.exe -t 1 -a heavy -o stratum+tcp://stratum01.heavycoinpool.com:5333 -u <<username.worker>> -p <<workerpassword>> -v 512 ccminer.exe -t 1 -a heavy -o stratum+tcp://stratum01.heavycoinpool.com:5333 -u <<username.worker>> -p <<workerpassword>> -v 512
Example for Heavycoin Mining on hvc.1gh.com with a dual gpu in your system Example for Heavycoin Mining on hvc.1gh.com with a dual gpu in your system
cudaminer.exe -t 2 -a heavy -o stratum+tcp://hvcpool.1gh.com:5333 -u <<WALLET>> -p x -v 512 ccminer.exe -t 2 -a heavy -o stratum+tcp://hvcpool.1gh.com:5333 -u <<WALLET>> -p x -v 512
Example for Fuguecoin solo-mining with 4 gpu's in your system and a Fuguecoin-wallet running on localhost Example for Fuguecoin solo-mining with 4 gpu's in your system and a Fuguecoin-wallet running on localhost
cudaminer.exe -q -s 1 -t 4 -a fugue256 -o http://localhost:9089 -u <<myusername>> -p <<mypassword>> ccminer.exe -q -s 1 -t 4 -a fugue256 -o http://localhost:9089 -u <<myusername>> -p <<mypassword>>
Example for Fuguecoin pool mining on dwarfpool.com with all your GPUs Example for Fuguecoin pool mining on dwarfpool.com with all your GPUs
-q -a fugue256 -o stratum+tcp://erebor.dwarfpool.com:3340 -u YOURWALLETADDRESS.1 -p YOUREMAILADDRESS ccminer.exe -q -a fugue256 -o stratum+tcp://erebor.dwarfpool.com:3340 -u YOURWALLETADDRESS.1 -p YOUREMAILADDRESS
Example for Groestlcoin solo mining
ccminer.exe -q -s 1 -a groestl -o http://127.0.0.1:1441 -u USERNAME -p PASSWORD
For solo-mining you typically use -o 127.0.0.1:xxxx where xxxx represents For solo-mining you typically use -o 127.0.0.1:xxxx where xxxx represents
@ -101,6 +107,9 @@ from your old clunkers.
>>> RELEASE HISTORY <<< >>> RELEASE HISTORY <<<
Match, 23 2014 added Groestlcoin support. stratum status unknown
(the only pool is currently down for fixing issues)
March, 21 2014 use of shared memory in Fugue256 kernel boosts hash rates March, 21 2014 use of shared memory in Fugue256 kernel boosts hash rates
on Fermi and Maxwell devices. Kepler may suffer slightly on Fermi and Maxwell devices. Kepler may suffer slightly
(3-5%) (3-5%)

3
ccminer.vcxproj

@ -229,6 +229,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<ClCompile Include="fugue.c" /> <ClCompile Include="fugue.c" />
<ClCompile Include="fuguecoin.cpp" /> <ClCompile Include="fuguecoin.cpp" />
<ClCompile Include="groestl.c" /> <ClCompile Include="groestl.c" />
<ClCompile Include="groestlcoin.cpp" />
<ClCompile Include="hefty1.c" /> <ClCompile Include="hefty1.c" />
<ClCompile Include="keccak.c" /> <ClCompile Include="keccak.c" />
<ClCompile Include="scrypt.c" /> <ClCompile Include="scrypt.c" />
@ -256,6 +257,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<ClInclude Include="cuda_blake512.h" /> <ClInclude Include="cuda_blake512.h" />
<ClInclude Include="cuda_combine.h" /> <ClInclude Include="cuda_combine.h" />
<ClInclude Include="cuda_groestl512.h" /> <ClInclude Include="cuda_groestl512.h" />
<ClInclude Include="cuda_groestlcoin.h" />
<ClInclude Include="cuda_hefty1.h" /> <ClInclude Include="cuda_hefty1.h" />
<ClInclude Include="cuda_keccak512.h" /> <ClInclude Include="cuda_keccak512.h" />
<ClInclude Include="cuda_sha256.h" /> <ClInclude Include="cuda_sha256.h" />
@ -274,6 +276,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<CudaCompile Include="cuda_combine.cu" /> <CudaCompile Include="cuda_combine.cu" />
<CudaCompile Include="cuda_fugue256.cu" /> <CudaCompile Include="cuda_fugue256.cu" />
<CudaCompile Include="cuda_groestl512.cu" /> <CudaCompile Include="cuda_groestl512.cu" />
<CudaCompile Include="cuda_groestlcoin.cu" />
<CudaCompile Include="cuda_hefty1.cu" /> <CudaCompile Include="cuda_hefty1.cu" />
<CudaCompile Include="cuda_keccak512.cu" /> <CudaCompile Include="cuda_keccak512.cu" />
<CudaCompile Include="cuda_sha256.cu" /> <CudaCompile Include="cuda_sha256.cu" />

9
ccminer.vcxproj.filters

@ -90,6 +90,9 @@
<ClCompile Include="fuguecoin.cpp"> <ClCompile Include="fuguecoin.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="groestlcoin.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="compat.h"> <ClInclude Include="compat.h">
@ -158,6 +161,9 @@
<ClInclude Include="uint256.h"> <ClInclude Include="uint256.h">
<Filter>Header Files</Filter> <Filter>Header Files</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="cuda_groestlcoin.h">
<Filter>Header Files\CUDA</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<CudaCompile Include="cuda_sha256.cu"> <CudaCompile Include="cuda_sha256.cu">
@ -184,5 +190,8 @@
<CudaCompile Include="cuda_fugue256.cu"> <CudaCompile Include="cuda_fugue256.cu">
<Filter>Source Files\CUDA</Filter> <Filter>Source Files\CUDA</Filter>
</CudaCompile> </CudaCompile>
<CudaCompile Include="cuda_groestlcoin.cu">
<Filter>Source Files\CUDA</Filter>
</CudaCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>

20
configure vendored

@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.68 for ccminer 2014.03.21. # Generated by GNU Autoconf 2.68 for ccminer 2014.03.23.
# #
# #
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@ -557,8 +557,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='ccminer' PACKAGE_NAME='ccminer'
PACKAGE_TARNAME='ccminer' PACKAGE_TARNAME='ccminer'
PACKAGE_VERSION='2014.03.21' PACKAGE_VERSION='2014.03.23'
PACKAGE_STRING='ccminer 2014.03.21' PACKAGE_STRING='ccminer 2014.03.23'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@ -1297,7 +1297,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures ccminer 2014.03.21 to adapt to many kinds of systems. \`configure' configures ccminer 2014.03.23 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1368,7 +1368,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of ccminer 2014.03.21:";; short | recursive ) echo "Configuration of ccminer 2014.03.23:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@ -1469,7 +1469,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
ccminer configure 2014.03.21 ccminer configure 2014.03.23
generated by GNU Autoconf 2.68 generated by GNU Autoconf 2.68
Copyright (C) 2010 Free Software Foundation, Inc. Copyright (C) 2010 Free Software Foundation, Inc.
@ -1972,7 +1972,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by ccminer $as_me 2014.03.21, which was It was created by ccminer $as_me 2014.03.23, which was
generated by GNU Autoconf 2.68. Invocation command line was generated by GNU Autoconf 2.68. Invocation command line was
$ $0 $@ $ $0 $@
@ -2901,7 +2901,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='ccminer' PACKAGE='ccminer'
VERSION='2014.03.21' VERSION='2014.03.23'
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
@ -7118,7 +7118,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by ccminer $as_me 2014.03.21, which was This file was extended by ccminer $as_me 2014.03.23, which was
generated by GNU Autoconf 2.68. Invocation command line was generated by GNU Autoconf 2.68. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@ -7184,7 +7184,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
ccminer config.status 2014.03.21 ccminer config.status 2014.03.23
configured by $0, generated by GNU Autoconf 2.68, configured by $0, generated by GNU Autoconf 2.68,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

2
configure.ac

@ -1,4 +1,4 @@
AC_INIT([ccminer], [2014.03.21]) AC_INIT([ccminer], [2014.03.23])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

13
cpu-miner.c

@ -117,11 +117,13 @@ struct workio_cmd {
typedef enum { typedef enum {
ALGO_HEAVY, /* Heavycoin hash */ ALGO_HEAVY, /* Heavycoin hash */
ALGO_FUGUE256, /* Fugue256 */ ALGO_FUGUE256, /* Fugue256 */
ALGO_GROESTL,
} sha256_algos; } sha256_algos;
static const char *algo_names[] = { static const char *algo_names[] = {
"heavy", "heavy",
"fugue256" "fugue256",
"groestl"
}; };
bool opt_debug = false; bool opt_debug = false;
@ -667,7 +669,11 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
if (opt_algo == ALGO_HEAVY) if (opt_algo == ALGO_HEAVY)
heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size); heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
else else
if (opt_algo == ALGO_FUGUE256)
SHA256((unsigned char*)sctx->job.coinbase, sctx->job.coinbase_size, (unsigned char*)merkle_root); SHA256((unsigned char*)sctx->job.coinbase, sctx->job.coinbase_size, (unsigned char*)merkle_root);
else
sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
for (i = 0; i < sctx->job.merkle_count; i++) { for (i = 0; i < sctx->job.merkle_count; i++) {
memcpy(merkle_root + 32, sctx->job.merkle[i], 32); memcpy(merkle_root + 32, sctx->job.merkle[i], 32);
if (opt_algo == ALGO_HEAVY) if (opt_algo == ALGO_HEAVY)
@ -817,7 +823,10 @@ static void *miner_thread(void *userdata)
rc = scanhash_fugue256(thr_id, work.data, work.target, rc = scanhash_fugue256(thr_id, work.data, work.target,
max_nonce, &hashes_done); max_nonce, &hashes_done);
break; break;
case ALGO_GROESTL:
rc = scanhash_groestlcoin(thr_id, work.data, work.target,
max_nonce, &hashes_done);
break;
default: default:
/* should never happen */ /* should never happen */
goto out; goto out;

124
cpuminer-config.h

@ -1,174 +1,167 @@
/* cpuminer-config.h. Generated from cpuminer-config.h.in by configure. */
/* cpuminer-config.h.in. Generated from configure.ac by autoheader. */ /* cpuminer-config.h.in. Generated from configure.ac by autoheader. */
/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP /* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
systems. This function is required for `alloca.c' support on those systems. systems. This function is required for `alloca.c' support on those systems.
*/ */
/* #undef CRAY_STACKSEG_END */ #undef CRAY_STACKSEG_END
/* Define to 1 if using `alloca.c'. */ /* Define to 1 if using `alloca.c'. */
/* #undef C_ALLOCA */ #undef C_ALLOCA
/* Define to 1 if you have `alloca', as a function or macro. */ /* Define to 1 if you have `alloca', as a function or macro. */
#define HAVE_ALLOCA 1 #undef HAVE_ALLOCA
/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix). /* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
*/ */
#define HAVE_ALLOCA_H 1 #undef HAVE_ALLOCA_H
/* Define to 1 if you have the declaration of `be32dec', and to 0 if you /* Define to 1 if you have the declaration of `be32dec', and to 0 if you
don't. */ don't. */
#define HAVE_DECL_BE32DEC 0 #undef HAVE_DECL_BE32DEC
/* Define to 1 if you have the declaration of `be32enc', and to 0 if you /* Define to 1 if you have the declaration of `be32enc', and to 0 if you
don't. */ don't. */
#define HAVE_DECL_BE32ENC 0 #undef HAVE_DECL_BE32ENC
/* Define to 1 if you have the declaration of `le32dec', and to 0 if you /* Define to 1 if you have the declaration of `le32dec', and to 0 if you
don't. */ don't. */
#define HAVE_DECL_LE32DEC 0 #undef HAVE_DECL_LE32DEC
/* Define to 1 if you have the declaration of `le32enc', and to 0 if you /* Define to 1 if you have the declaration of `le32enc', and to 0 if you
don't. */ don't. */
#define HAVE_DECL_LE32ENC 0 #undef HAVE_DECL_LE32ENC
/* Define to 1 if you have the `getopt_long' function. */ /* Define to 1 if you have the `getopt_long' function. */
#define HAVE_GETOPT_LONG 1 #define HAVE_GETOPT_LONG 1
/* Define to 1 if you have the <inttypes.h> header file. */ /* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1 #undef HAVE_INTTYPES_H
/* Define to 1 if you have the `crypto' library (-lcrypto). */
#define HAVE_LIBCRYPTO 1
/* Define to 1 if you have a functional curl library. */ /* Define to 1 if you have a functional curl library. */
#define HAVE_LIBCURL 1 #undef HAVE_LIBCURL
/* Define to 1 if you have the `ssl' library (-lssl). */
#define HAVE_LIBSSL 1
/* Define to 1 if you have the <memory.h> header file. */ /* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1 #undef HAVE_MEMORY_H
/* Define to 1 if you have the <stdint.h> header file. */ /* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1 #undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */ /* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1 #undef HAVE_STDLIB_H
/* Define to 1 if you have the <strings.h> header file. */ /* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1 #undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */ /* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1 #undef HAVE_STRING_H
/* Define to 1 if you have the <syslog.h> header file. */ /* Define to 1 if you have the <syslog.h> header file. */
#define HAVE_SYSLOG_H 1 #undef HAVE_SYSLOG_H
/* Define to 1 if you have the <sys/endian.h> header file. */ /* Define to 1 if you have the <sys/endian.h> header file. */
/* #undef HAVE_SYS_ENDIAN_H */ #undef HAVE_SYS_ENDIAN_H
/* Define to 1 if you have the <sys/param.h> header file. */ /* Define to 1 if you have the <sys/param.h> header file. */
#define HAVE_SYS_PARAM_H 1 #undef HAVE_SYS_PARAM_H
/* Define to 1 if you have the <sys/stat.h> header file. */ /* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1 #undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/sysctl.h> header file. */ /* Define to 1 if you have the <sys/sysctl.h> header file. */
#define HAVE_SYS_SYSCTL_H 1 #undef HAVE_SYS_SYSCTL_H
/* Define to 1 if you have the <sys/types.h> header file. */ /* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1 #undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <unistd.h> header file. */ /* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1 #undef HAVE_UNISTD_H
/* Defined if libcurl supports AsynchDNS */ /* Defined if libcurl supports AsynchDNS */
/* #undef LIBCURL_FEATURE_ASYNCHDNS */ #undef LIBCURL_FEATURE_ASYNCHDNS
/* Defined if libcurl supports IDN */ /* Defined if libcurl supports IDN */
#define LIBCURL_FEATURE_IDN 1 #undef LIBCURL_FEATURE_IDN
/* Defined if libcurl supports IPv6 */ /* Defined if libcurl supports IPv6 */
#define LIBCURL_FEATURE_IPV6 1 #undef LIBCURL_FEATURE_IPV6
/* Defined if libcurl supports KRB4 */ /* Defined if libcurl supports KRB4 */
/* #undef LIBCURL_FEATURE_KRB4 */ #undef LIBCURL_FEATURE_KRB4
/* Defined if libcurl supports libz */ /* Defined if libcurl supports libz */
#define LIBCURL_FEATURE_LIBZ 1 #undef LIBCURL_FEATURE_LIBZ
/* Defined if libcurl supports NTLM */ /* Defined if libcurl supports NTLM */
#define LIBCURL_FEATURE_NTLM 1 #undef LIBCURL_FEATURE_NTLM
/* Defined if libcurl supports SSL */ /* Defined if libcurl supports SSL */
#define LIBCURL_FEATURE_SSL 1 #undef LIBCURL_FEATURE_SSL
/* Defined if libcurl supports SSPI */ /* Defined if libcurl supports SSPI */
/* #undef LIBCURL_FEATURE_SSPI */ #undef LIBCURL_FEATURE_SSPI
/* Defined if libcurl supports DICT */ /* Defined if libcurl supports DICT */
#define LIBCURL_PROTOCOL_DICT 1 #undef LIBCURL_PROTOCOL_DICT
/* Defined if libcurl supports FILE */ /* Defined if libcurl supports FILE */
#define LIBCURL_PROTOCOL_FILE 1 #undef LIBCURL_PROTOCOL_FILE
/* Defined if libcurl supports FTP */ /* Defined if libcurl supports FTP */
#define LIBCURL_PROTOCOL_FTP 1 #undef LIBCURL_PROTOCOL_FTP
/* Defined if libcurl supports FTPS */ /* Defined if libcurl supports FTPS */
#define LIBCURL_PROTOCOL_FTPS 1 #undef LIBCURL_PROTOCOL_FTPS
/* Defined if libcurl supports HTTP */ /* Defined if libcurl supports HTTP */
#define LIBCURL_PROTOCOL_HTTP 1 #undef LIBCURL_PROTOCOL_HTTP
/* Defined if libcurl supports HTTPS */ /* Defined if libcurl supports HTTPS */
#define LIBCURL_PROTOCOL_HTTPS 1 #undef LIBCURL_PROTOCOL_HTTPS
/* Defined if libcurl supports IMAP */ /* Defined if libcurl supports IMAP */
#define LIBCURL_PROTOCOL_IMAP 1 #undef LIBCURL_PROTOCOL_IMAP
/* Defined if libcurl supports LDAP */ /* Defined if libcurl supports LDAP */
#define LIBCURL_PROTOCOL_LDAP 1 #undef LIBCURL_PROTOCOL_LDAP
/* Defined if libcurl supports POP3 */ /* Defined if libcurl supports POP3 */
#define LIBCURL_PROTOCOL_POP3 1 #undef LIBCURL_PROTOCOL_POP3
/* Defined if libcurl supports RTSP */ /* Defined if libcurl supports RTSP */
#define LIBCURL_PROTOCOL_RTSP 1 #undef LIBCURL_PROTOCOL_RTSP
/* Defined if libcurl supports SMTP */ /* Defined if libcurl supports SMTP */
#define LIBCURL_PROTOCOL_SMTP 1 #undef LIBCURL_PROTOCOL_SMTP
/* Defined if libcurl supports TELNET */ /* Defined if libcurl supports TELNET */
#define LIBCURL_PROTOCOL_TELNET 1 #undef LIBCURL_PROTOCOL_TELNET
/* Defined if libcurl supports TFTP */ /* Defined if libcurl supports TFTP */
#define LIBCURL_PROTOCOL_TFTP 1 #undef LIBCURL_PROTOCOL_TFTP
/* Define to 1 if your C compiler doesn't accept -c and -o together. */ /* Define to 1 if your C compiler doesn't accept -c and -o together. */
/* #undef NO_MINUS_C_MINUS_O */ #undef NO_MINUS_C_MINUS_O
/* Name of package */ /* Name of package */
#define PACKAGE "ccminer" #undef PACKAGE
/* Define to the address where bug reports for this package should be sent. */ /* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT "" #undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */ /* Define to the full name of this package. */
#define PACKAGE_NAME "ccminer" #define PACKAGE_NAME "ccminer"
/* Define to the full name and version of this package. */ /* Define to the full name and version of this package. */
#define PACKAGE_STRING "ccminer 2014.03.21" #define PACKAGE_STRING "ccminer 2014.03.23"
/* Define to the one symbol short name of this package. */ /* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "ccminer" #undef PACKAGE_TARNAME
/* Define to the home page for this package. */ /* Define to the home page for this package. */
#define PACKAGE_URL "" #undef PACKAGE_URL
/* Define to the version of this package. */ /* Define to the version of this package. */
#define PACKAGE_VERSION "2014.03.21" #define PACKAGE_VERSION "2014.03.23"
/* If using the C implementation of alloca, define if you know the /* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be direction of stack growth for your system; otherwise it will be
@ -176,25 +169,22 @@
STACK_DIRECTION > 0 => grows toward higher addresses STACK_DIRECTION > 0 => grows toward higher addresses
STACK_DIRECTION < 0 => grows toward lower addresses STACK_DIRECTION < 0 => grows toward lower addresses
STACK_DIRECTION = 0 => direction of growth unknown */ STACK_DIRECTION = 0 => direction of growth unknown */
/* #undef STACK_DIRECTION */ #undef STACK_DIRECTION
/* Define to 1 if you have the ANSI C header files. */ /* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1 #undef STDC_HEADERS
/* Define to 1 if AVX assembly is available. */ /* Define to 1 if AVX assembly is available. */
#define USE_AVX 1 #undef USE_AVX
/* Define to 1 if AVX2 assembly is available. */
#define USE_AVX2 1
/* Define to 1 if XOP assembly is available. */ /* Define to 1 if XOP assembly is available. */
#define USE_XOP 1 #undef USE_XOP
/* Version number of package */ /* Version number of package */
#define VERSION "2014.03.21" #undef VERSION
/* Define curl_free() as free() if our version of curl lacks curl_free. */ /* Define curl_free() as free() if our version of curl lacks curl_free. */
/* #undef curl_free */ #undef curl_free
/* Define to `unsigned int' if <sys/types.h> does not define. */ /* Define to `unsigned int' if <sys/types.h> does not define. */
/* #undef size_t */ #undef size_t

1
cuda_blake512.cu

@ -1,4 +1,3 @@
/* Diese Funktion ist auf 84+32-Byte große Eingabedaten ausgerichtet (Heavycoin) */
#include <cuda.h> #include <cuda.h>
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "device_launch_parameters.h" #include "device_launch_parameters.h"

1
cuda_combine.cu

@ -1,4 +1,3 @@
/* Diese Funktion ist auf 84+32 Byte große Eingabedaten ausgerichtet (Heavycoin) */
#include <cuda.h> #include <cuda.h>
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "device_launch_parameters.h" #include "device_launch_parameters.h"

6
cuda_fugue256.cu

@ -1,5 +1,3 @@
#if 1
/* Diese Funktion ist auf 84+32 Byte große Eingabedaten ausgerichtet (Heavycoin) */
#include <cuda.h> #include <cuda.h>
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "device_launch_parameters.h" #include "device_launch_parameters.h"
@ -571,6 +569,8 @@ fugue256_gpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHas
*((uint32_t*)mixtabs + (256+threadIdx.x)) = tex1Dfetch(mixTab1Tex, threadIdx.x); *((uint32_t*)mixtabs + (256+threadIdx.x)) = tex1Dfetch(mixTab1Tex, threadIdx.x);
*((uint32_t*)mixtabs + (512+threadIdx.x)) = tex1Dfetch(mixTab2Tex, threadIdx.x); *((uint32_t*)mixtabs + (512+threadIdx.x)) = tex1Dfetch(mixTab2Tex, threadIdx.x);
*((uint32_t*)mixtabs + (768+threadIdx.x)) = tex1Dfetch(mixTab3Tex, threadIdx.x); *((uint32_t*)mixtabs + (768+threadIdx.x)) = tex1Dfetch(mixTab3Tex, threadIdx.x);
__syncthreads();
#endif #endif
int thread = (blockDim.x * blockIdx.x + threadIdx.x); int thread = (blockDim.x * blockIdx.x + threadIdx.x);
@ -788,5 +788,3 @@ __host__ void fugue256_cpu_hash(int thr_id, int threads, int startNounce, void *
//cudaMemcpy(outputHashes, d_fugue256_hashoutput[thr_id], 8 * sizeof(uint32_t), cudaMemcpyDeviceToHost); //cudaMemcpy(outputHashes, d_fugue256_hashoutput[thr_id], 8 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
} }
#endif

31
cuda_groestl512.cu

@ -1,4 +1,3 @@
/* Diese Funktion ist auf 84+32-Byte große Eingabedaten ausgerichtet (Heavycoin) */
#include <cuda.h> #include <cuda.h>
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "device_launch_parameters.h" #include "device_launch_parameters.h"
@ -6,9 +5,6 @@
#include <stdio.h> #include <stdio.h>
#include <memory.h> #include <memory.h>
#define USE_SHARED 0
#define W_ALIGNMENT 65
// Folgende Definitionen später durch header ersetzen // Folgende Definitionen später durch header ersetzen
typedef unsigned char uint8_t; typedef unsigned char uint8_t;
typedef unsigned int uint32_t; typedef unsigned int uint32_t;
@ -60,7 +56,7 @@ texture<unsigned int, 1, cudaReadModeElementType> t2dn;
texture<unsigned int, 1, cudaReadModeElementType> t3up; texture<unsigned int, 1, cudaReadModeElementType> t3up;
texture<unsigned int, 1, cudaReadModeElementType> t3dn; texture<unsigned int, 1, cudaReadModeElementType> t3dn;
static const uint32_t T0up_cpu[] = { uint32_t T0up_cpu[] = {
C32e(0xc632f4a5), C32e(0xf86f9784), C32e(0xee5eb099), C32e(0xf67a8c8d), C32e(0xc632f4a5), C32e(0xf86f9784), C32e(0xee5eb099), C32e(0xf67a8c8d),
C32e(0xffe8170d), C32e(0xd60adcbd), C32e(0xde16c8b1), C32e(0x916dfc54), C32e(0xffe8170d), C32e(0xd60adcbd), C32e(0xde16c8b1), C32e(0x916dfc54),
C32e(0x6090f050), C32e(0x02070503), C32e(0xce2ee0a9), C32e(0x56d1877d), C32e(0x6090f050), C32e(0x02070503), C32e(0xce2ee0a9), C32e(0x56d1877d),
@ -127,7 +123,7 @@ static const uint32_t T0up_cpu[] = {
C32e(0x7b3d46cb), C32e(0xa8b71ffc), C32e(0x6d0c61d6), C32e(0x2c624e3a) C32e(0x7b3d46cb), C32e(0xa8b71ffc), C32e(0x6d0c61d6), C32e(0x2c624e3a)
}; };
static const uint32_t T0dn_cpu[] = { uint32_t T0dn_cpu[] = {
C32e(0xf497a5c6), C32e(0x97eb84f8), C32e(0xb0c799ee), C32e(0x8cf78df6), C32e(0xf497a5c6), C32e(0x97eb84f8), C32e(0xb0c799ee), C32e(0x8cf78df6),
C32e(0x17e50dff), C32e(0xdcb7bdd6), C32e(0xc8a7b1de), C32e(0xfc395491), C32e(0x17e50dff), C32e(0xdcb7bdd6), C32e(0xc8a7b1de), C32e(0xfc395491),
C32e(0xf0c05060), C32e(0x05040302), C32e(0xe087a9ce), C32e(0x87ac7d56), C32e(0xf0c05060), C32e(0x05040302), C32e(0xe087a9ce), C32e(0x87ac7d56),
@ -194,7 +190,7 @@ static const uint32_t T0dn_cpu[] = {
C32e(0x46f6cb7b), C32e(0x1f4bfca8), C32e(0x61dad66d), C32e(0x4e583a2c) C32e(0x46f6cb7b), C32e(0x1f4bfca8), C32e(0x61dad66d), C32e(0x4e583a2c)
}; };
static const uint32_t T1up_cpu[] = { uint32_t T1up_cpu[] = {
C32e(0xc6c632f4), C32e(0xf8f86f97), C32e(0xeeee5eb0), C32e(0xf6f67a8c), C32e(0xc6c632f4), C32e(0xf8f86f97), C32e(0xeeee5eb0), C32e(0xf6f67a8c),
C32e(0xffffe817), C32e(0xd6d60adc), C32e(0xdede16c8), C32e(0x91916dfc), C32e(0xffffe817), C32e(0xd6d60adc), C32e(0xdede16c8), C32e(0x91916dfc),
C32e(0x606090f0), C32e(0x02020705), C32e(0xcece2ee0), C32e(0x5656d187), C32e(0x606090f0), C32e(0x02020705), C32e(0xcece2ee0), C32e(0x5656d187),
@ -261,7 +257,7 @@ static const uint32_t T1up_cpu[] = {
C32e(0x7b7b3d46), C32e(0xa8a8b71f), C32e(0x6d6d0c61), C32e(0x2c2c624e) C32e(0x7b7b3d46), C32e(0xa8a8b71f), C32e(0x6d6d0c61), C32e(0x2c2c624e)
}; };
static const uint32_t T1dn_cpu[] = { uint32_t T1dn_cpu[] = {
C32e(0xa5f497a5), C32e(0x8497eb84), C32e(0x99b0c799), C32e(0x8d8cf78d), C32e(0xa5f497a5), C32e(0x8497eb84), C32e(0x99b0c799), C32e(0x8d8cf78d),
C32e(0x0d17e50d), C32e(0xbddcb7bd), C32e(0xb1c8a7b1), C32e(0x54fc3954), C32e(0x0d17e50d), C32e(0xbddcb7bd), C32e(0xb1c8a7b1), C32e(0x54fc3954),
C32e(0x50f0c050), C32e(0x03050403), C32e(0xa9e087a9), C32e(0x7d87ac7d), C32e(0x50f0c050), C32e(0x03050403), C32e(0xa9e087a9), C32e(0x7d87ac7d),
@ -328,7 +324,7 @@ static const uint32_t T1dn_cpu[] = {
C32e(0xcb46f6cb), C32e(0xfc1f4bfc), C32e(0xd661dad6), C32e(0x3a4e583a) C32e(0xcb46f6cb), C32e(0xfc1f4bfc), C32e(0xd661dad6), C32e(0x3a4e583a)
}; };
static const uint32_t T2up_cpu[] = { uint32_t T2up_cpu[] = {
C32e(0xa5c6c632), C32e(0x84f8f86f), C32e(0x99eeee5e), C32e(0x8df6f67a), C32e(0xa5c6c632), C32e(0x84f8f86f), C32e(0x99eeee5e), C32e(0x8df6f67a),
C32e(0x0dffffe8), C32e(0xbdd6d60a), C32e(0xb1dede16), C32e(0x5491916d), C32e(0x0dffffe8), C32e(0xbdd6d60a), C32e(0xb1dede16), C32e(0x5491916d),
C32e(0x50606090), C32e(0x03020207), C32e(0xa9cece2e), C32e(0x7d5656d1), C32e(0x50606090), C32e(0x03020207), C32e(0xa9cece2e), C32e(0x7d5656d1),
@ -395,7 +391,7 @@ static const uint32_t T2up_cpu[] = {
C32e(0xcb7b7b3d), C32e(0xfca8a8b7), C32e(0xd66d6d0c), C32e(0x3a2c2c62) C32e(0xcb7b7b3d), C32e(0xfca8a8b7), C32e(0xd66d6d0c), C32e(0x3a2c2c62)
}; };
static const uint32_t T2dn_cpu[] = { uint32_t T2dn_cpu[] = {
C32e(0xf4a5f497), C32e(0x978497eb), C32e(0xb099b0c7), C32e(0x8c8d8cf7), C32e(0xf4a5f497), C32e(0x978497eb), C32e(0xb099b0c7), C32e(0x8c8d8cf7),
C32e(0x170d17e5), C32e(0xdcbddcb7), C32e(0xc8b1c8a7), C32e(0xfc54fc39), C32e(0x170d17e5), C32e(0xdcbddcb7), C32e(0xc8b1c8a7), C32e(0xfc54fc39),
C32e(0xf050f0c0), C32e(0x05030504), C32e(0xe0a9e087), C32e(0x877d87ac), C32e(0xf050f0c0), C32e(0x05030504), C32e(0xe0a9e087), C32e(0x877d87ac),
@ -462,7 +458,7 @@ static const uint32_t T2dn_cpu[] = {
C32e(0x46cb46f6), C32e(0x1ffc1f4b), C32e(0x61d661da), C32e(0x4e3a4e58) C32e(0x46cb46f6), C32e(0x1ffc1f4b), C32e(0x61d661da), C32e(0x4e3a4e58)
}; };
static const uint32_t T3up_cpu[] = { uint32_t T3up_cpu[] = {
C32e(0x97a5c6c6), C32e(0xeb84f8f8), C32e(0xc799eeee), C32e(0xf78df6f6), C32e(0x97a5c6c6), C32e(0xeb84f8f8), C32e(0xc799eeee), C32e(0xf78df6f6),
C32e(0xe50dffff), C32e(0xb7bdd6d6), C32e(0xa7b1dede), C32e(0x39549191), C32e(0xe50dffff), C32e(0xb7bdd6d6), C32e(0xa7b1dede), C32e(0x39549191),
C32e(0xc0506060), C32e(0x04030202), C32e(0x87a9cece), C32e(0xac7d5656), C32e(0xc0506060), C32e(0x04030202), C32e(0x87a9cece), C32e(0xac7d5656),
@ -529,7 +525,7 @@ static const uint32_t T3up_cpu[] = {
C32e(0xf6cb7b7b), C32e(0x4bfca8a8), C32e(0xdad66d6d), C32e(0x583a2c2c) C32e(0xf6cb7b7b), C32e(0x4bfca8a8), C32e(0xdad66d6d), C32e(0x583a2c2c)
}; };
static const uint32_t T3dn_cpu[] = { uint32_t T3dn_cpu[] = {
C32e(0x32f4a5f4), C32e(0x6f978497), C32e(0x5eb099b0), C32e(0x7a8c8d8c), C32e(0x32f4a5f4), C32e(0x6f978497), C32e(0x5eb099b0), C32e(0x7a8c8d8c),
C32e(0xe8170d17), C32e(0x0adcbddc), C32e(0x16c8b1c8), C32e(0x6dfc54fc), C32e(0xe8170d17), C32e(0x0adcbddc), C32e(0x16c8b1c8), C32e(0x6dfc54fc),
C32e(0x90f050f0), C32e(0x07050305), C32e(0x2ee0a9e0), C32e(0xd1877d87), C32e(0x90f050f0), C32e(0x07050305), C32e(0x2ee0a9e0), C32e(0xd1877d87),
@ -685,15 +681,8 @@ __global__ void groestl512_gpu_hash(int threads, uint32_t startNounce, void *out
int thread = (blockDim.x * blockIdx.x + threadIdx.x); int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads) if (thread < threads)
{ {
#if USE_SHARED
extern __shared__ unsigned char s[];
uint32_t offset = W_ALIGNMENT * sizeof(uint32_t) * threadIdx.x;
uint32_t *message = (uint32_t*)(&s[offset + 0]); // 128 Byte
uint32_t *state = (uint32_t*)(&s[offset + 128]); // 128 Byte
#else
uint32_t message[32]; uint32_t message[32];
uint32_t state[32]; uint32_t state[32];
#endif
// lese message ein & verknüpfe diese mit dem hash1 von hefty1 // lese message ein & verknüpfe diese mit dem hash1 von hefty1
// lese den state ein // lese den state ein
@ -825,11 +814,7 @@ __host__ void groestl512_cpu_hash(int thr_id, int threads, uint32_t startNounce)
dim3 block(threadsperblock); dim3 block(threadsperblock);
// Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl) // Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl)
#if USE_SHARED
size_t shared_size = W_ALIGNMENT*sizeof(uint32_t)*threadsperblock; // ein uint32_t eingefügt gegen Bank Konflikte
#else
size_t shared_size = 0; size_t shared_size = 0;
#endif
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); // fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);

463
cuda_groestlcoin.cu

@ -0,0 +1,463 @@
// Auf Groestlcoin spezialisierte Version von Groestl
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <memory.h>
#define USE_SHARED 1
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
// Folgende Definitionen später durch header ersetzen
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
// globaler Speicher für alle HeftyHashes aller Threads
__constant__ uint32_t pTarget[8]; // Single GPU
extern uint32_t *d_resultNonce[8];
// globaler Speicher für unsere Ergebnisse
uint32_t *d_hashGROESTLCOINoutput[8];
__constant__ uint32_t groestlcoin_gpu_state[32];
__constant__ uint32_t groestlcoin_gpu_msg[32];
__constant__ uint32_t sha256coin_gpu_constantTable[64];
__constant__ uint32_t sha256coin_gpu_register[8];
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define PC32up(j, r) ((uint32_t)((j) + (r)))
#define PC32dn(j, r) 0
#define QC32up(j, r) 0xFFFFFFFF
#define QC32dn(j, r) (((uint32_t)(r) << 24) ^ SPH_T32(~((uint32_t)(j) << 24)))
#define B32_0(x) ((x) & 0xFF)
#define B32_1(x) (((x) >> 8) & 0xFF)
#define B32_2(x) (((x) >> 16) & 0xFF)
#define B32_3(x) ((x) >> 24)
#define SPH_C32(x) ((uint32_t)(x ## U))
#define C32e(x) ((SPH_C32(x) >> 24) \
| ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \
| ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \
| ((SPH_C32(x) << 24) & SPH_C32(0xFF000000)))
#if USE_SHARED
#define T0up(x) (*((uint32_t*)mixtabs + ( (x))))
#define T0dn(x) (*((uint32_t*)mixtabs + (256+(x))))
#define T1up(x) (*((uint32_t*)mixtabs + (512+(x))))
#define T1dn(x) (*((uint32_t*)mixtabs + (768+(x))))
#define T2up(x) (*((uint32_t*)mixtabs + (1024+(x))))
#define T2dn(x) (*((uint32_t*)mixtabs + (1280+(x))))
#define T3up(x) (*((uint32_t*)mixtabs + (1536+(x))))
#define T3dn(x) (*((uint32_t*)mixtabs + (1792+(x))))
#else
#define T0up(x) tex1Dfetch(t0up1, x)
#define T0dn(x) tex1Dfetch(t0dn1, x)
#define T1up(x) tex1Dfetch(t1up1, x)
#define T1dn(x) tex1Dfetch(t1dn1, x)
#define T2up(x) tex1Dfetch(t2up1, x)
#define T2dn(x) tex1Dfetch(t2dn1, x)
#define T3up(x) tex1Dfetch(t3up1, x)
#define T3dn(x) tex1Dfetch(t3dn1, x)
#endif
texture<unsigned int, 1, cudaReadModeElementType> t0up1;
texture<unsigned int, 1, cudaReadModeElementType> t0dn1;
texture<unsigned int, 1, cudaReadModeElementType> t1up1;
texture<unsigned int, 1, cudaReadModeElementType> t1dn1;
texture<unsigned int, 1, cudaReadModeElementType> t2up1;
texture<unsigned int, 1, cudaReadModeElementType> t2dn1;
texture<unsigned int, 1, cudaReadModeElementType> t3up1;
texture<unsigned int, 1, cudaReadModeElementType> t3dn1;
extern uint32_t T0up_cpu[];
extern uint32_t T0dn_cpu[];
extern uint32_t T1up_cpu[];
extern uint32_t T1dn_cpu[];
extern uint32_t T2up_cpu[];
extern uint32_t T2dn_cpu[];
extern uint32_t T3up_cpu[];
extern uint32_t T3dn_cpu[];
extern uint32_t sha256_cpu_hashTable[];
extern uint32_t sha256_cpu_constantTable[];
#define S(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define R(x, n) ((x) >> (n))
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
#define S0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
#define S1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
#define s0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
#define s1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
#define SWAB32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) )
__device__ void groestlcoin_perm_P(uint32_t *a, char *mixtabs)
{
uint32_t t[32];
//#pragma unroll 14
for(int r=0;r<14;r++)
{
#pragma unroll 16
for(int k=0;k<16;k++)
{
a[(k*2)+0] ^= PC32up(k * 0x10, r);
//a[(k<<1)+1] ^= PC32dn(k * 0x10, r);
}
// RBTT
#pragma unroll 16
for(int k=0;k<32;k+=2)
{
t[k + 0] = T0up( B32_0(a[k & 0x1f]) ) ^
T1up( B32_1(a[(k + 2) & 0x1f]) ) ^
T2up( B32_2(a[(k + 4) & 0x1f]) ) ^
T3up( B32_3(a[(k + 6) & 0x1f]) ) ^
T0dn( B32_0(a[(k + 9) & 0x1f]) ) ^
T1dn( B32_1(a[(k + 11) & 0x1f]) ) ^
T2dn( B32_2(a[(k + 13) & 0x1f]) ) ^
T3dn( B32_3(a[(k + 23) & 0x1f]) );
t[k + 1] = T0dn( B32_0(a[k & 0x1f]) ) ^
T1dn( B32_1(a[(k + 2) & 0x1f]) ) ^
T2dn( B32_2(a[(k + 4) & 0x1f]) ) ^
T3dn( B32_3(a[(k + 6) & 0x1f]) ) ^
T0up( B32_0(a[(k + 9) & 0x1f]) ) ^
T1up( B32_1(a[(k + 11) & 0x1f]) ) ^
T2up( B32_2(a[(k + 13) & 0x1f]) ) ^
T3up( B32_3(a[(k + 23) & 0x1f]) );
}
#pragma unroll 32
for(int k=0;k<32;k++)
a[k] = t[k];
}
}
__device__ void groestlcoin_perm_Q(uint32_t *a, char *mixtabs)
{
//#pragma unroll 14
for(int r=0;r<14;r++)
{
uint32_t t[32];
#pragma unroll 16
for(int k=0;k<16;k++)
{
a[(k*2)+0] ^= QC32up(k * 0x10, r);
a[(k*2)+1] ^= QC32dn(k * 0x10, r);
}
// RBTT
#pragma unroll 16
for(int k=0;k<32;k+=2)
{
t[k + 0] = T0up( B32_0(a[(k + 2) & 0x1f]) ) ^
T1up( B32_1(a[(k + 6) & 0x1f]) ) ^
T2up( B32_2(a[(k + 10) & 0x1f]) ) ^
T3up( B32_3(a[(k + 22) & 0x1f]) ) ^
T0dn( B32_0(a[(k + 1) & 0x1f]) ) ^
T1dn( B32_1(a[(k + 5) & 0x1f]) ) ^
T2dn( B32_2(a[(k + 9) & 0x1f]) ) ^
T3dn( B32_3(a[(k + 13) & 0x1f]) );
t[k + 1] = T0dn( B32_0(a[(k + 2) & 0x1f]) ) ^
T1dn( B32_1(a[(k + 6) & 0x1f]) ) ^
T2dn( B32_2(a[(k + 10) & 0x1f]) ) ^
T3dn( B32_3(a[(k + 22) & 0x1f]) ) ^
T0up( B32_0(a[(k + 1) & 0x1f]) ) ^
T1up( B32_1(a[(k + 5) & 0x1f]) ) ^
T2up( B32_2(a[(k + 9) & 0x1f]) ) ^
T3up( B32_3(a[(k + 13) & 0x1f]) );
}
#pragma unroll 32
for(int k=0;k<32;k++)
a[k] = t[k];
}
}
#if USE_SHARED
__global__ void __launch_bounds__(256)
#else
__global__ void
#endif
groestlcoin_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce)
{
#if USE_SHARED
extern __shared__ char mixtabs[];
*((uint32_t*)mixtabs + ( threadIdx.x)) = tex1Dfetch(t0up1, threadIdx.x);
*((uint32_t*)mixtabs + (256+threadIdx.x)) = tex1Dfetch(t0dn1, threadIdx.x);
*((uint32_t*)mixtabs + (512+threadIdx.x)) = tex1Dfetch(t1up1, threadIdx.x);
*((uint32_t*)mixtabs + (768+threadIdx.x)) = tex1Dfetch(t1dn1, threadIdx.x);
*((uint32_t*)mixtabs + (1024+threadIdx.x)) = tex1Dfetch(t2up1, threadIdx.x);
*((uint32_t*)mixtabs + (1280+threadIdx.x)) = tex1Dfetch(t2dn1, threadIdx.x);
*((uint32_t*)mixtabs + (1536+threadIdx.x)) = tex1Dfetch(t3up1, threadIdx.x);
*((uint32_t*)mixtabs + (1792+threadIdx.x)) = tex1Dfetch(t3dn1, threadIdx.x);
__syncthreads();
#endif
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
/////
///// Lieber groestl, mach, dass es abgeht!!!
/////
// GROESTL
uint32_t message[32];
uint32_t state[32];
// SHA
// jeder thread in diesem Block bekommt sein eigenes W Array im Shared memory
uint32_t g[32];
#pragma unroll 32
for(int k=0;k<32;k++)
{
state[k] = groestlcoin_gpu_state[k];
message[k] = groestlcoin_gpu_msg[k];
}
uint32_t nounce = startNounce + thread;
message[19] = SWAB32(nounce);
#pragma unroll 32
for(int u=0;u<32;u++)
g[u] = message[u] ^ state[u];
// Perm
#if USE_SHARED
groestlcoin_perm_P(g, mixtabs);
groestlcoin_perm_Q(message, mixtabs);
#else
groestlcoin_perm_P(g, NULL);
groestlcoin_perm_Q(message, NULL);
#endif
#pragma unroll 32
for(int u=0;u<32;u++)
{
state[u] ^= g[u] ^ message[u];
g[u] = state[u];
}
#if USE_SHARED
groestlcoin_perm_P(g, mixtabs);
#else
groestlcoin_perm_P(g, NULL);
#endif
#pragma unroll 32
for(int u=0;u<32;u++)
state[u] ^= g[u];
////
//// 2. Runde groestl
////
#pragma unroll 16
for(int k=0;k<16;k++)
message[k] = state[k + 16];
#pragma unroll 32
for(int k=0;k<32;k++)
state[k] = groestlcoin_gpu_state[k];
#pragma unroll 16
for(int k=0;k<16;k++)
message[k+16] = 0;
message[16] = 0x80;
message[31] = 0x01000000;
#pragma unroll 32
for(int u=0;u<32;u++)
g[u] = message[u] ^ state[u];
// Perm
#if USE_SHARED
groestlcoin_perm_P(g, mixtabs);
groestlcoin_perm_Q(message, mixtabs);
#else
groestlcoin_perm_P(g, NULL);
groestlcoin_perm_Q(message, NULL);
#endif
#pragma unroll 32
for(int u=0;u<32;u++)
{
state[u] ^= g[u] ^ message[u];
g[u] = state[u];
}
#if USE_SHARED
groestlcoin_perm_P(g, mixtabs);
#else
groestlcoin_perm_P(g, NULL);
#endif
#pragma unroll 32
for(int u=0;u<32;u++)
state[u] ^= g[u];
/*
#pragma unroll 8
for(int k=0;k<8;k++)
hash[k] = state[k+16];
*/
// kopiere Ergebnis
/*
#pragma unroll 16
for(int k=0;k<16;k++)
((uint32_t*)outputHash)[16*thread+k] = state[k + 16];
*/
int i;
bool rc = true;
for (i = 7; i >= 0; i--) {
if (state[i+16] > pTarget[i]) {
rc = false;
break;
}
if (state[i+16] < pTarget[i]) {
rc = true;
break;
}
}
if(rc == true)
{
if(resNounce[0] > nounce)
{
resNounce[0] = nounce;
/*
#pragma unroll 8
for(int k=0;k<8;k++)
((uint32_t*)outputHash)[k] = (hash[k]);
*/
}
}
}
}
#define texDef(texname, texmem, texsource, texsize) \
unsigned int *texmem; \
cudaMalloc(&texmem, texsize); \
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
texname.normalized = 0; \
texname.filterMode = cudaFilterModePoint; \
texname.addressMode[0] = cudaAddressModeClamp; \
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \
// Setup-Funktionen
__host__ void groestlcoin_cpu_init(int thr_id, int threads)
{
cudaSetDevice(thr_id);
cudaDeviceSetCacheConfig( cudaFuncCachePreferShared );
// Texturen mit obigem Makro initialisieren
texDef(t0up1, d_T0up, T0up_cpu, sizeof(uint32_t)*256);
texDef(t0dn1, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256);
texDef(t1up1, d_T1up, T1up_cpu, sizeof(uint32_t)*256);
texDef(t1dn1, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256);
texDef(t2up1, d_T2up, T2up_cpu, sizeof(uint32_t)*256);
texDef(t2dn1, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256);
texDef(t3up1, d_T3up, T3up_cpu, sizeof(uint32_t)*256);
texDef(t3dn1, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256);
// Kopiere die Hash-Tabellen in den GPU-Speicher
cudaMemcpyToSymbol( sha256coin_gpu_constantTable,
sha256_cpu_constantTable,
sizeof(uint32_t) * 64 );
// Startvektor
cudaMemcpyToSymbol( sha256coin_gpu_register,
sha256_cpu_hashTable,
sizeof(uint32_t) * 8 );
// setze register
uint32_t groestl_state_init[32];
memset(groestl_state_init, 0, sizeof(uint32_t) * 32);
groestl_state_init[31] = 0x20000;
// state speichern
cudaMemcpyToSymbol( groestlcoin_gpu_state,
groestl_state_init,
128);
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
// Speicher für alle Ergebnisse belegen (nur für Debug)
cudaMalloc(&d_hashGROESTLCOINoutput[thr_id], 8 * sizeof(uint32_t) * threads);
}
__host__ void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
{
// Nachricht expandieren und setzen
uint32_t msgBlock[32];
memset(msgBlock, 0, sizeof(uint32_t) * 32);
memcpy(&msgBlock[0], data, 80);
// Erweitere die Nachricht auf den Nachrichtenblock (padding)
// Unsere Nachricht hat 80 Byte
msgBlock[20] = 0x80;
msgBlock[31] = 0x01000000;
// groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird
// auf der GPU ausgeführt)
// Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch)
cudaMemcpyToSymbol( groestlcoin_gpu_msg,
msgBlock,
128);
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
cudaMemcpyToSymbol( pTarget,
pTargetIn,
sizeof(uint32_t) * 8 );
}
__host__ void groestlcoin_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce)
{
#if USE_SHARED
const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN
#else
const int threadsperblock = 512; // so einstellen wie gewünscht ;-)
#endif
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
// Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl)
#if USE_SHARED
size_t shared_size = 8 * 256 * sizeof(uint32_t);
#else
size_t shared_size = 0;
#endif
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
//fprintf(stderr, "ThrID: %d\n", thr_id);
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
groestlcoin_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, d_hashGROESTLCOINoutput[thr_id], d_resultNonce[thr_id]);
// Strategisches Sleep Kommando zur Senkung der CPU Last
MyStreamSynchronize(NULL, 0, thr_id);
cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
/// Debug
//cudaMemcpy(outputHashes, d_hashGROESTLCOINoutput[thr_id], 8 * sizeof(uint32_t) * threads, cudaMemcpyDeviceToHost);
// Nounce
//cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
}

8
cuda_groestlcoin.h

@ -0,0 +1,8 @@
#ifndef _CUDA_GROESTLCOIN_H
#define _CUDA_GROESTLCOIN_H
void groestlcoin_cpu_init(int thr_id, int threads);
void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn);
void groestlcoin_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce);
#endif

1
cuda_hefty1.cu

@ -1,4 +1,3 @@
/* Diese Funktion ist auf 84-Byte große Eingabedaten ausgerichtet (Heavycoin) */
#include <cuda.h> #include <cuda.h>
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "device_launch_parameters.h" #include "device_launch_parameters.h"

1
cuda_keccak512.cu

@ -1,4 +1,3 @@
/* Diese Funktion ist auf 84+32-Byte große Eingabedaten ausgerichtet (Heavycoin) */
#include <cuda.h> #include <cuda.h>
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "device_launch_parameters.h" #include "device_launch_parameters.h"

1
cuda_sha256.cu

@ -1,4 +1,3 @@
/* Diese Funktion ist auf 84+32 Byte große Eingabedaten ausgerichtet (Heavycoin) */
#include <cuda.h> #include <cuda.h>
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "device_launch_parameters.h" #include "device_launch_parameters.h"

3
groestl.c

@ -29,7 +29,7 @@
* *
* @author Thomas Pornin <thomas.pornin@cryptolog.com> * @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/ */
#include <stdio.h>
#include <stddef.h> #include <stddef.h>
#include <string.h> #include <string.h>
@ -2986,6 +2986,7 @@ groestl_big_close(sph_groestl_big_context *sc,
#endif #endif
} }
memset(pad + 1, 0, pad_len - 9); memset(pad + 1, 0, pad_len - 9);
//fprintf(stderr, "%x\n", pad_len);
#if SPH_64 #if SPH_64
sph_enc64be(pad + pad_len - 8, count); sph_enc64be(pad + pad_len - 8, count);
#else #else

175
groestlcoin.cpp

@ -0,0 +1,175 @@
#include "uint256.h"
#include "sph_groestl.h"
#include "cpuminer-config.h"
#include "miner.h"
#include <string.h>
#include <stdint.h>
#include "cuda_groestlcoin.h"
#include <openssl/sha.h>
#define SWAP32(x) \
((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \
(((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
void sha256func(unsigned char *hash, const unsigned char *data, int len)
{
uint32_t S[16], T[16];
int i, r;
sha256_init(S);
for (r = len; r > -9; r -= 64) {
if (r < 64)
memset(T, 0, 64);
memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
if (r >= 0 && r < 64)
((unsigned char *)T)[r] = 0x80;
for (i = 0; i < 16; i++)
T[i] = be32dec(T + i);
if (r < 56)
T[15] = 8 * len;
sha256_transform(S, T, 0);
}
/*
memcpy(S + 8, sha256d_hash1 + 8, 32);
sha256_init(T);
sha256_transform(T, S, 0);
*/
for (i = 0; i < 8; i++)
be32enc((uint32_t *)hash + i, T[i]);
}
static void groestlhash(void *state, const void *input)
{
// Tryout GPU-groestl
sph_groestl512_context ctx_groestl[2];
static unsigned char pblank[1];
int ii;
uint32_t mask = 8;
uint32_t zero = 0;
//these uint512 in the c++ source of the client are backed by an array of uint32
uint32_t hashA[16], hashB[16];
sph_groestl512_init(&ctx_groestl[0]);
sph_groestl512 (&ctx_groestl[0], input, 80); //6
sph_groestl512_close(&ctx_groestl[0], hashA); //7
sph_groestl512_init(&ctx_groestl[1]);
sph_groestl512 (&ctx_groestl[1], hashA, 64); //6
sph_groestl512_close(&ctx_groestl[1], hashB); //7
memcpy(state, hashB, 32);
}
extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t start_nonce = pdata[19]++;
const uint32_t Htarg = ptarget[7];
const uint32_t throughPut = 4096 * 128;
//const uint32_t throughPut = 1;
int i;
uint32_t *outputHash = (uint32_t*)malloc(throughPut * 16 * sizeof(uint32_t));
// init
static bool init[8] = { false, false, false, false, false, false, false, false };
if(!init[thr_id])
{
groestlcoin_cpu_init(thr_id, throughPut);
init[thr_id] = true;
}
// Endian Drehung ist notwendig
//char testdata[] = {"\x70\x00\x00\x00\x5d\x38\x5b\xa1\x14\xd0\x79\x97\x0b\x29\xa9\x41\x8f\xd0\x54\x9e\x7d\x68\xa9\x5c\x7f\x16\x86\x21\xa3\x14\x20\x10\x00\x00\x00\x00\x57\x85\x86\xd1\x49\xfd\x07\xb2\x2f\x3a\x8a\x34\x7c\x51\x6d\xe7\x05\x2f\x03\x4d\x2b\x76\xff\x68\xe0\xd6\xec\xff\x9b\x77\xa4\x54\x89\xe3\xfd\x51\x17\x32\x01\x1d\xf0\x73\x10\x00"};
//pdata = (uint32_t*)testdata;
uint32_t endiandata[32];
for (int kk=0; kk < 32; kk++)
be32enc(&endiandata[kk], pdata[kk]);
// Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt)
groestlcoin_cpu_setBlock(thr_id, endiandata, (void*)ptarget);
do {
// GPU
uint32_t foundNounce = 0xFFFFFFFF;
groestlcoin_cpu_hash(thr_id, throughPut, pdata[19], outputHash, &foundNounce);
/*
{
for(i=0;i<throughPut;i++)
{
uint32_t tmpHash[8];
endiandata[19] = SWAP32(pdata[19]);
groestlhash(tmpHash, endiandata);
int ii;
printf("result GPU: ");
for (ii=0; ii < 32; ii++)
{
printf ("%.2x",((uint8_t*)&outputHash[8*i])[ii]);
};
printf ("\n");
groestlhash(tmpHash, endiandata);
printf("result CPU: ");
for (ii=0; ii < 32; ii++)
{
printf ("%.2x",((uint8_t*)tmpHash)[ii]);
};
}
exit(0);
}
*/
if(foundNounce < 0xffffffff)
{
uint32_t tmpHash[8];
endiandata[19] = SWAP32(foundNounce);
groestlhash(tmpHash, endiandata);
if (((tmpHash[7]&0xFFFFFF00)==0) &&
fulltest(tmpHash, ptarget)) {
pdata[19] = foundNounce;
*hashes_done = foundNounce - start_nonce;
free(outputHash);
return true;
}
foundNounce = 0xffffffff;
/*
int ii;
printf("result GPU: ");
for (ii=0; ii < 32; ii++)
{
printf ("%.2x",((uint8_t*)&outputHash[0])[ii]);
};
printf ("\n");
printf("result CPU: ");
for (ii=0; ii < 32; ii++)
{
printf ("%.2x",((uint8_t*)tmpHash)[ii]);
};
printf ("\n");
*/
}
if (pdata[19] + throughPut < pdata[19])
pdata[19] = max_nonce;
else pdata[19] += throughPut;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
*hashes_done = pdata[19] - start_nonce;
free(outputHash);
return 0;
}

5
miner.h

@ -211,8 +211,13 @@ extern int scanhash_fugue256(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done); unsigned long *hashes_done);
extern int scanhash_groestlcoin(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done);
extern void fugue256_hash(unsigned char* output, const unsigned char* input, int len); extern void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
extern void heavycoin_hash(unsigned char* output, const unsigned char* input, int len); extern void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
extern void groestlcoin_hash(unsigned char* output, const unsigned char* input, int len);
struct thr_info { struct thr_info {
int id; int id;

Loading…
Cancel
Save