From b93669a99f3d05ccef471efbd8e493fd74fa162c Mon Sep 17 00:00:00 2001 From: Christian Buchner Date: Sun, 23 Mar 2014 21:39:26 +0100 Subject: [PATCH] committing changes to v0.3 release - added Groestlcoin. --- Makefile.am | 15 +- Makefile.in | 26 ++- README.txt | 19 +- ccminer.vcxproj | 3 + ccminer.vcxproj.filters | 9 + configure | 20 +- configure.ac | 2 +- cpu-miner.c | 13 +- cpuminer-config.h | 390 +++++++++++++++++---------------- cuda_blake512.cu | 1 - cuda_combine.cu | 1 - cuda_fugue256.cu | 6 +- cuda_groestl512.cu | 31 +-- cuda_groestlcoin.cu | 463 ++++++++++++++++++++++++++++++++++++++++ cuda_groestlcoin.h | 8 + cuda_hefty1.cu | 1 - cuda_keccak512.cu | 1 - cuda_sha256.cu | 1 - groestl.c | 5 +- groestlcoin.cpp | 175 +++++++++++++++ miner.h | 5 + 21 files changed, 934 insertions(+), 261 deletions(-) create mode 100644 cuda_groestlcoin.cu create mode 100644 cuda_groestlcoin.h create mode 100644 groestlcoin.cpp diff --git a/Makefile.am b/Makefile.am index f601a4d..ca58d96 100644 --- a/Makefile.am +++ b/Makefile.am @@ -25,13 +25,20 @@ ccminer_SOURCES = elist.h miner.h compat.h \ cuda_hefty1.cu cuda_hefty1.h \ cuda_keccak512.cu cuda_keccak512.h \ cuda_sha256.cu cuda_sha256.h \ - cuda_fugue256.cu \ - fuguecoin.cpp fugue.c sph_fugue.h uint256.h - + fuguecoin.cpp cuda_fugue256.cu fugue.c sph_fugue.h uint256.h \ + groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@ ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME .cu.o: - $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_35 --maxrregcount=124 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + +## Thrust needs Compute 2.0 minimum +#heavy.o: heavy.cu +# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< +# +#cuda_hefty1.o: cuda_hefty1.cu +# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + diff --git a/Makefile.in b/Makefile.in index 180cc83..f4a36fa 100644 --- a/Makefile.in +++ b/Makefile.in @@ -60,8 +60,9 @@ am_ccminer_OBJECTS = ccminer-cpu-miner.$(OBJEXT) \ ccminer-sha2.$(OBJEXT) heavy.$(OBJEXT) cuda_blake512.$(OBJEXT) \ cuda_combine.$(OBJEXT) cuda_groestl512.$(OBJEXT) \ cuda_hefty1.$(OBJEXT) cuda_keccak512.$(OBJEXT) \ - cuda_sha256.$(OBJEXT) cuda_fugue256.$(OBJEXT) \ - ccminer-fuguecoin.$(OBJEXT) ccminer-fugue.$(OBJEXT) + cuda_sha256.$(OBJEXT) ccminer-fuguecoin.$(OBJEXT) \ + cuda_fugue256.$(OBJEXT) ccminer-fugue.$(OBJEXT) \ + ccminer-groestlcoin.$(OBJEXT) cuda_groestlcoin.$(OBJEXT) ccminer_OBJECTS = $(am_ccminer_OBJECTS) ccminer_DEPENDENCIES = ccminer_LINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(ccminer_LDFLAGS) \ @@ -275,8 +276,8 @@ ccminer_SOURCES = elist.h miner.h compat.h \ cuda_hefty1.cu cuda_hefty1.h \ cuda_keccak512.cu cuda_keccak512.h \ cuda_sha256.cu cuda_sha256.h \ - cuda_fugue256.cu \ - fuguecoin.cpp fugue.c sph_fugue.h uint256.h + fuguecoin.cpp cuda_fugue256.cu fugue.c sph_fugue.h uint256.h \ + groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@ ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ @@ -387,6 +388,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fugue.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fuguecoin.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-groestl.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-groestlcoin.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-hefty1.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-keccak.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-scrypt.Po@am__quote@ @@ -561,6 +563,20 @@ ccminer-fuguecoin.obj: fuguecoin.cpp @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o ccminer-fuguecoin.obj `if test -f 'fuguecoin.cpp'; then $(CYGPATH_W) 'fuguecoin.cpp'; else $(CYGPATH_W) '$(srcdir)/fuguecoin.cpp'; fi` +ccminer-groestlcoin.o: groestlcoin.cpp +@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT ccminer-groestlcoin.o -MD -MP -MF $(DEPDIR)/ccminer-groestlcoin.Tpo -c -o ccminer-groestlcoin.o `test -f 'groestlcoin.cpp' || echo '$(srcdir)/'`groestlcoin.cpp +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/ccminer-groestlcoin.Tpo $(DEPDIR)/ccminer-groestlcoin.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='groestlcoin.cpp' object='ccminer-groestlcoin.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o ccminer-groestlcoin.o `test -f 'groestlcoin.cpp' || echo '$(srcdir)/'`groestlcoin.cpp + +ccminer-groestlcoin.obj: groestlcoin.cpp +@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT ccminer-groestlcoin.obj -MD -MP -MF $(DEPDIR)/ccminer-groestlcoin.Tpo -c -o ccminer-groestlcoin.obj `if test -f 'groestlcoin.cpp'; then $(CYGPATH_W) 'groestlcoin.cpp'; else $(CYGPATH_W) '$(srcdir)/groestlcoin.cpp'; fi` +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/ccminer-groestlcoin.Tpo $(DEPDIR)/ccminer-groestlcoin.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='groestlcoin.cpp' object='ccminer-groestlcoin.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o ccminer-groestlcoin.obj `if test -f 'groestlcoin.cpp'; then $(CYGPATH_W) 'groestlcoin.cpp'; else $(CYGPATH_W) '$(srcdir)/groestlcoin.cpp'; fi` + # This directory's subdirectories are mostly independent; you can cd # into them and run `make' without going through this Makefile. # To change the values of `make' variables: instead of editing Makefiles, @@ -1018,7 +1034,7 @@ uninstall-am: uninstall-binPROGRAMS .cu.o: - $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=sm_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_35 --maxrregcount=124 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< #heavy.o: heavy.cu # $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< diff --git a/README.txt b/README.txt index ccfcb3d..fe4b9ac 100644 --- a/README.txt +++ b/README.txt @@ -1,5 +1,5 @@ -ccMiner release 0.2 (Mar 21th 2014) - Pool Mining Release +ccMiner release 0.3 (Mar 23th 2014) - Groestlcoin Release ------------------------------------------------------------- *************************************************************** @@ -36,6 +36,7 @@ its command line interface and options. -a, --algo=ALGO specify the algorithm to use heavy use to mine Heavycoin fugue256 use to mine Fuguecoin + groestl use to mine Groestlcoin -o, --url=URL URL of mining server (default: " DEF_RPC_URL ") -O, --userpass=U:P username:password pair for mining server @@ -66,24 +67,29 @@ its command line interface and options. Example for Heavycoin Mining on heavycoinpool.com with a single gpu in your system -cudaminer.exe -t 1 -a heavy -o stratum+tcp://stratum01.heavycoinpool.com:5333 -u <> -p <> -v 512 +ccminer.exe -t 1 -a heavy -o stratum+tcp://stratum01.heavycoinpool.com:5333 -u <> -p <> -v 512 Example for Heavycoin Mining on hvc.1gh.com with a dual gpu in your system -cudaminer.exe -t 2 -a heavy -o stratum+tcp://hvcpool.1gh.com:5333 -u <> -p x -v 512 +ccminer.exe -t 2 -a heavy -o stratum+tcp://hvcpool.1gh.com:5333 -u <> -p x -v 512 Example for Fuguecoin solo-mining with 4 gpu's in your system and a Fuguecoin-wallet running on localhost -cudaminer.exe -q -s 1 -t 4 -a fugue256 -o http://localhost:9089 -u <> -p <> +ccminer.exe -q -s 1 -t 4 -a fugue256 -o http://localhost:9089 -u <> -p <> Example for Fuguecoin pool mining on dwarfpool.com with all your GPUs --q -a fugue256 -o stratum+tcp://erebor.dwarfpool.com:3340 -u YOURWALLETADDRESS.1 -p YOUREMAILADDRESS +ccminer.exe -q -a fugue256 -o stratum+tcp://erebor.dwarfpool.com:3340 -u YOURWALLETADDRESS.1 -p YOUREMAILADDRESS + + +Example for Groestlcoin solo mining + +ccminer.exe -q -s 1 -a groestl -o http://127.0.0.1:1441 -u USERNAME -p PASSWORD For solo-mining you typically use -o 127.0.0.1:xxxx where xxxx represents @@ -101,6 +107,9 @@ from your old clunkers. >>> RELEASE HISTORY <<< + Match, 23 2014 added Groestlcoin support. stratum status unknown + (the only pool is currently down for fixing issues) + March, 21 2014 use of shared memory in Fugue256 kernel boosts hash rates on Fermi and Maxwell devices. Kepler may suffer slightly (3-5%) diff --git a/ccminer.vcxproj b/ccminer.vcxproj index 10464ae..e12cfb2 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -229,6 +229,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + @@ -256,6 +257,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + @@ -274,6 +276,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters index f3b22ce..a276918 100644 --- a/ccminer.vcxproj.filters +++ b/ccminer.vcxproj.filters @@ -90,6 +90,9 @@ Source Files + + Source Files + @@ -158,6 +161,9 @@ Header Files + + Header Files\CUDA + @@ -184,5 +190,8 @@ Source Files\CUDA + + Source Files\CUDA + \ No newline at end of file diff --git a/configure b/configure index 5b8d03d..cb6956a 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.68 for ccminer 2014.03.21. +# Generated by GNU Autoconf 2.68 for ccminer 2014.03.23. # # # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -557,8 +557,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='ccminer' PACKAGE_TARNAME='ccminer' -PACKAGE_VERSION='2014.03.21' -PACKAGE_STRING='ccminer 2014.03.21' +PACKAGE_VERSION='2014.03.23' +PACKAGE_STRING='ccminer 2014.03.23' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1297,7 +1297,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures ccminer 2014.03.21 to adapt to many kinds of systems. +\`configure' configures ccminer 2014.03.23 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1368,7 +1368,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of ccminer 2014.03.21:";; + short | recursive ) echo "Configuration of ccminer 2014.03.23:";; esac cat <<\_ACEOF @@ -1469,7 +1469,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -ccminer configure 2014.03.21 +ccminer configure 2014.03.23 generated by GNU Autoconf 2.68 Copyright (C) 2010 Free Software Foundation, Inc. @@ -1972,7 +1972,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by ccminer $as_me 2014.03.21, which was +It was created by ccminer $as_me 2014.03.23, which was generated by GNU Autoconf 2.68. Invocation command line was $ $0 $@ @@ -2901,7 +2901,7 @@ fi # Define the identity of the package. PACKAGE='ccminer' - VERSION='2014.03.21' + VERSION='2014.03.23' cat >>confdefs.h <<_ACEOF @@ -7118,7 +7118,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by ccminer $as_me 2014.03.21, which was +This file was extended by ccminer $as_me 2014.03.23, which was generated by GNU Autoconf 2.68. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -7184,7 +7184,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -ccminer config.status 2014.03.21 +ccminer config.status 2014.03.23 configured by $0, generated by GNU Autoconf 2.68, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 7e6ff04..0d75c39 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([ccminer], [2014.03.21]) +AC_INIT([ccminer], [2014.03.23]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/cpu-miner.c b/cpu-miner.c index f71682a..3877f95 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -117,11 +117,13 @@ struct workio_cmd { typedef enum { ALGO_HEAVY, /* Heavycoin hash */ ALGO_FUGUE256, /* Fugue256 */ + ALGO_GROESTL, } sha256_algos; static const char *algo_names[] = { "heavy", - "fugue256" + "fugue256", + "groestl" }; bool opt_debug = false; @@ -667,7 +669,11 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work) if (opt_algo == ALGO_HEAVY) heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size); else + if (opt_algo == ALGO_FUGUE256) SHA256((unsigned char*)sctx->job.coinbase, sctx->job.coinbase_size, (unsigned char*)merkle_root); + else + sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size); + for (i = 0; i < sctx->job.merkle_count; i++) { memcpy(merkle_root + 32, sctx->job.merkle[i], 32); if (opt_algo == ALGO_HEAVY) @@ -817,7 +823,10 @@ static void *miner_thread(void *userdata) rc = scanhash_fugue256(thr_id, work.data, work.target, max_nonce, &hashes_done); break; - + case ALGO_GROESTL: + rc = scanhash_groestlcoin(thr_id, work.data, work.target, + max_nonce, &hashes_done); + break; default: /* should never happen */ goto out; diff --git a/cpuminer-config.h b/cpuminer-config.h index bb090cb..9196f3e 100644 --- a/cpuminer-config.h +++ b/cpuminer-config.h @@ -1,200 +1,190 @@ -/* cpuminer-config.h. Generated from cpuminer-config.h.in by configure. */ -/* cpuminer-config.h.in. Generated from configure.ac by autoheader. */ - -/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP - systems. This function is required for `alloca.c' support on those systems. - */ -/* #undef CRAY_STACKSEG_END */ - -/* Define to 1 if using `alloca.c'. */ -/* #undef C_ALLOCA */ - -/* Define to 1 if you have `alloca', as a function or macro. */ -#define HAVE_ALLOCA 1 - -/* Define to 1 if you have and it should be used (not on Ultrix). - */ -#define HAVE_ALLOCA_H 1 - -/* Define to 1 if you have the declaration of `be32dec', and to 0 if you - don't. */ -#define HAVE_DECL_BE32DEC 0 - -/* Define to 1 if you have the declaration of `be32enc', and to 0 if you - don't. */ -#define HAVE_DECL_BE32ENC 0 - -/* Define to 1 if you have the declaration of `le32dec', and to 0 if you - don't. */ -#define HAVE_DECL_LE32DEC 0 - -/* Define to 1 if you have the declaration of `le32enc', and to 0 if you - don't. */ -#define HAVE_DECL_LE32ENC 0 - -/* Define to 1 if you have the `getopt_long' function. */ -#define HAVE_GETOPT_LONG 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the `crypto' library (-lcrypto). */ -#define HAVE_LIBCRYPTO 1 - -/* Define to 1 if you have a functional curl library. */ -#define HAVE_LIBCURL 1 - -/* Define to 1 if you have the `ssl' library (-lssl). */ -#define HAVE_LIBSSL 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYSLOG_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_SYS_ENDIAN_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_PARAM_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_SYSCTL_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Defined if libcurl supports AsynchDNS */ -/* #undef LIBCURL_FEATURE_ASYNCHDNS */ - -/* Defined if libcurl supports IDN */ -#define LIBCURL_FEATURE_IDN 1 - -/* Defined if libcurl supports IPv6 */ -#define LIBCURL_FEATURE_IPV6 1 - -/* Defined if libcurl supports KRB4 */ -/* #undef LIBCURL_FEATURE_KRB4 */ - -/* Defined if libcurl supports libz */ -#define LIBCURL_FEATURE_LIBZ 1 - -/* Defined if libcurl supports NTLM */ -#define LIBCURL_FEATURE_NTLM 1 - -/* Defined if libcurl supports SSL */ -#define LIBCURL_FEATURE_SSL 1 - -/* Defined if libcurl supports SSPI */ -/* #undef LIBCURL_FEATURE_SSPI */ - -/* Defined if libcurl supports DICT */ -#define LIBCURL_PROTOCOL_DICT 1 - -/* Defined if libcurl supports FILE */ -#define LIBCURL_PROTOCOL_FILE 1 - -/* Defined if libcurl supports FTP */ -#define LIBCURL_PROTOCOL_FTP 1 - -/* Defined if libcurl supports FTPS */ -#define LIBCURL_PROTOCOL_FTPS 1 - -/* Defined if libcurl supports HTTP */ -#define LIBCURL_PROTOCOL_HTTP 1 - -/* Defined if libcurl supports HTTPS */ -#define LIBCURL_PROTOCOL_HTTPS 1 - -/* Defined if libcurl supports IMAP */ -#define LIBCURL_PROTOCOL_IMAP 1 - -/* Defined if libcurl supports LDAP */ -#define LIBCURL_PROTOCOL_LDAP 1 - -/* Defined if libcurl supports POP3 */ -#define LIBCURL_PROTOCOL_POP3 1 - -/* Defined if libcurl supports RTSP */ -#define LIBCURL_PROTOCOL_RTSP 1 - -/* Defined if libcurl supports SMTP */ -#define LIBCURL_PROTOCOL_SMTP 1 - -/* Defined if libcurl supports TELNET */ -#define LIBCURL_PROTOCOL_TELNET 1 - -/* Defined if libcurl supports TFTP */ -#define LIBCURL_PROTOCOL_TFTP 1 - -/* Define to 1 if your C compiler doesn't accept -c and -o together. */ -/* #undef NO_MINUS_C_MINUS_O */ - -/* Name of package */ -#define PACKAGE "ccminer" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "ccminer" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "ccminer 2014.03.21" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "ccminer" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "2014.03.21" - -/* If using the C implementation of alloca, define if you know the - direction of stack growth for your system; otherwise it will be - automatically deduced at runtime. - STACK_DIRECTION > 0 => grows toward higher addresses - STACK_DIRECTION < 0 => grows toward lower addresses - STACK_DIRECTION = 0 => direction of growth unknown */ -/* #undef STACK_DIRECTION */ - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Define to 1 if AVX assembly is available. */ -#define USE_AVX 1 - -/* Define to 1 if AVX2 assembly is available. */ -#define USE_AVX2 1 - -/* Define to 1 if XOP assembly is available. */ -#define USE_XOP 1 - -/* Version number of package */ -#define VERSION "2014.03.21" - -/* Define curl_free() as free() if our version of curl lacks curl_free. */ -/* #undef curl_free */ - -/* Define to `unsigned int' if does not define. */ -/* #undef size_t */ +/* cpuminer-config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP + systems. This function is required for `alloca.c' support on those systems. + */ +#undef CRAY_STACKSEG_END + +/* Define to 1 if using `alloca.c'. */ +#undef C_ALLOCA + +/* Define to 1 if you have `alloca', as a function or macro. */ +#undef HAVE_ALLOCA + +/* Define to 1 if you have and it should be used (not on Ultrix). + */ +#undef HAVE_ALLOCA_H + +/* Define to 1 if you have the declaration of `be32dec', and to 0 if you + don't. */ +#undef HAVE_DECL_BE32DEC + +/* Define to 1 if you have the declaration of `be32enc', and to 0 if you + don't. */ +#undef HAVE_DECL_BE32ENC + +/* Define to 1 if you have the declaration of `le32dec', and to 0 if you + don't. */ +#undef HAVE_DECL_LE32DEC + +/* Define to 1 if you have the declaration of `le32enc', and to 0 if you + don't. */ +#undef HAVE_DECL_LE32ENC + +/* Define to 1 if you have the `getopt_long' function. */ +#define HAVE_GETOPT_LONG 1 + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have a functional curl library. */ +#undef HAVE_LIBCURL + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYSLOG_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_ENDIAN_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_PARAM_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_SYSCTL_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Defined if libcurl supports AsynchDNS */ +#undef LIBCURL_FEATURE_ASYNCHDNS + +/* Defined if libcurl supports IDN */ +#undef LIBCURL_FEATURE_IDN + +/* Defined if libcurl supports IPv6 */ +#undef LIBCURL_FEATURE_IPV6 + +/* Defined if libcurl supports KRB4 */ +#undef LIBCURL_FEATURE_KRB4 + +/* Defined if libcurl supports libz */ +#undef LIBCURL_FEATURE_LIBZ + +/* Defined if libcurl supports NTLM */ +#undef LIBCURL_FEATURE_NTLM + +/* Defined if libcurl supports SSL */ +#undef LIBCURL_FEATURE_SSL + +/* Defined if libcurl supports SSPI */ +#undef LIBCURL_FEATURE_SSPI + +/* Defined if libcurl supports DICT */ +#undef LIBCURL_PROTOCOL_DICT + +/* Defined if libcurl supports FILE */ +#undef LIBCURL_PROTOCOL_FILE + +/* Defined if libcurl supports FTP */ +#undef LIBCURL_PROTOCOL_FTP + +/* Defined if libcurl supports FTPS */ +#undef LIBCURL_PROTOCOL_FTPS + +/* Defined if libcurl supports HTTP */ +#undef LIBCURL_PROTOCOL_HTTP + +/* Defined if libcurl supports HTTPS */ +#undef LIBCURL_PROTOCOL_HTTPS + +/* Defined if libcurl supports IMAP */ +#undef LIBCURL_PROTOCOL_IMAP + +/* Defined if libcurl supports LDAP */ +#undef LIBCURL_PROTOCOL_LDAP + +/* Defined if libcurl supports POP3 */ +#undef LIBCURL_PROTOCOL_POP3 + +/* Defined if libcurl supports RTSP */ +#undef LIBCURL_PROTOCOL_RTSP + +/* Defined if libcurl supports SMTP */ +#undef LIBCURL_PROTOCOL_SMTP + +/* Defined if libcurl supports TELNET */ +#undef LIBCURL_PROTOCOL_TELNET + +/* Defined if libcurl supports TFTP */ +#undef LIBCURL_PROTOCOL_TFTP + +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ +#undef NO_MINUS_C_MINUS_O + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "ccminer" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "ccminer 2014.03.23" + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2014.03.23" + +/* If using the C implementation of alloca, define if you know the + direction of stack growth for your system; otherwise it will be + automatically deduced at runtime. + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown */ +#undef STACK_DIRECTION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define to 1 if AVX assembly is available. */ +#undef USE_AVX + +/* Define to 1 if XOP assembly is available. */ +#undef USE_XOP + +/* Version number of package */ +#undef VERSION + +/* Define curl_free() as free() if our version of curl lacks curl_free. */ +#undef curl_free + +/* Define to `unsigned int' if does not define. */ +#undef size_t diff --git a/cuda_blake512.cu b/cuda_blake512.cu index 325901d..79fef85 100644 --- a/cuda_blake512.cu +++ b/cuda_blake512.cu @@ -1,4 +1,3 @@ -/* Diese Funktion ist auf 84+32-Byte große Eingabedaten ausgerichtet (Heavycoin) */ #include #include "cuda_runtime.h" #include "device_launch_parameters.h" diff --git a/cuda_combine.cu b/cuda_combine.cu index 2949765..eabb265 100644 --- a/cuda_combine.cu +++ b/cuda_combine.cu @@ -1,4 +1,3 @@ -/* Diese Funktion ist auf 84+32 Byte große Eingabedaten ausgerichtet (Heavycoin) */ #include #include "cuda_runtime.h" #include "device_launch_parameters.h" diff --git a/cuda_fugue256.cu b/cuda_fugue256.cu index f2f9035..bc8b9ee 100644 --- a/cuda_fugue256.cu +++ b/cuda_fugue256.cu @@ -1,5 +1,3 @@ -#if 1 -/* Diese Funktion ist auf 84+32 Byte große Eingabedaten ausgerichtet (Heavycoin) */ #include #include "cuda_runtime.h" #include "device_launch_parameters.h" @@ -571,6 +569,8 @@ fugue256_gpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHas *((uint32_t*)mixtabs + (256+threadIdx.x)) = tex1Dfetch(mixTab1Tex, threadIdx.x); *((uint32_t*)mixtabs + (512+threadIdx.x)) = tex1Dfetch(mixTab2Tex, threadIdx.x); *((uint32_t*)mixtabs + (768+threadIdx.x)) = tex1Dfetch(mixTab3Tex, threadIdx.x); + + __syncthreads(); #endif int thread = (blockDim.x * blockIdx.x + threadIdx.x); @@ -788,5 +788,3 @@ __host__ void fugue256_cpu_hash(int thr_id, int threads, int startNounce, void * //cudaMemcpy(outputHashes, d_fugue256_hashoutput[thr_id], 8 * sizeof(uint32_t), cudaMemcpyDeviceToHost); cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); } - -#endif diff --git a/cuda_groestl512.cu b/cuda_groestl512.cu index 1c1dce9..e29f4f6 100644 --- a/cuda_groestl512.cu +++ b/cuda_groestl512.cu @@ -1,4 +1,3 @@ -/* Diese Funktion ist auf 84+32-Byte große Eingabedaten ausgerichtet (Heavycoin) */ #include #include "cuda_runtime.h" #include "device_launch_parameters.h" @@ -6,9 +5,6 @@ #include #include -#define USE_SHARED 0 -#define W_ALIGNMENT 65 - // Folgende Definitionen später durch header ersetzen typedef unsigned char uint8_t; typedef unsigned int uint32_t; @@ -60,7 +56,7 @@ texture t2dn; texture t3up; texture t3dn; -static const uint32_t T0up_cpu[] = { +uint32_t T0up_cpu[] = { C32e(0xc632f4a5), C32e(0xf86f9784), C32e(0xee5eb099), C32e(0xf67a8c8d), C32e(0xffe8170d), C32e(0xd60adcbd), C32e(0xde16c8b1), C32e(0x916dfc54), C32e(0x6090f050), C32e(0x02070503), C32e(0xce2ee0a9), C32e(0x56d1877d), @@ -127,7 +123,7 @@ static const uint32_t T0up_cpu[] = { C32e(0x7b3d46cb), C32e(0xa8b71ffc), C32e(0x6d0c61d6), C32e(0x2c624e3a) }; -static const uint32_t T0dn_cpu[] = { +uint32_t T0dn_cpu[] = { C32e(0xf497a5c6), C32e(0x97eb84f8), C32e(0xb0c799ee), C32e(0x8cf78df6), C32e(0x17e50dff), C32e(0xdcb7bdd6), C32e(0xc8a7b1de), C32e(0xfc395491), C32e(0xf0c05060), C32e(0x05040302), C32e(0xe087a9ce), C32e(0x87ac7d56), @@ -194,7 +190,7 @@ static const uint32_t T0dn_cpu[] = { C32e(0x46f6cb7b), C32e(0x1f4bfca8), C32e(0x61dad66d), C32e(0x4e583a2c) }; -static const uint32_t T1up_cpu[] = { +uint32_t T1up_cpu[] = { C32e(0xc6c632f4), C32e(0xf8f86f97), C32e(0xeeee5eb0), C32e(0xf6f67a8c), C32e(0xffffe817), C32e(0xd6d60adc), C32e(0xdede16c8), C32e(0x91916dfc), C32e(0x606090f0), C32e(0x02020705), C32e(0xcece2ee0), C32e(0x5656d187), @@ -261,7 +257,7 @@ static const uint32_t T1up_cpu[] = { C32e(0x7b7b3d46), C32e(0xa8a8b71f), C32e(0x6d6d0c61), C32e(0x2c2c624e) }; -static const uint32_t T1dn_cpu[] = { +uint32_t T1dn_cpu[] = { C32e(0xa5f497a5), C32e(0x8497eb84), C32e(0x99b0c799), C32e(0x8d8cf78d), C32e(0x0d17e50d), C32e(0xbddcb7bd), C32e(0xb1c8a7b1), C32e(0x54fc3954), C32e(0x50f0c050), C32e(0x03050403), C32e(0xa9e087a9), C32e(0x7d87ac7d), @@ -328,7 +324,7 @@ static const uint32_t T1dn_cpu[] = { C32e(0xcb46f6cb), C32e(0xfc1f4bfc), C32e(0xd661dad6), C32e(0x3a4e583a) }; -static const uint32_t T2up_cpu[] = { +uint32_t T2up_cpu[] = { C32e(0xa5c6c632), C32e(0x84f8f86f), C32e(0x99eeee5e), C32e(0x8df6f67a), C32e(0x0dffffe8), C32e(0xbdd6d60a), C32e(0xb1dede16), C32e(0x5491916d), C32e(0x50606090), C32e(0x03020207), C32e(0xa9cece2e), C32e(0x7d5656d1), @@ -395,7 +391,7 @@ static const uint32_t T2up_cpu[] = { C32e(0xcb7b7b3d), C32e(0xfca8a8b7), C32e(0xd66d6d0c), C32e(0x3a2c2c62) }; -static const uint32_t T2dn_cpu[] = { +uint32_t T2dn_cpu[] = { C32e(0xf4a5f497), C32e(0x978497eb), C32e(0xb099b0c7), C32e(0x8c8d8cf7), C32e(0x170d17e5), C32e(0xdcbddcb7), C32e(0xc8b1c8a7), C32e(0xfc54fc39), C32e(0xf050f0c0), C32e(0x05030504), C32e(0xe0a9e087), C32e(0x877d87ac), @@ -462,7 +458,7 @@ static const uint32_t T2dn_cpu[] = { C32e(0x46cb46f6), C32e(0x1ffc1f4b), C32e(0x61d661da), C32e(0x4e3a4e58) }; -static const uint32_t T3up_cpu[] = { +uint32_t T3up_cpu[] = { C32e(0x97a5c6c6), C32e(0xeb84f8f8), C32e(0xc799eeee), C32e(0xf78df6f6), C32e(0xe50dffff), C32e(0xb7bdd6d6), C32e(0xa7b1dede), C32e(0x39549191), C32e(0xc0506060), C32e(0x04030202), C32e(0x87a9cece), C32e(0xac7d5656), @@ -529,7 +525,7 @@ static const uint32_t T3up_cpu[] = { C32e(0xf6cb7b7b), C32e(0x4bfca8a8), C32e(0xdad66d6d), C32e(0x583a2c2c) }; -static const uint32_t T3dn_cpu[] = { +uint32_t T3dn_cpu[] = { C32e(0x32f4a5f4), C32e(0x6f978497), C32e(0x5eb099b0), C32e(0x7a8c8d8c), C32e(0xe8170d17), C32e(0x0adcbddc), C32e(0x16c8b1c8), C32e(0x6dfc54fc), C32e(0x90f050f0), C32e(0x07050305), C32e(0x2ee0a9e0), C32e(0xd1877d87), @@ -685,15 +681,8 @@ __global__ void groestl512_gpu_hash(int threads, uint32_t startNounce, void *out int thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { -#if USE_SHARED - extern __shared__ unsigned char s[]; - uint32_t offset = W_ALIGNMENT * sizeof(uint32_t) * threadIdx.x; - uint32_t *message = (uint32_t*)(&s[offset + 0]); // 128 Byte - uint32_t *state = (uint32_t*)(&s[offset + 128]); // 128 Byte -#else uint32_t message[32]; uint32_t state[32]; -#endif // lese message ein & verknüpfe diese mit dem hash1 von hefty1 // lese den state ein @@ -825,11 +814,7 @@ __host__ void groestl512_cpu_hash(int thr_id, int threads, uint32_t startNounce) dim3 block(threadsperblock); // Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl) -#if USE_SHARED - size_t shared_size = W_ALIGNMENT*sizeof(uint32_t)*threadsperblock; // ein uint32_t eingefügt gegen Bank Konflikte -#else size_t shared_size = 0; -#endif // fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); diff --git a/cuda_groestlcoin.cu b/cuda_groestlcoin.cu new file mode 100644 index 0000000..606ae9f --- /dev/null +++ b/cuda_groestlcoin.cu @@ -0,0 +1,463 @@ +// Auf Groestlcoin spezialisierte Version von Groestl + +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include +#include + +#define USE_SHARED 1 + +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); + +// Folgende Definitionen später durch header ersetzen +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; + +// globaler Speicher für alle HeftyHashes aller Threads +__constant__ uint32_t pTarget[8]; // Single GPU +extern uint32_t *d_resultNonce[8]; + +// globaler Speicher für unsere Ergebnisse +uint32_t *d_hashGROESTLCOINoutput[8]; + +__constant__ uint32_t groestlcoin_gpu_state[32]; +__constant__ uint32_t groestlcoin_gpu_msg[32]; +__constant__ uint32_t sha256coin_gpu_constantTable[64]; +__constant__ uint32_t sha256coin_gpu_register[8]; + +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) + +#define PC32up(j, r) ((uint32_t)((j) + (r))) +#define PC32dn(j, r) 0 +#define QC32up(j, r) 0xFFFFFFFF +#define QC32dn(j, r) (((uint32_t)(r) << 24) ^ SPH_T32(~((uint32_t)(j) << 24))) + +#define B32_0(x) ((x) & 0xFF) +#define B32_1(x) (((x) >> 8) & 0xFF) +#define B32_2(x) (((x) >> 16) & 0xFF) +#define B32_3(x) ((x) >> 24) + +#define SPH_C32(x) ((uint32_t)(x ## U)) +#define C32e(x) ((SPH_C32(x) >> 24) \ + | ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \ + | ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \ + | ((SPH_C32(x) << 24) & SPH_C32(0xFF000000))) + +#if USE_SHARED +#define T0up(x) (*((uint32_t*)mixtabs + ( (x)))) +#define T0dn(x) (*((uint32_t*)mixtabs + (256+(x)))) +#define T1up(x) (*((uint32_t*)mixtabs + (512+(x)))) +#define T1dn(x) (*((uint32_t*)mixtabs + (768+(x)))) +#define T2up(x) (*((uint32_t*)mixtabs + (1024+(x)))) +#define T2dn(x) (*((uint32_t*)mixtabs + (1280+(x)))) +#define T3up(x) (*((uint32_t*)mixtabs + (1536+(x)))) +#define T3dn(x) (*((uint32_t*)mixtabs + (1792+(x)))) +#else +#define T0up(x) tex1Dfetch(t0up1, x) +#define T0dn(x) tex1Dfetch(t0dn1, x) +#define T1up(x) tex1Dfetch(t1up1, x) +#define T1dn(x) tex1Dfetch(t1dn1, x) +#define T2up(x) tex1Dfetch(t2up1, x) +#define T2dn(x) tex1Dfetch(t2dn1, x) +#define T3up(x) tex1Dfetch(t3up1, x) +#define T3dn(x) tex1Dfetch(t3dn1, x) +#endif +texture t0up1; +texture t0dn1; +texture t1up1; +texture t1dn1; +texture t2up1; +texture t2dn1; +texture t3up1; +texture t3dn1; + +extern uint32_t T0up_cpu[]; +extern uint32_t T0dn_cpu[]; +extern uint32_t T1up_cpu[]; +extern uint32_t T1dn_cpu[]; +extern uint32_t T2up_cpu[]; +extern uint32_t T2dn_cpu[]; +extern uint32_t T3up_cpu[]; +extern uint32_t T3dn_cpu[]; +extern uint32_t sha256_cpu_hashTable[]; +extern uint32_t sha256_cpu_constantTable[]; + +#define S(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define R(x, n) ((x) >> (n)) +#define Ch(x, y, z) ((x & (y ^ z)) ^ z) +#define Maj(x, y, z) ((x & (y | z)) | (y & z)) +#define S0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22)) +#define S1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25)) +#define s0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3)) +#define s1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10)) + +#define SWAB32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) ) + + +__device__ void groestlcoin_perm_P(uint32_t *a, char *mixtabs) +{ + uint32_t t[32]; + +//#pragma unroll 14 + for(int r=0;r<14;r++) + { +#pragma unroll 16 + for(int k=0;k<16;k++) + { + a[(k*2)+0] ^= PC32up(k * 0x10, r); + //a[(k<<1)+1] ^= PC32dn(k * 0x10, r); + } + + // RBTT +#pragma unroll 16 + for(int k=0;k<32;k+=2) + { + t[k + 0] = T0up( B32_0(a[k & 0x1f]) ) ^ + T1up( B32_1(a[(k + 2) & 0x1f]) ) ^ + T2up( B32_2(a[(k + 4) & 0x1f]) ) ^ + T3up( B32_3(a[(k + 6) & 0x1f]) ) ^ + T0dn( B32_0(a[(k + 9) & 0x1f]) ) ^ + T1dn( B32_1(a[(k + 11) & 0x1f]) ) ^ + T2dn( B32_2(a[(k + 13) & 0x1f]) ) ^ + T3dn( B32_3(a[(k + 23) & 0x1f]) ); + + t[k + 1] = T0dn( B32_0(a[k & 0x1f]) ) ^ + T1dn( B32_1(a[(k + 2) & 0x1f]) ) ^ + T2dn( B32_2(a[(k + 4) & 0x1f]) ) ^ + T3dn( B32_3(a[(k + 6) & 0x1f]) ) ^ + T0up( B32_0(a[(k + 9) & 0x1f]) ) ^ + T1up( B32_1(a[(k + 11) & 0x1f]) ) ^ + T2up( B32_2(a[(k + 13) & 0x1f]) ) ^ + T3up( B32_3(a[(k + 23) & 0x1f]) ); + } +#pragma unroll 32 + for(int k=0;k<32;k++) + a[k] = t[k]; + } +} + +__device__ void groestlcoin_perm_Q(uint32_t *a, char *mixtabs) +{ +//#pragma unroll 14 + for(int r=0;r<14;r++) + { + uint32_t t[32]; + +#pragma unroll 16 + for(int k=0;k<16;k++) + { + a[(k*2)+0] ^= QC32up(k * 0x10, r); + a[(k*2)+1] ^= QC32dn(k * 0x10, r); + } + + // RBTT +#pragma unroll 16 + for(int k=0;k<32;k+=2) + { + t[k + 0] = T0up( B32_0(a[(k + 2) & 0x1f]) ) ^ + T1up( B32_1(a[(k + 6) & 0x1f]) ) ^ + T2up( B32_2(a[(k + 10) & 0x1f]) ) ^ + T3up( B32_3(a[(k + 22) & 0x1f]) ) ^ + T0dn( B32_0(a[(k + 1) & 0x1f]) ) ^ + T1dn( B32_1(a[(k + 5) & 0x1f]) ) ^ + T2dn( B32_2(a[(k + 9) & 0x1f]) ) ^ + T3dn( B32_3(a[(k + 13) & 0x1f]) ); + + t[k + 1] = T0dn( B32_0(a[(k + 2) & 0x1f]) ) ^ + T1dn( B32_1(a[(k + 6) & 0x1f]) ) ^ + T2dn( B32_2(a[(k + 10) & 0x1f]) ) ^ + T3dn( B32_3(a[(k + 22) & 0x1f]) ) ^ + T0up( B32_0(a[(k + 1) & 0x1f]) ) ^ + T1up( B32_1(a[(k + 5) & 0x1f]) ) ^ + T2up( B32_2(a[(k + 9) & 0x1f]) ) ^ + T3up( B32_3(a[(k + 13) & 0x1f]) ); + } +#pragma unroll 32 + for(int k=0;k<32;k++) + a[k] = t[k]; + } +} +#if USE_SHARED +__global__ void __launch_bounds__(256) +#else +__global__ void +#endif + + groestlcoin_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce) +{ +#if USE_SHARED + extern __shared__ char mixtabs[]; + + *((uint32_t*)mixtabs + ( threadIdx.x)) = tex1Dfetch(t0up1, threadIdx.x); + *((uint32_t*)mixtabs + (256+threadIdx.x)) = tex1Dfetch(t0dn1, threadIdx.x); + *((uint32_t*)mixtabs + (512+threadIdx.x)) = tex1Dfetch(t1up1, threadIdx.x); + *((uint32_t*)mixtabs + (768+threadIdx.x)) = tex1Dfetch(t1dn1, threadIdx.x); + *((uint32_t*)mixtabs + (1024+threadIdx.x)) = tex1Dfetch(t2up1, threadIdx.x); + *((uint32_t*)mixtabs + (1280+threadIdx.x)) = tex1Dfetch(t2dn1, threadIdx.x); + *((uint32_t*)mixtabs + (1536+threadIdx.x)) = tex1Dfetch(t3up1, threadIdx.x); + *((uint32_t*)mixtabs + (1792+threadIdx.x)) = tex1Dfetch(t3dn1, threadIdx.x); + + __syncthreads(); +#endif + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + ///// + ///// Lieber groestl, mach, dass es abgeht!!! + ///// + // GROESTL + uint32_t message[32]; + uint32_t state[32]; + + // SHA + // jeder thread in diesem Block bekommt sein eigenes W Array im Shared memory + uint32_t g[32]; + + +#pragma unroll 32 + for(int k=0;k<32;k++) + { + state[k] = groestlcoin_gpu_state[k]; + message[k] = groestlcoin_gpu_msg[k]; + } + + uint32_t nounce = startNounce + thread; + message[19] = SWAB32(nounce); + +#pragma unroll 32 + for(int u=0;u<32;u++) + g[u] = message[u] ^ state[u]; + + // Perm +#if USE_SHARED + groestlcoin_perm_P(g, mixtabs); + groestlcoin_perm_Q(message, mixtabs); +#else + groestlcoin_perm_P(g, NULL); + groestlcoin_perm_Q(message, NULL); +#endif + +#pragma unroll 32 + for(int u=0;u<32;u++) + { + state[u] ^= g[u] ^ message[u]; + g[u] = state[u]; + } + +#if USE_SHARED + groestlcoin_perm_P(g, mixtabs); +#else + groestlcoin_perm_P(g, NULL); +#endif + +#pragma unroll 32 + for(int u=0;u<32;u++) + state[u] ^= g[u]; + + //// + //// 2. Runde groestl + //// +#pragma unroll 16 + for(int k=0;k<16;k++) + message[k] = state[k + 16]; + +#pragma unroll 32 + for(int k=0;k<32;k++) + state[k] = groestlcoin_gpu_state[k]; + +#pragma unroll 16 + for(int k=0;k<16;k++) + message[k+16] = 0; + + message[16] = 0x80; + message[31] = 0x01000000; + +#pragma unroll 32 + for(int u=0;u<32;u++) + g[u] = message[u] ^ state[u]; + + // Perm +#if USE_SHARED + groestlcoin_perm_P(g, mixtabs); + groestlcoin_perm_Q(message, mixtabs); +#else + groestlcoin_perm_P(g, NULL); + groestlcoin_perm_Q(message, NULL); +#endif + +#pragma unroll 32 + for(int u=0;u<32;u++) + { + state[u] ^= g[u] ^ message[u]; + g[u] = state[u]; + } + +#if USE_SHARED + groestlcoin_perm_P(g, mixtabs); +#else + groestlcoin_perm_P(g, NULL); +#endif + +#pragma unroll 32 + for(int u=0;u<32;u++) + state[u] ^= g[u]; + +/* + #pragma unroll 8 + for(int k=0;k<8;k++) + hash[k] = state[k+16]; +*/ + + // kopiere Ergebnis + /* +#pragma unroll 16 + for(int k=0;k<16;k++) + ((uint32_t*)outputHash)[16*thread+k] = state[k + 16]; + */ + int i; + bool rc = true; + + for (i = 7; i >= 0; i--) { + if (state[i+16] > pTarget[i]) { + rc = false; + break; + } + if (state[i+16] < pTarget[i]) { + rc = true; + break; + } + } + + if(rc == true) + { + if(resNounce[0] > nounce) + { + resNounce[0] = nounce; + /* + #pragma unroll 8 + for(int k=0;k<8;k++) + ((uint32_t*)outputHash)[k] = (hash[k]); + */ + } + } + + } +} + +#define texDef(texname, texmem, texsource, texsize) \ + unsigned int *texmem; \ + cudaMalloc(&texmem, texsize); \ + cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \ + texname.normalized = 0; \ + texname.filterMode = cudaFilterModePoint; \ + texname.addressMode[0] = cudaAddressModeClamp; \ + { cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); \ + cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \ + +// Setup-Funktionen +__host__ void groestlcoin_cpu_init(int thr_id, int threads) +{ + cudaSetDevice(thr_id); + cudaDeviceSetCacheConfig( cudaFuncCachePreferShared ); +// Texturen mit obigem Makro initialisieren + texDef(t0up1, d_T0up, T0up_cpu, sizeof(uint32_t)*256); + texDef(t0dn1, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256); + texDef(t1up1, d_T1up, T1up_cpu, sizeof(uint32_t)*256); + texDef(t1dn1, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256); + texDef(t2up1, d_T2up, T2up_cpu, sizeof(uint32_t)*256); + texDef(t2dn1, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256); + texDef(t3up1, d_T3up, T3up_cpu, sizeof(uint32_t)*256); + texDef(t3dn1, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256); + + // Kopiere die Hash-Tabellen in den GPU-Speicher + cudaMemcpyToSymbol( sha256coin_gpu_constantTable, + sha256_cpu_constantTable, + sizeof(uint32_t) * 64 ); + + // Startvektor + cudaMemcpyToSymbol( sha256coin_gpu_register, + sha256_cpu_hashTable, + sizeof(uint32_t) * 8 ); + + // setze register + uint32_t groestl_state_init[32]; + memset(groestl_state_init, 0, sizeof(uint32_t) * 32); + groestl_state_init[31] = 0x20000; + + // state speichern + cudaMemcpyToSymbol( groestlcoin_gpu_state, + groestl_state_init, + 128); + + cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t)); + + // Speicher für alle Ergebnisse belegen (nur für Debug) + cudaMalloc(&d_hashGROESTLCOINoutput[thr_id], 8 * sizeof(uint32_t) * threads); +} + +__host__ void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn) +{ + // Nachricht expandieren und setzen + uint32_t msgBlock[32]; + + memset(msgBlock, 0, sizeof(uint32_t) * 32); + memcpy(&msgBlock[0], data, 80); + + // Erweitere die Nachricht auf den Nachrichtenblock (padding) + // Unsere Nachricht hat 80 Byte + msgBlock[20] = 0x80; + msgBlock[31] = 0x01000000; + + // groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird + // auf der GPU ausgeführt) + + // Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch) + cudaMemcpyToSymbol( groestlcoin_gpu_msg, + msgBlock, + 128); + + cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t)); + cudaMemcpyToSymbol( pTarget, + pTargetIn, + sizeof(uint32_t) * 8 ); +} + +__host__ void groestlcoin_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce) +{ +#if USE_SHARED + const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN +#else + const int threadsperblock = 512; // so einstellen wie gewünscht ;-) +#endif + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + // Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl) +#if USE_SHARED + size_t shared_size = 8 * 256 * sizeof(uint32_t); +#else + size_t shared_size = 0; +#endif + +// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); + //fprintf(stderr, "ThrID: %d\n", thr_id); + cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t)); + groestlcoin_gpu_hash<<>>(threads, startNounce, d_hashGROESTLCOINoutput[thr_id], d_resultNonce[thr_id]); + + // Strategisches Sleep Kommando zur Senkung der CPU Last + MyStreamSynchronize(NULL, 0, thr_id); + + cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); + + /// Debug + //cudaMemcpy(outputHashes, d_hashGROESTLCOINoutput[thr_id], 8 * sizeof(uint32_t) * threads, cudaMemcpyDeviceToHost); + + // Nounce + //cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); +} diff --git a/cuda_groestlcoin.h b/cuda_groestlcoin.h new file mode 100644 index 0000000..97c2e7f --- /dev/null +++ b/cuda_groestlcoin.h @@ -0,0 +1,8 @@ +#ifndef _CUDA_GROESTLCOIN_H +#define _CUDA_GROESTLCOIN_H + +void groestlcoin_cpu_init(int thr_id, int threads); +void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn); +void groestlcoin_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce); + +#endif \ No newline at end of file diff --git a/cuda_hefty1.cu b/cuda_hefty1.cu index c90d15d..52637d3 100644 --- a/cuda_hefty1.cu +++ b/cuda_hefty1.cu @@ -1,4 +1,3 @@ -/* Diese Funktion ist auf 84-Byte große Eingabedaten ausgerichtet (Heavycoin) */ #include #include "cuda_runtime.h" #include "device_launch_parameters.h" diff --git a/cuda_keccak512.cu b/cuda_keccak512.cu index 66dddaf..65db302 100644 --- a/cuda_keccak512.cu +++ b/cuda_keccak512.cu @@ -1,4 +1,3 @@ -/* Diese Funktion ist auf 84+32-Byte große Eingabedaten ausgerichtet (Heavycoin) */ #include #include "cuda_runtime.h" #include "device_launch_parameters.h" diff --git a/cuda_sha256.cu b/cuda_sha256.cu index a68f849..97b7051 100644 --- a/cuda_sha256.cu +++ b/cuda_sha256.cu @@ -1,4 +1,3 @@ -/* Diese Funktion ist auf 84+32 Byte große Eingabedaten ausgerichtet (Heavycoin) */ #include #include "cuda_runtime.h" #include "device_launch_parameters.h" diff --git a/groestl.c b/groestl.c index cc685f4..5f19ed1 100644 --- a/groestl.c +++ b/groestl.c @@ -29,7 +29,7 @@ * * @author Thomas Pornin */ - +#include #include #include @@ -2986,12 +2986,13 @@ groestl_big_close(sph_groestl_big_context *sc, #endif } memset(pad + 1, 0, pad_len - 9); + //fprintf(stderr, "%x\n", pad_len); #if SPH_64 sph_enc64be(pad + pad_len - 8, count); #else sph_enc64be(pad + pad_len - 8, count_high); sph_enc64be(pad + pad_len - 4, count_low); -#endif +#endif groestl_big_core(sc, pad, pad_len); READ_STATE_BIG(sc); FINAL_BIG; diff --git a/groestlcoin.cpp b/groestlcoin.cpp new file mode 100644 index 0000000..0a3dcf2 --- /dev/null +++ b/groestlcoin.cpp @@ -0,0 +1,175 @@ +#include "uint256.h" +#include "sph_groestl.h" + +#include "cpuminer-config.h" +#include "miner.h" + +#include +#include +#include "cuda_groestlcoin.h" +#include + +#define SWAP32(x) \ + ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \ + (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu)) + +void sha256func(unsigned char *hash, const unsigned char *data, int len) +{ + uint32_t S[16], T[16]; + int i, r; + + sha256_init(S); + for (r = len; r > -9; r -= 64) { + if (r < 64) + memset(T, 0, 64); + memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r)); + if (r >= 0 && r < 64) + ((unsigned char *)T)[r] = 0x80; + for (i = 0; i < 16; i++) + T[i] = be32dec(T + i); + if (r < 56) + T[15] = 8 * len; + sha256_transform(S, T, 0); + } + /* + memcpy(S + 8, sha256d_hash1 + 8, 32); + sha256_init(T); + sha256_transform(T, S, 0); + */ + for (i = 0; i < 8; i++) + be32enc((uint32_t *)hash + i, T[i]); +} + +static void groestlhash(void *state, const void *input) +{ + // Tryout GPU-groestl + + sph_groestl512_context ctx_groestl[2]; + static unsigned char pblank[1]; + int ii; + uint32_t mask = 8; + uint32_t zero = 0; + + + //these uint512 in the c++ source of the client are backed by an array of uint32 + uint32_t hashA[16], hashB[16]; + + + sph_groestl512_init(&ctx_groestl[0]); + sph_groestl512 (&ctx_groestl[0], input, 80); //6 + sph_groestl512_close(&ctx_groestl[0], hashA); //7 + + sph_groestl512_init(&ctx_groestl[1]); + sph_groestl512 (&ctx_groestl[1], hashA, 64); //6 + sph_groestl512_close(&ctx_groestl[1], hashB); //7 + + memcpy(state, hashB, 32); +} + + + +extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget, + uint32_t max_nonce, unsigned long *hashes_done) +{ + uint32_t start_nonce = pdata[19]++; + const uint32_t Htarg = ptarget[7]; + const uint32_t throughPut = 4096 * 128; + //const uint32_t throughPut = 1; + int i; + uint32_t *outputHash = (uint32_t*)malloc(throughPut * 16 * sizeof(uint32_t)); + + // init + static bool init[8] = { false, false, false, false, false, false, false, false }; + if(!init[thr_id]) + { + groestlcoin_cpu_init(thr_id, throughPut); + init[thr_id] = true; + } + + // Endian Drehung ist notwendig + //char testdata[] = {"\x70\x00\x00\x00\x5d\x38\x5b\xa1\x14\xd0\x79\x97\x0b\x29\xa9\x41\x8f\xd0\x54\x9e\x7d\x68\xa9\x5c\x7f\x16\x86\x21\xa3\x14\x20\x10\x00\x00\x00\x00\x57\x85\x86\xd1\x49\xfd\x07\xb2\x2f\x3a\x8a\x34\x7c\x51\x6d\xe7\x05\x2f\x03\x4d\x2b\x76\xff\x68\xe0\xd6\xec\xff\x9b\x77\xa4\x54\x89\xe3\xfd\x51\x17\x32\x01\x1d\xf0\x73\x10\x00"}; + //pdata = (uint32_t*)testdata; + uint32_t endiandata[32]; + for (int kk=0; kk < 32; kk++) + be32enc(&endiandata[kk], pdata[kk]); + + // Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt) + groestlcoin_cpu_setBlock(thr_id, endiandata, (void*)ptarget); + + do { + // GPU + uint32_t foundNounce = 0xFFFFFFFF; + + groestlcoin_cpu_hash(thr_id, throughPut, pdata[19], outputHash, &foundNounce); + + /* + { + for(i=0;i