committing changes to v0.3 release - added Groestlcoin.

11 years ago · b93669a99f
21 changed files with 934 additions and 261 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -25,13 +25,20 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  cuda_hefty1.cu cuda_hefty1.h \
 			  cuda_keccak512.cu cuda_keccak512.h \
 			  cuda_sha256.cu cuda_sha256.h \
-			  cuda_fugue256.cu \
+			  fuguecoin.cpp cuda_fugue256.cu fugue.c sph_fugue.h uint256.h \
-			  fuguecoin.cpp fugue.c sph_fugue.h uint256.h
+			  groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h
 ccminer_LDFLAGS		= $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
 ccminer_LDADD		= @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
 ccminer_CPPFLAGS	= -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
 .cu.o:
-	$(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
+	$(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_35 --maxrregcount=124 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
 ## Thrust needs Compute 2.0 minimum
 #heavy.o: heavy.cu
 #	$(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
 #
 #cuda_hefty1.o: cuda_hefty1.cu
 #	$(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
--- a/Makefile.in
+++ b/Makefile.in
@ -60,8 +60,9 @@ am_ccminer_OBJECTS = ccminer-cpu-miner.$(OBJEXT) \
 	ccminer-sha2.$(OBJEXT) heavy.$(OBJEXT) cuda_blake512.$(OBJEXT) \
 	cuda_combine.$(OBJEXT) cuda_groestl512.$(OBJEXT) \
 	cuda_hefty1.$(OBJEXT) cuda_keccak512.$(OBJEXT) \
-	cuda_sha256.$(OBJEXT) cuda_fugue256.$(OBJEXT) \
+	cuda_sha256.$(OBJEXT) ccminer-fuguecoin.$(OBJEXT) \
-	ccminer-fuguecoin.$(OBJEXT) ccminer-fugue.$(OBJEXT)
+	cuda_fugue256.$(OBJEXT) ccminer-fugue.$(OBJEXT) \
 	ccminer-groestlcoin.$(OBJEXT) cuda_groestlcoin.$(OBJEXT)
 ccminer_OBJECTS = $(am_ccminer_OBJECTS)
 ccminer_DEPENDENCIES =
 ccminer_LINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(ccminer_LDFLAGS) \
@ -275,8 +276,8 @@ ccminer_SOURCES = elist.h miner.h compat.h \
 			  cuda_hefty1.cu cuda_hefty1.h \
 			  cuda_keccak512.cu cuda_keccak512.h \
 			  cuda_sha256.cu cuda_sha256.h \
-			  cuda_fugue256.cu \
+			  fuguecoin.cpp cuda_fugue256.cu fugue.c sph_fugue.h uint256.h \
-			  fuguecoin.cpp fugue.c sph_fugue.h uint256.h
+			  groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h
 ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
 ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
@ -387,6 +388,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fugue.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fuguecoin.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-groestl.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-groestlcoin.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-hefty1.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-keccak.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-scrypt.Po@am__quote@
@ -561,6 +563,20 @@ ccminer-fuguecoin.obj: fuguecoin.cpp
@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@	$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o ccminer-fuguecoin.obj `if test -f 'fuguecoin.cpp'; then $(CYGPATH_W) 'fuguecoin.cpp'; else $(CYGPATH_W) '$(srcdir)/fuguecoin.cpp'; fi`
 ccminer-groestlcoin.o: groestlcoin.cpp
@am__fastdepCXX_TRUE@	$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT ccminer-groestlcoin.o -MD -MP -MF $(DEPDIR)/ccminer-groestlcoin.Tpo -c -o ccminer-groestlcoin.o `test -f 'groestlcoin.cpp' || echo '$(srcdir)/'`groestlcoin.cpp
@am__fastdepCXX_TRUE@	$(am__mv) $(DEPDIR)/ccminer-groestlcoin.Tpo $(DEPDIR)/ccminer-groestlcoin.Po
@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='groestlcoin.cpp' object='ccminer-groestlcoin.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@	$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o ccminer-groestlcoin.o `test -f 'groestlcoin.cpp' || echo '$(srcdir)/'`groestlcoin.cpp
 ccminer-groestlcoin.obj: groestlcoin.cpp
@am__fastdepCXX_TRUE@	$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT ccminer-groestlcoin.obj -MD -MP -MF $(DEPDIR)/ccminer-groestlcoin.Tpo -c -o ccminer-groestlcoin.obj `if test -f 'groestlcoin.cpp'; then $(CYGPATH_W) 'groestlcoin.cpp'; else $(CYGPATH_W) '$(srcdir)/groestlcoin.cpp'; fi`
@am__fastdepCXX_TRUE@	$(am__mv) $(DEPDIR)/ccminer-groestlcoin.Tpo $(DEPDIR)/ccminer-groestlcoin.Po
@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='groestlcoin.cpp' object='ccminer-groestlcoin.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@	$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o ccminer-groestlcoin.obj `if test -f 'groestlcoin.cpp'; then $(CYGPATH_W) 'groestlcoin.cpp'; else $(CYGPATH_W) '$(srcdir)/groestlcoin.cpp'; fi`
 # This directory's subdirectories are mostly independent; you can cd
 # into them and run `make' without going through this Makefile.
 # To change the values of `make' variables: instead of editing Makefiles,
@ -1018,7 +1034,7 @@ uninstall-am: uninstall-binPROGRAMS
 .cu.o:
-	$(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=sm_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
+	$(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_35 --maxrregcount=124 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
 #heavy.o: heavy.cu
 #	$(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
--- a/README.txt
+++ b/README.txt
@ -1,5 +1,5 @@
-ccMiner release 0.2 (Mar 21th 2014) - Pool Mining Release
+ccMiner release 0.3 (Mar 23th 2014) - Groestlcoin Release
 -------------------------------------------------------------
 ***************************************************************
@ -36,6 +36,7 @@ its command line interface and options.
  -a, --algo=ALGO       specify the algorithm to use
                          heavy       use to mine Heavycoin
                          fugue256    use to mine Fuguecoin
                          groestl     use to mine Groestlcoin
  -o, --url=URL         URL of mining server (default: " DEF_RPC_URL ")
  -O, --userpass=U:P    username:password pair for mining server
@ -66,24 +67,29 @@ its command line interface and options.
 Example for Heavycoin Mining on heavycoinpool.com with a single gpu in your system
-cudaminer.exe -t 1 -a heavy -o stratum+tcp://stratum01.heavycoinpool.com:5333 -u <<username.worker>> -p <<workerpassword>> -v 512
+ccminer.exe -t 1 -a heavy -o stratum+tcp://stratum01.heavycoinpool.com:5333 -u <<username.worker>> -p <<workerpassword>> -v 512
 Example for Heavycoin Mining on hvc.1gh.com with a dual gpu in your system
-cudaminer.exe -t 2 -a heavy -o stratum+tcp://hvcpool.1gh.com:5333 -u <<WALLET>> -p x -v 512
+ccminer.exe -t 2 -a heavy -o stratum+tcp://hvcpool.1gh.com:5333 -u <<WALLET>> -p x -v 512
 Example for Fuguecoin solo-mining with 4 gpu's in your system and a Fuguecoin-wallet running on localhost
-cudaminer.exe -q -s 1 -t 4 -a fugue256 -o http://localhost:9089 -u <<myusername>> -p <<mypassword>>
+ccminer.exe -q -s 1 -t 4 -a fugue256 -o http://localhost:9089 -u <<myusername>> -p <<mypassword>>
 Example for Fuguecoin pool mining on dwarfpool.com with all your GPUs
-q -a fugue256 -o stratum+tcp://erebor.dwarfpool.com:3340 -u YOURWALLETADDRESS.1 -p YOUREMAILADDRESS
+ccminer.exe -q -a fugue256 -o stratum+tcp://erebor.dwarfpool.com:3340 -u YOURWALLETADDRESS.1 -p YOUREMAILADDRESS
 Example for Groestlcoin solo mining
 ccminer.exe -q -s 1 -a groestl -o http://127.0.0.1:1441 -u USERNAME -p PASSWORD
 For solo-mining you typically use -o 127.0.0.1:xxxx where xxxx represents
@ -101,6 +107,9 @@ from your old clunkers.
 >>> RELEASE HISTORY <<<
  Match, 23 2014 added Groestlcoin support. stratum status unknown
                 (the only pool is currently down for fixing issues)
  March, 21 2014 use of shared memory in Fugue256 kernel boosts hash rates
                 on Fermi and Maxwell devices. Kepler may suffer slightly
                 (3-5%)
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@ -229,6 +229,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    <ClCompile Include="fugue.c" />
    <ClCompile Include="fuguecoin.cpp" />
    <ClCompile Include="groestl.c" />
    <ClCompile Include="groestlcoin.cpp" />
    <ClCompile Include="hefty1.c" />
    <ClCompile Include="keccak.c" />
    <ClCompile Include="scrypt.c" />
@ -256,6 +257,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    <ClInclude Include="cuda_blake512.h" />
    <ClInclude Include="cuda_combine.h" />
    <ClInclude Include="cuda_groestl512.h" />
    <ClInclude Include="cuda_groestlcoin.h" />
    <ClInclude Include="cuda_hefty1.h" />
    <ClInclude Include="cuda_keccak512.h" />
    <ClInclude Include="cuda_sha256.h" />
@ -274,6 +276,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
    <CudaCompile Include="cuda_combine.cu" />
    <CudaCompile Include="cuda_fugue256.cu" />
    <CudaCompile Include="cuda_groestl512.cu" />
    <CudaCompile Include="cuda_groestlcoin.cu" />
    <CudaCompile Include="cuda_hefty1.cu" />
    <CudaCompile Include="cuda_keccak512.cu" />
    <CudaCompile Include="cuda_sha256.cu" />
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@ -90,6 +90,9 @@
    <ClCompile Include="fuguecoin.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="groestlcoin.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="compat.h">
@ -158,6 +161,9 @@
    <ClInclude Include="uint256.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="cuda_groestlcoin.h">
      <Filter>Header Files\CUDA</Filter>
    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <CudaCompile Include="cuda_sha256.cu">
@ -184,5 +190,8 @@
    <CudaCompile Include="cuda_fugue256.cu">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
    <CudaCompile Include="cuda_groestlcoin.cu">
      <Filter>Source Files\CUDA</Filter>
    </CudaCompile>
  </ItemGroup>
 </Project>
--- a/20
+++ b/20
@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.68 for ccminer 2014.03.21.
+# Generated by GNU Autoconf 2.68 for ccminer 2014.03.23.
 #
 #
 # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@ -557,8 +557,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='ccminer'
 PACKAGE_TARNAME='ccminer'
-PACKAGE_VERSION='2014.03.21'
+PACKAGE_VERSION='2014.03.23'
-PACKAGE_STRING='ccminer 2014.03.21'
+PACKAGE_STRING='ccminer 2014.03.23'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
@ -1297,7 +1297,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures ccminer 2014.03.21 to adapt to many kinds of systems.
+\`configure' configures ccminer 2014.03.23 to adapt to many kinds of systems.
 Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1368,7 +1368,7 @@ fi
 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of ccminer 2014.03.21:";;
+     short | recursive ) echo "Configuration of ccminer 2014.03.23:";;
   esac
  cat <<\_ACEOF
@ -1469,7 +1469,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-ccminer configure 2014.03.21
+ccminer configure 2014.03.23
 generated by GNU Autoconf 2.68
 Copyright (C) 2010 Free Software Foundation, Inc.
@ -1972,7 +1972,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
-It was created by ccminer $as_me 2014.03.21, which was
+It was created by ccminer $as_me 2014.03.23, which was
 generated by GNU Autoconf 2.68.  Invocation command line was
  $ $0 $@
@ -2901,7 +2901,7 @@ fi
 # Define the identity of the package.
 PACKAGE='ccminer'
- VERSION='2014.03.21'
+ VERSION='2014.03.23'
 cat >>confdefs.h <<_ACEOF
@ -7118,7 +7118,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by ccminer $as_me 2014.03.21, which was
+This file was extended by ccminer $as_me 2014.03.23, which was
 generated by GNU Autoconf 2.68.  Invocation command line was
  CONFIG_FILES    = $CONFIG_FILES
@ -7184,7 +7184,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-ccminer config.status 2014.03.21
+ccminer config.status 2014.03.23
 configured by $0, generated by GNU Autoconf 2.68,
  with options \\"\$ac_cs_config\\"
--- a/configure.ac
+++ b/configure.ac
@ -1,4 +1,4 @@
-AC_INIT([ccminer], [2014.03.21])
+AC_INIT([ccminer], [2014.03.23])
 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
--- a/cpu-miner.c
+++ b/cpu-miner.c
@ -117,11 +117,13 @@ struct workio_cmd {
 typedef enum {
 	ALGO_HEAVY,		/* Heavycoin hash */
 	ALGO_FUGUE256,		/* Fugue256 */
 	ALGO_GROESTL,
 } sha256_algos;
 static const char *algo_names[] = {
 	"heavy",
-	"fugue256"
+	"fugue256",
 	"groestl"
 };
 bool opt_debug = false;
@ -667,7 +669,11 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
 	if (opt_algo == ALGO_HEAVY)
 		heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
 	else
 	if (opt_algo == ALGO_FUGUE256)
 		SHA256((unsigned char*)sctx->job.coinbase, sctx->job.coinbase_size, (unsigned char*)merkle_root);
 	else
 		sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
 	for (i = 0; i < sctx->job.merkle_count; i++) {
 		memcpy(merkle_root + 32, sctx->job.merkle[i], 32);
 		if (opt_algo == ALGO_HEAVY)
@ -817,7 +823,10 @@ static void *miner_thread(void *userdata)
 			rc = scanhash_fugue256(thr_id, work.data, work.target,
 			                      max_nonce, &hashes_done);
 			break;
-
+		case ALGO_GROESTL:
 			rc = scanhash_groestlcoin(thr_id, work.data, work.target,
 			                      max_nonce, &hashes_done);
 			break;
 		default:
 			/* should never happen */
 			goto out;
--- a/cpuminer-config.h
+++ b/cpuminer-config.h
@ -1,174 +1,167 @@
 /* cpuminer-config.h.  Generated from cpuminer-config.h.in by configure.  */
 /* cpuminer-config.h.in.  Generated from configure.ac by autoheader.  */
 /* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
   systems. This function is required for `alloca.c' support on those systems.
   */
-/* #undef CRAY_STACKSEG_END */
+#undef CRAY_STACKSEG_END
 /* Define to 1 if using `alloca.c'. */
-/* #undef C_ALLOCA */
+#undef C_ALLOCA
 /* Define to 1 if you have `alloca', as a function or macro. */
-#define HAVE_ALLOCA 1
+#undef HAVE_ALLOCA
 /* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
   */
-#define HAVE_ALLOCA_H 1
+#undef HAVE_ALLOCA_H
 /* Define to 1 if you have the declaration of `be32dec', and to 0 if you
   don't. */
-#define HAVE_DECL_BE32DEC 0
+#undef HAVE_DECL_BE32DEC
 /* Define to 1 if you have the declaration of `be32enc', and to 0 if you
   don't. */
-#define HAVE_DECL_BE32ENC 0
+#undef HAVE_DECL_BE32ENC
 /* Define to 1 if you have the declaration of `le32dec', and to 0 if you
   don't. */
-#define HAVE_DECL_LE32DEC 0
+#undef HAVE_DECL_LE32DEC
 /* Define to 1 if you have the declaration of `le32enc', and to 0 if you
   don't. */
-#define HAVE_DECL_LE32ENC 0
+#undef HAVE_DECL_LE32ENC
 /* Define to 1 if you have the `getopt_long' function. */
 #define HAVE_GETOPT_LONG 1
 /* Define to 1 if you have the <inttypes.h> header file. */
-#define HAVE_INTTYPES_H 1
+#undef HAVE_INTTYPES_H
 /* Define to 1 if you have the `crypto' library (-lcrypto). */
 #define HAVE_LIBCRYPTO 1
 /* Define to 1 if you have a functional curl library. */
-#define HAVE_LIBCURL 1
+#undef HAVE_LIBCURL
 /* Define to 1 if you have the `ssl' library (-lssl). */
 #define HAVE_LIBSSL 1
 /* Define to 1 if you have the <memory.h> header file. */
-#define HAVE_MEMORY_H 1
+#undef HAVE_MEMORY_H
 /* Define to 1 if you have the <stdint.h> header file. */
-#define HAVE_STDINT_H 1
+#undef HAVE_STDINT_H
 /* Define to 1 if you have the <stdlib.h> header file. */
-#define HAVE_STDLIB_H 1
+#undef HAVE_STDLIB_H
 /* Define to 1 if you have the <strings.h> header file. */
-#define HAVE_STRINGS_H 1
+#undef HAVE_STRINGS_H
 /* Define to 1 if you have the <string.h> header file. */
-#define HAVE_STRING_H 1
+#undef HAVE_STRING_H
 /* Define to 1 if you have the <syslog.h> header file. */
-#define HAVE_SYSLOG_H 1
+#undef HAVE_SYSLOG_H
 /* Define to 1 if you have the <sys/endian.h> header file. */
-/* #undef HAVE_SYS_ENDIAN_H */
+#undef HAVE_SYS_ENDIAN_H
 /* Define to 1 if you have the <sys/param.h> header file. */
-#define HAVE_SYS_PARAM_H 1
+#undef HAVE_SYS_PARAM_H
 /* Define to 1 if you have the <sys/stat.h> header file. */
-#define HAVE_SYS_STAT_H 1
+#undef HAVE_SYS_STAT_H
 /* Define to 1 if you have the <sys/sysctl.h> header file. */
-#define HAVE_SYS_SYSCTL_H 1
+#undef HAVE_SYS_SYSCTL_H
 /* Define to 1 if you have the <sys/types.h> header file. */
-#define HAVE_SYS_TYPES_H 1
+#undef HAVE_SYS_TYPES_H
 /* Define to 1 if you have the <unistd.h> header file. */
-#define HAVE_UNISTD_H 1
+#undef HAVE_UNISTD_H
 /* Defined if libcurl supports AsynchDNS */
-/* #undef LIBCURL_FEATURE_ASYNCHDNS */
+#undef LIBCURL_FEATURE_ASYNCHDNS
 /* Defined if libcurl supports IDN */
-#define LIBCURL_FEATURE_IDN 1
+#undef LIBCURL_FEATURE_IDN
 /* Defined if libcurl supports IPv6 */
-#define LIBCURL_FEATURE_IPV6 1
+#undef LIBCURL_FEATURE_IPV6
 /* Defined if libcurl supports KRB4 */
-/* #undef LIBCURL_FEATURE_KRB4 */
+#undef LIBCURL_FEATURE_KRB4
 /* Defined if libcurl supports libz */
-#define LIBCURL_FEATURE_LIBZ 1
+#undef LIBCURL_FEATURE_LIBZ
 /* Defined if libcurl supports NTLM */
-#define LIBCURL_FEATURE_NTLM 1
+#undef LIBCURL_FEATURE_NTLM
 /* Defined if libcurl supports SSL */
-#define LIBCURL_FEATURE_SSL 1
+#undef LIBCURL_FEATURE_SSL
 /* Defined if libcurl supports SSPI */
-/* #undef LIBCURL_FEATURE_SSPI */
+#undef LIBCURL_FEATURE_SSPI
 /* Defined if libcurl supports DICT */
-#define LIBCURL_PROTOCOL_DICT 1
+#undef LIBCURL_PROTOCOL_DICT
 /* Defined if libcurl supports FILE */
-#define LIBCURL_PROTOCOL_FILE 1
+#undef LIBCURL_PROTOCOL_FILE
 /* Defined if libcurl supports FTP */
-#define LIBCURL_PROTOCOL_FTP 1
+#undef LIBCURL_PROTOCOL_FTP
 /* Defined if libcurl supports FTPS */
-#define LIBCURL_PROTOCOL_FTPS 1
+#undef LIBCURL_PROTOCOL_FTPS
 /* Defined if libcurl supports HTTP */
-#define LIBCURL_PROTOCOL_HTTP 1
+#undef LIBCURL_PROTOCOL_HTTP
 /* Defined if libcurl supports HTTPS */
-#define LIBCURL_PROTOCOL_HTTPS 1
+#undef LIBCURL_PROTOCOL_HTTPS
 /* Defined if libcurl supports IMAP */
-#define LIBCURL_PROTOCOL_IMAP 1
+#undef LIBCURL_PROTOCOL_IMAP
 /* Defined if libcurl supports LDAP */
-#define LIBCURL_PROTOCOL_LDAP 1
+#undef LIBCURL_PROTOCOL_LDAP
 /* Defined if libcurl supports POP3 */
-#define LIBCURL_PROTOCOL_POP3 1
+#undef LIBCURL_PROTOCOL_POP3
 /* Defined if libcurl supports RTSP */
-#define LIBCURL_PROTOCOL_RTSP 1
+#undef LIBCURL_PROTOCOL_RTSP
 /* Defined if libcurl supports SMTP */
-#define LIBCURL_PROTOCOL_SMTP 1
+#undef LIBCURL_PROTOCOL_SMTP
 /* Defined if libcurl supports TELNET */
-#define LIBCURL_PROTOCOL_TELNET 1
+#undef LIBCURL_PROTOCOL_TELNET
 /* Defined if libcurl supports TFTP */
-#define LIBCURL_PROTOCOL_TFTP 1
+#undef LIBCURL_PROTOCOL_TFTP
 /* Define to 1 if your C compiler doesn't accept -c and -o together. */
-/* #undef NO_MINUS_C_MINUS_O */
+#undef NO_MINUS_C_MINUS_O
 /* Name of package */
-#define PACKAGE "ccminer"
+#undef PACKAGE
 /* Define to the address where bug reports for this package should be sent. */
-#define PACKAGE_BUGREPORT ""
+#undef PACKAGE_BUGREPORT
 /* Define to the full name of this package. */
 #define PACKAGE_NAME "ccminer"
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "ccminer 2014.03.21"
+#define PACKAGE_STRING "ccminer 2014.03.23"
 /* Define to the one symbol short name of this package. */
-#define PACKAGE_TARNAME "ccminer"
+#undef PACKAGE_TARNAME
 /* Define to the home page for this package. */
-#define PACKAGE_URL ""
+#undef PACKAGE_URL
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "2014.03.21"
+#define PACKAGE_VERSION "2014.03.23"
 /* If using the C implementation of alloca, define if you know the
   direction of stack growth for your system; otherwise it will be
@ -176,25 +169,22 @@
 	STACK_DIRECTION > 0 => grows toward higher addresses
 	STACK_DIRECTION < 0 => grows toward lower addresses
 	STACK_DIRECTION = 0 => direction of growth unknown */
-/* #undef STACK_DIRECTION */
+#undef STACK_DIRECTION
 /* Define to 1 if you have the ANSI C header files. */
-#define STDC_HEADERS 1
+#undef STDC_HEADERS
 /* Define to 1 if AVX assembly is available. */
-#define USE_AVX 1
+#undef USE_AVX
 /* Define to 1 if AVX2 assembly is available. */
 #define USE_AVX2 1
 /* Define to 1 if XOP assembly is available. */
-#define USE_XOP 1
+#undef USE_XOP
 /* Version number of package */
-#define VERSION "2014.03.21"
+#undef VERSION
 /* Define curl_free() as free() if our version of curl lacks curl_free. */
-/* #undef curl_free */
+#undef curl_free
 /* Define to `unsigned int' if <sys/types.h> does not define. */
-/* #undef size_t */
+#undef size_t
--- a/cuda_blake512.cu
+++ b/cuda_blake512.cu
@ -1,4 +1,3 @@
 /* Diese Funktion ist auf 84+32-Byte große Eingabedaten ausgerichtet (Heavycoin) */
 #include <cuda.h>
 #include "cuda_runtime.h"
 #include "device_launch_parameters.h"
--- a/cuda_combine.cu
+++ b/cuda_combine.cu
@ -1,4 +1,3 @@
 /* Diese Funktion ist auf 84+32 Byte große Eingabedaten ausgerichtet (Heavycoin) */
 #include <cuda.h>
 #include "cuda_runtime.h"
 #include "device_launch_parameters.h"
--- a/cuda_fugue256.cu
+++ b/cuda_fugue256.cu
@ -1,5 +1,3 @@
 #if 1
 /* Diese Funktion ist auf 84+32 Byte große Eingabedaten ausgerichtet (Heavycoin) */
 #include <cuda.h>
 #include "cuda_runtime.h"
 #include "device_launch_parameters.h"
@ -571,6 +569,8 @@ fugue256_gpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHas
 	*((uint32_t*)mixtabs + (256+threadIdx.x)) = tex1Dfetch(mixTab1Tex, threadIdx.x);
 	*((uint32_t*)mixtabs + (512+threadIdx.x)) = tex1Dfetch(mixTab2Tex, threadIdx.x);
 	*((uint32_t*)mixtabs + (768+threadIdx.x)) = tex1Dfetch(mixTab3Tex, threadIdx.x);
 	__syncthreads();
 #endif
 	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
@ -788,5 +788,3 @@ __host__ void fugue256_cpu_hash(int thr_id, int threads, int startNounce, void *
 	//cudaMemcpy(outputHashes, d_fugue256_hashoutput[thr_id], 8 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
 	cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
 }
 #endif
--- a/cuda_groestl512.cu
+++ b/cuda_groestl512.cu
@ -1,4 +1,3 @@
 /* Diese Funktion ist auf 84+32-Byte große Eingabedaten ausgerichtet (Heavycoin) */
 #include <cuda.h>
 #include "cuda_runtime.h"
 #include "device_launch_parameters.h"
@ -6,9 +5,6 @@
 #include <stdio.h>
 #include <memory.h>
 #define USE_SHARED 0
 #define W_ALIGNMENT 65
 // Folgende Definitionen später durch header ersetzen
 typedef unsigned char uint8_t;
 typedef unsigned int uint32_t;
@ -60,7 +56,7 @@ texture<unsigned int, 1, cudaReadModeElementType> t2dn;
 texture<unsigned int, 1, cudaReadModeElementType> t3up;
 texture<unsigned int, 1, cudaReadModeElementType> t3dn;
-static const uint32_t T0up_cpu[] = {
+uint32_t T0up_cpu[] = {
 	C32e(0xc632f4a5), C32e(0xf86f9784), C32e(0xee5eb099), C32e(0xf67a8c8d),
 	C32e(0xffe8170d), C32e(0xd60adcbd), C32e(0xde16c8b1), C32e(0x916dfc54),
 	C32e(0x6090f050), C32e(0x02070503), C32e(0xce2ee0a9), C32e(0x56d1877d),
@ -127,7 +123,7 @@ static const uint32_t T0up_cpu[] = {
 	C32e(0x7b3d46cb), C32e(0xa8b71ffc), C32e(0x6d0c61d6), C32e(0x2c624e3a)
 };
-static const uint32_t T0dn_cpu[] = {
+uint32_t T0dn_cpu[] = {
 	C32e(0xf497a5c6), C32e(0x97eb84f8), C32e(0xb0c799ee), C32e(0x8cf78df6),
 	C32e(0x17e50dff), C32e(0xdcb7bdd6), C32e(0xc8a7b1de), C32e(0xfc395491),
 	C32e(0xf0c05060), C32e(0x05040302), C32e(0xe087a9ce), C32e(0x87ac7d56),
@ -194,7 +190,7 @@ static const uint32_t T0dn_cpu[] = {
 	C32e(0x46f6cb7b), C32e(0x1f4bfca8), C32e(0x61dad66d), C32e(0x4e583a2c)
 };
-static const uint32_t T1up_cpu[] = {
+uint32_t T1up_cpu[] = {
 	C32e(0xc6c632f4), C32e(0xf8f86f97), C32e(0xeeee5eb0), C32e(0xf6f67a8c),
 	C32e(0xffffe817), C32e(0xd6d60adc), C32e(0xdede16c8), C32e(0x91916dfc),
 	C32e(0x606090f0), C32e(0x02020705), C32e(0xcece2ee0), C32e(0x5656d187),
@ -261,7 +257,7 @@ static const uint32_t T1up_cpu[] = {
 	C32e(0x7b7b3d46), C32e(0xa8a8b71f), C32e(0x6d6d0c61), C32e(0x2c2c624e)
 };
-static const uint32_t T1dn_cpu[] = {
+uint32_t T1dn_cpu[] = {
 	C32e(0xa5f497a5), C32e(0x8497eb84), C32e(0x99b0c799), C32e(0x8d8cf78d),
 	C32e(0x0d17e50d), C32e(0xbddcb7bd), C32e(0xb1c8a7b1), C32e(0x54fc3954),
 	C32e(0x50f0c050), C32e(0x03050403), C32e(0xa9e087a9), C32e(0x7d87ac7d),
@ -328,7 +324,7 @@ static const uint32_t T1dn_cpu[] = {
 	C32e(0xcb46f6cb), C32e(0xfc1f4bfc), C32e(0xd661dad6), C32e(0x3a4e583a)
 };
-static const uint32_t T2up_cpu[] = {
+uint32_t T2up_cpu[] = {
 	C32e(0xa5c6c632), C32e(0x84f8f86f), C32e(0x99eeee5e), C32e(0x8df6f67a),
 	C32e(0x0dffffe8), C32e(0xbdd6d60a), C32e(0xb1dede16), C32e(0x5491916d),
 	C32e(0x50606090), C32e(0x03020207), C32e(0xa9cece2e), C32e(0x7d5656d1),
@ -395,7 +391,7 @@ static const uint32_t T2up_cpu[] = {
 	C32e(0xcb7b7b3d), C32e(0xfca8a8b7), C32e(0xd66d6d0c), C32e(0x3a2c2c62)
 };
-static const uint32_t T2dn_cpu[] = {
+uint32_t T2dn_cpu[] = {
 	C32e(0xf4a5f497), C32e(0x978497eb), C32e(0xb099b0c7), C32e(0x8c8d8cf7),
 	C32e(0x170d17e5), C32e(0xdcbddcb7), C32e(0xc8b1c8a7), C32e(0xfc54fc39),
 	C32e(0xf050f0c0), C32e(0x05030504), C32e(0xe0a9e087), C32e(0x877d87ac),
@ -462,7 +458,7 @@ static const uint32_t T2dn_cpu[] = {
 	C32e(0x46cb46f6), C32e(0x1ffc1f4b), C32e(0x61d661da), C32e(0x4e3a4e58)
 };
-static const uint32_t T3up_cpu[] = {
+uint32_t T3up_cpu[] = {
 	C32e(0x97a5c6c6), C32e(0xeb84f8f8), C32e(0xc799eeee), C32e(0xf78df6f6),
 	C32e(0xe50dffff), C32e(0xb7bdd6d6), C32e(0xa7b1dede), C32e(0x39549191),
 	C32e(0xc0506060), C32e(0x04030202), C32e(0x87a9cece), C32e(0xac7d5656),
@ -529,7 +525,7 @@ static const uint32_t T3up_cpu[] = {
 	C32e(0xf6cb7b7b), C32e(0x4bfca8a8), C32e(0xdad66d6d), C32e(0x583a2c2c)
 };
-static const uint32_t T3dn_cpu[] = {
+uint32_t T3dn_cpu[] = {
 	C32e(0x32f4a5f4), C32e(0x6f978497), C32e(0x5eb099b0), C32e(0x7a8c8d8c),
 	C32e(0xe8170d17), C32e(0x0adcbddc), C32e(0x16c8b1c8), C32e(0x6dfc54fc),
 	C32e(0x90f050f0), C32e(0x07050305), C32e(0x2ee0a9e0), C32e(0xd1877d87),
@ -685,15 +681,8 @@ __global__ void groestl512_gpu_hash(int threads, uint32_t startNounce, void *out
 	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)
 	{
 #if USE_SHARED
 		extern __shared__ unsigned char s[];
 		uint32_t offset = W_ALIGNMENT * sizeof(uint32_t) * threadIdx.x;
 		uint32_t *message = (uint32_t*)(&s[offset + 0]); // 128 Byte
 		uint32_t *state = (uint32_t*)(&s[offset + 128]); // 128 Byte
 #else
 		uint32_t message[32];
 		uint32_t state[32];
 #endif
 		// lese message ein & verknüpfe diese mit dem hash1 von hefty1
 		// lese den state ein
@ -825,11 +814,7 @@ __host__ void groestl512_cpu_hash(int thr_id, int threads, uint32_t startNounce)
 	dim3 block(threadsperblock);
 	// Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl)
 #if USE_SHARED
 	size_t shared_size = W_ALIGNMENT*sizeof(uint32_t)*threadsperblock;  // ein uint32_t eingefügt gegen Bank Konflikte
 #else
 	size_t shared_size = 0;
 #endif
 //	fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
--- a/cuda_groestlcoin.cu
+++ b/cuda_groestlcoin.cu
@ -0,0 +1,463 @@
 // Auf Groestlcoin spezialisierte Version von Groestl
 #include <cuda.h>
 #include "cuda_runtime.h"
 #include "device_launch_parameters.h"
 #include <stdio.h>
 #include <memory.h>
 #define USE_SHARED 1
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
 // Folgende Definitionen später durch header ersetzen
 typedef unsigned char uint8_t;
 typedef unsigned int uint32_t;
 typedef unsigned long long uint64_t;
 // globaler Speicher für alle HeftyHashes aller Threads
 __constant__ uint32_t pTarget[8]; // Single GPU
 extern uint32_t *d_resultNonce[8];
 // globaler Speicher für unsere Ergebnisse
 uint32_t *d_hashGROESTLCOINoutput[8];
 __constant__ uint32_t groestlcoin_gpu_state[32];
 __constant__ uint32_t groestlcoin_gpu_msg[32];
 __constant__ uint32_t sha256coin_gpu_constantTable[64];
 __constant__ uint32_t sha256coin_gpu_register[8];
 #define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
 #define PC32up(j, r)   ((uint32_t)((j) + (r)))
 #define PC32dn(j, r)   0
 #define QC32up(j, r)   0xFFFFFFFF
 #define QC32dn(j, r)   (((uint32_t)(r) << 24) ^ SPH_T32(~((uint32_t)(j) << 24)))
 #define B32_0(x)    ((x) & 0xFF)
 #define B32_1(x)    (((x) >> 8) & 0xFF)
 #define B32_2(x)    (((x) >> 16) & 0xFF)
 #define B32_3(x)    ((x) >> 24)
 #define SPH_C32(x)	((uint32_t)(x ## U))
 #define C32e(x)     ((SPH_C32(x) >> 24) \
                    | ((SPH_C32(x) >>  8) & SPH_C32(0x0000FF00)) \
                    | ((SPH_C32(x) <<  8) & SPH_C32(0x00FF0000)) \
                    | ((SPH_C32(x) << 24) & SPH_C32(0xFF000000)))
 #if USE_SHARED
 #define T0up(x) (*((uint32_t*)mixtabs + (    (x))))
 #define T0dn(x) (*((uint32_t*)mixtabs + (256+(x))))
 #define T1up(x) (*((uint32_t*)mixtabs + (512+(x))))
 #define T1dn(x) (*((uint32_t*)mixtabs + (768+(x))))
 #define T2up(x) (*((uint32_t*)mixtabs + (1024+(x))))
 #define T2dn(x) (*((uint32_t*)mixtabs + (1280+(x))))
 #define T3up(x) (*((uint32_t*)mixtabs + (1536+(x))))
 #define T3dn(x) (*((uint32_t*)mixtabs + (1792+(x))))
 #else
 #define T0up(x) tex1Dfetch(t0up1, x)
 #define T0dn(x) tex1Dfetch(t0dn1, x)
 #define T1up(x) tex1Dfetch(t1up1, x)
 #define T1dn(x) tex1Dfetch(t1dn1, x)
 #define T2up(x) tex1Dfetch(t2up1, x)
 #define T2dn(x) tex1Dfetch(t2dn1, x)
 #define T3up(x) tex1Dfetch(t3up1, x)
 #define T3dn(x) tex1Dfetch(t3dn1, x)
 #endif
 texture<unsigned int, 1, cudaReadModeElementType> t0up1;
 texture<unsigned int, 1, cudaReadModeElementType> t0dn1;
 texture<unsigned int, 1, cudaReadModeElementType> t1up1;
 texture<unsigned int, 1, cudaReadModeElementType> t1dn1;
 texture<unsigned int, 1, cudaReadModeElementType> t2up1;
 texture<unsigned int, 1, cudaReadModeElementType> t2dn1;
 texture<unsigned int, 1, cudaReadModeElementType> t3up1;
 texture<unsigned int, 1, cudaReadModeElementType> t3dn1;
 extern uint32_t T0up_cpu[];
 extern uint32_t T0dn_cpu[];
 extern uint32_t T1up_cpu[];
 extern uint32_t T1dn_cpu[];
 extern uint32_t T2up_cpu[];
 extern uint32_t T2dn_cpu[];
 extern uint32_t T3up_cpu[];
 extern uint32_t T3dn_cpu[];
 extern uint32_t sha256_cpu_hashTable[];
 extern uint32_t sha256_cpu_constantTable[];
 #define S(x, n)			(((x) >> (n)) | ((x) << (32 - (n))))
 #define R(x, n)			((x) >> (n))
 #define Ch(x, y, z)		((x & (y ^ z)) ^ z)
 #define Maj(x, y, z)	((x & (y | z)) | (y & z))
 #define S0(x)			(S(x, 2) ^ S(x, 13) ^ S(x, 22))
 #define S1(x)			(S(x, 6) ^ S(x, 11) ^ S(x, 25))
 #define s0(x)			(S(x, 7) ^ S(x, 18) ^ R(x, 3))
 #define s1(x)			(S(x, 17) ^ S(x, 19) ^ R(x, 10))
 #define SWAB32(x)		( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) )
 __device__ void groestlcoin_perm_P(uint32_t *a, char *mixtabs)
 {
 	uint32_t t[32];
 //#pragma unroll 14
 	for(int r=0;r<14;r++)
 	{
 #pragma unroll 16
 		for(int k=0;k<16;k++)
 		{
 			a[(k*2)+0] ^= PC32up(k * 0x10, r);
 			//a[(k<<1)+1] ^= PC32dn(k * 0x10, r);
 		}
 		// RBTT
 #pragma unroll 16
 		for(int k=0;k<32;k+=2)
 		{
 			t[k + 0] =	T0up( B32_0(a[k & 0x1f]) ) ^ 
 						T1up( B32_1(a[(k + 2) & 0x1f]) ) ^ 
 						T2up( B32_2(a[(k + 4) & 0x1f]) ) ^ 
 						T3up( B32_3(a[(k + 6) & 0x1f]) ) ^ 
 						T0dn( B32_0(a[(k + 9) & 0x1f]) ) ^ 
 						T1dn( B32_1(a[(k + 11) & 0x1f]) ) ^ 
 						T2dn( B32_2(a[(k + 13) & 0x1f]) ) ^ 
 						T3dn( B32_3(a[(k + 23) & 0x1f]) );
 			t[k + 1] =	T0dn( B32_0(a[k & 0x1f]) ) ^ 
 						T1dn( B32_1(a[(k + 2) & 0x1f]) ) ^ 
 						T2dn( B32_2(a[(k + 4) & 0x1f]) ) ^ 
 						T3dn( B32_3(a[(k + 6) & 0x1f]) ) ^ 
 						T0up( B32_0(a[(k + 9) & 0x1f]) ) ^ 
 						T1up( B32_1(a[(k + 11) & 0x1f]) ) ^ 
 						T2up( B32_2(a[(k + 13) & 0x1f]) ) ^ 
 						T3up( B32_3(a[(k + 23) & 0x1f]) );
 		}
 #pragma unroll 32
 		for(int k=0;k<32;k++)
 			a[k] = t[k];
 	}
 }
 __device__ void groestlcoin_perm_Q(uint32_t *a, char *mixtabs)
 {	
 //#pragma unroll 14
 	for(int r=0;r<14;r++)
 	{
 		uint32_t t[32];
 #pragma unroll 16
 		for(int k=0;k<16;k++)
 		{
 			a[(k*2)+0] ^= QC32up(k * 0x10, r);
 			a[(k*2)+1] ^= QC32dn(k * 0x10, r);
 		}
 		// RBTT
 #pragma unroll 16
 		for(int k=0;k<32;k+=2)
 		{
 			t[k + 0] =	T0up( B32_0(a[(k + 2) & 0x1f]) ) ^ 
 						T1up( B32_1(a[(k + 6) & 0x1f]) ) ^ 
 						T2up( B32_2(a[(k + 10) & 0x1f]) ) ^ 
 						T3up( B32_3(a[(k + 22) & 0x1f]) ) ^ 
 						T0dn( B32_0(a[(k + 1) & 0x1f]) ) ^ 
 						T1dn( B32_1(a[(k + 5) & 0x1f]) ) ^ 
 						T2dn( B32_2(a[(k + 9) & 0x1f]) ) ^ 
 						T3dn( B32_3(a[(k + 13) & 0x1f]) );
 			t[k + 1] =	T0dn( B32_0(a[(k + 2) & 0x1f]) ) ^ 
 						T1dn( B32_1(a[(k + 6) & 0x1f]) ) ^ 
 						T2dn( B32_2(a[(k + 10) & 0x1f]) ) ^ 
 						T3dn( B32_3(a[(k + 22) & 0x1f]) ) ^ 
 						T0up( B32_0(a[(k + 1) & 0x1f]) ) ^ 
 						T1up( B32_1(a[(k + 5) & 0x1f]) ) ^ 
 						T2up( B32_2(a[(k + 9) & 0x1f]) ) ^ 
 						T3up( B32_3(a[(k + 13) & 0x1f]) );
 		}
 #pragma unroll 32
 		for(int k=0;k<32;k++)
 			a[k] = t[k];
 	}
 }
 #if USE_SHARED
 __global__ void  __launch_bounds__(256) 
 #else
 __global__ void 
 #endif
 groestlcoin_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce)
 {
 #if USE_SHARED
 	extern __shared__ char mixtabs[];
 	*((uint32_t*)mixtabs + (    threadIdx.x)) = tex1Dfetch(t0up1, threadIdx.x);
 	*((uint32_t*)mixtabs + (256+threadIdx.x)) = tex1Dfetch(t0dn1, threadIdx.x);
 	*((uint32_t*)mixtabs + (512+threadIdx.x)) = tex1Dfetch(t1up1, threadIdx.x);
 	*((uint32_t*)mixtabs + (768+threadIdx.x)) = tex1Dfetch(t1dn1, threadIdx.x);
 	*((uint32_t*)mixtabs + (1024+threadIdx.x)) = tex1Dfetch(t2up1, threadIdx.x);
 	*((uint32_t*)mixtabs + (1280+threadIdx.x)) = tex1Dfetch(t2dn1, threadIdx.x);
 	*((uint32_t*)mixtabs + (1536+threadIdx.x)) = tex1Dfetch(t3up1, threadIdx.x);
 	*((uint32_t*)mixtabs + (1792+threadIdx.x)) = tex1Dfetch(t3dn1, threadIdx.x);
 	__syncthreads();
 #endif
 	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)
 	{
 	/////
 	///// Lieber groestl, mach, dass es abgeht!!!
 	/////
 		// GROESTL
 		uint32_t message[32];
 		uint32_t state[32];
 		// SHA
 		// jeder thread in diesem  Block bekommt sein eigenes W Array im Shared memory
 		uint32_t g[32];
 #pragma unroll 32
 		for(int k=0;k<32;k++)
 		{
 			state[k] = groestlcoin_gpu_state[k];
 			message[k] = groestlcoin_gpu_msg[k];
 		}
 		uint32_t nounce = startNounce + thread;
 		message[19] = SWAB32(nounce);
 #pragma unroll 32
 		for(int u=0;u<32;u++)
 			g[u] = message[u] ^ state[u];
 		// Perm
 #if USE_SHARED
 		groestlcoin_perm_P(g, mixtabs);
 		groestlcoin_perm_Q(message, mixtabs);
 #else
 		groestlcoin_perm_P(g, NULL);
 		groestlcoin_perm_Q(message, NULL);
 #endif
 #pragma unroll 32
 		for(int u=0;u<32;u++)
 		{
 			state[u] ^= g[u] ^ message[u];
 			g[u] = state[u];
 		}
 #if USE_SHARED
 		groestlcoin_perm_P(g, mixtabs);
 #else
 		groestlcoin_perm_P(g, NULL);
 #endif
 #pragma unroll 32
 		for(int u=0;u<32;u++)
 			state[u] ^= g[u];
 		////
 		//// 2. Runde groestl
 		////
 #pragma unroll 16
 		for(int k=0;k<16;k++)
 			message[k] = state[k + 16];
 #pragma unroll 32
 		for(int k=0;k<32;k++)
 			state[k] = groestlcoin_gpu_state[k];
 #pragma unroll 16
 		for(int k=0;k<16;k++)
 			message[k+16] = 0;
 		message[16] = 0x80;		
 		message[31] = 0x01000000;
 #pragma unroll 32
 		for(int u=0;u<32;u++)
 			g[u] = message[u] ^ state[u];
 		// Perm
 #if USE_SHARED
 		groestlcoin_perm_P(g, mixtabs);
 		groestlcoin_perm_Q(message, mixtabs);
 #else
 		groestlcoin_perm_P(g, NULL);
 		groestlcoin_perm_Q(message, NULL);
 #endif
 #pragma unroll 32
 		for(int u=0;u<32;u++)
 		{
 			state[u] ^= g[u] ^ message[u];
 			g[u] = state[u];
 		}
 #if USE_SHARED
 		groestlcoin_perm_P(g, mixtabs);
 #else
 		groestlcoin_perm_P(g, NULL);
 #endif
 #pragma unroll 32
 		for(int u=0;u<32;u++)
 			state[u] ^= g[u];
 /*
 	#pragma unroll 8
 		for(int k=0;k<8;k++)
 			hash[k] = state[k+16];
 */
 		// kopiere Ergebnis
 		/*
 #pragma unroll 16
 		for(int k=0;k<16;k++)
 			((uint32_t*)outputHash)[16*thread+k] = state[k + 16];
 			*/
 		int i;
 		bool rc = true;
 		for (i = 7; i >= 0; i--) {
 			if (state[i+16] > pTarget[i]) {
 				rc = false;
 				break;
 			}
 			if (state[i+16] < pTarget[i]) {
 				rc = true;
 				break;
 			}
 		}
 		if(rc == true)
 		{
 			if(resNounce[0] > nounce)
 			{
 				resNounce[0] = nounce;
 				/*
 				#pragma unroll 8
 				for(int k=0;k<8;k++)					
 					((uint32_t*)outputHash)[k] = (hash[k]);
 				*/
 			}
 		}
 	}
 }
 #define texDef(texname, texmem, texsource, texsize) \
 	unsigned int *texmem; \
 	cudaMalloc(&texmem, texsize); \
 	cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
 	texname.normalized = 0; \
 	texname.filterMode = cudaFilterModePoint; \
 	texname.addressMode[0] = cudaAddressModeClamp; \
 	{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
 	  cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \
 // Setup-Funktionen
 __host__ void groestlcoin_cpu_init(int thr_id, int threads)
 {
 	cudaSetDevice(thr_id);
 	cudaDeviceSetCacheConfig( cudaFuncCachePreferShared );
 // Texturen mit obigem Makro initialisieren
 	texDef(t0up1, d_T0up, T0up_cpu, sizeof(uint32_t)*256);
 	texDef(t0dn1, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256);
 	texDef(t1up1, d_T1up, T1up_cpu, sizeof(uint32_t)*256);
 	texDef(t1dn1, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256);
 	texDef(t2up1, d_T2up, T2up_cpu, sizeof(uint32_t)*256);
 	texDef(t2dn1, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256);
 	texDef(t3up1, d_T3up, T3up_cpu, sizeof(uint32_t)*256);
 	texDef(t3dn1, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256);
 	// Kopiere die Hash-Tabellen in den GPU-Speicher
 	cudaMemcpyToSymbol(	sha256coin_gpu_constantTable,
 						sha256_cpu_constantTable,
 						sizeof(uint32_t) * 64 );
 	// Startvektor
 	cudaMemcpyToSymbol(	sha256coin_gpu_register,
 						sha256_cpu_hashTable,
 						sizeof(uint32_t) * 8 );
 	// setze register 
 	uint32_t groestl_state_init[32];
 	memset(groestl_state_init, 0, sizeof(uint32_t) * 32);
 	groestl_state_init[31] = 0x20000;
 	// state speichern
 	cudaMemcpyToSymbol(	groestlcoin_gpu_state,
 						groestl_state_init,
 						128);
 	cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t)); 
 	// Speicher für alle Ergebnisse belegen (nur für Debug)
 	cudaMalloc(&d_hashGROESTLCOINoutput[thr_id], 8 * sizeof(uint32_t) * threads);
 }
 __host__ void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
 {
 	// Nachricht expandieren und setzen
 	uint32_t msgBlock[32];
 	memset(msgBlock, 0, sizeof(uint32_t) * 32);
 	memcpy(&msgBlock[0], data, 80);
 	// Erweitere die Nachricht auf den Nachrichtenblock (padding)
 	// Unsere Nachricht hat 80 Byte
 	msgBlock[20] = 0x80;
 	msgBlock[31] = 0x01000000;
 	// groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird
 	// auf der GPU ausgeführt)
 	// Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch)
 	cudaMemcpyToSymbol(	groestlcoin_gpu_msg,
 						msgBlock,
 						128);
 	cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
 	cudaMemcpyToSymbol(	pTarget,
 						pTargetIn,
 						sizeof(uint32_t) * 8 );
 }
 __host__ void groestlcoin_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce)
 {
 #if USE_SHARED
 	const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN
 #else
 	const int threadsperblock = 512; // so einstellen wie gewünscht ;-)
 #endif
 	// berechne wie viele Thread Blocks wir brauchen
 	dim3 grid((threads + threadsperblock-1)/threadsperblock);
 	dim3 block(threadsperblock);
 	// Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl)
 #if USE_SHARED
 	size_t shared_size = 8 * 256 * sizeof(uint32_t);
 #else
 	size_t shared_size = 0;
 #endif
 //	fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
 	//fprintf(stderr, "ThrID: %d\n", thr_id);
 	cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
 	groestlcoin_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, d_hashGROESTLCOINoutput[thr_id], d_resultNonce[thr_id]);
 	// Strategisches Sleep Kommando zur Senkung der CPU Last
 	MyStreamSynchronize(NULL, 0, thr_id);
 	cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
 	/// Debug
 	//cudaMemcpy(outputHashes, d_hashGROESTLCOINoutput[thr_id], 8 * sizeof(uint32_t) * threads, cudaMemcpyDeviceToHost);
 	// Nounce
 	//cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
 }
--- a/cuda_groestlcoin.h
+++ b/cuda_groestlcoin.h
@ -0,0 +1,8 @@
 #ifndef _CUDA_GROESTLCOIN_H
 #define _CUDA_GROESTLCOIN_H
 void groestlcoin_cpu_init(int thr_id, int threads);
 void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn);
 void groestlcoin_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce);
 #endif
--- a/cuda_hefty1.cu
+++ b/cuda_hefty1.cu
@ -1,4 +1,3 @@
 /* Diese Funktion ist auf 84-Byte große Eingabedaten ausgerichtet (Heavycoin) */
 #include <cuda.h>
 #include "cuda_runtime.h"
 #include "device_launch_parameters.h"
--- a/cuda_keccak512.cu
+++ b/cuda_keccak512.cu
@ -1,4 +1,3 @@
 /* Diese Funktion ist auf 84+32-Byte große Eingabedaten ausgerichtet (Heavycoin) */
 #include <cuda.h>
 #include "cuda_runtime.h"
 #include "device_launch_parameters.h"
--- a/cuda_sha256.cu
+++ b/cuda_sha256.cu
@ -1,4 +1,3 @@
 /* Diese Funktion ist auf 84+32 Byte große Eingabedaten ausgerichtet (Heavycoin) */
 #include <cuda.h>
 #include "cuda_runtime.h"
 #include "device_launch_parameters.h"
--- a/groestl.c
+++ b/groestl.c
@ -29,7 +29,7 @@
 *
 * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
 */
-
+#include <stdio.h>
 #include <stddef.h>
 #include <string.h>
@ -2986,6 +2986,7 @@ groestl_big_close(sph_groestl_big_context *sc,
 #endif
 	}
 	memset(pad + 1, 0, pad_len - 9);
 	//fprintf(stderr, "%x\n", pad_len);
 #if SPH_64
 	sph_enc64be(pad + pad_len - 8, count);
 #else
--- a/groestlcoin.cpp
+++ b/groestlcoin.cpp
@ -0,0 +1,175 @@
 #include "uint256.h"
 #include "sph_groestl.h"
 #include "cpuminer-config.h"
 #include "miner.h"
 #include <string.h>
 #include <stdint.h>
 #include "cuda_groestlcoin.h"
 #include <openssl/sha.h>
 #define SWAP32(x) \
    ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u)   | \
      (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
 void sha256func(unsigned char *hash, const unsigned char *data, int len)
 {
 	uint32_t S[16], T[16];
 	int i, r;
 	sha256_init(S);
 	for (r = len; r > -9; r -= 64) {
 		if (r < 64)
 			memset(T, 0, 64);
 		memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
 		if (r >= 0 && r < 64)
 			((unsigned char *)T)[r] = 0x80;
 		for (i = 0; i < 16; i++)
 			T[i] = be32dec(T + i);
 		if (r < 56)
 			T[15] = 8 * len;
 		sha256_transform(S, T, 0);
 	}
 	/*
 	memcpy(S + 8, sha256d_hash1 + 8, 32);
 	sha256_init(T);
 	sha256_transform(T, S, 0);
 	*/
 	for (i = 0; i < 8; i++)
 		be32enc((uint32_t *)hash + i, T[i]);
 }
 static void groestlhash(void *state, const void *input)
 {
 	// Tryout GPU-groestl
    sph_groestl512_context     ctx_groestl[2];
    static unsigned char pblank[1];
 	int ii;
    uint32_t mask = 8;
    uint32_t zero = 0;
 	//these uint512 in the c++ source of the client are backed by an array of uint32
    uint32_t hashA[16], hashB[16];	
    sph_groestl512_init(&ctx_groestl[0]);
    sph_groestl512 (&ctx_groestl[0], input, 80); //6
    sph_groestl512_close(&ctx_groestl[0], hashA); //7	
 	sph_groestl512_init(&ctx_groestl[1]);
 	sph_groestl512 (&ctx_groestl[1], hashA, 64); //6
    sph_groestl512_close(&ctx_groestl[1], hashB); //7
 	memcpy(state, hashB, 32);
 }
 extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
 	uint32_t max_nonce, unsigned long *hashes_done)
 {	
 	uint32_t start_nonce = pdata[19]++;
 	const uint32_t Htarg = ptarget[7];
 	const uint32_t throughPut = 4096 * 128;
 	//const uint32_t throughPut = 1;
 	int i;
 	uint32_t *outputHash = (uint32_t*)malloc(throughPut * 16 * sizeof(uint32_t));
 	// init
 	static bool init[8] = { false, false, false, false, false, false, false, false };
 	if(!init[thr_id])
 	{
 		groestlcoin_cpu_init(thr_id, throughPut);
 		init[thr_id] = true;
 	}
 	// Endian Drehung ist notwendig
 	//char testdata[] = {"\x70\x00\x00\x00\x5d\x38\x5b\xa1\x14\xd0\x79\x97\x0b\x29\xa9\x41\x8f\xd0\x54\x9e\x7d\x68\xa9\x5c\x7f\x16\x86\x21\xa3\x14\x20\x10\x00\x00\x00\x00\x57\x85\x86\xd1\x49\xfd\x07\xb2\x2f\x3a\x8a\x34\x7c\x51\x6d\xe7\x05\x2f\x03\x4d\x2b\x76\xff\x68\xe0\xd6\xec\xff\x9b\x77\xa4\x54\x89\xe3\xfd\x51\x17\x32\x01\x1d\xf0\x73\x10\x00"};
 	//pdata = (uint32_t*)testdata;
 	uint32_t endiandata[32];
 	for (int kk=0; kk < 32; kk++)
 		be32enc(&endiandata[kk], pdata[kk]);
 	// Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt)
 	groestlcoin_cpu_setBlock(thr_id, endiandata, (void*)ptarget);
 	do {
 		// GPU
 		uint32_t foundNounce = 0xFFFFFFFF;
 		groestlcoin_cpu_hash(thr_id, throughPut, pdata[19], outputHash, &foundNounce);
 		/*
 		{
 			for(i=0;i<throughPut;i++)
 			{
 				uint32_t tmpHash[8];
 				endiandata[19] = SWAP32(pdata[19]);
 				groestlhash(tmpHash, endiandata);
 				int ii;
 				printf("result GPU: ");
 				for (ii=0; ii < 32; ii++)
 				{
 					printf ("%.2x",((uint8_t*)&outputHash[8*i])[ii]);
 				};
 				printf ("\n");	
 				groestlhash(tmpHash, endiandata);
 				printf("result CPU: ");
 				for (ii=0; ii < 32; ii++)
 				{
 					printf ("%.2x",((uint8_t*)tmpHash)[ii]);
 				};
 			}
 			exit(0);
 		}		
 		*/
 		if(foundNounce < 0xffffffff)
 		{
 			uint32_t tmpHash[8];
 			endiandata[19] = SWAP32(foundNounce);
 			groestlhash(tmpHash, endiandata);
 			if (((tmpHash[7]&0xFFFFFF00)==0) && 
 					fulltest(tmpHash, ptarget)) {
 						pdata[19] = foundNounce;
 						*hashes_done = foundNounce - start_nonce;
 						free(outputHash);
 				return true;
 			}
 			foundNounce = 0xffffffff;
 			/*
 			int ii;
 			printf("result GPU: ");
 			for (ii=0; ii < 32; ii++)
 			{
 				printf ("%.2x",((uint8_t*)&outputHash[0])[ii]);
 			};
 			printf ("\n");	
 			printf("result CPU: ");
 			for (ii=0; ii < 32; ii++)
 			{
 				printf ("%.2x",((uint8_t*)tmpHash)[ii]);
 			};
 			printf ("\n");	
 			*/
 		}
 		if (pdata[19] + throughPut < pdata[19])
 			pdata[19] = max_nonce;
 		else pdata[19] += throughPut;
 	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
 	*hashes_done = pdata[19] - start_nonce;
 	free(outputHash);
 	return 0;
 }
--- a/miner.h
+++ b/miner.h
@ -211,8 +211,13 @@ extern int scanhash_fugue256(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
 extern int scanhash_groestlcoin(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
 extern void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
 extern void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
 extern void groestlcoin_hash(unsigned char* output, const unsigned char* input, int len);
 struct thr_info {
 	int		id;