committing changes to v0.3 release - added Groestlcoin.

This commit is contained in:
Christian Buchner 2014-03-23 21:39:26 +01:00
parent a3f4e78a20
commit b93669a99f
21 changed files with 934 additions and 261 deletions

View File

@ -25,13 +25,20 @@ ccminer_SOURCES = elist.h miner.h compat.h \
cuda_hefty1.cu cuda_hefty1.h \
cuda_keccak512.cu cuda_keccak512.h \
cuda_sha256.cu cuda_sha256.h \
cuda_fugue256.cu \
fuguecoin.cpp fugue.c sph_fugue.h uint256.h
fuguecoin.cpp cuda_fugue256.cu fugue.c sph_fugue.h uint256.h \
groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
.cu.o:
$(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
$(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_35 --maxrregcount=124 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
## Thrust needs Compute 2.0 minimum
#heavy.o: heavy.cu
# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
#
#cuda_hefty1.o: cuda_hefty1.cu
# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<

View File

@ -60,8 +60,9 @@ am_ccminer_OBJECTS = ccminer-cpu-miner.$(OBJEXT) \
ccminer-sha2.$(OBJEXT) heavy.$(OBJEXT) cuda_blake512.$(OBJEXT) \
cuda_combine.$(OBJEXT) cuda_groestl512.$(OBJEXT) \
cuda_hefty1.$(OBJEXT) cuda_keccak512.$(OBJEXT) \
cuda_sha256.$(OBJEXT) cuda_fugue256.$(OBJEXT) \
ccminer-fuguecoin.$(OBJEXT) ccminer-fugue.$(OBJEXT)
cuda_sha256.$(OBJEXT) ccminer-fuguecoin.$(OBJEXT) \
cuda_fugue256.$(OBJEXT) ccminer-fugue.$(OBJEXT) \
ccminer-groestlcoin.$(OBJEXT) cuda_groestlcoin.$(OBJEXT)
ccminer_OBJECTS = $(am_ccminer_OBJECTS)
ccminer_DEPENDENCIES =
ccminer_LINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(ccminer_LDFLAGS) \
@ -275,8 +276,8 @@ ccminer_SOURCES = elist.h miner.h compat.h \
cuda_hefty1.cu cuda_hefty1.h \
cuda_keccak512.cu cuda_keccak512.h \
cuda_sha256.cu cuda_sha256.h \
cuda_fugue256.cu \
fuguecoin.cpp fugue.c sph_fugue.h uint256.h
fuguecoin.cpp cuda_fugue256.cu fugue.c sph_fugue.h uint256.h \
groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
@ -387,6 +388,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fugue.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fuguecoin.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-groestl.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-groestlcoin.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-hefty1.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-keccak.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-scrypt.Po@am__quote@
@ -561,6 +563,20 @@ ccminer-fuguecoin.obj: fuguecoin.cpp
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o ccminer-fuguecoin.obj `if test -f 'fuguecoin.cpp'; then $(CYGPATH_W) 'fuguecoin.cpp'; else $(CYGPATH_W) '$(srcdir)/fuguecoin.cpp'; fi`
ccminer-groestlcoin.o: groestlcoin.cpp
@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT ccminer-groestlcoin.o -MD -MP -MF $(DEPDIR)/ccminer-groestlcoin.Tpo -c -o ccminer-groestlcoin.o `test -f 'groestlcoin.cpp' || echo '$(srcdir)/'`groestlcoin.cpp
@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/ccminer-groestlcoin.Tpo $(DEPDIR)/ccminer-groestlcoin.Po
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='groestlcoin.cpp' object='ccminer-groestlcoin.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o ccminer-groestlcoin.o `test -f 'groestlcoin.cpp' || echo '$(srcdir)/'`groestlcoin.cpp
ccminer-groestlcoin.obj: groestlcoin.cpp
@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT ccminer-groestlcoin.obj -MD -MP -MF $(DEPDIR)/ccminer-groestlcoin.Tpo -c -o ccminer-groestlcoin.obj `if test -f 'groestlcoin.cpp'; then $(CYGPATH_W) 'groestlcoin.cpp'; else $(CYGPATH_W) '$(srcdir)/groestlcoin.cpp'; fi`
@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/ccminer-groestlcoin.Tpo $(DEPDIR)/ccminer-groestlcoin.Po
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='groestlcoin.cpp' object='ccminer-groestlcoin.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o ccminer-groestlcoin.obj `if test -f 'groestlcoin.cpp'; then $(CYGPATH_W) 'groestlcoin.cpp'; else $(CYGPATH_W) '$(srcdir)/groestlcoin.cpp'; fi`
# This directory's subdirectories are mostly independent; you can cd
# into them and run `make' without going through this Makefile.
# To change the values of `make' variables: instead of editing Makefiles,
@ -1018,7 +1034,7 @@ uninstall-am: uninstall-binPROGRAMS
.cu.o:
$(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=sm_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
$(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_35 --maxrregcount=124 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
#heavy.o: heavy.cu
# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<

View File

@ -1,5 +1,5 @@
ccMiner release 0.2 (Mar 21th 2014) - Pool Mining Release
ccMiner release 0.3 (Mar 23th 2014) - Groestlcoin Release
-------------------------------------------------------------
***************************************************************
@ -36,6 +36,7 @@ its command line interface and options.
-a, --algo=ALGO specify the algorithm to use
heavy use to mine Heavycoin
fugue256 use to mine Fuguecoin
groestl use to mine Groestlcoin
-o, --url=URL URL of mining server (default: " DEF_RPC_URL ")
-O, --userpass=U:P username:password pair for mining server
@ -66,24 +67,29 @@ its command line interface and options.
Example for Heavycoin Mining on heavycoinpool.com with a single gpu in your system
cudaminer.exe -t 1 -a heavy -o stratum+tcp://stratum01.heavycoinpool.com:5333 -u <<username.worker>> -p <<workerpassword>> -v 512
ccminer.exe -t 1 -a heavy -o stratum+tcp://stratum01.heavycoinpool.com:5333 -u <<username.worker>> -p <<workerpassword>> -v 512
Example for Heavycoin Mining on hvc.1gh.com with a dual gpu in your system
cudaminer.exe -t 2 -a heavy -o stratum+tcp://hvcpool.1gh.com:5333 -u <<WALLET>> -p x -v 512
ccminer.exe -t 2 -a heavy -o stratum+tcp://hvcpool.1gh.com:5333 -u <<WALLET>> -p x -v 512
Example for Fuguecoin solo-mining with 4 gpu's in your system and a Fuguecoin-wallet running on localhost
cudaminer.exe -q -s 1 -t 4 -a fugue256 -o http://localhost:9089 -u <<myusername>> -p <<mypassword>>
ccminer.exe -q -s 1 -t 4 -a fugue256 -o http://localhost:9089 -u <<myusername>> -p <<mypassword>>
Example for Fuguecoin pool mining on dwarfpool.com with all your GPUs
-q -a fugue256 -o stratum+tcp://erebor.dwarfpool.com:3340 -u YOURWALLETADDRESS.1 -p YOUREMAILADDRESS
ccminer.exe -q -a fugue256 -o stratum+tcp://erebor.dwarfpool.com:3340 -u YOURWALLETADDRESS.1 -p YOUREMAILADDRESS
Example for Groestlcoin solo mining
ccminer.exe -q -s 1 -a groestl -o http://127.0.0.1:1441 -u USERNAME -p PASSWORD
For solo-mining you typically use -o 127.0.0.1:xxxx where xxxx represents
@ -101,6 +107,9 @@ from your old clunkers.
>>> RELEASE HISTORY <<<
Match, 23 2014 added Groestlcoin support. stratum status unknown
(the only pool is currently down for fixing issues)
March, 21 2014 use of shared memory in Fugue256 kernel boosts hash rates
on Fermi and Maxwell devices. Kepler may suffer slightly
(3-5%)

View File

@ -229,6 +229,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<ClCompile Include="fugue.c" />
<ClCompile Include="fuguecoin.cpp" />
<ClCompile Include="groestl.c" />
<ClCompile Include="groestlcoin.cpp" />
<ClCompile Include="hefty1.c" />
<ClCompile Include="keccak.c" />
<ClCompile Include="scrypt.c" />
@ -256,6 +257,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<ClInclude Include="cuda_blake512.h" />
<ClInclude Include="cuda_combine.h" />
<ClInclude Include="cuda_groestl512.h" />
<ClInclude Include="cuda_groestlcoin.h" />
<ClInclude Include="cuda_hefty1.h" />
<ClInclude Include="cuda_keccak512.h" />
<ClInclude Include="cuda_sha256.h" />
@ -274,6 +276,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
<CudaCompile Include="cuda_combine.cu" />
<CudaCompile Include="cuda_fugue256.cu" />
<CudaCompile Include="cuda_groestl512.cu" />
<CudaCompile Include="cuda_groestlcoin.cu" />
<CudaCompile Include="cuda_hefty1.cu" />
<CudaCompile Include="cuda_keccak512.cu" />
<CudaCompile Include="cuda_sha256.cu" />

View File

@ -90,6 +90,9 @@
<ClCompile Include="fuguecoin.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="groestlcoin.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="compat.h">
@ -158,6 +161,9 @@
<ClInclude Include="uint256.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="cuda_groestlcoin.h">
<Filter>Header Files\CUDA</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<CudaCompile Include="cuda_sha256.cu">
@ -184,5 +190,8 @@
<CudaCompile Include="cuda_fugue256.cu">
<Filter>Source Files\CUDA</Filter>
</CudaCompile>
<CudaCompile Include="cuda_groestlcoin.cu">
<Filter>Source Files\CUDA</Filter>
</CudaCompile>
</ItemGroup>
</Project>

20
configure vendored
View File

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.68 for ccminer 2014.03.21.
# Generated by GNU Autoconf 2.68 for ccminer 2014.03.23.
#
#
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@ -557,8 +557,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='ccminer'
PACKAGE_TARNAME='ccminer'
PACKAGE_VERSION='2014.03.21'
PACKAGE_STRING='ccminer 2014.03.21'
PACKAGE_VERSION='2014.03.23'
PACKAGE_STRING='ccminer 2014.03.23'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@ -1297,7 +1297,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures ccminer 2014.03.21 to adapt to many kinds of systems.
\`configure' configures ccminer 2014.03.23 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1368,7 +1368,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of ccminer 2014.03.21:";;
short | recursive ) echo "Configuration of ccminer 2014.03.23:";;
esac
cat <<\_ACEOF
@ -1469,7 +1469,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
ccminer configure 2014.03.21
ccminer configure 2014.03.23
generated by GNU Autoconf 2.68
Copyright (C) 2010 Free Software Foundation, Inc.
@ -1972,7 +1972,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by ccminer $as_me 2014.03.21, which was
It was created by ccminer $as_me 2014.03.23, which was
generated by GNU Autoconf 2.68. Invocation command line was
$ $0 $@
@ -2901,7 +2901,7 @@ fi
# Define the identity of the package.
PACKAGE='ccminer'
VERSION='2014.03.21'
VERSION='2014.03.23'
cat >>confdefs.h <<_ACEOF
@ -7118,7 +7118,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by ccminer $as_me 2014.03.21, which was
This file was extended by ccminer $as_me 2014.03.23, which was
generated by GNU Autoconf 2.68. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -7184,7 +7184,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
ccminer config.status 2014.03.21
ccminer config.status 2014.03.23
configured by $0, generated by GNU Autoconf 2.68,
with options \\"\$ac_cs_config\\"

View File

@ -1,4 +1,4 @@
AC_INIT([ccminer], [2014.03.21])
AC_INIT([ccminer], [2014.03.23])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@ -117,11 +117,13 @@ struct workio_cmd {
typedef enum {
ALGO_HEAVY, /* Heavycoin hash */
ALGO_FUGUE256, /* Fugue256 */
ALGO_GROESTL,
} sha256_algos;
static const char *algo_names[] = {
"heavy",
"fugue256"
"fugue256",
"groestl"
};
bool opt_debug = false;
@ -667,7 +669,11 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
if (opt_algo == ALGO_HEAVY)
heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
else
if (opt_algo == ALGO_FUGUE256)
SHA256((unsigned char*)sctx->job.coinbase, sctx->job.coinbase_size, (unsigned char*)merkle_root);
else
sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
for (i = 0; i < sctx->job.merkle_count; i++) {
memcpy(merkle_root + 32, sctx->job.merkle[i], 32);
if (opt_algo == ALGO_HEAVY)
@ -817,7 +823,10 @@ static void *miner_thread(void *userdata)
rc = scanhash_fugue256(thr_id, work.data, work.target,
max_nonce, &hashes_done);
break;
case ALGO_GROESTL:
rc = scanhash_groestlcoin(thr_id, work.data, work.target,
max_nonce, &hashes_done);
break;
default:
/* should never happen */
goto out;

View File

@ -1,200 +1,190 @@
/* cpuminer-config.h. Generated from cpuminer-config.h.in by configure. */
/* cpuminer-config.h.in. Generated from configure.ac by autoheader. */
/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
systems. This function is required for `alloca.c' support on those systems.
*/
/* #undef CRAY_STACKSEG_END */
/* Define to 1 if using `alloca.c'. */
/* #undef C_ALLOCA */
/* Define to 1 if you have `alloca', as a function or macro. */
#define HAVE_ALLOCA 1
/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
*/
#define HAVE_ALLOCA_H 1
/* Define to 1 if you have the declaration of `be32dec', and to 0 if you
don't. */
#define HAVE_DECL_BE32DEC 0
/* Define to 1 if you have the declaration of `be32enc', and to 0 if you
don't. */
#define HAVE_DECL_BE32ENC 0
/* Define to 1 if you have the declaration of `le32dec', and to 0 if you
don't. */
#define HAVE_DECL_LE32DEC 0
/* Define to 1 if you have the declaration of `le32enc', and to 0 if you
don't. */
#define HAVE_DECL_LE32ENC 0
/* Define to 1 if you have the `getopt_long' function. */
#define HAVE_GETOPT_LONG 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if you have the `crypto' library (-lcrypto). */
#define HAVE_LIBCRYPTO 1
/* Define to 1 if you have a functional curl library. */
#define HAVE_LIBCURL 1
/* Define to 1 if you have the `ssl' library (-lssl). */
#define HAVE_LIBSSL 1
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the <syslog.h> header file. */
#define HAVE_SYSLOG_H 1
/* Define to 1 if you have the <sys/endian.h> header file. */
/* #undef HAVE_SYS_ENDIAN_H */
/* Define to 1 if you have the <sys/param.h> header file. */
#define HAVE_SYS_PARAM_H 1
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/sysctl.h> header file. */
#define HAVE_SYS_SYSCTL_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Defined if libcurl supports AsynchDNS */
/* #undef LIBCURL_FEATURE_ASYNCHDNS */
/* Defined if libcurl supports IDN */
#define LIBCURL_FEATURE_IDN 1
/* Defined if libcurl supports IPv6 */
#define LIBCURL_FEATURE_IPV6 1
/* Defined if libcurl supports KRB4 */
/* #undef LIBCURL_FEATURE_KRB4 */
/* Defined if libcurl supports libz */
#define LIBCURL_FEATURE_LIBZ 1
/* Defined if libcurl supports NTLM */
#define LIBCURL_FEATURE_NTLM 1
/* Defined if libcurl supports SSL */
#define LIBCURL_FEATURE_SSL 1
/* Defined if libcurl supports SSPI */
/* #undef LIBCURL_FEATURE_SSPI */
/* Defined if libcurl supports DICT */
#define LIBCURL_PROTOCOL_DICT 1
/* Defined if libcurl supports FILE */
#define LIBCURL_PROTOCOL_FILE 1
/* Defined if libcurl supports FTP */
#define LIBCURL_PROTOCOL_FTP 1
/* Defined if libcurl supports FTPS */
#define LIBCURL_PROTOCOL_FTPS 1
/* Defined if libcurl supports HTTP */
#define LIBCURL_PROTOCOL_HTTP 1
/* Defined if libcurl supports HTTPS */
#define LIBCURL_PROTOCOL_HTTPS 1
/* Defined if libcurl supports IMAP */
#define LIBCURL_PROTOCOL_IMAP 1
/* Defined if libcurl supports LDAP */
#define LIBCURL_PROTOCOL_LDAP 1
/* Defined if libcurl supports POP3 */
#define LIBCURL_PROTOCOL_POP3 1
/* Defined if libcurl supports RTSP */
#define LIBCURL_PROTOCOL_RTSP 1
/* Defined if libcurl supports SMTP */
#define LIBCURL_PROTOCOL_SMTP 1
/* Defined if libcurl supports TELNET */
#define LIBCURL_PROTOCOL_TELNET 1
/* Defined if libcurl supports TFTP */
#define LIBCURL_PROTOCOL_TFTP 1
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
/* #undef NO_MINUS_C_MINUS_O */
/* Name of package */
#define PACKAGE "ccminer"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT ""
/* Define to the full name of this package. */
#define PACKAGE_NAME "ccminer"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "ccminer 2014.03.21"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "ccminer"
/* Define to the home page for this package. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "2014.03.21"
/* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be
automatically deduced at runtime.
STACK_DIRECTION > 0 => grows toward higher addresses
STACK_DIRECTION < 0 => grows toward lower addresses
STACK_DIRECTION = 0 => direction of growth unknown */
/* #undef STACK_DIRECTION */
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Define to 1 if AVX assembly is available. */
#define USE_AVX 1
/* Define to 1 if AVX2 assembly is available. */
#define USE_AVX2 1
/* Define to 1 if XOP assembly is available. */
#define USE_XOP 1
/* Version number of package */
#define VERSION "2014.03.21"
/* Define curl_free() as free() if our version of curl lacks curl_free. */
/* #undef curl_free */
/* Define to `unsigned int' if <sys/types.h> does not define. */
/* #undef size_t */
/* cpuminer-config.h.in. Generated from configure.ac by autoheader. */
/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
systems. This function is required for `alloca.c' support on those systems.
*/
#undef CRAY_STACKSEG_END
/* Define to 1 if using `alloca.c'. */
#undef C_ALLOCA
/* Define to 1 if you have `alloca', as a function or macro. */
#undef HAVE_ALLOCA
/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
*/
#undef HAVE_ALLOCA_H
/* Define to 1 if you have the declaration of `be32dec', and to 0 if you
don't. */
#undef HAVE_DECL_BE32DEC
/* Define to 1 if you have the declaration of `be32enc', and to 0 if you
don't. */
#undef HAVE_DECL_BE32ENC
/* Define to 1 if you have the declaration of `le32dec', and to 0 if you
don't. */
#undef HAVE_DECL_LE32DEC
/* Define to 1 if you have the declaration of `le32enc', and to 0 if you
don't. */
#undef HAVE_DECL_LE32ENC
/* Define to 1 if you have the `getopt_long' function. */
#define HAVE_GETOPT_LONG 1
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have a functional curl library. */
#undef HAVE_LIBCURL
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if you have the <syslog.h> header file. */
#undef HAVE_SYSLOG_H
/* Define to 1 if you have the <sys/endian.h> header file. */
#undef HAVE_SYS_ENDIAN_H
/* Define to 1 if you have the <sys/param.h> header file. */
#undef HAVE_SYS_PARAM_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/sysctl.h> header file. */
#undef HAVE_SYS_SYSCTL_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Defined if libcurl supports AsynchDNS */
#undef LIBCURL_FEATURE_ASYNCHDNS
/* Defined if libcurl supports IDN */
#undef LIBCURL_FEATURE_IDN
/* Defined if libcurl supports IPv6 */
#undef LIBCURL_FEATURE_IPV6
/* Defined if libcurl supports KRB4 */
#undef LIBCURL_FEATURE_KRB4
/* Defined if libcurl supports libz */
#undef LIBCURL_FEATURE_LIBZ
/* Defined if libcurl supports NTLM */
#undef LIBCURL_FEATURE_NTLM
/* Defined if libcurl supports SSL */
#undef LIBCURL_FEATURE_SSL
/* Defined if libcurl supports SSPI */
#undef LIBCURL_FEATURE_SSPI
/* Defined if libcurl supports DICT */
#undef LIBCURL_PROTOCOL_DICT
/* Defined if libcurl supports FILE */
#undef LIBCURL_PROTOCOL_FILE
/* Defined if libcurl supports FTP */
#undef LIBCURL_PROTOCOL_FTP
/* Defined if libcurl supports FTPS */
#undef LIBCURL_PROTOCOL_FTPS
/* Defined if libcurl supports HTTP */
#undef LIBCURL_PROTOCOL_HTTP
/* Defined if libcurl supports HTTPS */
#undef LIBCURL_PROTOCOL_HTTPS
/* Defined if libcurl supports IMAP */
#undef LIBCURL_PROTOCOL_IMAP
/* Defined if libcurl supports LDAP */
#undef LIBCURL_PROTOCOL_LDAP
/* Defined if libcurl supports POP3 */
#undef LIBCURL_PROTOCOL_POP3
/* Defined if libcurl supports RTSP */
#undef LIBCURL_PROTOCOL_RTSP
/* Defined if libcurl supports SMTP */
#undef LIBCURL_PROTOCOL_SMTP
/* Defined if libcurl supports TELNET */
#undef LIBCURL_PROTOCOL_TELNET
/* Defined if libcurl supports TFTP */
#undef LIBCURL_PROTOCOL_TFTP
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
#undef NO_MINUS_C_MINUS_O
/* Name of package */
#undef PACKAGE
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#define PACKAGE_NAME "ccminer"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "ccminer 2014.03.23"
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */
#define PACKAGE_VERSION "2014.03.23"
/* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be
automatically deduced at runtime.
STACK_DIRECTION > 0 => grows toward higher addresses
STACK_DIRECTION < 0 => grows toward lower addresses
STACK_DIRECTION = 0 => direction of growth unknown */
#undef STACK_DIRECTION
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Define to 1 if AVX assembly is available. */
#undef USE_AVX
/* Define to 1 if XOP assembly is available. */
#undef USE_XOP
/* Version number of package */
#undef VERSION
/* Define curl_free() as free() if our version of curl lacks curl_free. */
#undef curl_free
/* Define to `unsigned int' if <sys/types.h> does not define. */
#undef size_t

View File

@ -1,4 +1,3 @@
/* Diese Funktion ist auf 84+32-Byte große Eingabedaten ausgerichtet (Heavycoin) */
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

View File

@ -1,4 +1,3 @@
/* Diese Funktion ist auf 84+32 Byte große Eingabedaten ausgerichtet (Heavycoin) */
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

View File

@ -1,5 +1,3 @@
#if 1
/* Diese Funktion ist auf 84+32 Byte große Eingabedaten ausgerichtet (Heavycoin) */
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
@ -571,6 +569,8 @@ fugue256_gpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHas
*((uint32_t*)mixtabs + (256+threadIdx.x)) = tex1Dfetch(mixTab1Tex, threadIdx.x);
*((uint32_t*)mixtabs + (512+threadIdx.x)) = tex1Dfetch(mixTab2Tex, threadIdx.x);
*((uint32_t*)mixtabs + (768+threadIdx.x)) = tex1Dfetch(mixTab3Tex, threadIdx.x);
__syncthreads();
#endif
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
@ -788,5 +788,3 @@ __host__ void fugue256_cpu_hash(int thr_id, int threads, int startNounce, void *
//cudaMemcpy(outputHashes, d_fugue256_hashoutput[thr_id], 8 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
}
#endif

View File

@ -1,4 +1,3 @@
/* Diese Funktion ist auf 84+32-Byte große Eingabedaten ausgerichtet (Heavycoin) */
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
@ -6,9 +5,6 @@
#include <stdio.h>
#include <memory.h>
#define USE_SHARED 0
#define W_ALIGNMENT 65
// Folgende Definitionen später durch header ersetzen
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
@ -60,7 +56,7 @@ texture<unsigned int, 1, cudaReadModeElementType> t2dn;
texture<unsigned int, 1, cudaReadModeElementType> t3up;
texture<unsigned int, 1, cudaReadModeElementType> t3dn;
static const uint32_t T0up_cpu[] = {
uint32_t T0up_cpu[] = {
C32e(0xc632f4a5), C32e(0xf86f9784), C32e(0xee5eb099), C32e(0xf67a8c8d),
C32e(0xffe8170d), C32e(0xd60adcbd), C32e(0xde16c8b1), C32e(0x916dfc54),
C32e(0x6090f050), C32e(0x02070503), C32e(0xce2ee0a9), C32e(0x56d1877d),
@ -127,7 +123,7 @@ static const uint32_t T0up_cpu[] = {
C32e(0x7b3d46cb), C32e(0xa8b71ffc), C32e(0x6d0c61d6), C32e(0x2c624e3a)
};
static const uint32_t T0dn_cpu[] = {
uint32_t T0dn_cpu[] = {
C32e(0xf497a5c6), C32e(0x97eb84f8), C32e(0xb0c799ee), C32e(0x8cf78df6),
C32e(0x17e50dff), C32e(0xdcb7bdd6), C32e(0xc8a7b1de), C32e(0xfc395491),
C32e(0xf0c05060), C32e(0x05040302), C32e(0xe087a9ce), C32e(0x87ac7d56),
@ -194,7 +190,7 @@ static const uint32_t T0dn_cpu[] = {
C32e(0x46f6cb7b), C32e(0x1f4bfca8), C32e(0x61dad66d), C32e(0x4e583a2c)
};
static const uint32_t T1up_cpu[] = {
uint32_t T1up_cpu[] = {
C32e(0xc6c632f4), C32e(0xf8f86f97), C32e(0xeeee5eb0), C32e(0xf6f67a8c),
C32e(0xffffe817), C32e(0xd6d60adc), C32e(0xdede16c8), C32e(0x91916dfc),
C32e(0x606090f0), C32e(0x02020705), C32e(0xcece2ee0), C32e(0x5656d187),
@ -261,7 +257,7 @@ static const uint32_t T1up_cpu[] = {
C32e(0x7b7b3d46), C32e(0xa8a8b71f), C32e(0x6d6d0c61), C32e(0x2c2c624e)
};
static const uint32_t T1dn_cpu[] = {
uint32_t T1dn_cpu[] = {
C32e(0xa5f497a5), C32e(0x8497eb84), C32e(0x99b0c799), C32e(0x8d8cf78d),
C32e(0x0d17e50d), C32e(0xbddcb7bd), C32e(0xb1c8a7b1), C32e(0x54fc3954),
C32e(0x50f0c050), C32e(0x03050403), C32e(0xa9e087a9), C32e(0x7d87ac7d),
@ -328,7 +324,7 @@ static const uint32_t T1dn_cpu[] = {
C32e(0xcb46f6cb), C32e(0xfc1f4bfc), C32e(0xd661dad6), C32e(0x3a4e583a)
};
static const uint32_t T2up_cpu[] = {
uint32_t T2up_cpu[] = {
C32e(0xa5c6c632), C32e(0x84f8f86f), C32e(0x99eeee5e), C32e(0x8df6f67a),
C32e(0x0dffffe8), C32e(0xbdd6d60a), C32e(0xb1dede16), C32e(0x5491916d),
C32e(0x50606090), C32e(0x03020207), C32e(0xa9cece2e), C32e(0x7d5656d1),
@ -395,7 +391,7 @@ static const uint32_t T2up_cpu[] = {
C32e(0xcb7b7b3d), C32e(0xfca8a8b7), C32e(0xd66d6d0c), C32e(0x3a2c2c62)
};
static const uint32_t T2dn_cpu[] = {
uint32_t T2dn_cpu[] = {
C32e(0xf4a5f497), C32e(0x978497eb), C32e(0xb099b0c7), C32e(0x8c8d8cf7),
C32e(0x170d17e5), C32e(0xdcbddcb7), C32e(0xc8b1c8a7), C32e(0xfc54fc39),
C32e(0xf050f0c0), C32e(0x05030504), C32e(0xe0a9e087), C32e(0x877d87ac),
@ -462,7 +458,7 @@ static const uint32_t T2dn_cpu[] = {
C32e(0x46cb46f6), C32e(0x1ffc1f4b), C32e(0x61d661da), C32e(0x4e3a4e58)
};
static const uint32_t T3up_cpu[] = {
uint32_t T3up_cpu[] = {
C32e(0x97a5c6c6), C32e(0xeb84f8f8), C32e(0xc799eeee), C32e(0xf78df6f6),
C32e(0xe50dffff), C32e(0xb7bdd6d6), C32e(0xa7b1dede), C32e(0x39549191),
C32e(0xc0506060), C32e(0x04030202), C32e(0x87a9cece), C32e(0xac7d5656),
@ -529,7 +525,7 @@ static const uint32_t T3up_cpu[] = {
C32e(0xf6cb7b7b), C32e(0x4bfca8a8), C32e(0xdad66d6d), C32e(0x583a2c2c)
};
static const uint32_t T3dn_cpu[] = {
uint32_t T3dn_cpu[] = {
C32e(0x32f4a5f4), C32e(0x6f978497), C32e(0x5eb099b0), C32e(0x7a8c8d8c),
C32e(0xe8170d17), C32e(0x0adcbddc), C32e(0x16c8b1c8), C32e(0x6dfc54fc),
C32e(0x90f050f0), C32e(0x07050305), C32e(0x2ee0a9e0), C32e(0xd1877d87),
@ -685,15 +681,8 @@ __global__ void groestl512_gpu_hash(int threads, uint32_t startNounce, void *out
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
#if USE_SHARED
extern __shared__ unsigned char s[];
uint32_t offset = W_ALIGNMENT * sizeof(uint32_t) * threadIdx.x;
uint32_t *message = (uint32_t*)(&s[offset + 0]); // 128 Byte
uint32_t *state = (uint32_t*)(&s[offset + 128]); // 128 Byte
#else
uint32_t message[32];
uint32_t state[32];
#endif
// lese message ein & verknüpfe diese mit dem hash1 von hefty1
// lese den state ein
@ -825,11 +814,7 @@ __host__ void groestl512_cpu_hash(int thr_id, int threads, uint32_t startNounce)
dim3 block(threadsperblock);
// Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl)
#if USE_SHARED
size_t shared_size = W_ALIGNMENT*sizeof(uint32_t)*threadsperblock; // ein uint32_t eingefügt gegen Bank Konflikte
#else
size_t shared_size = 0;
#endif
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);

463
cuda_groestlcoin.cu Normal file
View File

@ -0,0 +1,463 @@
// Auf Groestlcoin spezialisierte Version von Groestl
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <memory.h>
#define USE_SHARED 1
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
// Folgende Definitionen später durch header ersetzen
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
// globaler Speicher für alle HeftyHashes aller Threads
__constant__ uint32_t pTarget[8]; // Single GPU
extern uint32_t *d_resultNonce[8];
// globaler Speicher für unsere Ergebnisse
uint32_t *d_hashGROESTLCOINoutput[8];
__constant__ uint32_t groestlcoin_gpu_state[32];
__constant__ uint32_t groestlcoin_gpu_msg[32];
__constant__ uint32_t sha256coin_gpu_constantTable[64];
__constant__ uint32_t sha256coin_gpu_register[8];
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define PC32up(j, r) ((uint32_t)((j) + (r)))
#define PC32dn(j, r) 0
#define QC32up(j, r) 0xFFFFFFFF
#define QC32dn(j, r) (((uint32_t)(r) << 24) ^ SPH_T32(~((uint32_t)(j) << 24)))
#define B32_0(x) ((x) & 0xFF)
#define B32_1(x) (((x) >> 8) & 0xFF)
#define B32_2(x) (((x) >> 16) & 0xFF)
#define B32_3(x) ((x) >> 24)
#define SPH_C32(x) ((uint32_t)(x ## U))
#define C32e(x) ((SPH_C32(x) >> 24) \
| ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \
| ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \
| ((SPH_C32(x) << 24) & SPH_C32(0xFF000000)))
#if USE_SHARED
#define T0up(x) (*((uint32_t*)mixtabs + ( (x))))
#define T0dn(x) (*((uint32_t*)mixtabs + (256+(x))))
#define T1up(x) (*((uint32_t*)mixtabs + (512+(x))))
#define T1dn(x) (*((uint32_t*)mixtabs + (768+(x))))
#define T2up(x) (*((uint32_t*)mixtabs + (1024+(x))))
#define T2dn(x) (*((uint32_t*)mixtabs + (1280+(x))))
#define T3up(x) (*((uint32_t*)mixtabs + (1536+(x))))
#define T3dn(x) (*((uint32_t*)mixtabs + (1792+(x))))
#else
#define T0up(x) tex1Dfetch(t0up1, x)
#define T0dn(x) tex1Dfetch(t0dn1, x)
#define T1up(x) tex1Dfetch(t1up1, x)
#define T1dn(x) tex1Dfetch(t1dn1, x)
#define T2up(x) tex1Dfetch(t2up1, x)
#define T2dn(x) tex1Dfetch(t2dn1, x)
#define T3up(x) tex1Dfetch(t3up1, x)
#define T3dn(x) tex1Dfetch(t3dn1, x)
#endif
texture<unsigned int, 1, cudaReadModeElementType> t0up1;
texture<unsigned int, 1, cudaReadModeElementType> t0dn1;
texture<unsigned int, 1, cudaReadModeElementType> t1up1;
texture<unsigned int, 1, cudaReadModeElementType> t1dn1;
texture<unsigned int, 1, cudaReadModeElementType> t2up1;
texture<unsigned int, 1, cudaReadModeElementType> t2dn1;
texture<unsigned int, 1, cudaReadModeElementType> t3up1;
texture<unsigned int, 1, cudaReadModeElementType> t3dn1;
extern uint32_t T0up_cpu[];
extern uint32_t T0dn_cpu[];
extern uint32_t T1up_cpu[];
extern uint32_t T1dn_cpu[];
extern uint32_t T2up_cpu[];
extern uint32_t T2dn_cpu[];
extern uint32_t T3up_cpu[];
extern uint32_t T3dn_cpu[];
extern uint32_t sha256_cpu_hashTable[];
extern uint32_t sha256_cpu_constantTable[];
#define S(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define R(x, n) ((x) >> (n))
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
#define S0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
#define S1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
#define s0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
#define s1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
#define SWAB32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) )
__device__ void groestlcoin_perm_P(uint32_t *a, char *mixtabs)
{
uint32_t t[32];
//#pragma unroll 14
for(int r=0;r<14;r++)
{
#pragma unroll 16
for(int k=0;k<16;k++)
{
a[(k*2)+0] ^= PC32up(k * 0x10, r);
//a[(k<<1)+1] ^= PC32dn(k * 0x10, r);
}
// RBTT
#pragma unroll 16
for(int k=0;k<32;k+=2)
{
t[k + 0] = T0up( B32_0(a[k & 0x1f]) ) ^
T1up( B32_1(a[(k + 2) & 0x1f]) ) ^
T2up( B32_2(a[(k + 4) & 0x1f]) ) ^
T3up( B32_3(a[(k + 6) & 0x1f]) ) ^
T0dn( B32_0(a[(k + 9) & 0x1f]) ) ^
T1dn( B32_1(a[(k + 11) & 0x1f]) ) ^
T2dn( B32_2(a[(k + 13) & 0x1f]) ) ^
T3dn( B32_3(a[(k + 23) & 0x1f]) );
t[k + 1] = T0dn( B32_0(a[k & 0x1f]) ) ^
T1dn( B32_1(a[(k + 2) & 0x1f]) ) ^
T2dn( B32_2(a[(k + 4) & 0x1f]) ) ^
T3dn( B32_3(a[(k + 6) & 0x1f]) ) ^
T0up( B32_0(a[(k + 9) & 0x1f]) ) ^
T1up( B32_1(a[(k + 11) & 0x1f]) ) ^
T2up( B32_2(a[(k + 13) & 0x1f]) ) ^
T3up( B32_3(a[(k + 23) & 0x1f]) );
}
#pragma unroll 32
for(int k=0;k<32;k++)
a[k] = t[k];
}
}
__device__ void groestlcoin_perm_Q(uint32_t *a, char *mixtabs)
{
//#pragma unroll 14
for(int r=0;r<14;r++)
{
uint32_t t[32];
#pragma unroll 16
for(int k=0;k<16;k++)
{
a[(k*2)+0] ^= QC32up(k * 0x10, r);
a[(k*2)+1] ^= QC32dn(k * 0x10, r);
}
// RBTT
#pragma unroll 16
for(int k=0;k<32;k+=2)
{
t[k + 0] = T0up( B32_0(a[(k + 2) & 0x1f]) ) ^
T1up( B32_1(a[(k + 6) & 0x1f]) ) ^
T2up( B32_2(a[(k + 10) & 0x1f]) ) ^
T3up( B32_3(a[(k + 22) & 0x1f]) ) ^
T0dn( B32_0(a[(k + 1) & 0x1f]) ) ^
T1dn( B32_1(a[(k + 5) & 0x1f]) ) ^
T2dn( B32_2(a[(k + 9) & 0x1f]) ) ^
T3dn( B32_3(a[(k + 13) & 0x1f]) );
t[k + 1] = T0dn( B32_0(a[(k + 2) & 0x1f]) ) ^
T1dn( B32_1(a[(k + 6) & 0x1f]) ) ^
T2dn( B32_2(a[(k + 10) & 0x1f]) ) ^
T3dn( B32_3(a[(k + 22) & 0x1f]) ) ^
T0up( B32_0(a[(k + 1) & 0x1f]) ) ^
T1up( B32_1(a[(k + 5) & 0x1f]) ) ^
T2up( B32_2(a[(k + 9) & 0x1f]) ) ^
T3up( B32_3(a[(k + 13) & 0x1f]) );
}
#pragma unroll 32
for(int k=0;k<32;k++)
a[k] = t[k];
}
}
#if USE_SHARED
__global__ void __launch_bounds__(256)
#else
__global__ void
#endif
groestlcoin_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce)
{
#if USE_SHARED
extern __shared__ char mixtabs[];
*((uint32_t*)mixtabs + ( threadIdx.x)) = tex1Dfetch(t0up1, threadIdx.x);
*((uint32_t*)mixtabs + (256+threadIdx.x)) = tex1Dfetch(t0dn1, threadIdx.x);
*((uint32_t*)mixtabs + (512+threadIdx.x)) = tex1Dfetch(t1up1, threadIdx.x);
*((uint32_t*)mixtabs + (768+threadIdx.x)) = tex1Dfetch(t1dn1, threadIdx.x);
*((uint32_t*)mixtabs + (1024+threadIdx.x)) = tex1Dfetch(t2up1, threadIdx.x);
*((uint32_t*)mixtabs + (1280+threadIdx.x)) = tex1Dfetch(t2dn1, threadIdx.x);
*((uint32_t*)mixtabs + (1536+threadIdx.x)) = tex1Dfetch(t3up1, threadIdx.x);
*((uint32_t*)mixtabs + (1792+threadIdx.x)) = tex1Dfetch(t3dn1, threadIdx.x);
__syncthreads();
#endif
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
/////
///// Lieber groestl, mach, dass es abgeht!!!
/////
// GROESTL
uint32_t message[32];
uint32_t state[32];
// SHA
// jeder thread in diesem Block bekommt sein eigenes W Array im Shared memory
uint32_t g[32];
#pragma unroll 32
for(int k=0;k<32;k++)
{
state[k] = groestlcoin_gpu_state[k];
message[k] = groestlcoin_gpu_msg[k];
}
uint32_t nounce = startNounce + thread;
message[19] = SWAB32(nounce);
#pragma unroll 32
for(int u=0;u<32;u++)
g[u] = message[u] ^ state[u];
// Perm
#if USE_SHARED
groestlcoin_perm_P(g, mixtabs);
groestlcoin_perm_Q(message, mixtabs);
#else
groestlcoin_perm_P(g, NULL);
groestlcoin_perm_Q(message, NULL);
#endif
#pragma unroll 32
for(int u=0;u<32;u++)
{
state[u] ^= g[u] ^ message[u];
g[u] = state[u];
}
#if USE_SHARED
groestlcoin_perm_P(g, mixtabs);
#else
groestlcoin_perm_P(g, NULL);
#endif
#pragma unroll 32
for(int u=0;u<32;u++)
state[u] ^= g[u];
////
//// 2. Runde groestl
////
#pragma unroll 16
for(int k=0;k<16;k++)
message[k] = state[k + 16];
#pragma unroll 32
for(int k=0;k<32;k++)
state[k] = groestlcoin_gpu_state[k];
#pragma unroll 16
for(int k=0;k<16;k++)
message[k+16] = 0;
message[16] = 0x80;
message[31] = 0x01000000;
#pragma unroll 32
for(int u=0;u<32;u++)
g[u] = message[u] ^ state[u];
// Perm
#if USE_SHARED
groestlcoin_perm_P(g, mixtabs);
groestlcoin_perm_Q(message, mixtabs);
#else
groestlcoin_perm_P(g, NULL);
groestlcoin_perm_Q(message, NULL);
#endif
#pragma unroll 32
for(int u=0;u<32;u++)
{
state[u] ^= g[u] ^ message[u];
g[u] = state[u];
}
#if USE_SHARED
groestlcoin_perm_P(g, mixtabs);
#else
groestlcoin_perm_P(g, NULL);
#endif
#pragma unroll 32
for(int u=0;u<32;u++)
state[u] ^= g[u];
/*
#pragma unroll 8
for(int k=0;k<8;k++)
hash[k] = state[k+16];
*/
// kopiere Ergebnis
/*
#pragma unroll 16
for(int k=0;k<16;k++)
((uint32_t*)outputHash)[16*thread+k] = state[k + 16];
*/
int i;
bool rc = true;
for (i = 7; i >= 0; i--) {
if (state[i+16] > pTarget[i]) {
rc = false;
break;
}
if (state[i+16] < pTarget[i]) {
rc = true;
break;
}
}
if(rc == true)
{
if(resNounce[0] > nounce)
{
resNounce[0] = nounce;
/*
#pragma unroll 8
for(int k=0;k<8;k++)
((uint32_t*)outputHash)[k] = (hash[k]);
*/
}
}
}
}
#define texDef(texname, texmem, texsource, texsize) \
unsigned int *texmem; \
cudaMalloc(&texmem, texsize); \
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
texname.normalized = 0; \
texname.filterMode = cudaFilterModePoint; \
texname.addressMode[0] = cudaAddressModeClamp; \
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \
// Setup-Funktionen
__host__ void groestlcoin_cpu_init(int thr_id, int threads)
{
cudaSetDevice(thr_id);
cudaDeviceSetCacheConfig( cudaFuncCachePreferShared );
// Texturen mit obigem Makro initialisieren
texDef(t0up1, d_T0up, T0up_cpu, sizeof(uint32_t)*256);
texDef(t0dn1, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256);
texDef(t1up1, d_T1up, T1up_cpu, sizeof(uint32_t)*256);
texDef(t1dn1, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256);
texDef(t2up1, d_T2up, T2up_cpu, sizeof(uint32_t)*256);
texDef(t2dn1, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256);
texDef(t3up1, d_T3up, T3up_cpu, sizeof(uint32_t)*256);
texDef(t3dn1, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256);
// Kopiere die Hash-Tabellen in den GPU-Speicher
cudaMemcpyToSymbol( sha256coin_gpu_constantTable,
sha256_cpu_constantTable,
sizeof(uint32_t) * 64 );
// Startvektor
cudaMemcpyToSymbol( sha256coin_gpu_register,
sha256_cpu_hashTable,
sizeof(uint32_t) * 8 );
// setze register
uint32_t groestl_state_init[32];
memset(groestl_state_init, 0, sizeof(uint32_t) * 32);
groestl_state_init[31] = 0x20000;
// state speichern
cudaMemcpyToSymbol( groestlcoin_gpu_state,
groestl_state_init,
128);
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
// Speicher für alle Ergebnisse belegen (nur für Debug)
cudaMalloc(&d_hashGROESTLCOINoutput[thr_id], 8 * sizeof(uint32_t) * threads);
}
__host__ void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
{
// Nachricht expandieren und setzen
uint32_t msgBlock[32];
memset(msgBlock, 0, sizeof(uint32_t) * 32);
memcpy(&msgBlock[0], data, 80);
// Erweitere die Nachricht auf den Nachrichtenblock (padding)
// Unsere Nachricht hat 80 Byte
msgBlock[20] = 0x80;
msgBlock[31] = 0x01000000;
// groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird
// auf der GPU ausgeführt)
// Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch)
cudaMemcpyToSymbol( groestlcoin_gpu_msg,
msgBlock,
128);
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
cudaMemcpyToSymbol( pTarget,
pTargetIn,
sizeof(uint32_t) * 8 );
}
__host__ void groestlcoin_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce)
{
#if USE_SHARED
const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN
#else
const int threadsperblock = 512; // so einstellen wie gewünscht ;-)
#endif
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
// Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl)
#if USE_SHARED
size_t shared_size = 8 * 256 * sizeof(uint32_t);
#else
size_t shared_size = 0;
#endif
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
//fprintf(stderr, "ThrID: %d\n", thr_id);
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
groestlcoin_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, d_hashGROESTLCOINoutput[thr_id], d_resultNonce[thr_id]);
// Strategisches Sleep Kommando zur Senkung der CPU Last
MyStreamSynchronize(NULL, 0, thr_id);
cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
/// Debug
//cudaMemcpy(outputHashes, d_hashGROESTLCOINoutput[thr_id], 8 * sizeof(uint32_t) * threads, cudaMemcpyDeviceToHost);
// Nounce
//cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
}

8
cuda_groestlcoin.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef _CUDA_GROESTLCOIN_H
#define _CUDA_GROESTLCOIN_H
void groestlcoin_cpu_init(int thr_id, int threads);
void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn);
void groestlcoin_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce);
#endif

View File

@ -1,4 +1,3 @@
/* Diese Funktion ist auf 84-Byte große Eingabedaten ausgerichtet (Heavycoin) */
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

View File

@ -1,4 +1,3 @@
/* Diese Funktion ist auf 84+32-Byte große Eingabedaten ausgerichtet (Heavycoin) */
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

View File

@ -1,4 +1,3 @@
/* Diese Funktion ist auf 84+32 Byte große Eingabedaten ausgerichtet (Heavycoin) */
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

View File

@ -29,7 +29,7 @@
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include <stdio.h>
#include <stddef.h>
#include <string.h>
@ -2986,12 +2986,13 @@ groestl_big_close(sph_groestl_big_context *sc,
#endif
}
memset(pad + 1, 0, pad_len - 9);
//fprintf(stderr, "%x\n", pad_len);
#if SPH_64
sph_enc64be(pad + pad_len - 8, count);
#else
sph_enc64be(pad + pad_len - 8, count_high);
sph_enc64be(pad + pad_len - 4, count_low);
#endif
#endif
groestl_big_core(sc, pad, pad_len);
READ_STATE_BIG(sc);
FINAL_BIG;

175
groestlcoin.cpp Normal file
View File

@ -0,0 +1,175 @@
#include "uint256.h"
#include "sph_groestl.h"
#include "cpuminer-config.h"
#include "miner.h"
#include <string.h>
#include <stdint.h>
#include "cuda_groestlcoin.h"
#include <openssl/sha.h>
#define SWAP32(x) \
((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \
(((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
void sha256func(unsigned char *hash, const unsigned char *data, int len)
{
uint32_t S[16], T[16];
int i, r;
sha256_init(S);
for (r = len; r > -9; r -= 64) {
if (r < 64)
memset(T, 0, 64);
memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
if (r >= 0 && r < 64)
((unsigned char *)T)[r] = 0x80;
for (i = 0; i < 16; i++)
T[i] = be32dec(T + i);
if (r < 56)
T[15] = 8 * len;
sha256_transform(S, T, 0);
}
/*
memcpy(S + 8, sha256d_hash1 + 8, 32);
sha256_init(T);
sha256_transform(T, S, 0);
*/
for (i = 0; i < 8; i++)
be32enc((uint32_t *)hash + i, T[i]);
}
static void groestlhash(void *state, const void *input)
{
// Tryout GPU-groestl
sph_groestl512_context ctx_groestl[2];
static unsigned char pblank[1];
int ii;
uint32_t mask = 8;
uint32_t zero = 0;
//these uint512 in the c++ source of the client are backed by an array of uint32
uint32_t hashA[16], hashB[16];
sph_groestl512_init(&ctx_groestl[0]);
sph_groestl512 (&ctx_groestl[0], input, 80); //6
sph_groestl512_close(&ctx_groestl[0], hashA); //7
sph_groestl512_init(&ctx_groestl[1]);
sph_groestl512 (&ctx_groestl[1], hashA, 64); //6
sph_groestl512_close(&ctx_groestl[1], hashB); //7
memcpy(state, hashB, 32);
}
extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t start_nonce = pdata[19]++;
const uint32_t Htarg = ptarget[7];
const uint32_t throughPut = 4096 * 128;
//const uint32_t throughPut = 1;
int i;
uint32_t *outputHash = (uint32_t*)malloc(throughPut * 16 * sizeof(uint32_t));
// init
static bool init[8] = { false, false, false, false, false, false, false, false };
if(!init[thr_id])
{
groestlcoin_cpu_init(thr_id, throughPut);
init[thr_id] = true;
}
// Endian Drehung ist notwendig
//char testdata[] = {"\x70\x00\x00\x00\x5d\x38\x5b\xa1\x14\xd0\x79\x97\x0b\x29\xa9\x41\x8f\xd0\x54\x9e\x7d\x68\xa9\x5c\x7f\x16\x86\x21\xa3\x14\x20\x10\x00\x00\x00\x00\x57\x85\x86\xd1\x49\xfd\x07\xb2\x2f\x3a\x8a\x34\x7c\x51\x6d\xe7\x05\x2f\x03\x4d\x2b\x76\xff\x68\xe0\xd6\xec\xff\x9b\x77\xa4\x54\x89\xe3\xfd\x51\x17\x32\x01\x1d\xf0\x73\x10\x00"};
//pdata = (uint32_t*)testdata;
uint32_t endiandata[32];
for (int kk=0; kk < 32; kk++)
be32enc(&endiandata[kk], pdata[kk]);
// Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt)
groestlcoin_cpu_setBlock(thr_id, endiandata, (void*)ptarget);
do {
// GPU
uint32_t foundNounce = 0xFFFFFFFF;
groestlcoin_cpu_hash(thr_id, throughPut, pdata[19], outputHash, &foundNounce);
/*
{
for(i=0;i<throughPut;i++)
{
uint32_t tmpHash[8];
endiandata[19] = SWAP32(pdata[19]);
groestlhash(tmpHash, endiandata);
int ii;
printf("result GPU: ");
for (ii=0; ii < 32; ii++)
{
printf ("%.2x",((uint8_t*)&outputHash[8*i])[ii]);
};
printf ("\n");
groestlhash(tmpHash, endiandata);
printf("result CPU: ");
for (ii=0; ii < 32; ii++)
{
printf ("%.2x",((uint8_t*)tmpHash)[ii]);
};
}
exit(0);
}
*/
if(foundNounce < 0xffffffff)
{
uint32_t tmpHash[8];
endiandata[19] = SWAP32(foundNounce);
groestlhash(tmpHash, endiandata);
if (((tmpHash[7]&0xFFFFFF00)==0) &&
fulltest(tmpHash, ptarget)) {
pdata[19] = foundNounce;
*hashes_done = foundNounce - start_nonce;
free(outputHash);
return true;
}
foundNounce = 0xffffffff;
/*
int ii;
printf("result GPU: ");
for (ii=0; ii < 32; ii++)
{
printf ("%.2x",((uint8_t*)&outputHash[0])[ii]);
};
printf ("\n");
printf("result CPU: ");
for (ii=0; ii < 32; ii++)
{
printf ("%.2x",((uint8_t*)tmpHash)[ii]);
};
printf ("\n");
*/
}
if (pdata[19] + throughPut < pdata[19])
pdata[19] = max_nonce;
else pdata[19] += throughPut;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
*hashes_done = pdata[19] - start_nonce;
free(outputHash);
return 0;
}

View File

@ -211,8 +211,13 @@ extern int scanhash_fugue256(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done);
extern int scanhash_groestlcoin(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done);
extern void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
extern void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
extern void groestlcoin_hash(unsigned char* output, const unsigned char* input, int len);
struct thr_info {
int id;