Browse Source

bump to revision v0.9 (VC++ project files not updated yet)

master
Christian Buchner 11 years ago
parent
commit
e049f32fee
  1. 9
      Makefile.am
  2. 187
      Makefile.in
  3. 8
      README.txt
  4. 20
      configure
  5. 2
      configure.ac
  6. 22
      cpu-miner.c
  7. 4
      cpuminer-config.h
  8. 8
      miner.h
  9. 292
      quark/animecoin.cu
  10. 473
      quark/cuda_bmw512.cu
  11. 6
      quark/cuda_jh512.cu
  12. 70
      quark/cuda_quark_blake512.cu
  13. 363
      quark/cuda_quark_compactionTest.cu
  14. 182
      quark/cuda_quark_keccak512.cu
  15. 274
      quark/quarkcoin.cu

9
Makefile.am

@ -16,8 +16,8 @@ bin_PROGRAMS = ccminer
ccminer_SOURCES = elist.h miner.h compat.h \ ccminer_SOURCES = elist.h miner.h compat.h \
compat/inttypes.h compat/stdbool.h compat/unistd.h \ compat/inttypes.h compat/stdbool.h compat/unistd.h \
compat/sys/time.h compat/getopt/getopt.h \ compat/sys/time.h compat/getopt/getopt.h \
cpu-miner.c util.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c hefty1.c scrypt.c sha2.c \ cpu-miner.c util.c sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c hefty1.c scrypt.c sha2.c \
sph/sph_blake.h sph/sph_groestl.h sph/sph_jh.h sph/sph_keccak.h sph/sph_skein.h sph/sph_types.h \ sph/bmw.h sph/sph_blake.h sph/sph_groestl.h sph/sph_jh.h sph/sph_keccak.h sph/sph_skein.h sph/sph_types.h \
heavy/heavy.cu \ heavy/heavy.cu \
heavy/cuda_blake512.cu heavy/cuda_blake512.h \ heavy/cuda_blake512.cu heavy/cuda_blake512.h \
heavy/cuda_combine.cu heavy/cuda_combine.h \ heavy/cuda_combine.cu heavy/cuda_combine.h \
@ -30,6 +30,8 @@ ccminer_SOURCES = elist.h miner.h compat.h \
JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \ JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
JHA/cuda_jha_compactionTest.cu quark/cuda_quark_checkhash.cu \ JHA/cuda_jha_compactionTest.cu quark/cuda_quark_checkhash.cu \
quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \ quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \
quark/cuda_quark_compactionTest.cu \
myriadgroestl.cpp cuda_myriadgroestl.cu myriadgroestl.cpp cuda_myriadgroestl.cu
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@ ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
@ -42,5 +44,8 @@ ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -f
# ABI requiring code modules # ABI requiring code modules
# this module doesn't compile with Compute 2.0 unfortunately # this module doesn't compile with Compute 2.0 unfortunately
quark/cuda_quark_compactionTest.o: quark/cuda_quark_compactionTest.cu
$(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
JHA/cuda_jha_compactionTest.o: JHA/cuda_jha_compactionTest.cu JHA/cuda_jha_compactionTest.o: JHA/cuda_jha_compactionTest.cu
$(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< $(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<

187
Makefile.in

@ -55,18 +55,26 @@ am__installdirs = "$(DESTDIR)$(bindir)"
PROGRAMS = $(bin_PROGRAMS) PROGRAMS = $(bin_PROGRAMS)
am__dirstamp = $(am__leading_dot)dirstamp am__dirstamp = $(am__leading_dot)dirstamp
am_ccminer_OBJECTS = ccminer-cpu-miner.$(OBJEXT) \ am_ccminer_OBJECTS = ccminer-cpu-miner.$(OBJEXT) \
ccminer-util.$(OBJEXT) ccminer-blake.$(OBJEXT) \ ccminer-util.$(OBJEXT) ccminer-bmw.$(OBJEXT) \
ccminer-groestl.$(OBJEXT) ccminer-keccak.$(OBJEXT) \ ccminer-blake.$(OBJEXT) ccminer-groestl.$(OBJEXT) \
ccminer-hefty1.$(OBJEXT) ccminer-scrypt.$(OBJEXT) \ ccminer-jh.$(OBJEXT) ccminer-keccak.$(OBJEXT) \
ccminer-sha2.$(OBJEXT) heavy.$(OBJEXT) cuda_blake512.$(OBJEXT) \ ccminer-skein.$(OBJEXT) ccminer-hefty1.$(OBJEXT) \
cuda_combine.$(OBJEXT) cuda_groestl512.$(OBJEXT) \ ccminer-scrypt.$(OBJEXT) ccminer-sha2.$(OBJEXT) \
cuda_hefty1.$(OBJEXT) cuda_keccak512.$(OBJEXT) \ heavy/heavy.$(OBJEXT) heavy/cuda_blake512.$(OBJEXT) \
cuda_sha256.$(OBJEXT) ccminer-fuguecoin.$(OBJEXT) \ heavy/cuda_combine.$(OBJEXT) heavy/cuda_groestl512.$(OBJEXT) \
heavy/cuda_hefty1.$(OBJEXT) heavy/cuda_keccak512.$(OBJEXT) \
heavy/cuda_sha256.$(OBJEXT) ccminer-fuguecoin.$(OBJEXT) \
cuda_fugue256.$(OBJEXT) ccminer-fugue.$(OBJEXT) \ cuda_fugue256.$(OBJEXT) ccminer-fugue.$(OBJEXT) \
ccminer-groestlcoin.$(OBJEXT) cuda_groestlcoin.$(OBJEXT) \ ccminer-groestlcoin.$(OBJEXT) cuda_groestlcoin.$(OBJEXT) \
JHA/jackpotcoin.$(OBJEXT) JHA/cuda_jha_keccak512.$(OBJEXT) \ JHA/jackpotcoin.$(OBJEXT) JHA/cuda_jha_keccak512.$(OBJEXT) \
ccminer-jh.$(OBJEXT) ccminer-skein.$(OBJEXT) \ JHA/cuda_jha_compactionTest.$(OBJEXT) \
quark/cuda_quark_checkhash.$(OBJEXT) \ quark/cuda_quark_checkhash.$(OBJEXT) \
quark/cuda_jh512.$(OBJEXT) quark/cuda_quark_blake512.$(OBJEXT) \
quark/cuda_quark_groestl512.$(OBJEXT) \
quark/cuda_skein512.$(OBJEXT) quark/cuda_bmw512.$(OBJEXT) \
quark/cuda_quark_keccak512.$(OBJEXT) quark/quarkcoin.$(OBJEXT) \
quark/animecoin.$(OBJEXT) \
quark/cuda_quark_compactionTest.$(OBJEXT) \
ccminer-myriadgroestl.$(OBJEXT) cuda_myriadgroestl.$(OBJEXT) ccminer-myriadgroestl.$(OBJEXT) cuda_myriadgroestl.$(OBJEXT)
ccminer_OBJECTS = $(am_ccminer_OBJECTS) ccminer_OBJECTS = $(am_ccminer_OBJECTS)
ccminer_DEPENDENCIES = ccminer_DEPENDENCIES =
@ -272,19 +280,22 @@ SUBDIRS = compat
ccminer_SOURCES = elist.h miner.h compat.h \ ccminer_SOURCES = elist.h miner.h compat.h \
compat/inttypes.h compat/stdbool.h compat/unistd.h \ compat/inttypes.h compat/stdbool.h compat/unistd.h \
compat/sys/time.h compat/getopt/getopt.h \ compat/sys/time.h compat/getopt/getopt.h \
cpu-miner.c util.c sph/blake.c sph/groestl.c sph/keccak.c hefty1.c scrypt.c sha2.c \ cpu-miner.c util.c sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c hefty1.c scrypt.c sha2.c \
sph/sph_blake.h sph/sph_groestl.h sph/sph_keccak.h sph/sph_types.h \ sph/bmw.h sph/sph_blake.h sph/sph_groestl.h sph/sph_jh.h sph/sph_keccak.h sph/sph_skein.h sph/sph_types.h \
heavy.cu \ heavy/heavy.cu \
cuda_blake512.cu cuda_blake512.h \ heavy/cuda_blake512.cu heavy/cuda_blake512.h \
cuda_combine.cu cuda_combine.h \ heavy/cuda_combine.cu heavy/cuda_combine.h \
cuda_groestl512.cu cuda_groestl512.h \ heavy/cuda_groestl512.cu heavy/cuda_groestl512.h \
cuda_hefty1.cu cuda_hefty1.h \ heavy/cuda_hefty1.cu heavy/cuda_hefty1.h \
cuda_keccak512.cu cuda_keccak512.h \ heavy/cuda_keccak512.cu heavy/cuda_keccak512.h \
cuda_sha256.cu cuda_sha256.h \ heavy/cuda_sha256.cu heavy/cuda_sha256.h \
fuguecoin.cpp cuda_fugue256.cu sph/fugue.c sph/sph_fugue.h uint256.h \ fuguecoin.cpp cuda_fugue256.cu sph/fugue.c sph/sph_fugue.h uint256.h \
groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h \ groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h \
JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu sph/jh.c sph/skein.c \ JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
sph/sph_jh.h sph/sph_skein.h quark/cuda_quark_checkhash.cu \ JHA/cuda_jha_compactionTest.cu quark/cuda_quark_checkhash.cu \
quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \
quark/cuda_quark_compactionTest.cu \
myriadgroestl.cpp cuda_myriadgroestl.cu myriadgroestl.cpp cuda_myriadgroestl.cu
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@ ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
@ -381,6 +392,26 @@ uninstall-binPROGRAMS:
clean-binPROGRAMS: clean-binPROGRAMS:
-test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
heavy/$(am__dirstamp):
@$(MKDIR_P) heavy
@: > heavy/$(am__dirstamp)
heavy/$(DEPDIR)/$(am__dirstamp):
@$(MKDIR_P) heavy/$(DEPDIR)
@: > heavy/$(DEPDIR)/$(am__dirstamp)
heavy/heavy.$(OBJEXT): heavy/$(am__dirstamp) \
heavy/$(DEPDIR)/$(am__dirstamp)
heavy/cuda_blake512.$(OBJEXT): heavy/$(am__dirstamp) \
heavy/$(DEPDIR)/$(am__dirstamp)
heavy/cuda_combine.$(OBJEXT): heavy/$(am__dirstamp) \
heavy/$(DEPDIR)/$(am__dirstamp)
heavy/cuda_groestl512.$(OBJEXT): heavy/$(am__dirstamp) \
heavy/$(DEPDIR)/$(am__dirstamp)
heavy/cuda_hefty1.$(OBJEXT): heavy/$(am__dirstamp) \
heavy/$(DEPDIR)/$(am__dirstamp)
heavy/cuda_keccak512.$(OBJEXT): heavy/$(am__dirstamp) \
heavy/$(DEPDIR)/$(am__dirstamp)
heavy/cuda_sha256.$(OBJEXT): heavy/$(am__dirstamp) \
heavy/$(DEPDIR)/$(am__dirstamp)
JHA/$(am__dirstamp): JHA/$(am__dirstamp):
@$(MKDIR_P) JHA @$(MKDIR_P) JHA
@: > JHA/$(am__dirstamp) @: > JHA/$(am__dirstamp)
@ -391,6 +422,8 @@ JHA/jackpotcoin.$(OBJEXT): JHA/$(am__dirstamp) \
JHA/$(DEPDIR)/$(am__dirstamp) JHA/$(DEPDIR)/$(am__dirstamp)
JHA/cuda_jha_keccak512.$(OBJEXT): JHA/$(am__dirstamp) \ JHA/cuda_jha_keccak512.$(OBJEXT): JHA/$(am__dirstamp) \
JHA/$(DEPDIR)/$(am__dirstamp) JHA/$(DEPDIR)/$(am__dirstamp)
JHA/cuda_jha_compactionTest.$(OBJEXT): JHA/$(am__dirstamp) \
JHA/$(DEPDIR)/$(am__dirstamp)
quark/$(am__dirstamp): quark/$(am__dirstamp):
@$(MKDIR_P) quark @$(MKDIR_P) quark
@: > quark/$(am__dirstamp) @: > quark/$(am__dirstamp)
@ -399,20 +432,56 @@ quark/$(DEPDIR)/$(am__dirstamp):
@: > quark/$(DEPDIR)/$(am__dirstamp) @: > quark/$(DEPDIR)/$(am__dirstamp)
quark/cuda_quark_checkhash.$(OBJEXT): quark/$(am__dirstamp) \ quark/cuda_quark_checkhash.$(OBJEXT): quark/$(am__dirstamp) \
quark/$(DEPDIR)/$(am__dirstamp) quark/$(DEPDIR)/$(am__dirstamp)
quark/cuda_jh512.$(OBJEXT): quark/$(am__dirstamp) \
quark/$(DEPDIR)/$(am__dirstamp)
quark/cuda_quark_blake512.$(OBJEXT): quark/$(am__dirstamp) \
quark/$(DEPDIR)/$(am__dirstamp)
quark/cuda_quark_groestl512.$(OBJEXT): quark/$(am__dirstamp) \
quark/$(DEPDIR)/$(am__dirstamp)
quark/cuda_skein512.$(OBJEXT): quark/$(am__dirstamp) \
quark/$(DEPDIR)/$(am__dirstamp)
quark/cuda_bmw512.$(OBJEXT): quark/$(am__dirstamp) \
quark/$(DEPDIR)/$(am__dirstamp)
quark/cuda_quark_keccak512.$(OBJEXT): quark/$(am__dirstamp) \
quark/$(DEPDIR)/$(am__dirstamp)
quark/quarkcoin.$(OBJEXT): quark/$(am__dirstamp) \
quark/$(DEPDIR)/$(am__dirstamp)
quark/animecoin.$(OBJEXT): quark/$(am__dirstamp) \
quark/$(DEPDIR)/$(am__dirstamp)
quark/cuda_quark_compactionTest.$(OBJEXT): quark/$(am__dirstamp) \
quark/$(DEPDIR)/$(am__dirstamp)
ccminer$(EXEEXT): $(ccminer_OBJECTS) $(ccminer_DEPENDENCIES) $(EXTRA_ccminer_DEPENDENCIES) ccminer$(EXEEXT): $(ccminer_OBJECTS) $(ccminer_DEPENDENCIES) $(EXTRA_ccminer_DEPENDENCIES)
@rm -f ccminer$(EXEEXT) @rm -f ccminer$(EXEEXT)
$(ccminer_LINK) $(ccminer_OBJECTS) $(ccminer_LDADD) $(LIBS) $(ccminer_LINK) $(ccminer_OBJECTS) $(ccminer_LDADD) $(LIBS)
mostlyclean-compile: mostlyclean-compile:
-rm -f *.$(OBJEXT) -rm -f *.$(OBJEXT)
-rm -f JHA/cuda_jha_compactionTest.$(OBJEXT)
-rm -f JHA/cuda_jha_keccak512.$(OBJEXT) -rm -f JHA/cuda_jha_keccak512.$(OBJEXT)
-rm -f JHA/jackpotcoin.$(OBJEXT) -rm -f JHA/jackpotcoin.$(OBJEXT)
-rm -f heavy/cuda_blake512.$(OBJEXT)
-rm -f heavy/cuda_combine.$(OBJEXT)
-rm -f heavy/cuda_groestl512.$(OBJEXT)
-rm -f heavy/cuda_hefty1.$(OBJEXT)
-rm -f heavy/cuda_keccak512.$(OBJEXT)
-rm -f heavy/cuda_sha256.$(OBJEXT)
-rm -f heavy/heavy.$(OBJEXT)
-rm -f quark/animecoin.$(OBJEXT)
-rm -f quark/cuda_bmw512.$(OBJEXT)
-rm -f quark/cuda_jh512.$(OBJEXT)
-rm -f quark/cuda_quark_blake512.$(OBJEXT)
-rm -f quark/cuda_quark_checkhash.$(OBJEXT) -rm -f quark/cuda_quark_checkhash.$(OBJEXT)
-rm -f quark/cuda_quark_compactionTest.$(OBJEXT)
-rm -f quark/cuda_quark_groestl512.$(OBJEXT)
-rm -f quark/cuda_quark_keccak512.$(OBJEXT)
-rm -f quark/cuda_skein512.$(OBJEXT)
-rm -f quark/quarkcoin.$(OBJEXT)
distclean-compile: distclean-compile:
-rm -f *.tab.c -rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-blake.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-blake.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-bmw.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-cpu-miner.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-cpu-miner.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fugue.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fugue.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fuguecoin.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ccminer-fuguecoin.Po@am__quote@
@ -469,6 +538,20 @@ ccminer-util.obj: util.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-util.obj `if test -f 'util.c'; then $(CYGPATH_W) 'util.c'; else $(CYGPATH_W) '$(srcdir)/util.c'; fi` @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-util.obj `if test -f 'util.c'; then $(CYGPATH_W) 'util.c'; else $(CYGPATH_W) '$(srcdir)/util.c'; fi`
ccminer-bmw.o: sph/bmw.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-bmw.o -MD -MP -MF $(DEPDIR)/ccminer-bmw.Tpo -c -o ccminer-bmw.o `test -f 'sph/bmw.c' || echo '$(srcdir)/'`sph/bmw.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-bmw.Tpo $(DEPDIR)/ccminer-bmw.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sph/bmw.c' object='ccminer-bmw.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-bmw.o `test -f 'sph/bmw.c' || echo '$(srcdir)/'`sph/bmw.c
ccminer-bmw.obj: sph/bmw.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-bmw.obj -MD -MP -MF $(DEPDIR)/ccminer-bmw.Tpo -c -o ccminer-bmw.obj `if test -f 'sph/bmw.c'; then $(CYGPATH_W) 'sph/bmw.c'; else $(CYGPATH_W) '$(srcdir)/sph/bmw.c'; fi`
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-bmw.Tpo $(DEPDIR)/ccminer-bmw.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sph/bmw.c' object='ccminer-bmw.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-bmw.obj `if test -f 'sph/bmw.c'; then $(CYGPATH_W) 'sph/bmw.c'; else $(CYGPATH_W) '$(srcdir)/sph/bmw.c'; fi`
ccminer-blake.o: sph/blake.c ccminer-blake.o: sph/blake.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-blake.o -MD -MP -MF $(DEPDIR)/ccminer-blake.Tpo -c -o ccminer-blake.o `test -f 'sph/blake.c' || echo '$(srcdir)/'`sph/blake.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-blake.o -MD -MP -MF $(DEPDIR)/ccminer-blake.Tpo -c -o ccminer-blake.o `test -f 'sph/blake.c' || echo '$(srcdir)/'`sph/blake.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-blake.Tpo $(DEPDIR)/ccminer-blake.Po @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-blake.Tpo $(DEPDIR)/ccminer-blake.Po
@ -497,6 +580,20 @@ ccminer-groestl.obj: sph/groestl.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-groestl.obj `if test -f 'sph/groestl.c'; then $(CYGPATH_W) 'sph/groestl.c'; else $(CYGPATH_W) '$(srcdir)/sph/groestl.c'; fi` @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-groestl.obj `if test -f 'sph/groestl.c'; then $(CYGPATH_W) 'sph/groestl.c'; else $(CYGPATH_W) '$(srcdir)/sph/groestl.c'; fi`
ccminer-jh.o: sph/jh.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-jh.o -MD -MP -MF $(DEPDIR)/ccminer-jh.Tpo -c -o ccminer-jh.o `test -f 'sph/jh.c' || echo '$(srcdir)/'`sph/jh.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-jh.Tpo $(DEPDIR)/ccminer-jh.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sph/jh.c' object='ccminer-jh.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-jh.o `test -f 'sph/jh.c' || echo '$(srcdir)/'`sph/jh.c
ccminer-jh.obj: sph/jh.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-jh.obj -MD -MP -MF $(DEPDIR)/ccminer-jh.Tpo -c -o ccminer-jh.obj `if test -f 'sph/jh.c'; then $(CYGPATH_W) 'sph/jh.c'; else $(CYGPATH_W) '$(srcdir)/sph/jh.c'; fi`
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-jh.Tpo $(DEPDIR)/ccminer-jh.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sph/jh.c' object='ccminer-jh.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-jh.obj `if test -f 'sph/jh.c'; then $(CYGPATH_W) 'sph/jh.c'; else $(CYGPATH_W) '$(srcdir)/sph/jh.c'; fi`
ccminer-keccak.o: sph/keccak.c ccminer-keccak.o: sph/keccak.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-keccak.o -MD -MP -MF $(DEPDIR)/ccminer-keccak.Tpo -c -o ccminer-keccak.o `test -f 'sph/keccak.c' || echo '$(srcdir)/'`sph/keccak.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-keccak.o -MD -MP -MF $(DEPDIR)/ccminer-keccak.Tpo -c -o ccminer-keccak.o `test -f 'sph/keccak.c' || echo '$(srcdir)/'`sph/keccak.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-keccak.Tpo $(DEPDIR)/ccminer-keccak.Po @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-keccak.Tpo $(DEPDIR)/ccminer-keccak.Po
@ -511,6 +608,20 @@ ccminer-keccak.obj: sph/keccak.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-keccak.obj `if test -f 'sph/keccak.c'; then $(CYGPATH_W) 'sph/keccak.c'; else $(CYGPATH_W) '$(srcdir)/sph/keccak.c'; fi` @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-keccak.obj `if test -f 'sph/keccak.c'; then $(CYGPATH_W) 'sph/keccak.c'; else $(CYGPATH_W) '$(srcdir)/sph/keccak.c'; fi`
ccminer-skein.o: sph/skein.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-skein.o -MD -MP -MF $(DEPDIR)/ccminer-skein.Tpo -c -o ccminer-skein.o `test -f 'sph/skein.c' || echo '$(srcdir)/'`sph/skein.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-skein.Tpo $(DEPDIR)/ccminer-skein.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sph/skein.c' object='ccminer-skein.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-skein.o `test -f 'sph/skein.c' || echo '$(srcdir)/'`sph/skein.c
ccminer-skein.obj: sph/skein.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-skein.obj -MD -MP -MF $(DEPDIR)/ccminer-skein.Tpo -c -o ccminer-skein.obj `if test -f 'sph/skein.c'; then $(CYGPATH_W) 'sph/skein.c'; else $(CYGPATH_W) '$(srcdir)/sph/skein.c'; fi`
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-skein.Tpo $(DEPDIR)/ccminer-skein.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sph/skein.c' object='ccminer-skein.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-skein.obj `if test -f 'sph/skein.c'; then $(CYGPATH_W) 'sph/skein.c'; else $(CYGPATH_W) '$(srcdir)/sph/skein.c'; fi`
ccminer-hefty1.o: hefty1.c ccminer-hefty1.o: hefty1.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-hefty1.o -MD -MP -MF $(DEPDIR)/ccminer-hefty1.Tpo -c -o ccminer-hefty1.o `test -f 'hefty1.c' || echo '$(srcdir)/'`hefty1.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-hefty1.o -MD -MP -MF $(DEPDIR)/ccminer-hefty1.Tpo -c -o ccminer-hefty1.o `test -f 'hefty1.c' || echo '$(srcdir)/'`hefty1.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-hefty1.Tpo $(DEPDIR)/ccminer-hefty1.Po @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-hefty1.Tpo $(DEPDIR)/ccminer-hefty1.Po
@ -567,34 +678,6 @@ ccminer-fugue.obj: sph/fugue.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-fugue.obj `if test -f 'sph/fugue.c'; then $(CYGPATH_W) 'sph/fugue.c'; else $(CYGPATH_W) '$(srcdir)/sph/fugue.c'; fi` @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-fugue.obj `if test -f 'sph/fugue.c'; then $(CYGPATH_W) 'sph/fugue.c'; else $(CYGPATH_W) '$(srcdir)/sph/fugue.c'; fi`
ccminer-jh.o: sph/jh.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-jh.o -MD -MP -MF $(DEPDIR)/ccminer-jh.Tpo -c -o ccminer-jh.o `test -f 'sph/jh.c' || echo '$(srcdir)/'`sph/jh.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-jh.Tpo $(DEPDIR)/ccminer-jh.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sph/jh.c' object='ccminer-jh.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-jh.o `test -f 'sph/jh.c' || echo '$(srcdir)/'`sph/jh.c
ccminer-jh.obj: sph/jh.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-jh.obj -MD -MP -MF $(DEPDIR)/ccminer-jh.Tpo -c -o ccminer-jh.obj `if test -f 'sph/jh.c'; then $(CYGPATH_W) 'sph/jh.c'; else $(CYGPATH_W) '$(srcdir)/sph/jh.c'; fi`
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-jh.Tpo $(DEPDIR)/ccminer-jh.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sph/jh.c' object='ccminer-jh.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-jh.obj `if test -f 'sph/jh.c'; then $(CYGPATH_W) 'sph/jh.c'; else $(CYGPATH_W) '$(srcdir)/sph/jh.c'; fi`
ccminer-skein.o: sph/skein.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-skein.o -MD -MP -MF $(DEPDIR)/ccminer-skein.Tpo -c -o ccminer-skein.o `test -f 'sph/skein.c' || echo '$(srcdir)/'`sph/skein.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-skein.Tpo $(DEPDIR)/ccminer-skein.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sph/skein.c' object='ccminer-skein.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-skein.o `test -f 'sph/skein.c' || echo '$(srcdir)/'`sph/skein.c
ccminer-skein.obj: sph/skein.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ccminer-skein.obj -MD -MP -MF $(DEPDIR)/ccminer-skein.Tpo -c -o ccminer-skein.obj `if test -f 'sph/skein.c'; then $(CYGPATH_W) 'sph/skein.c'; else $(CYGPATH_W) '$(srcdir)/sph/skein.c'; fi`
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ccminer-skein.Tpo $(DEPDIR)/ccminer-skein.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sph/skein.c' object='ccminer-skein.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ccminer_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ccminer-skein.obj `if test -f 'sph/skein.c'; then $(CYGPATH_W) 'sph/skein.c'; else $(CYGPATH_W) '$(srcdir)/sph/skein.c'; fi`
.cpp.o: .cpp.o:
@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po @am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@ -1012,6 +1095,8 @@ distclean-generic:
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-rm -f JHA/$(DEPDIR)/$(am__dirstamp) -rm -f JHA/$(DEPDIR)/$(am__dirstamp)
-rm -f JHA/$(am__dirstamp) -rm -f JHA/$(am__dirstamp)
-rm -f heavy/$(DEPDIR)/$(am__dirstamp)
-rm -f heavy/$(am__dirstamp)
-rm -f quark/$(DEPDIR)/$(am__dirstamp) -rm -f quark/$(DEPDIR)/$(am__dirstamp)
-rm -f quark/$(am__dirstamp) -rm -f quark/$(am__dirstamp)
@ -1115,6 +1200,14 @@ uninstall-am: uninstall-binPROGRAMS
.cu.o: .cu.o:
$(NVCC) @CFLAGS@ -I . -Xptxas "-abi=no -v" -gencode=arch=compute_20,code=\"sm_20,compute_20\" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< $(NVCC) @CFLAGS@ -I . -Xptxas "-abi=no -v" -gencode=arch=compute_20,code=\"sm_20,compute_20\" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
# ABI requiring code modules
# this module doesn't compile with Compute 2.0 unfortunately
quark/cuda_quark_compactionTest.o: quark/cuda_quark_compactionTest.cu
$(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
JHA/cuda_jha_compactionTest.o: JHA/cuda_jha_compactionTest.cu
$(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
# Tell versions [3.59,3.63) of GNU make to not export all variables. # Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded. # Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT: .NOEXPORT:

8
README.txt

@ -1,5 +1,5 @@
ccMiner release 0.7 (May 01th 2014) - "Jackpot II" ccMiner release 0.9 (May 06th 2014) - "Say Hi to Quark, Anime"
------------------------------------------------------------- -------------------------------------------------------------
*************************************************************** ***************************************************************
@ -14,6 +14,8 @@ If you find this tool useful and like to support its continued
DOGE donation address: DT9ghsGmez6ojVdEZgvaZbT2Z3TruXG6yP DOGE donation address: DT9ghsGmez6ojVdEZgvaZbT2Z3TruXG6yP
HVC donation address: HNN3PyyTMkDo4RkEjkWSGMwqia1yD8mwJN HVC donation address: HNN3PyyTMkDo4RkEjkWSGMwqia1yD8mwJN
GRS donation address: FmJKJAhvyHWPeEVeLQHefr2naqgWc9ABTM GRS donation address: FmJKJAhvyHWPeEVeLQHefr2naqgWc9ABTM
JPC donation address: JYFBypVDkk583yKWY4M46TG5vXG8hfgD2U
MNC donation address: MShgNUSYwybEbXLvJUtdNg1a7rUeiNgooK
*************************************************************** ***************************************************************
>>> Introduction <<< >>> Introduction <<<
@ -39,6 +41,8 @@ its command line interface and options.
groestl use to mine Groestlcoin groestl use to mine Groestlcoin
myr-gr use to mine Myriad-Groestl myr-gr use to mine Myriad-Groestl
jackpot use to mine Jackpotcoin jackpot use to mine Jackpotcoin
quark use to mine Quarkcoin
anime use to mine Animecoin
-d, --devices gives a comma separated list of CUDA device IDs -d, --devices gives a comma separated list of CUDA device IDs
to operate on. Device IDs start counting from 0! to operate on. Device IDs start counting from 0!
@ -117,6 +121,8 @@ from your old clunkers.
>>> RELEASE HISTORY <<< >>> RELEASE HISTORY <<<
May 6th 2014 this adds the quark and animecoin algorithms.
May 3rd 2014 add the MjollnirCoin hash algorithm for the upcomin May 3rd 2014 add the MjollnirCoin hash algorithm for the upcomin
MjollnirCoin relaunch. MjollnirCoin relaunch.

20
configure vendored

@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.68 for ccminer 2014.04.27. # Generated by GNU Autoconf 2.68 for ccminer 2014.05.03.
# #
# #
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@ -557,8 +557,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='ccminer' PACKAGE_NAME='ccminer'
PACKAGE_TARNAME='ccminer' PACKAGE_TARNAME='ccminer'
PACKAGE_VERSION='2014.04.27' PACKAGE_VERSION='2014.05.03'
PACKAGE_STRING='ccminer 2014.04.27' PACKAGE_STRING='ccminer 2014.05.03'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@ -1297,7 +1297,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures ccminer 2014.04.27 to adapt to many kinds of systems. \`configure' configures ccminer 2014.05.03 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1368,7 +1368,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of ccminer 2014.04.27:";; short | recursive ) echo "Configuration of ccminer 2014.05.03:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@ -1469,7 +1469,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
ccminer configure 2014.04.27 ccminer configure 2014.05.03
generated by GNU Autoconf 2.68 generated by GNU Autoconf 2.68
Copyright (C) 2010 Free Software Foundation, Inc. Copyright (C) 2010 Free Software Foundation, Inc.
@ -1972,7 +1972,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by ccminer $as_me 2014.04.27, which was It was created by ccminer $as_me 2014.05.03, which was
generated by GNU Autoconf 2.68. Invocation command line was generated by GNU Autoconf 2.68. Invocation command line was
$ $0 $@ $ $0 $@
@ -2901,7 +2901,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='ccminer' PACKAGE='ccminer'
VERSION='2014.04.27' VERSION='2014.05.03'
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
@ -7118,7 +7118,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by ccminer $as_me 2014.04.27, which was This file was extended by ccminer $as_me 2014.05.03, which was
generated by GNU Autoconf 2.68. Invocation command line was generated by GNU Autoconf 2.68. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@ -7184,7 +7184,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
ccminer config.status 2014.04.27 ccminer config.status 2014.05.03
configured by $0, generated by GNU Autoconf 2.68, configured by $0, generated by GNU Autoconf 2.68,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

2
configure.ac

@ -1,4 +1,4 @@
AC_INIT([ccminer], [2014.05.03]) AC_INIT([ccminer], [2014.05.06])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

22
cpu-miner.c

@ -126,7 +126,9 @@ typedef enum {
ALGO_FUGUE256, /* Fugue256 */ ALGO_FUGUE256, /* Fugue256 */
ALGO_GROESTL, ALGO_GROESTL,
ALGO_MYR_GR, ALGO_MYR_GR,
ALGO_JACKPOT ALGO_JACKPOT,
ALGO_QUARK,
ALGO_ANIME
} sha256_algos; } sha256_algos;
static const char *algo_names[] = { static const char *algo_names[] = {
@ -135,7 +137,9 @@ static const char *algo_names[] = {
"fugue256", "fugue256",
"groestl", "groestl",
"myr-gr", "myr-gr",
"jackpot" "jackpot",
"quark",
"anime"
}; };
bool opt_debug = false; bool opt_debug = false;
@ -203,6 +207,8 @@ Options:\n\
groestl Groestlcoin hash\n\ groestl Groestlcoin hash\n\
myr-gr Myriad-Groestl hash\n\ myr-gr Myriad-Groestl hash\n\
jackpot Jackpot hash\n\ jackpot Jackpot hash\n\
quark Quark hash\n\
anime Animecoin hash\n\
-d, --devices takes a comma separated list of CUDA devices to use.\n\ -d, --devices takes a comma separated list of CUDA devices to use.\n\
Device IDs start counting from 0! Alternatively takes\n\ Device IDs start counting from 0! Alternatively takes\n\
string names of your cards like gtx780ti or gt640#2\n\ string names of your cards like gtx780ti or gt640#2\n\
@ -877,6 +883,16 @@ static void *miner_thread(void *userdata)
max_nonce, &hashes_done); max_nonce, &hashes_done);
break; break;
case ALGO_QUARK:
rc = scanhash_quark(thr_id, work.data, work.target,
max_nonce, &hashes_done);
break;
case ALGO_ANIME:
rc = scanhash_anime(thr_id, work.data, work.target,
max_nonce, &hashes_done);
break;
default: default:
/* should never happen */ /* should never happen */
goto out; goto out;
@ -1429,7 +1445,7 @@ static void signal_handler(int sig)
} }
#endif #endif
#define PROGRAM_VERSION "0.8" #define PROGRAM_VERSION "0.9"
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
struct thr_info *thr; struct thr_info *thr;

4
cpuminer-config.h

@ -152,7 +152,7 @@
#define PACKAGE_NAME "ccminer" #define PACKAGE_NAME "ccminer"
/* Define to the full name and version of this package. */ /* Define to the full name and version of this package. */
#define PACKAGE_STRING "ccminer 2014.05.03" #define PACKAGE_STRING "ccminer 2014.05.06"
/* Define to the one symbol short name of this package. */ /* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME #undef PACKAGE_TARNAME
@ -161,7 +161,7 @@
#undef PACKAGE_URL #undef PACKAGE_URL
/* Define to the version of this package. */ /* Define to the version of this package. */
#define PACKAGE_VERSION "2014.05.03" #define PACKAGE_VERSION "2014.05.06"
/* If using the C implementation of alloca, define if you know the /* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be direction of stack growth for your system; otherwise it will be

8
miner.h

@ -223,6 +223,14 @@ extern int scanhash_jackpot(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done); unsigned long *hashes_done);
extern int scanhash_quark(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done);
extern int scanhash_anime(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done);
extern void fugue256_hash(unsigned char* output, const unsigned char* input, int len); extern void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
extern void heavycoin_hash(unsigned char* output, const unsigned char* input, int len); extern void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
extern void groestlcoin_hash(unsigned char* output, const unsigned char* input, int len); extern void groestlcoin_hash(unsigned char* output, const unsigned char* input, int len);

292
quark/animecoin.cu

@ -0,0 +1,292 @@
extern "C"
{
#include "sph/sph_blake.h"
#include "sph/sph_bmw.h"
#include "sph/sph_groestl.h"
#include "sph/sph_skein.h"
#include "sph/sph_jh.h"
#include "sph/sph_keccak.h"
#include "miner.h"
}
#include <stdint.h>
// aus cpu-miner.c
extern int device_map[8];
// Speicher für Input/Output der verketteten Hashfunktionen
static uint32_t *d_hash[8];
// Speicher zur Generierung der Noncevektoren für die bedingten Hashes
static uint32_t *d_animeNonces[8];
static uint32_t *d_branch1Nonces[8];
static uint32_t *d_branch2Nonces[8];
static uint32_t *d_branch3Nonces[8];
extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_bmw512_cpu_init(int thr_id, int threads);
extern void quark_bmw512_cpu_setBlock_80(void *pdata);
extern void quark_bmw512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order);
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_outputHash, int order);
extern void quark_groestl512_cpu_init(int thr_id, int threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_skein512_cpu_init(int thr_id, int threads);
extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_keccak512_cpu_init(int thr_id, int threads);
extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_jh512_cpu_init(int thr_id, int threads);
extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_check_cpu_init(int thr_id, int threads);
extern void quark_check_cpu_setTarget(const void *ptarget);
extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern void quark_compactTest_cpu_init(int thr_id, int threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
uint32_t *d_nonces1, size_t *nrm1,
uint32_t *d_nonces2, size_t *nrm2,
int order);
extern void quark_compactTest_single_false_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
uint32_t *d_nonces1, size_t *nrm1,
int order);
// Original Quarkhash Funktion aus einem miner Quelltext
inline void animehash(void *state, const void *input)
{
sph_blake512_context ctx_blake;
sph_bmw512_context ctx_bmw;
sph_groestl512_context ctx_groestl;
sph_jh512_context ctx_jh;
sph_keccak512_context ctx_keccak;
sph_skein512_context ctx_skein;
unsigned char hash[64];
sph_bmw512_init(&ctx_bmw);
// ZBMW;
sph_bmw512 (&ctx_bmw, (const void*) input, 80);
sph_bmw512_close(&ctx_bmw, (void*) hash);
sph_blake512_init(&ctx_blake);
// ZBLAKE;
sph_blake512 (&ctx_blake, hash, 64);
sph_blake512_close(&ctx_blake, (void*) hash);
if (hash[0] & 0x8)
{
sph_groestl512_init(&ctx_groestl);
// ZGROESTL;
sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
sph_groestl512_close(&ctx_groestl, (void*) hash);
}
else
{
sph_skein512_init(&ctx_skein);
// ZSKEIN;
sph_skein512 (&ctx_skein, (const void*) hash, 64);
sph_skein512_close(&ctx_skein, (void*) hash);
}
sph_groestl512_init(&ctx_groestl);
// ZGROESTL;
sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
sph_groestl512_close(&ctx_groestl, (void*) hash);
sph_jh512_init(&ctx_jh);
// ZJH;
sph_jh512 (&ctx_jh, (const void*) hash, 64);
sph_jh512_close(&ctx_jh, (void*) hash);
if (hash[0] & 0x8)
{
sph_blake512_init(&ctx_blake);
// ZBLAKE;
sph_blake512 (&ctx_blake, (const void*) hash, 64);
sph_blake512_close(&ctx_blake, (void*) hash);
}
else
{
sph_bmw512_init(&ctx_bmw);
// ZBMW;
sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
sph_bmw512_close(&ctx_bmw, (void*) hash);
}
sph_keccak512_init(&ctx_keccak);
// ZKECCAK;
sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
sph_keccak512_close(&ctx_keccak, (void*) hash);
sph_skein512_init(&ctx_skein);
// SKEIN;
sph_skein512 (&ctx_skein, (const void*) hash, 64);
sph_skein512_close(&ctx_skein, (void*) hash);
if (hash[0] & 0x8)
{
sph_keccak512_init(&ctx_keccak);
// ZKECCAK;
sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
sph_keccak512_close(&ctx_keccak, (void*) hash);
}
else
{
sph_jh512_init(&ctx_jh);
// ZJH;
sph_jh512 (&ctx_jh, (const void*) hash, 64);
sph_jh512_close(&ctx_jh, (void*) hash);
}
memcpy(state, hash, 32);
}
struct HashPredicate
{
HashPredicate(uint32_t *hashes, uint32_t startNonce) :
m_hashes(hashes),
m_startNonce(startNonce)
{ }
__device__
bool operator()(const uint32_t x)
{
uint32_t *hash = &m_hashes[(x - m_startNonce)*16];
return hash[0] & 0x8;
}
uint32_t *m_hashes;
uint32_t m_startNonce;
};
extern bool opt_benchmark;
extern "C" int scanhash_anime(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
// TODO: entfernen für eine Release! Ist nur zum Testen!
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x00000f;
const uint32_t Htarg = ptarget[7];
const int throughput = 256*2048; // 100;
static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
// Konstanten kopieren, Speicher belegen
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
quark_skein512_cpu_init(thr_id, throughput);
quark_bmw512_cpu_init(thr_id, throughput);
quark_keccak512_cpu_init(thr_id, throughput);
quark_jh512_cpu_init(thr_id, throughput);
quark_check_cpu_init(thr_id, throughput);
quark_compactTest_cpu_init(thr_id, throughput);
cudaMalloc(&d_animeNonces[thr_id], sizeof(uint32_t)*throughput);
cudaMalloc(&d_branch1Nonces[thr_id], sizeof(uint32_t)*throughput);
cudaMalloc(&d_branch2Nonces[thr_id], sizeof(uint32_t)*throughput);
cudaMalloc(&d_branch3Nonces[thr_id], sizeof(uint32_t)*throughput);
init[thr_id] = true;
}
uint32_t endiandata[20];
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
quark_bmw512_cpu_setBlock_80((void*)endiandata);
quark_check_cpu_setTarget(ptarget);
do {
int order = 0;
size_t nrm1=0, nrm2=0, nrm3=0;
// erstes BMW512 Hash mit CUDA
quark_bmw512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
// das ist der unbedingte Branch für Blake512
quark_blake512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
quark_compactTest_single_false_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], NULL,
d_branch3Nonces[thr_id], &nrm3,
order++);
// nur den Skein Branch weiterverfolgen
quark_skein512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
// das ist der unbedingte Branch für Groestl512
quark_groestl512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
// das ist der unbedingte Branch für JH512
quark_jh512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
// quarkNonces in branch1 und branch2 aufsplitten gemäss if (hash[0] & 0x8)
quark_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
d_branch1Nonces[thr_id], &nrm1,
d_branch2Nonces[thr_id], &nrm2,
order++);
// das ist der bedingte Branch für Blake512
quark_blake512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);
// das ist der bedingte Branch für Bmw512
quark_bmw512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
// das ist der unbedingte Branch für Keccak512
quark_keccak512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
// das ist der unbedingte Branch für Skein512
quark_skein512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
// quarkNonces in branch1 und branch2 aufsplitten gemäss if (hash[0] & 0x8)
quark_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
d_branch1Nonces[thr_id], &nrm1,
d_branch2Nonces[thr_id], &nrm2,
order++);
// das ist der bedingte Branch für Keccak512
quark_keccak512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);
// das ist der bedingte Branch für JH512
quark_jh512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
// Scan nach Gewinner Hashes auf der GPU
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
if (foundNonce != 0xffffffff)
{
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
animehash(vhash64, endiandata);
if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) {
pdata[19] = foundNonce;
*hashes_done = (foundNonce - first_nonce + 1)/2;
return 1;
} else {
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
}
}
pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
*hashes_done = (pdata[19] - first_nonce + 1)/2;
return 0;
}

473
quark/cuda_bmw512.cu

@ -0,0 +1,473 @@
#if 1
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <memory.h>
// Folgende Definitionen später durch header ersetzen
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
// Endian Drehung für 32 Bit Typen
/*
static __device__ uint32_t cuda_swab32(uint32_t x)
{
return (((x << 24) & 0xff000000u) | ((x << 8) & 0x00ff0000u)
| ((x >> 8) & 0x0000ff00u) | ((x >> 24) & 0x000000ffu));
}
*/
static __device__ uint32_t cuda_swab32(uint32_t x)
{
return __byte_perm(x, 0, 0x0123);
}
// Endian Drehung für 64 Bit Typen
static __device__ unsigned long long cuda_swab64(unsigned long long x) {
uint32_t h = (x >> 32);
uint32_t l = (x & 0xFFFFFFFFULL);
return (((unsigned long long)cuda_swab32(l)) << 32) | ((unsigned long long)cuda_swab32(h));
}
// das Hi Word aus einem 64 Bit Typen extrahieren
static __device__ uint32_t HIWORD(const unsigned long long &x) {
#if __CUDA_ARCH__ >= 130
return (uint32_t)__double2hiint(__longlong_as_double(x));
#else
return (uint32_t)(x >> 32);
#endif
}
// das Hi Word in einem 64 Bit Typen ersetzen
static __device__ unsigned long long REPLACE_HIWORD(const unsigned long long &x, const uint32_t &y) {
return (x & 0xFFFFFFFFULL) | (((unsigned long long)y) << 32ULL);
}
// das Lo Word aus einem 64 Bit Typen extrahieren
static __device__ uint32_t LOWORD(const unsigned long long &x) {
#if __CUDA_ARCH__ >= 130
return (uint32_t)__double2loint(__longlong_as_double(x));
#else
return (uint32_t)(x & 0xFFFFFFFFULL);
#endif
}
static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI)
{
#if __CUDA_ARCH__ >= 130
return __double_as_longlong(__hiloint2double(HI, LO));
#else
return (unsigned long long)LO | (((unsigned long long)HI) << 32ULL);
#endif
}
// das Lo Word in einem 64 Bit Typen ersetzen
static __device__ unsigned long long REPLACE_LOWORD(const unsigned long long &x, const uint32_t &y) {
return (x & 0xFFFFFFFF00000000ULL) | ((unsigned long long)y);
}
// der Versuch, einen Wrapper für einen aus 32 Bit Registern zusammengesetzten uin64_t Typen zu entferfen...
#if 1
typedef unsigned long long uint64_t;
#else
typedef class uint64
{
public:
__device__ uint64()
{
}
__device__ uint64(unsigned long long init)
{
val = make_uint2( LOWORD(init), HIWORD(init) );
}
__device__ uint64(uint32_t lo, uint32_t hi)
{
val = make_uint2( lo, hi );
}
__device__ const uint64 operator^(uint64 const& rhs) const
{
return uint64(val.x ^ rhs.val.x, val.y ^ rhs.val.y);
}
__device__ const uint64 operator|(uint64 const& rhs) const
{
return uint64(val.x | rhs.val.x, val.y | rhs.val.y);
}
__device__ const uint64 operator+(unsigned long long const& rhs) const
{
return *this+uint64(rhs);
}
__device__ const uint64 operator+(uint64 const& rhs) const
{
uint64 res;
asm ("add.cc.u32 %0, %2, %4;\n\t"
"addc.cc.u32 %1, %3, %5;\n\t"
: "=r"(res.val.x), "=r"(res.val.y)
: "r"( val.x), "r"( val.y),
"r"(rhs.val.x), "r"(rhs.val.y));
return res;
}
__device__ const uint64 operator-(uint64 const& rhs) const
{
uint64 res;
asm ("sub.cc.u32 %0, %2, %4;\n\t"
"subc.cc.u32 %1, %3, %5;\n\t"
: "=r"(res.val.x), "=r"(res.val.y)
: "r"( val.x), "r"( val.y),
"r"(rhs.val.x), "r"(rhs.val.y));
return res;
}
__device__ const uint64 operator<<(int n) const
{
return uint64(unsigned long long(*this)<<n);
}
__device__ const uint64 operator>>(int n) const
{
return uint64(unsigned long long(*this)>>n);
}
__device__ operator unsigned long long() const
{
return MAKE_ULONGLONG(val.x, val.y);
}
uint2 val;
} uint64_t;
#endif
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
// die Message it Padding zur Berechnung auf der GPU
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
#define SPH_C64(x) ((uint64_t)(x ## ULL))
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
// diese 64 Bit Rotates werden unter Compute 3.5 (und besser) mit dem Funnel Shifter beschleunigt
#if __CUDA_ARCH__ >= 350
__forceinline__ __device__ uint64_t ROTL64(const uint64_t value, const int offset) {
uint2 result;
if(offset >= 32) {
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset));
} else {
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset));
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
}
return __double_as_longlong(__hiloint2double(result.y, result.x));
}
#else
#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
#endif
#define SHL(x, n) ((x) << (n))
#define SHR(x, n) ((x) >> (n))
#define CONST_EXP2 q[i+0] + ROTL64(q[i+1], 5) + q[i+2] + ROTL64(q[i+3], 11) + \
q[i+4] + ROTL64(q[i+5], 27) + q[i+6] + ROTL64(q[i+7], 32) + \
q[i+8] + ROTL64(q[i+9], 37) + q[i+10] + ROTL64(q[i+11], 43) + \
q[i+12] + ROTL64(q[i+13], 53) + (SHR(q[i+14],1) ^ q[i+14]) + (SHR(q[i+15],2) ^ q[i+15])
__device__ void Compression512(uint64_t *msg, uint64_t *hash)
{
// Compression ref. implementation
uint64_t tmp;
uint64_t q[32];
tmp = (msg[ 5] ^ hash[ 5]) - (msg[ 7] ^ hash[ 7]) + (msg[10] ^ hash[10]) + (msg[13] ^ hash[13]) + (msg[14] ^ hash[14]);
q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ ROTL64(tmp, 4) ^ ROTL64(tmp, 37)) + hash[1];
tmp = (msg[ 6] ^ hash[ 6]) - (msg[ 8] ^ hash[ 8]) + (msg[11] ^ hash[11]) + (msg[14] ^ hash[14]) - (msg[15] ^ hash[15]);
q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ ROTL64(tmp, 13) ^ ROTL64(tmp, 43)) + hash[2];
tmp = (msg[ 0] ^ hash[ 0]) + (msg[ 7] ^ hash[ 7]) + (msg[ 9] ^ hash[ 9]) - (msg[12] ^ hash[12]) + (msg[15] ^ hash[15]);
q[2] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ ROTL64(tmp, 19) ^ ROTL64(tmp, 53)) + hash[3];
tmp = (msg[ 0] ^ hash[ 0]) - (msg[ 1] ^ hash[ 1]) + (msg[ 8] ^ hash[ 8]) - (msg[10] ^ hash[10]) + (msg[13] ^ hash[13]);
q[3] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ ROTL64(tmp, 28) ^ ROTL64(tmp, 59)) + hash[4];
tmp = (msg[ 1] ^ hash[ 1]) + (msg[ 2] ^ hash[ 2]) + (msg[ 9] ^ hash[ 9]) - (msg[11] ^ hash[11]) - (msg[14] ^ hash[14]);
q[4] = (SHR(tmp, 1) ^ tmp) + hash[5];
tmp = (msg[ 3] ^ hash[ 3]) - (msg[ 2] ^ hash[ 2]) + (msg[10] ^ hash[10]) - (msg[12] ^ hash[12]) + (msg[15] ^ hash[15]);
q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ ROTL64(tmp, 4) ^ ROTL64(tmp, 37)) + hash[6];
tmp = (msg[ 4] ^ hash[ 4]) - (msg[ 0] ^ hash[ 0]) - (msg[ 3] ^ hash[ 3]) - (msg[11] ^ hash[11]) + (msg[13] ^ hash[13]);
q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ ROTL64(tmp, 13) ^ ROTL64(tmp, 43)) + hash[7];
tmp = (msg[ 1] ^ hash[ 1]) - (msg[ 4] ^ hash[ 4]) - (msg[ 5] ^ hash[ 5]) - (msg[12] ^ hash[12]) - (msg[14] ^ hash[14]);
q[7] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ ROTL64(tmp, 19) ^ ROTL64(tmp, 53)) + hash[8];
tmp = (msg[ 2] ^ hash[ 2]) - (msg[ 5] ^ hash[ 5]) - (msg[ 6] ^ hash[ 6]) + (msg[13] ^ hash[13]) - (msg[15] ^ hash[15]);
q[8] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ ROTL64(tmp, 28) ^ ROTL64(tmp, 59)) + hash[9];
tmp = (msg[ 0] ^ hash[ 0]) - (msg[ 3] ^ hash[ 3]) + (msg[ 6] ^ hash[ 6]) - (msg[ 7] ^ hash[ 7]) + (msg[14] ^ hash[14]);
q[9] = (SHR(tmp, 1) ^ tmp) + hash[10];
tmp = (msg[ 8] ^ hash[ 8]) - (msg[ 1] ^ hash[ 1]) - (msg[ 4] ^ hash[ 4]) - (msg[ 7] ^ hash[ 7]) + (msg[15] ^ hash[15]);
q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ ROTL64(tmp, 4) ^ ROTL64(tmp, 37)) + hash[11];
tmp = (msg[ 8] ^ hash[ 8]) - (msg[ 0] ^ hash[ 0]) - (msg[ 2] ^ hash[ 2]) - (msg[ 5] ^ hash[ 5]) + (msg[ 9] ^ hash[ 9]);
q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ ROTL64(tmp, 13) ^ ROTL64(tmp, 43)) + hash[12];
tmp = (msg[ 1] ^ hash[ 1]) + (msg[ 3] ^ hash[ 3]) - (msg[ 6] ^ hash[ 6]) - (msg[ 9] ^ hash[ 9]) + (msg[10] ^ hash[10]);
q[12] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ ROTL64(tmp, 19) ^ ROTL64(tmp, 53)) + hash[13];
tmp = (msg[ 2] ^ hash[ 2]) + (msg[ 4] ^ hash[ 4]) + (msg[ 7] ^ hash[ 7]) + (msg[10] ^ hash[10]) + (msg[11] ^ hash[11]);
q[13] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ ROTL64(tmp, 28) ^ ROTL64(tmp, 59)) + hash[14];
tmp = (msg[ 3] ^ hash[ 3]) - (msg[ 5] ^ hash[ 5]) + (msg[ 8] ^ hash[ 8]) - (msg[11] ^ hash[11]) - (msg[12] ^ hash[12]);
q[14] = (SHR(tmp, 1) ^ tmp) + hash[15];
tmp = (msg[12] ^ hash[12]) - (msg[ 4] ^ hash[ 4]) - (msg[ 6] ^ hash[ 6]) - (msg[ 9] ^ hash[ 9]) + (msg[13] ^ hash[13]);
q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ ROTL64(tmp, 4) ^ ROTL64(tmp, 37)) + hash[0];
// Expand 1
#pragma unroll 2
for(int i=0;i<2;i++)
{
q[i+16] =
(SHR(q[i], 1) ^ SHL(q[i], 2) ^ ROTL64(q[i], 13) ^ ROTL64(q[i], 43)) +
(SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ ROTL64(q[i+1], 19) ^ ROTL64(q[i+1], 53)) +
(SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ ROTL64(q[i+2], 28) ^ ROTL64(q[i+2], 59)) +
(SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ ROTL64(q[i+3], 4) ^ ROTL64(q[i+3], 37)) +
(SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ ROTL64(q[i+4], 13) ^ ROTL64(q[i+4], 43)) +
(SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ ROTL64(q[i+5], 19) ^ ROTL64(q[i+5], 53)) +
(SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ ROTL64(q[i+6], 28) ^ ROTL64(q[i+6], 59)) +
(SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ ROTL64(q[i+7], 4) ^ ROTL64(q[i+7], 37)) +
(SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ ROTL64(q[i+8], 13) ^ ROTL64(q[i+8], 43)) +
(SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ ROTL64(q[i+9], 19) ^ ROTL64(q[i+9], 53)) +
(SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ ROTL64(q[i+10], 28) ^ ROTL64(q[i+10], 59)) +
(SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ ROTL64(q[i+11], 4) ^ ROTL64(q[i+11], 37)) +
(SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ ROTL64(q[i+12], 13) ^ ROTL64(q[i+12], 43)) +
(SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ ROTL64(q[i+13], 19) ^ ROTL64(q[i+13], 53)) +
(SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ ROTL64(q[i+14], 28) ^ ROTL64(q[i+14], 59)) +
(SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ ROTL64(q[i+15], 4) ^ ROTL64(q[i+15], 37)) +
(( ((i+16)*(0x0555555555555555ull)) + ROTL64(msg[i], i+1) +
ROTL64(msg[i+3], i+4) - ROTL64(msg[i+10], i+11) ) ^ hash[i+7]);
}
#pragma unroll 4
for(int i=2;i<6;i++) {
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + ROTL64(msg[i], i+1) +
ROTL64(msg[i+3], i+4) - ROTL64(msg[i+10], i+11) ) ^ hash[i+7]);
}
#pragma unroll 3
for(int i=6;i<9;i++) {
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + ROTL64(msg[i], i+1) +
ROTL64(msg[i+3], i+4) - ROTL64(msg[i-6], (i-6)+1) ) ^ hash[i+7]);
}
#pragma unroll 4
for(int i=9;i<13;i++) {
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + ROTL64(msg[i], i+1) +
ROTL64(msg[i+3], i+4) - ROTL64(msg[i-6], (i-6)+1) ) ^ hash[i-9]);
}
#pragma unroll 3
for(int i=13;i<16;i++) {
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + ROTL64(msg[i], i+1) +
ROTL64(msg[i-13], (i-13)+1) - ROTL64(msg[i-6], (i-6)+1) ) ^ hash[i-9]);
}
uint64_t XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23];
uint64_t XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31];
hash[0] = (SHL(XH64, 5) ^ SHR(q[16],5) ^ msg[ 0]) + ( XL64 ^ q[24] ^ q[ 0]);
hash[1] = (SHR(XH64, 7) ^ SHL(q[17],8) ^ msg[ 1]) + ( XL64 ^ q[25] ^ q[ 1]);
hash[2] = (SHR(XH64, 5) ^ SHL(q[18],5) ^ msg[ 2]) + ( XL64 ^ q[26] ^ q[ 2]);
hash[3] = (SHR(XH64, 1) ^ SHL(q[19],5) ^ msg[ 3]) + ( XL64 ^ q[27] ^ q[ 3]);
hash[4] = (SHR(XH64, 3) ^ q[20] ^ msg[ 4]) + ( XL64 ^ q[28] ^ q[ 4]);
hash[5] = (SHL(XH64, 6) ^ SHR(q[21],6) ^ msg[ 5]) + ( XL64 ^ q[29] ^ q[ 5]);
hash[6] = (SHR(XH64, 4) ^ SHL(q[22],6) ^ msg[ 6]) + ( XL64 ^ q[30] ^ q[ 6]);
hash[7] = (SHR(XH64,11) ^ SHL(q[23],2) ^ msg[ 7]) + ( XL64 ^ q[31] ^ q[ 7]);
hash[ 8] = ROTL64(hash[4], 9) + ( XH64 ^ q[24] ^ msg[ 8]) + (SHL(XL64,8) ^ q[23] ^ q[ 8]);
hash[ 9] = ROTL64(hash[5],10) + ( XH64 ^ q[25] ^ msg[ 9]) + (SHR(XL64,6) ^ q[16] ^ q[ 9]);
hash[10] = ROTL64(hash[6],11) + ( XH64 ^ q[26] ^ msg[10]) + (SHL(XL64,6) ^ q[17] ^ q[10]);
hash[11] = ROTL64(hash[7],12) + ( XH64 ^ q[27] ^ msg[11]) + (SHL(XL64,4) ^ q[18] ^ q[11]);
hash[12] = ROTL64(hash[0],13) + ( XH64 ^ q[28] ^ msg[12]) + (SHR(XL64,3) ^ q[19] ^ q[12]);
hash[13] = ROTL64(hash[1],14) + ( XH64 ^ q[29] ^ msg[13]) + (SHR(XL64,4) ^ q[20] ^ q[13]);
hash[14] = ROTL64(hash[2],15) + ( XH64 ^ q[30] ^ msg[14]) + (SHR(XL64,7) ^ q[21] ^ q[14]);
hash[15] = ROTL64(hash[3],16) + ( XH64 ^ q[31] ^ msg[15]) + (SHR(XL64,2) ^ q[22] ^ q[15]);
}
static __constant__ uint64_t d_constMem[16];
static uint64_t h_constMem[16] = {
SPH_C64(0x8081828384858687),
SPH_C64(0x88898A8B8C8D8E8F),
SPH_C64(0x9091929394959697),
SPH_C64(0x98999A9B9C9D9E9F),
SPH_C64(0xA0A1A2A3A4A5A6A7),
SPH_C64(0xA8A9AAABACADAEAF),
SPH_C64(0xB0B1B2B3B4B5B6B7),
SPH_C64(0xB8B9BABBBCBDBEBF),
SPH_C64(0xC0C1C2C3C4C5C6C7),
SPH_C64(0xC8C9CACBCCCDCECF),
SPH_C64(0xD0D1D2D3D4D5D6D7),
SPH_C64(0xD8D9DADBDCDDDEDF),
SPH_C64(0xE0E1E2E3E4E5E6E7),
SPH_C64(0xE8E9EAEBECEDEEEF),
SPH_C64(0xF0F1F2F3F4F5F6F7),
SPH_C64(0xF8F9FAFBFCFDFEFF)
};
__global__ void quark_bmw512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
int hashPosition = nounce - startNounce;
uint64_t *inpHash = &g_hash[8 * hashPosition];
// Init
uint64_t h[16];
/*
h[ 0] = SPH_C64(0x8081828384858687);
h[ 1] = SPH_C64(0x88898A8B8C8D8E8F);
h[ 2] = SPH_C64(0x9091929394959697);
h[ 3] = SPH_C64(0x98999A9B9C9D9E9F);
h[ 4] = SPH_C64(0xA0A1A2A3A4A5A6A7);
h[ 5] = SPH_C64(0xA8A9AAABACADAEAF);
h[ 6] = SPH_C64(0xB0B1B2B3B4B5B6B7);
h[ 7] = SPH_C64(0xB8B9BABBBCBDBEBF);
h[ 8] = SPH_C64(0xC0C1C2C3C4C5C6C7);
h[ 9] = SPH_C64(0xC8C9CACBCCCDCECF);
h[10] = SPH_C64(0xD0D1D2D3D4D5D6D7);
h[11] = SPH_C64(0xD8D9DADBDCDDDEDF);
h[12] = SPH_C64(0xE0E1E2E3E4E5E6E7);
h[13] = SPH_C64(0xE8E9EAEBECEDEEEF);
h[14] = SPH_C64(0xF0F1F2F3F4F5F6F7);
h[15] = SPH_C64(0xF8F9FAFBFCFDFEFF);
*/
#pragma unroll 16
for(int i=0;i<16;i++)
h[i] = d_constMem[i];
// Nachricht kopieren (Achtung, die Nachricht hat 64 Byte,
// BMW arbeitet mit 128 Byte!!!
uint64_t message[16];
#pragma unroll 8
for(int i=0;i<8;i++)
message[i] = inpHash[i];
#pragma unroll 6
for(int i=9;i<15;i++)
message[i] = 0;
// Padding einfügen (Byteorder?!?)
message[8] = SPH_C64(0x80);
// Länge (in Bits, d.h. 64 Byte * 8 = 512 Bits
message[15] = SPH_C64(512);
// Compression 1
Compression512(message, h);
// Final
#pragma unroll 16
for(int i=0;i<16;i++)
message[i] = 0xaaaaaaaaaaaaaaa0ull + (uint64_t)i;
Compression512(h, message);
// fertig
uint64_t *outpHash = &g_hash[8 * hashPosition];
#pragma unroll 8
for(int i=0;i<8;i++)
outpHash[i] = message[i+8];
}
}
__global__ void quark_bmw512_gpu_hash_80(int threads, uint32_t startNounce, uint64_t *g_hash)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = startNounce + thread;
// Init
uint64_t h[16];
#pragma unroll 16
for(int i=0;i<16;i++)
h[i] = d_constMem[i];
// Nachricht kopieren (Achtung, die Nachricht hat 64 Byte,
// BMW arbeitet mit 128 Byte!!!
uint64_t message[16];
#pragma unroll 16
for(int i=0;i<16;i++)
message[i] = c_PaddedMessage80[i];
// die Nounce durch die thread-spezifische ersetzen
message[9] = REPLACE_HIWORD(message[9], cuda_swab32(nounce));
// Compression 1
Compression512(message, h);
// Final
#pragma unroll 16
for(int i=0;i<16;i++)
message[i] = 0xaaaaaaaaaaaaaaa0ull + (uint64_t)i;
Compression512(h, message);
// fertig
uint64_t *outpHash = &g_hash[8 * thread];
#pragma unroll 8
for(int i=0;i<8;i++)
outpHash[i] = message[i+8];
}
}
// Setup-Funktionen
__host__ void quark_bmw512_cpu_init(int thr_id, int threads)
{
// nix zu tun ;-)
// jetzt schon :D
cudaMemcpyToSymbol( d_constMem,
h_constMem,
sizeof(h_constMem),
0, cudaMemcpyHostToDevice);
}
// Bmw512 für 80 Byte grosse Eingangsdaten
__host__ void quark_bmw512_cpu_setBlock_80(void *pdata)
{
// Message mit Padding bereitstellen
// lediglich die korrekte Nonce ist noch ab Byte 76 einzusetzen.
unsigned char PaddedMessage[128];
memcpy(PaddedMessage, pdata, 80);
memset(PaddedMessage+80, 0, 48);
uint64_t *message = (uint64_t*)PaddedMessage;
// Padding einfügen (Byteorder?!?)
message[10] = SPH_C64(0x80);
// Länge (in Bits, d.h. 80 Byte * 8 = 640 Bits
message[15] = SPH_C64(640);
// die Message zur Berechnung auf der GPU
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
}
__host__ void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
{
const int threadsperblock = 256;
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
// Größe des dynamischen Shared Memory Bereichs
size_t shared_size = 0;
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
quark_bmw512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
MyStreamSynchronize(NULL, order, thr_id);
}
__host__ void quark_bmw512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order)
{
const int threadsperblock = 256;
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
// Größe des dynamischen Shared Memory Bereichs
size_t shared_size = 0;
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
quark_bmw512_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash);
MyStreamSynchronize(NULL, order, thr_id);
}
#endif

6
quark/cuda_jh512.cu

@ -73,9 +73,11 @@ const unsigned char h_E8_bitslice_roundconstant[42][32]={
/*swapping bits 8i||8i+1||8i+2||8i+3 with bits 8i+4||8i+5||8i+6||8i+7 of 32-bit x*/ /*swapping bits 8i||8i+1||8i+2||8i+3 with bits 8i+4||8i+5||8i+6||8i+7 of 32-bit x*/
#define SWAP4(x) (x) = ((((x) & 0x0f0f0f0fUL) << 4) | (((x) & 0xf0f0f0f0UL) >> 4)); #define SWAP4(x) (x) = ((((x) & 0x0f0f0f0fUL) << 4) | (((x) & 0xf0f0f0f0UL) >> 4));
/*swapping bits 16i||16i+1||......||16i+7 with bits 16i+8||16i+9||......||16i+15 of 32-bit x*/ /*swapping bits 16i||16i+1||......||16i+7 with bits 16i+8||16i+9||......||16i+15 of 32-bit x*/
#define SWAP8(x) (x) = ((((x) & 0x00ff00ffUL) << 8) | (((x) & 0xff00ff00UL) >> 8)); //#define SWAP8(x) (x) = ((((x) & 0x00ff00ffUL) << 8) | (((x) & 0xff00ff00UL) >> 8));
#define SWAP8(x) (x) = __byte_perm(x, x, 0x2301);
/*swapping bits 32i||32i+1||......||32i+15 with bits 32i+16||32i+17||......||32i+31 of 32-bit x*/ /*swapping bits 32i||32i+1||......||32i+15 with bits 32i+16||32i+17||......||32i+31 of 32-bit x*/
#define SWAP16(x) (x) = ((((x) & 0x0000ffffUL) << 16) | (((x) & 0xffff0000UL) >> 16)); //#define SWAP16(x) (x) = ((((x) & 0x0000ffffUL) << 16) | (((x) & 0xffff0000UL) >> 16));
#define SWAP16(x) (x) = __byte_perm(x, x, 0x1032);
/*The MDS transform*/ /*The MDS transform*/
#define L(m0,m1,m2,m3,m4,m5,m6,m7) \ #define L(m0,m1,m2,m3,m4,m5,m6,m7) \

70
quark/cuda_quark_blake512.cu

@ -70,30 +70,6 @@ static __device__ uint64_t REPLACE_LOWORD(const uint64_t &x, const uint32_t &y)
return (x & 0xFFFFFFFF00000000ULL) | ((uint64_t)y); return (x & 0xFFFFFFFF00000000ULL) | ((uint64_t)y);
} }
/*
#define SWAP32(x) \
((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \
(((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
#define SWAP64(x) \
((uint64_t)((((uint64_t)(x) & 0xff00000000000000ULL) >> 56) | \
(((uint64_t)(x) & 0x00ff000000000000ULL) >> 40) | \
(((uint64_t)(x) & 0x0000ff0000000000ULL) >> 24) | \
(((uint64_t)(x) & 0x000000ff00000000ULL) >> 8) | \
(((uint64_t)(x) & 0x00000000ff000000ULL) << 8) | \
(((uint64_t)(x) & 0x0000000000ff0000ULL) << 24) | \
(((uint64_t)(x) & 0x000000000000ff00ULL) << 40) | \
(((uint64_t)(x) & 0x00000000000000ffULL) << 56)))
*/
/*
__device__ __forceinline__ void SWAP32(uint32_t *x)
{
// Input: 33221100
// Output: 00112233
x[0] = __byte_perm(x[0], 0, 0x0123);
}
*/
__device__ __forceinline__ uint64_t SWAP64(uint64_t x) __device__ __forceinline__ uint64_t SWAP64(uint64_t x)
{ {
// Input: 77665544 33221100 // Input: 77665544 33221100
@ -154,9 +130,9 @@ __device__ void quark_blake512_compress( uint64_t *h, const uint64_t *block, con
#pragma unroll 16 #pragma unroll 16
for( i = 0; i < 16; ++i ) for( i = 0; i < 16; ++i )
{ {
m[i] = SWAP64(block[i]); m[i] = SWAP64(block[i]);
} }
#pragma unroll 8 #pragma unroll 8
for( i = 0; i < 8; ++i ) v[i] = h[i]; for( i = 0; i < 8; ++i ) v[i] = h[i];
@ -197,11 +173,8 @@ __device__ void quark_blake512_compress( uint64_t *h, const uint64_t *block, con
static __device__ uint32_t cuda_swab32(uint32_t x) static __device__ uint32_t cuda_swab32(uint32_t x)
{ {
return __byte_perm(x, 0, 0x0123); return __byte_perm(x, 0, 0x0123);
/*
return (((x << 24) & 0xff000000u) | ((x << 8) & 0x00ff0000u)
| ((x >> 8) & 0x0000ff00u) | ((x >> 24) & 0x000000ffu));
*/
} }
/* /*
// Endian Drehung für 64 Bit Typen // Endian Drehung für 64 Bit Typen
static __device__ uint64_t cuda_swab64(uint64_t x) { static __device__ uint64_t cuda_swab64(uint64_t x) {
@ -234,7 +207,7 @@ static const uint64_t h_constHashPadding[8] = {
0, 0,
0x0002000000000000ull }; 0x0002000000000000ull };
__global__ void quark_blake512_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint64_t *g_hash) __global__ __launch_bounds__(256, 2) void quark_blake512_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint64_t *g_hash)
{ {
int thread = (blockDim.x * blockIdx.x + threadIdx.x); int thread = (blockDim.x * blockIdx.x + threadIdx.x);
@ -395,14 +368,14 @@ __host__ void quark_blake512_cpu_init(int thr_id, int threads)
0, cudaMemcpyHostToDevice); 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol( d_constMem, cudaMemcpyToSymbol( d_constMem,
h_constMem, h_constMem,
sizeof(h_constMem), sizeof(h_constMem),
0, cudaMemcpyHostToDevice); 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol( d_constHashPadding, cudaMemcpyToSymbol( d_constHashPadding,
h_constHashPadding, h_constHashPadding,
sizeof(h_constHashPadding), sizeof(h_constHashPadding),
0, cudaMemcpyHostToDevice); 0, cudaMemcpyHostToDevice);
} }
// Blake512 für 80 Byte grosse Eingangsdaten // Blake512 für 80 Byte grosse Eingangsdaten
@ -422,27 +395,6 @@ __host__ void quark_blake512_cpu_setBlock_80(void *pdata)
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
} }
#if 0
// Blake512 für 64 Byte grosse Eingangsdaten
// evtl. macht es gar keinen Sinn, das alles ins Constant Memory to schicken. Es sind hier sowieso
// nur die letzten 64 Bytes des Blocks konstant, und die meisten Bytes davon sind 0. Das kann mnan
// auch im Kernel initialisieren.
__host__ void quark_blake512_cpu_setBlock_64(void *pdata)
{
// Message mit Padding bereitstellen
unsigned char PaddedMessage[128];
memcpy(PaddedMessage, pdata, 64); // Hinweis: diese 64 Bytes sind nonce-spezifisch und ändern sich KOMPLETT für jede Nonce!
memset(PaddedMessage+64, 0, 64);
PaddedMessage[64] = 0x80;
PaddedMessage[111] = 1;
PaddedMessage[126] = 0x02;
PaddedMessage[127] = 0x00;
// die Message zur Berechnung auf der GPU
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
}
#endif
__host__ void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_outputHash, int order) __host__ void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_outputHash, int order)
{ {
const int threadsperblock = 256; const int threadsperblock = 256;

363
quark/cuda_quark_compactionTest.cu

@ -0,0 +1,363 @@
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "sm_30_intrinsics.h"
#include <stdio.h>
#include <memory.h>
#include <stdint.h>
// aus cpu-miner.c
extern "C" int device_map[8];
// diese Struktur wird in der Init Funktion angefordert
static cudaDeviceProp props[8];
static uint32_t *d_tempBranch1Nonces[8];
static uint32_t *d_numValid[8];
static uint32_t *h_numValid[8];
static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
// True/False tester
typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash);
__device__ uint32_t QuarkTrueTest(uint32_t *inpHash)
{
return ((inpHash[0] & 0x08) == 0x08);
}
__device__ uint32_t QuarkFalseTest(uint32_t *inpHash)
{
return ((inpHash[0] & 0x08) == 0);
}
__device__ cuda_compactTestFunction_t d_QuarkTrueFunction = QuarkTrueTest, d_QuarkFalseFunction = QuarkFalseTest;
cuda_compactTestFunction_t h_QuarkTrueFunction[8], h_QuarkFalseFunction[8];
// Setup-Funktionen
__host__ void quark_compactTest_cpu_init(int thr_id, int threads)
{
cudaGetDeviceProperties(&props[thr_id], device_map[thr_id]);
cudaMemcpyFromSymbol(&h_QuarkTrueFunction[thr_id], d_QuarkTrueFunction, sizeof(cuda_compactTestFunction_t));
cudaMemcpyFromSymbol(&h_QuarkFalseFunction[thr_id], d_QuarkFalseFunction, sizeof(cuda_compactTestFunction_t));
// wir brauchen auch Speicherplatz auf dem Device
cudaMalloc(&d_tempBranch1Nonces[thr_id], sizeof(uint32_t) * threads * 2);
cudaMalloc(&d_numValid[thr_id], 2*sizeof(uint32_t));
cudaMallocHost(&h_numValid[thr_id], 2*sizeof(uint32_t));
uint32_t s1;
s1 = (threads / 256) * 2;
cudaMalloc(&d_partSum[0][thr_id], sizeof(uint32_t) * s1); // BLOCKSIZE (Threads/Block)
cudaMalloc(&d_partSum[1][thr_id], sizeof(uint32_t) * s1); // BLOCKSIZE (Threads/Block)
}
// Die Summenfunktion (vom NVIDIA SDK)
__global__ void quark_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t *partial_sums=NULL, cuda_compactTestFunction_t testFunc=NULL, int threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL)
{
extern __shared__ uint32_t sums[];
int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
//int lane_id = id % warpSize;
int lane_id = id % width;
// determine a warp_id within a block
//int warp_id = threadIdx.x / warpSize;
int warp_id = threadIdx.x / width;
sums[lane_id] = 0;
// Below is the basic structure of using a shfl instruction
// for a scan.
// Record "value" as a variable - we accumulate it along the way
uint32_t value;
if(testFunc != NULL)
{
if (id < threads)
{
uint32_t *inpHash;
if(d_validNonceTable == NULL)
{
// keine Nonce-Liste
inpHash = &inpHashes[id<<4];
}else
{
// Nonce-Liste verfügbar
int nonce = d_validNonceTable[id] - startNounce;
inpHash = &inpHashes[nonce<<4];
}
value = (*testFunc)(inpHash);
}else
{
value = 0;
}
}else
{
value = data[id];
}
__syncthreads();
// Now accumulate in log steps up the chain
// compute sums, with another thread's value who is
// distance delta away (i). Note
// those threads where the thread 'i' away would have
// been out of bounds of the warp are unaffected. This
// creates the scan sum.
#pragma unroll
for (int i=1; i<=width; i*=2)
{
uint32_t n = __shfl_up((int)value, i, width);
if (lane_id >= i) value += n;
}
// value now holds the scan value for the individual thread
// next sum the largest values for each warp
// write the sum of the warp to smem
//if (threadIdx.x % warpSize == warpSize-1)
if (threadIdx.x % width == width-1)
{
sums[warp_id] = value;
}
__syncthreads();
//
// scan sum the warp sums
// the same shfl scan operation, but performed on warp sums
//
if (warp_id == 0)
{
uint32_t warp_sum = sums[lane_id];
for (int i=1; i<=width; i*=2)
{
uint32_t n = __shfl_up((int)warp_sum, i, width);
if (lane_id >= i) warp_sum += n;
}
sums[lane_id] = warp_sum;
}
__syncthreads();
// perform a uniform add across warps in the block
// read neighbouring warp's sum and add it to threads value
uint32_t blockSum = 0;
if (warp_id > 0)
{
blockSum = sums[warp_id-1];
}
value += blockSum;
// Now write out our result
data[id] = value;
// last thread has sum, write write out the block's sum
if (partial_sums != NULL && threadIdx.x == blockDim.x-1)
{
partial_sums[blockIdx.x] = value;
}
}
// Uniform add: add partial sums array
__global__ void quark_compactTest_gpu_ADD(uint32_t *data, uint32_t *partial_sums, int len)
{
__shared__ uint32_t buf;
int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
if (id > len) return;
if (threadIdx.x == 0)
{
buf = partial_sums[blockIdx.x];
}
__syncthreads();
data[id] += buf;
}
// Der Scatter
__global__ void quark_compactTest_gpu_SCATTER(uint32_t *sum, uint32_t *outp, cuda_compactTestFunction_t testFunc, int threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL)
{
int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
uint32_t actNounce = id;
uint32_t value;
if (id < threads)
{
// uint32_t nounce = startNounce + id;
uint32_t *inpHash;
if(d_validNonceTable == NULL)
{
// keine Nonce-Liste
inpHash = &inpHashes[id<<4];
}else
{
// Nonce-Liste verfügbar
int nonce = d_validNonceTable[id] - startNounce;
actNounce = nonce;
inpHash = &inpHashes[nonce<<4];
}
value = (*testFunc)(inpHash);
}else
{
value = 0;
}
if( value )
{
int idx = sum[id];
if(idx > 0)
outp[idx-1] = startNounce + actNounce;
}
}
__host__ static uint32_t quark_compactTest_roundUpExp(uint32_t val)
{
if(val == 0)
return 0;
uint32_t mask = 0x80000000;
while( (val & mask) == 0 ) mask = mask >> 1;
if( (val & (~mask)) != 0 )
return mask << 1;
return mask;
}
__host__ void quark_compactTest_cpu_singleCompaction(int thr_id, int threads, uint32_t *nrm,
uint32_t *d_nonces1, cuda_compactTestFunction_t function,
uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable)
{
int orgThreads = threads;
threads = (int)quark_compactTest_roundUpExp((uint32_t)threads);
// threadsPerBlock ausrechnen
int blockSize = 256;
int nSummen = threads / blockSize;
int thr1 = (threads+blockSize-1) / blockSize;
int thr2 = threads / (blockSize*blockSize);
int blockSize2 = (nSummen < blockSize) ? nSummen : blockSize;
int thr3 = (nSummen + blockSize2-1) / blockSize2;
bool callThrid = (thr2 > 0) ? true : false;
// Erster Initialscan
quark_compactTest_gpu_SCAN<<<thr1,blockSize, 32*sizeof(uint32_t)>>>(
d_tempBranch1Nonces[thr_id], 32, d_partSum[0][thr_id], function, orgThreads, startNounce, inpHashes, d_validNonceTable);
// weitere Scans
if(callThrid)
{
quark_compactTest_gpu_SCAN<<<thr2,blockSize, 32*sizeof(uint32_t)>>>(d_partSum[0][thr_id], 32, d_partSum[1][thr_id]);
quark_compactTest_gpu_SCAN<<<1, thr2, 32*sizeof(uint32_t)>>>(d_partSum[1][thr_id], (thr2>32) ? 32 : thr2);
}else
{
quark_compactTest_gpu_SCAN<<<thr3,blockSize2, 32*sizeof(uint32_t)>>>(d_partSum[0][thr_id], (blockSize2>32) ? 32 : blockSize2);
}
// Sync + Anzahl merken
cudaStreamSynchronize(NULL);
if(callThrid)
cudaMemcpy(nrm, &(d_partSum[1][thr_id])[thr2-1], sizeof(uint32_t), cudaMemcpyDeviceToHost);
else
cudaMemcpy(nrm, &(d_partSum[0][thr_id])[nSummen-1], sizeof(uint32_t), cudaMemcpyDeviceToHost);
// Addieren
if(callThrid)
{
quark_compactTest_gpu_ADD<<<thr2-1, blockSize>>>(d_partSum[0][thr_id]+blockSize, d_partSum[1][thr_id], blockSize*thr2);
}
quark_compactTest_gpu_ADD<<<thr1-1, blockSize>>>(d_tempBranch1Nonces[thr_id]+blockSize, d_partSum[0][thr_id], threads);
// Scatter
quark_compactTest_gpu_SCATTER<<<thr1,blockSize,0>>>(d_tempBranch1Nonces[thr_id], d_nonces1,
function, orgThreads, startNounce, inpHashes, d_validNonceTable);
// Sync
cudaStreamSynchronize(NULL);
}
////// ACHTUNG: Diese funktion geht aktuell nur mit threads > 65536 (Am besten 256 * 1024 oder 256*2048)
__host__ void quark_compactTest_cpu_dualCompaction(int thr_id, int threads, uint32_t *nrm,
uint32_t *d_nonces1, uint32_t *d_nonces2,
uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable)
{
quark_compactTest_cpu_singleCompaction(thr_id, threads, &nrm[0], d_nonces1, h_QuarkTrueFunction[thr_id], startNounce, inpHashes, d_validNonceTable);
quark_compactTest_cpu_singleCompaction(thr_id, threads, &nrm[1], d_nonces2, h_QuarkFalseFunction[thr_id], startNounce, inpHashes, d_validNonceTable);
/*
// threadsPerBlock ausrechnen
int blockSize = 256;
int thr1 = threads / blockSize;
int thr2 = threads / (blockSize*blockSize);
// 1
quark_compactTest_gpu_SCAN<<<thr1,blockSize, 32*sizeof(uint32_t)>>>(d_tempBranch1Nonces[thr_id], 32, d_partSum1[thr_id], h_QuarkTrueFunction[thr_id], threads, startNounce, inpHashes);
quark_compactTest_gpu_SCAN<<<thr2,blockSize, 32*sizeof(uint32_t)>>>(d_partSum1[thr_id], 32, d_partSum2[thr_id]);
quark_compactTest_gpu_SCAN<<<1, thr2, 32*sizeof(uint32_t)>>>(d_partSum2[thr_id], (thr2>32) ? 32 : thr2);
cudaStreamSynchronize(NULL);
cudaMemcpy(&nrm[0], &(d_partSum2[thr_id])[thr2-1], sizeof(uint32_t), cudaMemcpyDeviceToHost);
quark_compactTest_gpu_ADD<<<thr2-1, blockSize>>>(d_partSum1[thr_id]+blockSize, d_partSum2[thr_id], blockSize*thr2);
quark_compactTest_gpu_ADD<<<thr1-1, blockSize>>>(d_tempBranch1Nonces[thr_id]+blockSize, d_partSum1[thr_id], threads);
// 2
quark_compactTest_gpu_SCAN<<<thr1,blockSize, 32*sizeof(uint32_t)>>>(d_tempBranch2Nonces[thr_id], 32, d_partSum1[thr_id], h_QuarkFalseFunction[thr_id], threads, startNounce, inpHashes);
quark_compactTest_gpu_SCAN<<<thr2,blockSize, 32*sizeof(uint32_t)>>>(d_partSum1[thr_id], 32, d_partSum2[thr_id]);
quark_compactTest_gpu_SCAN<<<1, thr2, 32*sizeof(uint32_t)>>>(d_partSum2[thr_id], (thr2>32) ? 32 : thr2);
cudaStreamSynchronize(NULL);
cudaMemcpy(&nrm[1], &(d_partSum2[thr_id])[thr2-1], sizeof(uint32_t), cudaMemcpyDeviceToHost);
quark_compactTest_gpu_ADD<<<thr2-1, blockSize>>>(d_partSum1[thr_id]+blockSize, d_partSum2[thr_id], blockSize*thr2);
quark_compactTest_gpu_ADD<<<thr1-1, blockSize>>>(d_tempBranch2Nonces[thr_id]+blockSize, d_partSum1[thr_id], threads);
// Hier ist noch eine Besonderheit: in d_tempBranch1Nonces sind die element von 1...nrm1 die Interessanten
// Schritt 3: Scatter
quark_compactTest_gpu_SCATTER<<<thr1,blockSize,0>>>(d_tempBranch1Nonces[thr_id], d_nonces1, h_QuarkTrueFunction[thr_id], threads, startNounce, inpHashes);
quark_compactTest_gpu_SCATTER<<<thr1,blockSize,0>>>(d_tempBranch2Nonces[thr_id], d_nonces2, h_QuarkFalseFunction[thr_id], threads, startNounce, inpHashes);
cudaStreamSynchronize(NULL);
*/
}
__host__ void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
uint32_t *d_nonces1, size_t *nrm1,
uint32_t *d_nonces2, size_t *nrm2,
int order)
{
// Wenn validNonceTable genutzt wird, dann werden auch nur die Nonces betrachtet, die dort enthalten sind
// "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen!
quark_compactTest_cpu_dualCompaction(thr_id, threads,
h_numValid[thr_id], d_nonces1, d_nonces2,
startNounce, inpHashes, d_validNonceTable);
cudaStreamSynchronize(NULL); // Das original braucht zwar etwas CPU-Last, ist an dieser Stelle aber evtl besser
*nrm1 = (size_t)h_numValid[thr_id][0];
*nrm2 = (size_t)h_numValid[thr_id][1];
}
__host__ void quark_compactTest_single_false_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
uint32_t *d_nonces1, size_t *nrm1,
int order)
{
// Wenn validNonceTable genutzt wird, dann werden auch nur die Nonces betrachtet, die dort enthalten sind
// "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen!
quark_compactTest_cpu_singleCompaction(thr_id, threads, h_numValid[thr_id], d_nonces1, h_QuarkFalseFunction[thr_id], startNounce, inpHashes, d_validNonceTable);
cudaStreamSynchronize(NULL); // Das original braucht zwar etwas CPU-Last, ist an dieser Stelle aber evtl besser
*nrm1 = (size_t)h_numValid[thr_id][0];
}

182
quark/cuda_quark_keccak512.cu

@ -0,0 +1,182 @@
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <memory.h>
// Folgende Definitionen später durch header ersetzen
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
#include "cuda_helper.h"
#define U32TO64_LE(p) \
(((uint64_t)(*p)) | (((uint64_t)(*(p + 1))) << 32))
#define U64TO32_LE(p, v) \
*p = (uint32_t)((v)); *(p+1) = (uint32_t)((v) >> 32);
static const uint64_t host_keccak_round_constants[24] = {
0x0000000000000001ull, 0x0000000000008082ull,
0x800000000000808aull, 0x8000000080008000ull,
0x000000000000808bull, 0x0000000080000001ull,
0x8000000080008081ull, 0x8000000000008009ull,
0x000000000000008aull, 0x0000000000000088ull,
0x0000000080008009ull, 0x000000008000000aull,
0x000000008000808bull, 0x800000000000008bull,
0x8000000000008089ull, 0x8000000000008003ull,
0x8000000000008002ull, 0x8000000000000080ull,
0x000000000000800aull, 0x800000008000000aull,
0x8000000080008081ull, 0x8000000000008080ull,
0x0000000080000001ull, 0x8000000080008008ull
};
__constant__ uint64_t c_keccak_round_constants[24];
static __device__ __forceinline__ void
keccak_block(uint64_t *s, const uint32_t *in, const uint64_t *keccak_round_constants) {
size_t i;
uint64_t t[5], u[5], v, w;
/* absorb input */
#pragma unroll 9
for (i = 0; i < 72 / 8; i++, in += 2)
s[i] ^= U32TO64_LE(in);
for (i = 0; i < 24; i++) {
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
u[0] = t[4] ^ ROTL64(t[1], 1);
u[1] = t[0] ^ ROTL64(t[2], 1);
u[2] = t[1] ^ ROTL64(t[3], 1);
u[3] = t[2] ^ ROTL64(t[4], 1);
u[4] = t[3] ^ ROTL64(t[0], 1);
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0];
s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1];
s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2];
s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3];
s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4];
/* rho pi: b[..] = rotl(a[..], ..) */
v = s[ 1];
s[ 1] = ROTL64(s[ 6], 44);
s[ 6] = ROTL64(s[ 9], 20);
s[ 9] = ROTL64(s[22], 61);
s[22] = ROTL64(s[14], 39);
s[14] = ROTL64(s[20], 18);
s[20] = ROTL64(s[ 2], 62);
s[ 2] = ROTL64(s[12], 43);
s[12] = ROTL64(s[13], 25);
s[13] = ROTL64(s[19], 8);
s[19] = ROTL64(s[23], 56);
s[23] = ROTL64(s[15], 41);
s[15] = ROTL64(s[ 4], 27);
s[ 4] = ROTL64(s[24], 14);
s[24] = ROTL64(s[21], 2);
s[21] = ROTL64(s[ 8], 55);
s[ 8] = ROTL64(s[16], 45);
s[16] = ROTL64(s[ 5], 36);
s[ 5] = ROTL64(s[ 3], 28);
s[ 3] = ROTL64(s[18], 21);
s[18] = ROTL64(s[17], 15);
s[17] = ROTL64(s[11], 10);
s[11] = ROTL64(s[ 7], 6);
s[ 7] = ROTL64(s[10], 3);
s[10] = ROTL64( v, 1);
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
v = s[ 0]; w = s[ 1]; s[ 0] ^= (~w) & s[ 2]; s[ 1] ^= (~s[ 2]) & s[ 3]; s[ 2] ^= (~s[ 3]) & s[ 4]; s[ 3] ^= (~s[ 4]) & v; s[ 4] ^= (~v) & w;
v = s[ 5]; w = s[ 6]; s[ 5] ^= (~w) & s[ 7]; s[ 6] ^= (~s[ 7]) & s[ 8]; s[ 7] ^= (~s[ 8]) & s[ 9]; s[ 8] ^= (~s[ 9]) & v; s[ 9] ^= (~v) & w;
v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w;
v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w;
v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w;
/* iota: a[0,0] ^= round constant */
s[0] ^= keccak_round_constants[i];
}
}
__global__ void quark_keccak512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
int hashPosition = nounce - startNounce;
uint32_t *inpHash = (uint32_t*)&g_hash[8 * hashPosition];
// Nachricht kopieren
uint32_t message[18];
#pragma unroll 16
for(int i=0;i<16;i++)
message[i] = inpHash[i];
message[16] = 0x01;
message[17] = 0x80000000;
// State initialisieren
uint64_t keccak_gpu_state[25];
#pragma unroll 25
for (int i=0; i<25; i++)
keccak_gpu_state[i] = 0;
// den Block einmal gut durchschütteln
keccak_block(keccak_gpu_state, message, c_keccak_round_constants);
// das Hash erzeugen
uint32_t hash[16];
#pragma unroll 8
for (size_t i = 0; i < 64; i += 8) {
U64TO32_LE((&hash[i/4]), keccak_gpu_state[i / 8]);
}
// fertig
uint32_t *outpHash = (uint32_t*)&g_hash[8 * hashPosition];
#pragma unroll 16
for(int i=0;i<16;i++)
outpHash[i] = hash[i];
}
}
// Setup-Funktionen
__host__ void quark_keccak512_cpu_init(int thr_id, int threads)
{
// Kopiere die Hash-Tabellen in den GPU-Speicher
cudaMemcpyToSymbol( c_keccak_round_constants,
host_keccak_round_constants,
sizeof(host_keccak_round_constants),
0, cudaMemcpyHostToDevice);
}
__host__ void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
{
const int threadsperblock = 256;
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
// Größe des dynamischen Shared Memory Bereichs
size_t shared_size = 0;
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
quark_keccak512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
MyStreamSynchronize(NULL, order, thr_id);
}

274
quark/quarkcoin.cu

@ -0,0 +1,274 @@
extern "C"
{
#include "sph/sph_blake.h"
#include "sph/sph_bmw.h"
#include "sph/sph_groestl.h"
#include "sph/sph_skein.h"
#include "sph/sph_jh.h"
#include "sph/sph_keccak.h"
#include "miner.h"
}
#include <stdint.h>
// aus cpu-miner.c
extern int device_map[8];
// Speicher für Input/Output der verketteten Hashfunktionen
static uint32_t *d_hash[8];
// Speicher zur Generierung der Noncevektoren für die bedingten Hashes
static uint32_t *d_quarkNonces[8];
static uint32_t *d_branch1Nonces[8];
static uint32_t *d_branch2Nonces[8];
static uint32_t *d_branch3Nonces[8];
extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata);
extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_bmw512_cpu_init(int thr_id, int threads);
extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_groestl512_cpu_init(int thr_id, int threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_skein512_cpu_init(int thr_id, int threads);
extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_keccak512_cpu_init(int thr_id, int threads);
extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_jh512_cpu_init(int thr_id, int threads);
extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_check_cpu_init(int thr_id, int threads);
extern void quark_check_cpu_setTarget(const void *ptarget);
extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
extern void quark_compactTest_cpu_init(int thr_id, int threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
uint32_t *d_nonces1, size_t *nrm1,
uint32_t *d_nonces2, size_t *nrm2,
int order);
extern void quark_compactTest_single_false_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
uint32_t *d_nonces1, size_t *nrm1,
int order);
// Original Quarkhash Funktion aus einem miner Quelltext
inline void quarkhash(void *state, const void *input)
{
sph_blake512_context ctx_blake;
sph_bmw512_context ctx_bmw;
sph_groestl512_context ctx_groestl;
sph_jh512_context ctx_jh;
sph_keccak512_context ctx_keccak;
sph_skein512_context ctx_skein;
unsigned char hash[64];
sph_blake512_init(&ctx_blake);
// ZBLAKE;
sph_blake512 (&ctx_blake, input, 80);
sph_blake512_close(&ctx_blake, (void*) hash);
sph_bmw512_init(&ctx_bmw);
// ZBMW;
sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
sph_bmw512_close(&ctx_bmw, (void*) hash);
if (hash[0] & 0x8)
{
sph_groestl512_init(&ctx_groestl);
// ZGROESTL;
sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
sph_groestl512_close(&ctx_groestl, (void*) hash);
}
else
{
sph_skein512_init(&ctx_skein);
// ZSKEIN;
sph_skein512 (&ctx_skein, (const void*) hash, 64);
sph_skein512_close(&ctx_skein, (void*) hash);
}
sph_groestl512_init(&ctx_groestl);
// ZGROESTL;
sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
sph_groestl512_close(&ctx_groestl, (void*) hash);
sph_jh512_init(&ctx_jh);
// ZJH;
sph_jh512 (&ctx_jh, (const void*) hash, 64);
sph_jh512_close(&ctx_jh, (void*) hash);
if (hash[0] & 0x8)
{
sph_blake512_init(&ctx_blake);
// ZBLAKE;
sph_blake512 (&ctx_blake, (const void*) hash, 64);
sph_blake512_close(&ctx_blake, (void*) hash);
}
else
{
sph_bmw512_init(&ctx_bmw);
// ZBMW;
sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
sph_bmw512_close(&ctx_bmw, (void*) hash);
}
sph_keccak512_init(&ctx_keccak);
// ZKECCAK;
sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
sph_keccak512_close(&ctx_keccak, (void*) hash);
sph_skein512_init(&ctx_skein);
// SKEIN;
sph_skein512 (&ctx_skein, (const void*) hash, 64);
sph_skein512_close(&ctx_skein, (void*) hash);
if (hash[0] & 0x8)
{
sph_keccak512_init(&ctx_keccak);
// ZKECCAK;
sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
sph_keccak512_close(&ctx_keccak, (void*) hash);
}
else
{
sph_jh512_init(&ctx_jh);
// ZJH;
sph_jh512 (&ctx_jh, (const void*) hash, 64);
sph_jh512_close(&ctx_jh, (void*) hash);
}
memcpy(state, hash, 32);
}
extern bool opt_benchmark;
extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
// TODO: entfernen für eine Release! Ist nur zum Testen!
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
const uint32_t Htarg = ptarget[7];
const int throughput = 256*4096; // 100;
static bool init[8] = {0,0,0,0,0,0,0,0};
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
// Konstanten kopieren, Speicher belegen
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput);
quark_skein512_cpu_init(thr_id, throughput);
quark_bmw512_cpu_init(thr_id, throughput);
quark_keccak512_cpu_init(thr_id, throughput);
quark_jh512_cpu_init(thr_id, throughput);
quark_check_cpu_init(thr_id, throughput);
quark_compactTest_cpu_init(thr_id, throughput);
cudaMalloc(&d_quarkNonces[thr_id], sizeof(uint32_t)*throughput);
cudaMalloc(&d_branch1Nonces[thr_id], sizeof(uint32_t)*throughput);
cudaMalloc(&d_branch2Nonces[thr_id], sizeof(uint32_t)*throughput);
cudaMalloc(&d_branch3Nonces[thr_id], sizeof(uint32_t)*throughput);
init[thr_id] = true;
}
uint32_t endiandata[20];
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
quark_blake512_cpu_setBlock_80((void*)endiandata);
quark_check_cpu_setTarget(ptarget);
do {
int order = 0;
size_t nrm1=0, nrm2=0, nrm3=0;
// erstes Blake512 Hash mit CUDA
quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
// das ist der unbedingte Branch für BMW512
quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
quark_compactTest_single_false_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], NULL,
d_branch3Nonces[thr_id], &nrm3,
order++);
// nur den Skein Branch weiterverfolgen
quark_skein512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
// das ist der unbedingte Branch für Groestl512
quark_groestl512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
// das ist der unbedingte Branch für JH512
quark_jh512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
// quarkNonces in branch1 und branch2 aufsplitten gemäss if (hash[0] & 0x8)
quark_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
d_branch1Nonces[thr_id], &nrm1,
d_branch2Nonces[thr_id], &nrm2,
order++);
// das ist der bedingte Branch für Blake512
quark_blake512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);
// das ist der bedingte Branch für Bmw512
quark_bmw512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
// das ist der unbedingte Branch für Keccak512
quark_keccak512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
// das ist der unbedingte Branch für Skein512
quark_skein512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
// quarkNonces in branch1 und branch2 aufsplitten gemäss if (hash[0] & 0x8)
quark_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
d_branch1Nonces[thr_id], &nrm1,
d_branch2Nonces[thr_id], &nrm2,
order++);
// das ist der bedingte Branch für Keccak512
quark_keccak512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);
// das ist der bedingte Branch für JH512
quark_jh512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
// Scan nach Gewinner Hashes auf der GPU
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
if (foundNonce != 0xffffffff)
{
uint32_t vhash64[8];
be32enc(&endiandata[19], foundNonce);
quarkhash(vhash64, endiandata);
if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) {
pdata[19] = foundNonce;
*hashes_done = (foundNonce - first_nonce + 1)/2;
return 1;
} else {
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
}
}
pdata[19] += throughput;
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
*hashes_done = (pdata[19] - first_nonce + 1)/2;
return 0;
}
Loading…
Cancel
Save