Browse Source

m7, add sources from djm34 repo (v6+)

fix most part of indentation and headers

but ive copied whirlpool and x17 stuff in a new m7 folder
Tanguy Pruvot 10 years ago
parent
commit
bf55536ba6
  1. 14
      Makefile.am
  2. 7
      build.sh
  3. 601
      compat/Makefile.in
  4. 571
      compat/jansson/Makefile.in
  5. 8102
      configure
  6. 18
      configure.ac
  7. 6
      cpuminer-config.h
  8. 143
      cuda_helper.h
  9. 431
      m7/cuda_m7_haval256.cu
  10. 532
      m7/cuda_m7_sha256.cu
  11. 344
      m7/cuda_m7_sha512.cu
  12. 2870
      m7/cuda_m7_whirlpool.cu
  13. 276
      m7/cuda_mul.cu
  14. 468
      m7/cuda_mul2.cu
  15. 401
      m7/cuda_ripemd160.cu
  16. 795
      m7/cuda_tiger192.cu
  17. 359
      m7/m7.cu
  18. 287
      m7/m7_keccak512.cu
  19. 833
      sph/ripemd.c
  20. 273
      sph/sph_ripemd.h
  21. 691
      sph/sph_sha2.c
  22. 191
      sph/sph_tiger.h
  23. 698
      sph/tiger.c
  24. 4
      x15/cuda_x15_whirlpool.cu

14
Makefile.am

@ -37,14 +37,18 @@ ccminer_SOURCES = elist.h miner.h compat.h \ @@ -37,14 +37,18 @@ ccminer_SOURCES = elist.h miner.h compat.h \
sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \
sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
sph/shabal.c sph/whirlpool.c sph/sha2big.c sph/haval.c \
sph/sph_sha2.c sph/tiger.c sph/ripemd.c sph/sph_sha2.h sph/sph_tiger.h sph/sph_ripemd.h \
x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu x15/whirlpool.cu \
x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu
x17/x17.cu x17/cuda_x17_haval512.cu x17/cuda_x17_sha512.cu \
m7/m7.cu m7/cuda_m7_sha256.cu m7/cuda_tiger192.cu m7/cuda_ripemd160.cu m7/m7_keccak512.cu \
m7/cuda_m7_whirlpool.cu m7/cuda_m7_haval256.cu m7/cuda_m7_sha512.cu \
m7/cuda_mul.cu m7/cuda_mul2.cu
ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ @MPIRLIB@
ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
nvcc_ARCH = -gencode=arch=compute_50,code=\"sm_50,compute_50\"
@ -69,9 +73,15 @@ x11/cuda_x11_echo.o: x11/cuda_x11_echo.cu @@ -69,9 +73,15 @@ x11/cuda_x11_echo.o: x11/cuda_x11_echo.cu
x11/cuda_x11_shavite512.o: x11/cuda_x11_shavite512.cu
$(NVCC) $(nvcc_FLAGS) -I cudpp-2.1/include @CFLAGS@ --maxrregcount=128 -o $@ -c $<
x15/cuda_x15_whirlpool.o: x15/cuda_x15_whirlpool.cu
$(NVCC) $(nvcc_FLAGS) -O2 --maxrregcount=96 -o $@ -c $<
x17/cuda_x17_sha512.o: x17/cuda_x17_sha512.cu
$(NVCC) $(nvcc_FLAGS) -O2 --maxrregcount=80 -o $@ -c $<
m7/cuda_tiger192.o: m7/cuda_tiger192.cu
$(NVCC) $(nvcc_FLAGS) -O2 --maxrregcount=64 -o $@ -c $<
# ABI requiring code modules
quark/cuda_quark_compactionTest.o: quark/cuda_quark_compactionTest.cu
$(NVCC) $(nvcc_FLAGS) -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" --maxrregcount=80 -o $@ -c $<

7
config.sh → build.sh

@ -1,7 +1,6 @@ @@ -1,7 +1,6 @@
#!/bin/bash
# Simple script to create the Makefile
# then type 'make'
# Simple script to create the Makefile and build
# export PATH="$PATH:/usr/local/cuda/bin/"
@ -11,4 +10,6 @@ rm -f Makefile.in @@ -11,4 +10,6 @@ rm -f Makefile.in
rm -f config.status
./autogen.sh || echo done
CC=/usr/local/bin/colorgcc.pl CFLAGS="-O2" ./configure
CFLAGS="-O2" ./configure --with-mpir-src=../mpir-2.6.0
make

601
compat/Makefile.in

@ -1,601 +0,0 @@ @@ -1,601 +0,0 @@
# Makefile.in generated by automake 1.14.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
subdir = compat
DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/cpuminer-config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
SOURCES =
DIST_SOURCES =
RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
ctags-recursive dvi-recursive html-recursive info-recursive \
install-data-recursive install-dvi-recursive \
install-exec-recursive install-html-recursive \
install-info-recursive install-pdf-recursive \
install-ps-recursive install-recursive installcheck-recursive \
installdirs-recursive pdf-recursive ps-recursive \
tags-recursive uninstall-recursive
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
am__recursive_targets = \
$(RECURSIVE_TARGETS) \
$(RECURSIVE_CLEAN_TARGETS) \
$(am__extra_recursive_targets)
AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
distdir
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
DIST_SUBDIRS = jansson
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
am__relativize = \
dir0=`pwd`; \
sed_first='s,^\([^/]*\)/.*$$,\1,'; \
sed_rest='s,^[^/]*/*,,'; \
sed_last='s,^.*/\([^/]*\)$$,\1,'; \
sed_butlast='s,/*[^/]*$$,,'; \
while test -n "$$dir1"; do \
first=`echo "$$dir1" | sed -e "$$sed_first"`; \
if test "$$first" != "."; then \
if test "$$first" = ".."; then \
dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
else \
first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
if test "$$first2" = "$$first"; then \
dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
else \
dir2="../$$dir2"; \
fi; \
dir0="$$dir0"/"$$first"; \
fi; \
fi; \
dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
done; \
reldir="$$dir2"
ACLOCAL = @ACLOCAL@
ALLOCA = @ALLOCA@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCAS = @CCAS@
CCASDEPMODE = @CCASDEPMODE@
CCASFLAGS = @CCASFLAGS@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CUDA_CFLAGS = @CUDA_CFLAGS@
CUDA_LDFLAGS = @CUDA_LDFLAGS@
CUDA_LIBS = @CUDA_LIBS@
CXX = @CXX@
CXXDEPMODE = @CXXDEPMODE@
CXXFLAGS = @CXXFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
JANSSON_LIBS = @JANSSON_LIBS@
LDFLAGS = @LDFLAGS@
LIBCURL = @LIBCURL@
LIBCURL_CPPFLAGS = @LIBCURL_CPPFLAGS@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LTLIBOBJS = @LTLIBOBJS@
MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MKDIR_P = @MKDIR_P@
NVCC = @NVCC@
OBJEXT = @OBJEXT@
OPENMP_CFLAGS = @OPENMP_CFLAGS@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PTHREAD_FLAGS = @PTHREAD_FLAGS@
PTHREAD_LIBS = @PTHREAD_LIBS@
RANLIB = @RANLIB@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
WS2_LIBS = @WS2_LIBS@
_libcurl_config = @_libcurl_config@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target = @target@
target_alias = @target_alias@
target_cpu = @target_cpu@
target_os = @target_os@
target_vendor = @target_vendor@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
@WANT_JANSSON_FALSE@SUBDIRS =
@WANT_JANSSON_TRUE@SUBDIRS = jansson
all: all-recursive
.SUFFIXES:
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu compat/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu compat/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
# This directory's subdirectories are mostly independent; you can cd
# into them and run 'make' without going through this Makefile.
# To change the values of 'make' variables: instead of editing Makefiles,
# (1) if the variable is set in 'config.status', edit 'config.status'
# (which will cause the Makefiles to be regenerated when you run 'make');
# (2) otherwise, pass the desired values on the 'make' command line.
$(am__recursive_targets):
@fail=; \
if $(am__make_keepgoing); then \
failcom='fail=yes'; \
else \
failcom='exit 1'; \
fi; \
dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
case "$@" in \
distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
*) list='$(SUBDIRS)' ;; \
esac; \
for subdir in $$list; do \
echo "Making $$target in $$subdir"; \
if test "$$subdir" = "."; then \
dot_seen=yes; \
local_target="$$target-am"; \
else \
local_target="$$target"; \
fi; \
($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|| eval $$failcom; \
done; \
if test "$$dot_seen" = "no"; then \
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
fi; test -z "$$fail"
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-recursive
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
include_option=--etags-include; \
empty_fix=.; \
else \
include_option=--include; \
empty_fix=; \
fi; \
list='$(SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
test ! -f $$subdir/TAGS || \
set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
fi; \
done; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-recursive
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-recursive
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
$(am__make_dryrun) \
|| test -d "$(distdir)/$$subdir" \
|| $(MKDIR_P) "$(distdir)/$$subdir" \
|| exit 1; \
dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
$(am__relativize); \
new_distdir=$$reldir; \
dir1=$$subdir; dir2="$(top_distdir)"; \
$(am__relativize); \
new_top_distdir=$$reldir; \
echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
($(am__cd) $$subdir && \
$(MAKE) $(AM_MAKEFLAGS) \
top_distdir="$$new_top_distdir" \
distdir="$$new_distdir" \
am__remove_distdir=: \
am__skip_length_check=: \
am__skip_mode_fix=: \
distdir) \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-recursive
all-am: Makefile
installdirs: installdirs-recursive
installdirs-am:
install: install-recursive
install-exec: install-exec-recursive
install-data: install-data-recursive
uninstall: uninstall-recursive
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-recursive
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-recursive
clean-am: clean-generic mostlyclean-am
distclean: distclean-recursive
-rm -f Makefile
distclean-am: clean-am distclean-generic distclean-tags
dvi: dvi-recursive
dvi-am:
html: html-recursive
html-am:
info: info-recursive
info-am:
install-data-am:
install-dvi: install-dvi-recursive
install-dvi-am:
install-exec-am:
install-html: install-html-recursive
install-html-am:
install-info: install-info-recursive
install-info-am:
install-man:
install-pdf: install-pdf-recursive
install-pdf-am:
install-ps: install-ps-recursive
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-recursive
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-recursive
mostlyclean-am: mostlyclean-generic
pdf: pdf-recursive
pdf-am:
ps: ps-recursive
ps-am:
uninstall-am:
.MAKE: $(am__recursive_targets) install-am install-strip
.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \
check-am clean clean-generic cscopelist-am ctags ctags-am \
distclean distclean-generic distclean-tags distdir dvi dvi-am \
html html-am info info-am install install-am install-data \
install-data-am install-dvi install-dvi-am install-exec \
install-exec-am install-html install-html-am install-info \
install-info-am install-man install-pdf install-pdf-am \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs installdirs-am maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
pdf-am ps ps-am tags tags-am uninstall uninstall-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

571
compat/jansson/Makefile.in

@ -1,571 +0,0 @@ @@ -1,571 +0,0 @@
# Makefile.in generated by automake 1.14.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
subdir = compat/jansson
DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
$(top_srcdir)/depcomp
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/cpuminer-config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
LIBRARIES = $(noinst_LIBRARIES)
AR = ar
ARFLAGS = cru
AM_V_AR = $(am__v_AR_@AM_V@)
am__v_AR_ = $(am__v_AR_@AM_DEFAULT_V@)
am__v_AR_0 = @echo " AR " $@;
am__v_AR_1 =
libjansson_a_AR = $(AR) $(ARFLAGS)
libjansson_a_LIBADD =
am_libjansson_a_OBJECTS = dump.$(OBJEXT) hashtable.$(OBJEXT) \
load.$(OBJEXT) strbuffer.$(OBJEXT) utf.$(OBJEXT) \
value.$(OBJEXT)
libjansson_a_OBJECTS = $(am_libjansson_a_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
AM_V_CC = $(am__v_CC_@AM_V@)
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
am__v_CC_0 = @echo " CC " $@;
am__v_CC_1 =
CCLD = $(CC)
LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
am__v_CCLD_0 = @echo " CCLD " $@;
am__v_CCLD_1 =
SOURCES = $(libjansson_a_SOURCES)
DIST_SOURCES = $(libjansson_a_SOURCES)
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
ALLOCA = @ALLOCA@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCAS = @CCAS@
CCASDEPMODE = @CCASDEPMODE@
CCASFLAGS = @CCASFLAGS@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CUDA_CFLAGS = @CUDA_CFLAGS@
CUDA_LDFLAGS = @CUDA_LDFLAGS@
CUDA_LIBS = @CUDA_LIBS@
CXX = @CXX@
CXXDEPMODE = @CXXDEPMODE@
CXXFLAGS = @CXXFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
JANSSON_LIBS = @JANSSON_LIBS@
LDFLAGS = @LDFLAGS@
LIBCURL = @LIBCURL@
LIBCURL_CPPFLAGS = @LIBCURL_CPPFLAGS@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LTLIBOBJS = @LTLIBOBJS@
MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MKDIR_P = @MKDIR_P@
NVCC = @NVCC@
OBJEXT = @OBJEXT@
OPENMP_CFLAGS = @OPENMP_CFLAGS@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PTHREAD_FLAGS = @PTHREAD_FLAGS@
PTHREAD_LIBS = @PTHREAD_LIBS@
RANLIB = @RANLIB@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
WS2_LIBS = @WS2_LIBS@
_libcurl_config = @_libcurl_config@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target = @target@
target_alias = @target_alias@
target_cpu = @target_cpu@
target_os = @target_os@
target_vendor = @target_vendor@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
noinst_LIBRARIES = libjansson.a
libjansson_a_SOURCES = \
config.h \
dump.c \
hashtable.c \
hashtable.h \
jansson.h \
jansson_private.h \
load.c \
strbuffer.c \
strbuffer.h \
utf.c \
utf.h \
util.h \
value.c
all: all-am
.SUFFIXES:
.SUFFIXES: .c .o .obj
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu compat/jansson/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu compat/jansson/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
clean-noinstLIBRARIES:
-test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES)
libjansson.a: $(libjansson_a_OBJECTS) $(libjansson_a_DEPENDENCIES) $(EXTRA_libjansson_a_DEPENDENCIES)
$(AM_V_at)-rm -f libjansson.a
$(AM_V_AR)$(libjansson_a_AR) libjansson.a $(libjansson_a_OBJECTS) $(libjansson_a_LIBADD)
$(AM_V_at)$(RANLIB) libjansson.a
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dump.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hashtable.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/load.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strbuffer.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/value.Po@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
.c.obj:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-am
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-am
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-am
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(LIBRARIES)
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
clean-am: clean-generic clean-noinstLIBRARIES mostlyclean-am
distclean: distclean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
dvi: dvi-am
dvi-am:
html: html-am
html-am:
info: info-am
info-am:
install-data-am:
install-dvi: install-dvi-am
install-dvi-am:
install-exec-am:
install-html: install-html-am
install-html-am:
install-info: install-info-am
install-info-am:
install-man:
install-pdf: install-pdf-am
install-pdf-am:
install-ps: install-ps-am
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
mostlyclean-am: mostlyclean-compile mostlyclean-generic
pdf: pdf-am
pdf-am:
ps: ps-am
ps-am:
uninstall-am:
.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
clean-noinstLIBRARIES cscopelist-am ctags ctags-am distclean \
distclean-compile distclean-generic distclean-tags distdir dvi \
dvi-am html html-am info info-am install install-am \
install-data install-data-am install-dvi install-dvi-am \
install-exec install-exec-am install-html install-html-am \
install-info install-info-am install-man install-pdf \
install-pdf-am install-ps install-ps-am install-strip \
installcheck installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic pdf pdf-am ps ps-am tags tags-am uninstall \
uninstall-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

8102
configure vendored

File diff suppressed because it is too large Load Diff

18
configure.ac

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
AC_INIT([ccminer], [2014.08.12])
AC_INIT([ccminer], [2014.08.25])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
@ -154,11 +154,23 @@ else @@ -154,11 +154,23 @@ else
CUDA_LDFLAGS="-L/usr/local/cuda/lib$SUFFIX"
NVCC="nvcc"
fi
AC_SUBST(CUDA_CFLAGS)
AC_SUBST(CUDA_LIBS)
AC_SUBST(CUDA_LDFLAGS)
AC_SUBST(NVCC)
AC_ARG_WITH([mpir-src],
[ --with-mpir-src=PATH prefix for mpir src [sample=../mpir-2.6.0]])
if test -n "$with_mpir_src"
then
MPIRLIB="-lmpir"
CUDA_CFLAGS="$CUDA_CFLAGS -I$with_mpir_src"
CUDA_LDFLAGS="$CUDA_LDFLAGS -L$with_mpir_src"
fi
AC_SUBST(MPIRLIB)
AC_SUBST(CUDA_CFLAGS)
AC_SUBST(CUDA_LDFLAGS)
AC_SUBST(OPENMP_CFLAGS)
AC_OUTPUT

6
cpuminer-config.h

@ -156,7 +156,7 @@ @@ -156,7 +156,7 @@
#define PACKAGE_NAME "ccminer"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "ccminer 2014.08.12"
#define PACKAGE_STRING "ccminer 2014.08.25"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "ccminer"
@ -165,7 +165,7 @@ @@ -165,7 +165,7 @@
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "2014.08.12"
#define PACKAGE_VERSION "2014.08.25"
/* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be
@ -188,7 +188,7 @@ @@ -188,7 +188,7 @@
#define USE_XOP 1
/* Version number of package */
#define VERSION "2014.08.12"
#define VERSION "2014.08.25"
/* Define curl_free() as free() if our version of curl lacks curl_free. */
/* #undef curl_free */

143
cuda_helper.h

@ -110,6 +110,9 @@ __device__ __forceinline__ uint64_t cuda_swab64(uint64_t x) @@ -110,6 +110,9 @@ __device__ __forceinline__ uint64_t cuda_swab64(uint64_t x)
(((uint64_t)(x) & 0x00000000000000ffULL) << 56)))
#endif
#define cuda_swab32ll(u64) \
MAKE_ULONGLONG(cuda_swab32(_LOWORD(u64)), cuda_swab32(_HIWORD(u64)))
/*********************************************************************/
// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call) \
@ -132,8 +135,7 @@ do { \ @@ -132,8 +135,7 @@ do { \
#if USE_XOR_ASM_OPTS
// device asm for whirpool
__device__ __forceinline__
uint64_t xor1(uint64_t a, uint64_t b)
{
uint64_t xor1(uint64_t a, uint64_t b) {
uint64_t result;
asm("xor.b64 %0, %1, %2;" : "=l"(result) : "l"(a), "l"(b));
return result;
@ -145,8 +147,7 @@ uint64_t xor1(uint64_t a, uint64_t b) @@ -145,8 +147,7 @@ uint64_t xor1(uint64_t a, uint64_t b)
#if USE_XOR_ASM_OPTS
// device asm for whirpool
__device__ __forceinline__
uint64_t xor3(uint64_t a, uint64_t b, uint64_t c)
{
uint64_t xor3(uint64_t a, uint64_t b, uint64_t c) {
uint64_t result;
asm("xor.b64 %0, %2, %3;\n\t"
"xor.b64 %0, %0, %1;\n\t"
@ -159,7 +160,78 @@ uint64_t xor3(uint64_t a, uint64_t b, uint64_t c) @@ -159,7 +160,78 @@ uint64_t xor3(uint64_t a, uint64_t b, uint64_t c)
#endif
#if USE_XOR_ASM_OPTS
// device asm for whirpool
// device asm 32 for m7_sha256
__device__ __forceinline__
uint32_t xor3b(uint32_t a, uint32_t b, uint32_t c) {
uint32_t result;
asm("xor.b32 %0, %2, %3;\n\t"
"xor.b32 %0, %0, %1;\n\t"
: "=r"(result) : "r"(a) ,"r"(b),"r"(c));
return result;
}
#else
#define xor3b(a,b,c) (a ^ b ^ c)
#endif
#if USE_XOR_ASM_OPTS
// device asm for m7_sha256
__device__ __forceinline__
uint64_t xor5(uint64_t a, uint64_t b, uint64_t c, uint64_t d, uint64_t e) {
uint64_t result;
asm("{\n\t"
" .reg .u64 t1,t2,t3;\n\t"
"xor.b64 t1, %1, %2;\n\t"
"xor.b64 t2, %3, %4;\n\t"
"xor.b64 t3, t1, t2;\n\t"
"xor.b64 %0, t3,%5;\n\t"
"}"
: "=l"(result) : "l"(a) ,"l"(b), "l"(c), "l"(d) ,"l"(e));
return result;
}
#else
#define xor5(a,b,c,d,e) (a ^ b ^ c ^ d ^ e)
#endif
#if USE_XOR_ASM_OPTS
// device asm for m7_ripemd160
__device__ __forceinline__
uint64_t xornot64(uint64_t a, uint64_t b, uint64_t c)
{
uint64_t result;
asm("{\n\t"
".reg .u64 m,n;\n\t"
"not.b64 m,%2; \n\t"
"or.b64 n, %1,m;\n\t"
"xor.b64 %0, n,%3;\n\t"
"}"
: "=l"(result) : "l"(a), "l"(b), "l"(c));
return result;
}
#else
#define xornot64(a,b,c) (c ^ (a | ~b))
#endif
#if USE_XOR_ASM_OPTS
// device asm for m7_sha256
__device__ __forceinline__
uint64_t xornt64(uint64_t a, uint64_t b, uint64_t c)
{
uint64_t result;
asm("{\n\t"
".reg .u64 m,n;\n\t"
"not.b64 m,%3; \n\t"
"or.b64 n, %2,m;\n\t"
"xor.b64 %0, %1,n;\n\t"
"}"
: "=l"(result) : "l"(a), "l"(b), "l"(c));
return result;
}
#else
#define xornt64(a,b,c) (a ^ (b | ~c))
#endif
#if USE_XOR_ASM_OPTS
// device asm for whirlpool
__device__ __forceinline__
uint64_t xor8(uint64_t a, uint64_t b, uint64_t c, uint64_t d,uint64_t e,uint64_t f,uint64_t g, uint64_t h)
{
@ -242,6 +314,39 @@ uint64_t shl_t64(uint64_t x, uint32_t n) @@ -242,6 +314,39 @@ uint64_t shl_t64(uint64_t x, uint32_t n)
return result;
}
// device asm for m7_sha256
__device__ __forceinline__
uint32_t andor32(uint32_t a, uint32_t b, uint32_t c)
{
uint32_t result;
asm("{\n\t"
".reg .u32 m,n;\n\t"
"and.b32 m, %1, %2;\n\t"
" or.b32 n, %1, %2;\n\t"
"and.b32 %0, n, %3;\n\t"
" or.b32 %0, %0, m ;\n\t"
"}\n"
: "=r"(result) : "r"(a), "r"(b), "r"(c));
return result;
}
// device asm for m7_sha256
__device__ __forceinline__
uint32_t shr_t32(uint32_t x,uint32_t n)
{
uint32_t result;
asm("shr.b32 %0,%1,%2;\n" : "=r"(result) : "r"(x), "r"(n));
return result;
}
// device asm for ?
__device__ __forceinline__
uint32_t shl_t32(uint32_t x,uint32_t n)
{
uint32_t result;
asm("shl.b32 %0,%1,%2;\n" : "=r"(result) : "r"(x), "r"(n));
return result;
}
// 64-bit ROTATE RIGHT
#if __CUDA_ARCH__ >= 350
@ -314,4 +419,32 @@ uint64_t ROTL64(const uint64_t x, const int offset) @@ -314,4 +419,32 @@ uint64_t ROTL64(const uint64_t x, const int offset)
#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
#endif
__device__ __forceinline__
void muladd128(uint64_t &u,uint64_t &v,uint64_t a, uint64_t b,uint64_t &c,uint64_t &e)
{
asm("{\n\t"
".reg .b64 abl,abh; \n\t"
".reg .b32 abll,ablh,abhl,abhh,x1,x2,x3,x4; \n\t"
".reg .b32 cl,ch,el,eh; \n\t"
"mul.lo.u64 abl,%2,%3; \n\t"
"mul.hi.u64 abh,%2,%3; \n\t"
"mov.b64 {abll,ablh},abl; \n\t"
"mov.b64 {abhl,abhh},abh; \n\t"
"mov.b64 {cl,ch},%4; \n\t"
"mov.b64 {el,eh},%5; \n\t"
"add.cc.u32 x1,cl,el; \n\t"
"addc.cc.u32 x2,ch,eh; \n\t"
"addc.u32 x3,0,0; \n\t"
"add.cc.u32 x1,x1,abll; \n\t"
"addc.cc.u32 x2,x2,ablh; \n\t"
"addc.cc.u32 x3,x3,abhl; \n\t"
"addc.u32 x4,abhh,0; \n\t"
"mov.b64 %1,{x1,x2}; \n\t"
"mov.b64 %0,{x3,x4}; \n\t"
"}"
: "=l"(u), "=l"(v) : "l"(a) , "l"(b) , "l"(c) , "l"(e));
}
#endif // #ifndef CUDA_HELPER_H

431
m7/cuda_m7_haval256.cu

@ -0,0 +1,431 @@ @@ -0,0 +1,431 @@
/*
* Haval-256 for m7
*
* Built on cbuchner1's implementation, actual hashing code
* heavily based on phm's sgminer
*
*/
/*
* Haval-256 kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2014 djm34
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author phm <phm@inbox.com>
*/
#include <stdio.h>
#include <memory.h>
#define USE_SHARED 1
#include "cuda_helper.h"
#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
// in heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
__constant__ static uint32_t c_PaddedMessage80[32];
__constant__ static uint32_t initVector[8];
static const uint32_t c_initVector[8] = {
SPH_C32(0x243F6A88),
SPH_C32(0x85A308D3),
SPH_C32(0x13198A2E),
SPH_C32(0x03707344),
SPH_C32(0xA4093822),
SPH_C32(0x299F31D0),
SPH_C32(0x082EFA98),
SPH_C32(0xEC4E6C89)
};
#define PASS1(n, in) { \
STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in[ 0], SPH_C32(0x00000000)); \
STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in[ 1], SPH_C32(0x00000000)); \
STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in[ 2], SPH_C32(0x00000000)); \
STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in[ 3], SPH_C32(0x00000000)); \
STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in[ 4], SPH_C32(0x00000000)); \
STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in[ 5], SPH_C32(0x00000000)); \
STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in[ 6], SPH_C32(0x00000000)); \
STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in[ 7], SPH_C32(0x00000000)); \
\
STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in[ 8], SPH_C32(0x00000000)); \
STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in[ 9], SPH_C32(0x00000000)); \
STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in[10], SPH_C32(0x00000000)); \
STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in[11], SPH_C32(0x00000000)); \
STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in[12], SPH_C32(0x00000000)); \
STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in[13], SPH_C32(0x00000000)); \
STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in[14], SPH_C32(0x00000000)); \
STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in[15], SPH_C32(0x00000000)); \
\
STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in[16], SPH_C32(0x00000000)); \
STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in[17], SPH_C32(0x00000000)); \
STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in[18], SPH_C32(0x00000000)); \
STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in[19], SPH_C32(0x00000000)); \
STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in[20], SPH_C32(0x00000000)); \
STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in[21], SPH_C32(0x00000000)); \
STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in[22], SPH_C32(0x00000000)); \
STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in[23], SPH_C32(0x00000000)); \
\
STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in[24], SPH_C32(0x00000000)); \
STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in[25], SPH_C32(0x00000000)); \
STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in[26], SPH_C32(0x00000000)); \
STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in[27], SPH_C32(0x00000000)); \
STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in[28], SPH_C32(0x00000000)); \
STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in[29], SPH_C32(0x00000000)); \
STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in[30], SPH_C32(0x00000000)); \
STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in[31], SPH_C32(0x00000000)); \
}
#define PASS2(n, in) { \
STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in[ 5], SPH_C32(0x452821E6)); \
STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in[14], SPH_C32(0x38D01377)); \
STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in[26], SPH_C32(0xBE5466CF)); \
STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in[18], SPH_C32(0x34E90C6C)); \
STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in[11], SPH_C32(0xC0AC29B7)); \
STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in[28], SPH_C32(0xC97C50DD)); \
STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in[ 7], SPH_C32(0x3F84D5B5)); \
STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in[16], SPH_C32(0xB5470917)); \
\
STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in[ 0], SPH_C32(0x9216D5D9)); \
STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in[23], SPH_C32(0x8979FB1B)); \
STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in[20], SPH_C32(0xD1310BA6)); \
STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in[22], SPH_C32(0x98DFB5AC)); \
STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in[ 1], SPH_C32(0x2FFD72DB)); \
STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in[10], SPH_C32(0xD01ADFB7)); \
STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in[ 4], SPH_C32(0xB8E1AFED)); \
STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in[ 8], SPH_C32(0x6A267E96)); \
\
STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in[30], SPH_C32(0xBA7C9045)); \
STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in[ 3], SPH_C32(0xF12C7F99)); \
STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in[21], SPH_C32(0x24A19947)); \
STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in[ 9], SPH_C32(0xB3916CF7)); \
STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in[17], SPH_C32(0x0801F2E2)); \
STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in[24], SPH_C32(0x858EFC16)); \
STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in[29], SPH_C32(0x636920D8)); \
STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in[ 6], SPH_C32(0x71574E69)); \
\
STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in[19], SPH_C32(0xA458FEA3)); \
STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in[12], SPH_C32(0xF4933D7E)); \
STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in[15], SPH_C32(0x0D95748F)); \
STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in[13], SPH_C32(0x728EB658)); \
STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in[ 2], SPH_C32(0x718BCD58)); \
STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in[25], SPH_C32(0x82154AEE)); \
STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in[31], SPH_C32(0x7B54A41D)); \
STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in[27], SPH_C32(0xC25A59B5)); \
}
#define PASS3(n, in) { \
STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in[19], SPH_C32(0x9C30D539)); \
STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in[ 9], SPH_C32(0x2AF26013)); \
STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in[ 4], SPH_C32(0xC5D1B023)); \
STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in[20], SPH_C32(0x286085F0)); \
STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in[28], SPH_C32(0xCA417918)); \
STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in[17], SPH_C32(0xB8DB38EF)); \
STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in[ 8], SPH_C32(0x8E79DCB0)); \
STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in[22], SPH_C32(0x603A180E)); \
\
STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in[29], SPH_C32(0x6C9E0E8B)); \
STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in[14], SPH_C32(0xB01E8A3E)); \
STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in[25], SPH_C32(0xD71577C1)); \
STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in[12], SPH_C32(0xBD314B27)); \
STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in[24], SPH_C32(0x78AF2FDA)); \
STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in[30], SPH_C32(0x55605C60)); \
STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in[16], SPH_C32(0xE65525F3)); \
STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in[26], SPH_C32(0xAA55AB94)); \
\
STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in[31], SPH_C32(0x57489862)); \
STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in[15], SPH_C32(0x63E81440)); \
STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in[ 7], SPH_C32(0x55CA396A)); \
STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in[ 3], SPH_C32(0x2AAB10B6)); \
STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in[ 1], SPH_C32(0xB4CC5C34)); \
STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in[ 0], SPH_C32(0x1141E8CE)); \
STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in[18], SPH_C32(0xA15486AF)); \
STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in[27], SPH_C32(0x7C72E993)); \
\
STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in[13], SPH_C32(0xB3EE1411)); \
STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in[ 6], SPH_C32(0x636FBC2A)); \
STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in[21], SPH_C32(0x2BA9C55D)); \
STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in[10], SPH_C32(0x741831F6)); \
STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in[23], SPH_C32(0xCE5C3E16)); \
STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in[11], SPH_C32(0x9B87931E)); \
STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in[ 5], SPH_C32(0xAFD6BA33)); \
STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in[ 2], SPH_C32(0x6C24CF5C)); \
}
#define PASS4(n, in) { \
STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in[24], SPH_C32(0x7A325381)); \
STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in[ 4], SPH_C32(0x28958677)); \
STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in[ 0], SPH_C32(0x3B8F4898)); \
STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in[14], SPH_C32(0x6B4BB9AF)); \
STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in[ 2], SPH_C32(0xC4BFE81B)); \
STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in[ 7], SPH_C32(0x66282193)); \
STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in[28], SPH_C32(0x61D809CC)); \
STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in[23], SPH_C32(0xFB21A991)); \
\
STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in[26], SPH_C32(0x487CAC60)); \
STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in[ 6], SPH_C32(0x5DEC8032)); \
STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in[30], SPH_C32(0xEF845D5D)); \
STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in[20], SPH_C32(0xE98575B1)); \
STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in[18], SPH_C32(0xDC262302)); \
STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in[25], SPH_C32(0xEB651B88)); \
STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in[19], SPH_C32(0x23893E81)); \
STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in[ 3], SPH_C32(0xD396ACC5)); \
\
STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in[22], SPH_C32(0x0F6D6FF3)); \
STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in[11], SPH_C32(0x83F44239)); \
STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in[31], SPH_C32(0x2E0B4482)); \
STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in[21], SPH_C32(0xA4842004)); \
STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in[ 8], SPH_C32(0x69C8F04A)); \
STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in[27], SPH_C32(0x9E1F9B5E)); \
STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in[12], SPH_C32(0x21C66842)); \
STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in[ 9], SPH_C32(0xF6E96C9A)); \
\
STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in[ 1], SPH_C32(0x670C9C61)); \
STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in[29], SPH_C32(0xABD388F0)); \
STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in[ 5], SPH_C32(0x6A51A0D2)); \
STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in[15], SPH_C32(0xD8542F68)); \
STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in[17], SPH_C32(0x960FA728)); \
STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in[10], SPH_C32(0xAB5133A3)); \
STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in[16], SPH_C32(0x6EEF0B6C)); \
STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in[13], SPH_C32(0x137A3BE4)); \
}
#define PASS5(n, in) { \
STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in[27], SPH_C32(0xBA3BF050)); \
STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in[ 3], SPH_C32(0x7EFB2A98)); \
STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in[21], SPH_C32(0xA1F1651D)); \
STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in[26], SPH_C32(0x39AF0176)); \
STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in[17], SPH_C32(0x66CA593E)); \
STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in[11], SPH_C32(0x82430E88)); \
STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in[20], SPH_C32(0x8CEE8619)); \
STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in[29], SPH_C32(0x456F9FB4)); \
\
STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in[19], SPH_C32(0x7D84A5C3)); \
STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in[ 0], SPH_C32(0x3B8B5EBE)); \
STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in[12], SPH_C32(0xE06F75D8)); \
STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in[ 7], SPH_C32(0x85C12073)); \
STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in[13], SPH_C32(0x401A449F)); \
STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in[ 8], SPH_C32(0x56C16AA6)); \
STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in[31], SPH_C32(0x4ED3AA62)); \
STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in[10], SPH_C32(0x363F7706)); \
\
STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in[ 5], SPH_C32(0x1BFEDF72)); \
STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in[ 9], SPH_C32(0x429B023D)); \
STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in[14], SPH_C32(0x37D0D724)); \
STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in[30], SPH_C32(0xD00A1248)); \
STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in[18], SPH_C32(0xDB0FEAD3)); \
STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in[ 6], SPH_C32(0x49F1C09B)); \
STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in[28], SPH_C32(0x075372C9)); \
STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in[24], SPH_C32(0x80991B7B)); \
\
STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in[ 2], SPH_C32(0x25D479D8)); \
STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in[23], SPH_C32(0xF6E8DEF7)); \
STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in[16], SPH_C32(0xE3FE501A)); \
STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in[22], SPH_C32(0xB6794C3B)); \
STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in[ 4], SPH_C32(0x976CE0BD)); \
STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in[ 1], SPH_C32(0x04C006BA)); \
STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in[25], SPH_C32(0xC1A94FB6)); \
STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in[15], SPH_C32(0x409F60C4)); \
}
#define F1(x6, x5, x4, x3, x2, x1, x0) \
(((x1) & ((x0) ^ (x4))) ^ ((x2) & (x5)) ^ ((x3) & (x6)) ^ (x0))
#define F2(x6, x5, x4, x3, x2, x1, x0) \
(((x2) & (((x1) & ~(x3)) ^ ((x4) & (x5)) ^ (x6) ^ (x0))) \
^ ((x4) & ((x1) ^ (x5))) ^ ((x3 & (x5)) ^ (x0)))
#define F3(x6, x5, x4, x3, x2, x1, x0) \
(((x3) & (((x1) & (x2)) ^ (x6) ^ (x0))) \
^ ((x1) & (x4)) ^ ((x2) & (x5)) ^ (x0))
#define F4(x6, x5, x4, x3, x2, x1, x0) \
(((x3) & (((x1) & (x2)) ^ ((x4) | (x6)) ^ (x5))) \
^ ((x4) & ((~(x2) & (x5)) ^ (x1) ^ (x6) ^ (x0))) \
^ ((x2) & (x6)) ^ (x0))
#define F5(x6, x5, x4, x3, x2, x1, x0) \
(((x0) & ~(((x1) & (x2) & (x3)) ^ (x5))) \
^ ((x1) & (x4)) ^ ((x2) & (x5)) ^ ((x3) & (x6)))
#define FP5_1(x6, x5, x4, x3, x2, x1, x0) \
F1(x3, x4, x1, x0, x5, x2, x6)
#define FP5_2(x6, x5, x4, x3, x2, x1, x0) \
F2(x6, x2, x1, x0, x3, x4, x5)
#define FP5_3(x6, x5, x4, x3, x2, x1, x0) \
F3(x2, x6, x0, x4, x3, x1, x5)
#define FP5_4(x6, x5, x4, x3, x2, x1, x0) \
F4(x1, x5, x3, x2, x0, x4, x6)
#define FP5_5(x6, x5, x4, x3, x2, x1, x0) \
F5(x2, x5, x0, x6, x4, x3, x1)
#define STEP(n, p, x7, x6, x5, x4, x3, x2, x1, x0, w, c) { \
uint32_t t = FP ## n ## _ ## p(x6, x5, x4, x3, x2, x1, x0); \
(x7) = SPH_T32(SPH_ROTR32(t, 7) + SPH_ROTR32((x7), 11) \
+ (w) + (c)); \
}
__global__
void m7_haval256_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = startNounce + thread;
union {
uint32_t h4[16];
uint64_t h8[8];
} hash;
uint32_t u0, u1, u2, u3, u4, u5, u6, u7;
uint32_t s0,s1,s2,s3,s4,s5,s6,s7;
uint32_t buf[32];
s0 = initVector[0];
s1 = initVector[1];
s2 = initVector[2];
s3 = initVector[3];
s4 = initVector[4];
s5 = initVector[5];
s6 = initVector[6];
s7 = initVector[7];
u0 = s0;
u1 = s1;
u2 = s2;
u3 = s3;
u4 = s4;
u5 = s5;
u6 = s6;
u7 = s7;
///////// input big /////////////////////
#pragma unroll 28
for (int i=0;i<29;i++) {
buf[i]=c_PaddedMessage80[i];
}
buf[29]=nounce;
buf[30]=c_PaddedMessage80[30]+0x00010000; //need to fix that
buf[31]=0;
PASS1(5, buf);
PASS2(5, buf);
PASS3(5, buf);
PASS4(5, buf);
PASS5(5, buf);
s0 = sph_t32(s0 + u0);
s1 = sph_t32(s1 + u1);
s2 = sph_t32(s2 + u2);
s3 = sph_t32(s3 + u3);
s4 = sph_t32(s4 + u4);
s5 = sph_t32(s5 + u5);
s6 = sph_t32(s6 + u6);
s7 = sph_t32(s7 + u7);
u0 = s0;
u1 = s1;
u2 = s2;
u3 = s3;
u4 = s4;
u5 = s5;
u6 = s6;
u7 = s7;
/////////////////////
#pragma unroll 32
for (int i=0;i<32;i++) {buf[i]=0;}
buf[29]=0x40290000;
buf[30]=0x000003d0;
PASS1(5, buf);
PASS2(5, buf);
PASS3(5, buf);
PASS4(5, buf);
PASS5(5, buf);
s0 = sph_t32(s0 + u0);
s1 = sph_t32(s1 + u1);
s2 = sph_t32(s2 + u2);
s3 = sph_t32(s3 + u3);
s4 = sph_t32(s4 + u4);
s5 = sph_t32(s5 + u5);
s6 = sph_t32(s6 + u6);
s7 = sph_t32(s7 + u7);
////////////////////
hash.h4[0]=s0;
hash.h4[1]=s1;
hash.h4[2]=s2;
hash.h4[3]=s3;
hash.h4[4]=s4;
hash.h4[5]=s5;
hash.h4[6]=s6;
hash.h4[7]=s7;
#pragma unroll 4
for (int i=0;i<4;i++) {
outputHash[i*threads+thread]=hash.h8[i];
}
} // threads
}
__host__
void m7_haval256_cpu_init(int thr_id, int threads)
{
cudaMemcpyToSymbol(initVector,c_initVector,sizeof(c_initVector),0, cudaMemcpyHostToDevice);
}
__host__
void m7_haval256_setBlock_120(void *pdata)
{
unsigned char PaddedMessage[128];
memcpy(PaddedMessage, pdata, 122);
memset(PaddedMessage+122, 0, 6);
cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 32*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
}
__host__
void m7_haval256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
{
const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size = 0;
m7_haval256_gpu_hash_120<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
MyStreamSynchronize(NULL, order, thr_id);
}

532
m7/cuda_m7_sha256.cu

@ -0,0 +1,532 @@ @@ -0,0 +1,532 @@
#include <stdio.h>
#include <memory.h>
#include "cuda_helper.h"
#include "sph/sph_types.h"
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
//#define SPH_C64(x) ((uint64_t)(x ## ULL))
//#define SPH_C32(x) ((uint32_t)(x ## U))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define ROTR SPH_ROTR32
#define host_swab32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) )
__constant__ uint32_t c_PaddedMessage80[32]; // padded message (80 bytes + padding)
__constant__ uint32_t pTarget[8];
__constant__ uint32_t pbuf[8];
uint32_t *d_mnounce[8];
uint32_t *d_MNonce[8];
static __constant__ uint32_t H256[8];
static __constant__ uint32_t K[64];
__constant__ uint32_t sha256_gpu_blockHeader[16]; // 2x512 Bit Message
__constant__ uint32_t sha256_gpu_register[8];
static const uint32_t cpu_H256[8] = {
SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85), SPH_C32(0x3C6EF372),
SPH_C32(0xA54FF53A), SPH_C32(0x510E527F), SPH_C32(0x9B05688C),
SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19)
};
static const uint32_t cpu_K[64] = {
SPH_C32(0x428A2F98), SPH_C32(0x71374491),
SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5),
SPH_C32(0x3956C25B), SPH_C32(0x59F111F1),
SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5),
SPH_C32(0xD807AA98), SPH_C32(0x12835B01),
SPH_C32(0x243185BE), SPH_C32(0x550C7DC3),
SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE),
SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174),
SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786),
SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC),
SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA),
SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA),
SPH_C32(0x983E5152), SPH_C32(0xA831C66D),
SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7),
SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147),
SPH_C32(0x06CA6351), SPH_C32(0x14292967),
SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138),
SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13),
SPH_C32(0x650A7354), SPH_C32(0x766A0ABB),
SPH_C32(0x81C2C92E), SPH_C32(0x92722C85),
SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B),
SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3),
SPH_C32(0xD192E819), SPH_C32(0xD6990624),
SPH_C32(0xF40E3585), SPH_C32(0x106AA070),
SPH_C32(0x19A4C116), SPH_C32(0x1E376C08),
SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5),
SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A),
SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3),
SPH_C32(0x748F82EE), SPH_C32(0x78A5636F),
SPH_C32(0x84C87814), SPH_C32(0x8CC70208),
SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB),
SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2)
};
static __device__ __forceinline__ uint32_t bsg2_0(uint32_t x)
{
uint32_t r1 = SPH_ROTR32(x,2);
uint32_t r2 = SPH_ROTR32(x,13);
uint32_t r3 = SPH_ROTR32(x,22);
return xor3b(r1,r2,r3);
}
static __device__ __forceinline__ uint32_t bsg2_1(uint32_t x)
{
uint32_t r1 = SPH_ROTR32(x,6);
uint32_t r2 = SPH_ROTR32(x,11);
uint32_t r3 = SPH_ROTR32(x,25);
return xor3b(r1,r2,r3);
}
static __device__ __forceinline__ uint32_t ssg2_0(uint32_t x)
{
uint64_t r1 = SPH_ROTR32(x,7);
uint64_t r2 = SPH_ROTR32(x,18);
uint64_t r3 = shr_t32(x,3);
return xor3b(r1,r2,r3);
}
static __device__ __forceinline__ uint32_t ssg2_1(uint32_t x)
{
uint64_t r1 = SPH_ROTR32(x,17);
uint64_t r2 = SPH_ROTR32(x,19);
uint64_t r3 = shr_t32(x,10);
return xor3b(r1,r2,r3);
}
static __device__ __forceinline__ void sha2_step1(uint32_t a,uint32_t b,uint32_t c,uint32_t &d,uint32_t e,uint32_t f,uint32_t g,uint32_t &h,
uint32_t in,const uint32_t Kshared)
{
uint32_t t1,t2;
uint32_t vxandx = xandx(e, f, g);
uint32_t bsg21 =bsg2_1(e);
uint32_t bsg20 =bsg2_0(a);
uint32_t andorv =andor32(a,b,c);
t1 = h + bsg21 + vxandx + Kshared + in;
t2 = bsg20 + andorv;
d = d + t1;
h = t1 + t2;
}
static __forceinline__ void sha2_step1_host(uint32_t a,uint32_t b,uint32_t c,uint32_t &d,uint32_t e,uint32_t f,uint32_t g,uint32_t &h,
uint32_t in,const uint32_t Kshared)
{
uint32_t t1,t2;
uint32_t vxandx = (((f) ^ (g)) & (e)) ^ (g); // xandx(e, f, g);
uint32_t bsg21 =ROTR(e, 6) ^ ROTR(e, 11) ^ ROTR(e, 25); // bsg2_1(e);
uint32_t bsg20 =ROTR(a, 2) ^ ROTR(a, 13) ^ ROTR(a, 22); //bsg2_0(a);
uint32_t andorv =((b) & (c)) | (((b) | (c)) & (a)); //andor32(a,b,c);
t1 = h + bsg21 + vxandx + Kshared + in;
t2 = bsg20 + andorv;
d = d + t1;
h = t1 + t2;
}
static __device__ __forceinline__ void sha2_step2(uint32_t a,uint32_t b,uint32_t c,uint32_t &d,uint32_t e,uint32_t f,uint32_t g,uint32_t &h,
uint32_t* in,uint32_t pc,const uint32_t Kshared)
{
uint32_t t1,t2;
int pcidx1 = (pc-2) & 0xF;
int pcidx2 = (pc-7) & 0xF;
int pcidx3 = (pc-15) & 0xF;
uint32_t inx0 = in[pc];
uint32_t inx1 = in[pcidx1];
uint32_t inx2 = in[pcidx2];
uint32_t inx3 = in[pcidx3];
uint32_t ssg21 = ssg2_1(inx1);
uint32_t ssg20 = ssg2_0(inx3);
uint32_t vxandx = xandx(e, f, g);
uint32_t bsg21 =bsg2_1(e);
uint32_t bsg20 =bsg2_0(a);
uint32_t andorv =andor32(a,b,c);
in[pc] = ssg21+inx2+ssg20+inx0;
t1 = h + bsg21 + vxandx + Kshared + in[pc];
t2 = bsg20 + andorv;
d = d + t1;
h = t1 + t2;
}
static __forceinline__ void sha2_step2_host(uint32_t a,uint32_t b,uint32_t c,uint32_t &d,uint32_t e,uint32_t f,uint32_t g,uint32_t &h,
uint32_t* in,uint32_t pc,const uint32_t Kshared)
{
uint32_t t1,t2;
int pcidx1 = (pc-2) & 0xF;
int pcidx2 = (pc-7) & 0xF;
int pcidx3 = (pc-15) & 0xF;
uint32_t inx0 = in[pc];
uint32_t inx1 = in[pcidx1];
uint32_t inx2 = in[pcidx2];
uint32_t inx3 = in[pcidx3];
uint32_t ssg21 = ROTR(inx1, 17) ^ ROTR(inx1, 19) ^ SPH_T32((inx1) >> 10); //ssg2_1(inx1);
uint32_t ssg20 = ROTR(inx3, 7) ^ ROTR(inx3, 18) ^ SPH_T32((inx3) >> 3); //ssg2_0(inx3);
uint32_t vxandx = (((f) ^ (g)) & (e)) ^ (g); // xandx(e, f, g);
uint32_t bsg21 =ROTR(e, 6) ^ ROTR(e, 11) ^ ROTR(e, 25); // bsg2_1(e);
uint32_t bsg20 =ROTR(a, 2) ^ ROTR(a, 13) ^ ROTR(a, 22); //bsg2_0(a);
uint32_t andorv =((b) & (c)) | (((b) | (c)) & (a)); //andor32(a,b,c);
in[pc] = ssg21+inx2+ssg20+inx0;
t1 = h + bsg21 + vxandx + Kshared + in[pc];
t2 = bsg20 + andorv;
d = d + t1;
h = t1 + t2;
}
static __device__ __forceinline__ void sha2_round_body(uint32_t* in, uint32_t* r,const uint32_t* Kshared)
{
uint32_t a=r[0];
uint32_t b=r[1];
uint32_t c=r[2];
uint32_t d=r[3];
uint32_t e=r[4];
uint32_t f=r[5];
uint32_t g=r[6];
uint32_t h=r[7];
sha2_step1(a,b,c,d,e,f,g,h,in[0],Kshared[0]);
sha2_step1(h,a,b,c,d,e,f,g,in[1],Kshared[1]);
sha2_step1(g,h,a,b,c,d,e,f,in[2],Kshared[2]);
sha2_step1(f,g,h,a,b,c,d,e,in[3],Kshared[3]);
sha2_step1(e,f,g,h,a,b,c,d,in[4],Kshared[4]);
sha2_step1(d,e,f,g,h,a,b,c,in[5],Kshared[5]);
sha2_step1(c,d,e,f,g,h,a,b,in[6],Kshared[6]);
sha2_step1(b,c,d,e,f,g,h,a,in[7],Kshared[7]);
sha2_step1(a,b,c,d,e,f,g,h,in[8],Kshared[8]);
sha2_step1(h,a,b,c,d,e,f,g,in[9],Kshared[9]);
sha2_step1(g,h,a,b,c,d,e,f,in[10],Kshared[10]);
sha2_step1(f,g,h,a,b,c,d,e,in[11],Kshared[11]);
sha2_step1(e,f,g,h,a,b,c,d,in[12],Kshared[12]);
sha2_step1(d,e,f,g,h,a,b,c,in[13],Kshared[13]);
sha2_step1(c,d,e,f,g,h,a,b,in[14],Kshared[14]);
sha2_step1(b,c,d,e,f,g,h,a,in[15],Kshared[15]);
#pragma unroll 3
for (int i=0;i<3;i++) {
sha2_step2(a,b,c,d,e,f,g,h,in,0,Kshared[16+16*i]);
sha2_step2(h,a,b,c,d,e,f,g,in,1,Kshared[17+16*i]);
sha2_step2(g,h,a,b,c,d,e,f,in,2,Kshared[18+16*i]);
sha2_step2(f,g,h,a,b,c,d,e,in,3,Kshared[19+16*i]);
sha2_step2(e,f,g,h,a,b,c,d,in,4,Kshared[20+16*i]);
sha2_step2(d,e,f,g,h,a,b,c,in,5,Kshared[21+16*i]);
sha2_step2(c,d,e,f,g,h,a,b,in,6,Kshared[22+16*i]);
sha2_step2(b,c,d,e,f,g,h,a,in,7,Kshared[23+16*i]);
sha2_step2(a,b,c,d,e,f,g,h,in,8,Kshared[24+16*i]);
sha2_step2(h,a,b,c,d,e,f,g,in,9,Kshared[25+16*i]);
sha2_step2(g,h,a,b,c,d,e,f,in,10,Kshared[26+16*i]);
sha2_step2(f,g,h,a,b,c,d,e,in,11,Kshared[27+16*i]);
sha2_step2(e,f,g,h,a,b,c,d,in,12,Kshared[28+16*i]);
sha2_step2(d,e,f,g,h,a,b,c,in,13,Kshared[29+16*i]);
sha2_step2(c,d,e,f,g,h,a,b,in,14,Kshared[30+16*i]);
sha2_step2(b,c,d,e,f,g,h,a,in,15,Kshared[31+16*i]);
}
r[0] = r[0] + a;
r[1] = r[1] + b;
r[2] = r[2] + c;
r[3] = r[3] + d;
r[4] = r[4] + e;
r[5] = r[5] + f;
r[6] = r[6] + g;
r[7] = r[7] + h;
}
static __forceinline__ void sha2_round_body_host(uint32_t* in, uint32_t* r,const uint32_t* Kshared)
{
uint32_t a=r[0];
uint32_t b=r[1];
uint32_t c=r[2];
uint32_t d=r[3];
uint32_t e=r[4];
uint32_t f=r[5];
uint32_t g=r[6];
uint32_t h=r[7];
sha2_step1_host(a,b,c,d,e,f,g,h,in[0],Kshared[0]);
sha2_step1_host(h,a,b,c,d,e,f,g,in[1],Kshared[1]);
sha2_step1_host(g,h,a,b,c,d,e,f,in[2],Kshared[2]);
sha2_step1_host(f,g,h,a,b,c,d,e,in[3],Kshared[3]);
sha2_step1_host(e,f,g,h,a,b,c,d,in[4],Kshared[4]);
sha2_step1_host(d,e,f,g,h,a,b,c,in[5],Kshared[5]);
sha2_step1_host(c,d,e,f,g,h,a,b,in[6],Kshared[6]);
sha2_step1_host(b,c,d,e,f,g,h,a,in[7],Kshared[7]);
sha2_step1_host(a,b,c,d,e,f,g,h,in[8],Kshared[8]);
sha2_step1_host(h,a,b,c,d,e,f,g,in[9],Kshared[9]);
sha2_step1_host(g,h,a,b,c,d,e,f,in[10],Kshared[10]);
sha2_step1_host(f,g,h,a,b,c,d,e,in[11],Kshared[11]);
sha2_step1_host(e,f,g,h,a,b,c,d,in[12],Kshared[12]);
sha2_step1_host(d,e,f,g,h,a,b,c,in[13],Kshared[13]);
sha2_step1_host(c,d,e,f,g,h,a,b,in[14],Kshared[14]);
sha2_step1_host(b,c,d,e,f,g,h,a,in[15],Kshared[15]);
#pragma unroll 3
for (int i=0;i<3;i++) {
sha2_step2_host(a,b,c,d,e,f,g,h,in,0,Kshared[16+16*i]);
sha2_step2_host(h,a,b,c,d,e,f,g,in,1,Kshared[17+16*i]);
sha2_step2_host(g,h,a,b,c,d,e,f,in,2,Kshared[18+16*i]);
sha2_step2_host(f,g,h,a,b,c,d,e,in,3,Kshared[19+16*i]);
sha2_step2_host(e,f,g,h,a,b,c,d,in,4,Kshared[20+16*i]);
sha2_step2_host(d,e,f,g,h,a,b,c,in,5,Kshared[21+16*i]);
sha2_step2_host(c,d,e,f,g,h,a,b,in,6,Kshared[22+16*i]);
sha2_step2_host(b,c,d,e,f,g,h,a,in,7,Kshared[23+16*i]);
sha2_step2_host(a,b,c,d,e,f,g,h,in,8,Kshared[24+16*i]);
sha2_step2_host(h,a,b,c,d,e,f,g,in,9,Kshared[25+16*i]);
sha2_step2_host(g,h,a,b,c,d,e,f,in,10,Kshared[26+16*i]);
sha2_step2_host(f,g,h,a,b,c,d,e,in,11,Kshared[27+16*i]);
sha2_step2_host(e,f,g,h,a,b,c,d,in,12,Kshared[28+16*i]);
sha2_step2_host(d,e,f,g,h,a,b,c,in,13,Kshared[29+16*i]);
sha2_step2_host(c,d,e,f,g,h,a,b,in,14,Kshared[30+16*i]);
sha2_step2_host(b,c,d,e,f,g,h,a,in,15,Kshared[31+16*i]);
}
r[0] = r[0] + a;
r[1] = r[1] + b;
r[2] = r[2] + c;
r[3] = r[3] + d;
r[4] = r[4] + e;
r[5] = r[5] + f;
r[6] = r[6] + g;
r[7] = r[7] + h;
}
__global__ void m7_sha256_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
{
/*
__shared__ uint32_t Kshared[64];
if (threadIdx.x < 64) {
Kshared[threadIdx.x]=K[threadIdx.x];
}
__syncthreads();
*/
union {
uint8_t h1[64];
uint32_t h4[16];
uint64_t h8[8];
} hash;
//uint32_t buf[8];
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = startNounce + thread ; // original implementation
uint32_t buf[8];
uint32_t in2[16]={0};
uint32_t in3[16]={0};
#pragma unroll 13
for (int i=0;i<13;i++) {in2[i]= cuda_swab32(c_PaddedMessage80[i+16]);}
in2[13]=cuda_swab32(nounce);
in2[14]=cuda_swab32(c_PaddedMessage80[30]);
in3[15]=0x3d0;
#pragma unroll 8
for (int i=0;i<8;i++) {buf[i]= pbuf[i];}
sha2_round_body(in2,buf,K);
sha2_round_body(in3,buf,K);
//#pragma unroll 8
//for (int i=0;i<8;i++) {hash.h4[i]=cuda_swab32(buf[i]);}
#pragma unroll 4
for (int i=0;i<4;i++) {outputHash[i*threads+thread]=cuda_swab32ll(((uint64_t*)buf)[i]);}
//////////////////////////////////////////////////////////////////////////////////////////////////
} // threads
}
__global__ void m7_sha256_gpu_hash_300(int threads, uint32_t startNounce, uint64_t *g_hash1, uint64_t *g_nonceVector, uint32_t *resNounce)
{
/*
__shared__ uint32_t Kshared[64];
if (threadIdx.x < 64) {
Kshared[threadIdx.x]=K[threadIdx.x];
}
__syncthreads();
*/
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
union {
uint8_t h1[304];
uint32_t h4[76];
uint64_t h8[38];
} hash;
uint32_t in[16],buf[8];
#pragma unroll 8
for (int i=0;i<8;i++) {((uint64_t*)in)[i]= cuda_swab32ll(g_hash1[threads*i+thread]);}
#pragma unroll 8
for (int i=0;i<8;i++) {buf[i] = H256[i];}
sha2_round_body(in,buf,K);
#pragma unroll 8
for (int i=0;i<8;i++) {((uint64_t*)in)[i]= cuda_swab32ll(g_hash1[threads*(i+8)+thread]);}
sha2_round_body(in,buf,K);
#pragma unroll 8
for (int i=0;i<8;i++) {((uint64_t*)in)[i]= cuda_swab32ll(g_hash1[threads*(i+16)+thread]);}
sha2_round_body(in,buf,K);
#pragma unroll 8
for (int i=0;i<8;i++) {((uint64_t*)in)[i]= cuda_swab32ll(g_hash1[threads*(i+24)+thread]);}
sha2_round_body(in,buf,K);
#pragma unroll 5
for (int i=0;i<5;i++) {((uint64_t*)in)[i]= cuda_swab32ll(g_hash1[threads*(i+32)+thread]);}
((uint64_t*)in)[5]= g_hash1[threads*(5+32)+thread];
in[11]=0;
in[12]=0;
in[13]=0;
in[14]=0;
in[15]=0x968;
int it=0;
do {
in[15]-=8;
it++;
} while (((uint8_t*)in)[44-it]==0);
((uint8_t*)in)[44-it+1]=0x80;
((uint64_t*)in)[5]= cuda_swab32ll(((uint64_t*)in)[5]);
sha2_round_body(in,buf,K);
uint32_t nounce = startNounce +thread;
bool rc = false;
#pragma unroll 4
for (int i = 0; i < 4; i++)
{
if (cuda_swab32ll(((uint64_t*)buf)[i]) != ((uint64_t*)pTarget)[i]) {
if (cuda_swab32ll(((uint64_t*)buf)[i]) < ((uint64_t*)pTarget)[i]) {rc = true;} else {rc = false;}
// if cuda_swab32(((uint64_t*)buf)[3]) < ((uint64_t*)pTarget)[3]) {rc = true;}
}
}
if(rc == true)
{
if(resNounce[0] > nounce)
resNounce[0] = nounce;
}
////
} // threads
}
__host__ void m7_sha256_cpu_init(int thr_id, int threads)
{
// Kopiere die Hash-Tabellen in den GPU-Speicher
cudaMemcpyToSymbol( H256,cpu_H256,sizeof(cpu_H256),0, cudaMemcpyHostToDevice );
cudaMemcpyToSymbol( K,cpu_K,sizeof(cpu_K),0, cudaMemcpyHostToDevice );
cudaMalloc(&d_MNonce[thr_id], sizeof(uint32_t));
cudaMallocHost(&d_mnounce[thr_id], 1*sizeof(uint32_t));
}
__host__ uint32_t m7_sha256_cpu_hash_300(int thr_id, int threads, uint32_t startNounce, uint64_t *d_nonceVector,uint64_t *d_hash, int order)
{
uint32_t result = 0xffffffff;
cudaMemset(d_MNonce[thr_id], 0xff, sizeof(uint32_t));
const int threadsperblock = 384; // Alignment mit mixtob Grösse. NICHT ÄNDERN
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size = 0;
m7_sha256_gpu_hash_300<<<grid, block, shared_size>>>(threads, startNounce, d_hash, d_nonceVector, d_MNonce[thr_id]);
cudaMemcpy(d_mnounce[thr_id], d_MNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
MyStreamSynchronize(NULL, order, thr_id);
result = *d_mnounce[thr_id];
return result;
}
__host__ void m7_sha256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
{
const int threadsperblock = 512; // Alignment mit mixtob Grösse. NICHT ÄNDERN
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
// dim3 grid(1);
// dim3 block(1);
size_t shared_size = 0;
m7_sha256_gpu_hash_120<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
MyStreamSynchronize(NULL, order, thr_id);
}
__host__ void m7_sha256_setBlock_120(void *pdata,const void *ptarget) //not useful
{
unsigned char PaddedMessage[128];
uint8_t ending =0x80;
memcpy(PaddedMessage, pdata, 122);
memset(PaddedMessage+122,ending,1);
memset(PaddedMessage+123, 0, 5); //useless
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
/// do first loop here... ///
uint32_t * alt_data = (uint32_t*) PaddedMessage;
uint32_t in[16],buf[8];
for (int i=0;i<16;i++) {in[i]= host_swab32(alt_data[i]);}
for (int i=0;i<8;i++) {buf[i]= cpu_H256[i];}
sha2_round_body_host(in,buf,cpu_K);
cudaMemcpyToSymbol( pbuf, buf, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
}

344
m7/cuda_m7_sha512.cu

@ -0,0 +1,344 @@ @@ -0,0 +1,344 @@
/**
* sha512 djm34
* (cleaned by tpruvot)
*/
/*
* sha-512 kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2014 djm34
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author phm <phm@inbox.com>
*/
#include <stdio.h>
#define USE_SHARED 1
#include "cuda_helper.h"
#define SWAP64(u64) cuda_swab64(u64)
#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
// in heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
__constant__ uint64_t c_PaddedMessage80[16];
static __constant__ uint64_t H_512[8];
static const uint64_t H512[8] = {
SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F),
SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179)
};
static __constant__ uint64_t K_512[80];
static const uint64_t K512[80] = {
SPH_C64(0x428A2F98D728AE22), SPH_C64(0x7137449123EF65CD),
SPH_C64(0xB5C0FBCFEC4D3B2F), SPH_C64(0xE9B5DBA58189DBBC),
SPH_C64(0x3956C25BF348B538), SPH_C64(0x59F111F1B605D019),
SPH_C64(0x923F82A4AF194F9B), SPH_C64(0xAB1C5ED5DA6D8118),
SPH_C64(0xD807AA98A3030242), SPH_C64(0x12835B0145706FBE),
SPH_C64(0x243185BE4EE4B28C), SPH_C64(0x550C7DC3D5FFB4E2),
SPH_C64(0x72BE5D74F27B896F), SPH_C64(0x80DEB1FE3B1696B1),
SPH_C64(0x9BDC06A725C71235), SPH_C64(0xC19BF174CF692694),
SPH_C64(0xE49B69C19EF14AD2), SPH_C64(0xEFBE4786384F25E3),
SPH_C64(0x0FC19DC68B8CD5B5), SPH_C64(0x240CA1CC77AC9C65),
SPH_C64(0x2DE92C6F592B0275), SPH_C64(0x4A7484AA6EA6E483),
SPH_C64(0x5CB0A9DCBD41FBD4), SPH_C64(0x76F988DA831153B5),
SPH_C64(0x983E5152EE66DFAB), SPH_C64(0xA831C66D2DB43210),
SPH_C64(0xB00327C898FB213F), SPH_C64(0xBF597FC7BEEF0EE4),
SPH_C64(0xC6E00BF33DA88FC2), SPH_C64(0xD5A79147930AA725),
SPH_C64(0x06CA6351E003826F), SPH_C64(0x142929670A0E6E70),
SPH_C64(0x27B70A8546D22FFC), SPH_C64(0x2E1B21385C26C926),
SPH_C64(0x4D2C6DFC5AC42AED), SPH_C64(0x53380D139D95B3DF),
SPH_C64(0x650A73548BAF63DE), SPH_C64(0x766A0ABB3C77B2A8),
SPH_C64(0x81C2C92E47EDAEE6), SPH_C64(0x92722C851482353B),
SPH_C64(0xA2BFE8A14CF10364), SPH_C64(0xA81A664BBC423001),
SPH_C64(0xC24B8B70D0F89791), SPH_C64(0xC76C51A30654BE30),
SPH_C64(0xD192E819D6EF5218), SPH_C64(0xD69906245565A910),
SPH_C64(0xF40E35855771202A), SPH_C64(0x106AA07032BBD1B8),
SPH_C64(0x19A4C116B8D2D0C8), SPH_C64(0x1E376C085141AB53),
SPH_C64(0x2748774CDF8EEB99), SPH_C64(0x34B0BCB5E19B48A8),
SPH_C64(0x391C0CB3C5C95A63), SPH_C64(0x4ED8AA4AE3418ACB),
SPH_C64(0x5B9CCA4F7763E373), SPH_C64(0x682E6FF3D6B2B8A3),
SPH_C64(0x748F82EE5DEFB2FC), SPH_C64(0x78A5636F43172F60),
SPH_C64(0x84C87814A1F0AB72), SPH_C64(0x8CC702081A6439EC),
SPH_C64(0x90BEFFFA23631E28), SPH_C64(0xA4506CEBDE82BDE9),
SPH_C64(0xBEF9A3F7B2C67915), SPH_C64(0xC67178F2E372532B),
SPH_C64(0xCA273ECEEA26619C), SPH_C64(0xD186B8C721C0C207),
SPH_C64(0xEADA7DD6CDE0EB1E), SPH_C64(0xF57D4F7FEE6ED178),
SPH_C64(0x06F067AA72176FBA), SPH_C64(0x0A637DC5A2C898A6),
SPH_C64(0x113F9804BEF90DAE), SPH_C64(0x1B710B35131C471B),
SPH_C64(0x28DB77F523047D84), SPH_C64(0x32CAAB7B40C72493),
SPH_C64(0x3C9EBE0A15C9BEBC), SPH_C64(0x431D67C49C100D4C),
SPH_C64(0x4CC5D4BECB3E42B6), SPH_C64(0x597F299CFC657E2A),
SPH_C64(0x5FCB6FAB3AD6FAEC), SPH_C64(0x6C44198C4A475817)
};
#define SHA3_STEP(ord,r,i) { \
uint64_t T1, T2; \
int a = 8-ord; \
T1 = SPH_T64(r[(7+a)%8] + BSG5_1(r[(4+a)%8]) + CH(r[(4+a)%8], r[(5+a)%8], r[(6+a)%8]) + K_512[i] + W[i]); \
T2 = SPH_T64(BSG5_0(r[(0+a)%8]) + MAJ(r[(0+a)%8], r[(1+a)%8], r[(2+a)%8])); \
r[(3+a)%8] = SPH_T64(r[(3+a)%8] + T1); \
r[(7+a)%8] = SPH_T64(T1 + T2); \
}
#define SHA3_STEP2(truc,ord,r,i) { \
uint64_t T1, T2; \
int a = 8-ord; \
T1 = Tone(truc,r,W,a,i); \
T2 = SPH_T64(BSG5_0(r[(0+a)%8]) + MAJ(r[(0+a)%8], r[(1+a)%8], r[(2+a)%8])); \
r[(3+a)%8] = SPH_T64(r[(3+a)%8] + T1); \
r[(7+a)%8] = SPH_T64(T1 + T2); \
}
//#define BSG5_0(x) (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39))
#define BSG5_0(x) xor3(ROTR64(x, 28),ROTR64(x, 34),ROTR64(x, 39))
//#define BSG5_1(x) (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41))
#define BSG5_1(x) xor3(ROTR64(x, 14),ROTR64(x, 18),ROTR64(x, 41))
//#define SSG5_0(x) (ROTR64(x, 1) ^ ROTR64(x, 8) ^ SPH_T64((x) >> 7))
#define SSG5_0(x) xor3(ROTR64(x, 1),ROTR64(x, 8),shr_t64(x,7))
//#define SSG5_1(x) (ROTR64(x, 19) ^ ROTR64(x, 61) ^ SPH_T64((x) >> 6))
#define SSG5_1(x) xor3(ROTR64(x, 19),ROTR64(x, 61),shr_t64(x,6))
//#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
#define CH(x, y, z) xandx(x,y,z)
//#define MAJ(X, Y, Z) (((X) & (Y)) | (((X) | (Y)) & (Z)))
#define MAJ(x, y, z) andor(x,y,z)
__device__ __forceinline__
uint64_t Tone(const uint64_t* sharedMemory, uint64_t r[8], uint64_t W[80], uint32_t a, uint32_t i)
{
uint64_t h = r[(7+a)%8];
uint64_t e = r[(4+a)%8];
uint64_t f = r[(5+a)%8];
uint64_t g = r[(6+a)%8];
//uint64_t BSG51 = ROTR64(e, 14) ^ ROTR64(e, 18) ^ ROTR64(e, 41);
uint64_t BSG51 = xor3(ROTR64(e, 14),ROTR64(e, 18),ROTR64(e, 41));
//uint64_t CHl = (((f) ^ (g)) & (e)) ^ (g);
uint64_t CHl = xandx(e,f,g);
uint64_t result = SPH_T64(h+BSG51+CHl+sharedMemory[i]+W[i]);
return result;
}
#if 0
__global__
void m7_sha512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
int hashPosition = nounce - startNounce;
uint32_t *inpHash = (uint32_t*)&g_hash[8 * hashPosition];
union {
uint8_t h1[64];
uint32_t h4[16];
uint64_t h8[8];
} hash;
#pragma unroll
for (int i=0;i<16;i++) {
hash.h4[i]= inpHash[i];
}
uint64_t W[80];
uint64_t r[8];
#pragma unroll 71
for (int i=9;i<80;i++) {
W[i]=0;
}
#pragma unroll
for (int i = 0; i < 8; i ++) {
W[i] = SWAP64(hash.h8[i]);
r[i] = H_512[i];
}
W[8] = 0x8000000000000000;
W[15]= 0x0000000000000200;
#pragma unroll 64
for (int i = 16; i < 80; i ++)
W[i] = SPH_T64(SSG5_1(W[i - 2]) + W[i - 7]
+ SSG5_0(W[i - 15]) + W[i - 16]);
#pragma unroll 10
for (int i = 0; i < 80; i += 8) {
#pragma unroll 8
for (int ord=0;ord<8;ord++) {
SHA3_STEP2(K_512,ord,r,i+ord);
}
}
#pragma unroll 8
for (int i = 0; i < 8; i++) {
r[i] = SPH_T64(r[i] + H_512[i]);
}
#pragma unroll 8
for(int i=0;i<8;i++) {
hash.h8[i] = SWAP64(r[i]);
}
#pragma unroll 16
for (int u = 0; u < 16; u ++) {
inpHash[u] = hash.h4[u];
}
}
}
__host__
void m7_sha512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
{
const int threadsperblock = 256;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size =0;
m7_sha512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
MyStreamSynchronize(NULL, order, thr_id);
}
#endif
__host__
void m7_sha512_cpu_init(int thr_id, int threads)
{
cudaMemcpyToSymbol(K_512,K512,80*sizeof(uint64_t),0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(H_512,H512,sizeof(H512),0, cudaMemcpyHostToDevice);
}
__global__
void m7_sha512_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = startNounce + thread;
uint64_t W[80];
uint64_t r[8];
#pragma unroll 8
for (int i = 0; i < 8; i ++) {
r[i] = H_512[i];
}
#pragma unroll 14
for (int i = 0; i < 14; i ++) {
W[i] = cuda_swab64(c_PaddedMessage80[i]);
}
W[14] = cuda_swab64(REPLACE_HIWORD(c_PaddedMessage80[14],nounce));
W[15] = cuda_swab64(c_PaddedMessage80[15]);
#pragma unroll 64
for (int i = 16; i < 80; i ++)
W[i] = SPH_T64(SSG5_1(W[i - 2]) + W[i - 7] + SSG5_0(W[i - 15]) + W[i - 16]);
#if __CUDA_ARCH__ < 500 // go figure...
#pragma unroll 10
#endif
for (int i = 0; i < 10; i ++) {
#pragma unroll 8
for (int ord=0;ord<8;ord++) {
SHA3_STEP2(K_512,ord,r,8*i+ord);
}
}
#pragma unroll 8
for (int i = 0; i < 8; i++) {
r[i] = SPH_T64(r[i] + H_512[i]);
}
uint64_t tempr[8];
#pragma unroll 8
for (int i=0;i<8;i++) {
tempr[i]=r[i];
}
#pragma unroll 15
for (int i = 0; i < 15; i ++) {
W[i] = 0;
}
W[15]=0x3d0;
#pragma unroll 64
for (int i = 16; i < 80; i ++)
W[i] = SPH_T64(SSG5_1(W[i - 2]) + W[i - 7] + SSG5_0(W[i - 15]) + W[i - 16]);
#if __CUDA_ARCH__ < 500 // go figure...
#pragma unroll 10
#endif
for (int i = 0; i < 10; i ++) {
#pragma unroll 8
for (int ord=0;ord<8;ord++) {SHA3_STEP2(K_512,ord,r,8*i+ord);}
}
#pragma unroll 8
for(int i=0;i<8;i++) {
outputHash[i*threads+thread] = cuda_swab64(SPH_T64(r[i] + tempr[i]));
}
/////////////////////////////////////////////////////////////////////////////////////
} // thread
}
__host__
void m7_sha512_setBlock_120(void *pdata)
{
unsigned char PaddedMessage[128];
uint8_t ending =0x80;
memcpy(PaddedMessage, pdata, 122);
memset(PaddedMessage+122,ending,1);
memset(PaddedMessage+123, 0, 5); //useless
cudaMemcpyToSymbol(c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
}
__host__
void m7_sha512_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
{
const int threadsperblock = 256;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size = 0;
m7_sha512_gpu_hash_120<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
MyStreamSynchronize(NULL, order, thr_id);
}

2870
m7/cuda_m7_whirlpool.cu

File diff suppressed because it is too large Load Diff

276
m7/cuda_mul.cu

@ -0,0 +1,276 @@ @@ -0,0 +1,276 @@
/*
* tiger-192 djm34
*
*/
/*
* tiger-192 kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2014 djm34
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author phm <phm@inbox.com>
*/
//#include <stdio.h>
#include <memory.h>
#include "cuda_helper.h"
#define HIWORD _HIWORD
#define LOWORD _LOWORD
#if 0
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
#endif
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
__device__ __forceinline__
void bigmul(uint64_t *w, uint64_t* am, uint64_t* bm, int sizea, int sizeb, int thread)
{
int threads = 256*256*8*2;
#pragma unroll
for (int i=0;i<sizea+sizeb;i++) {w[i*threads+thread]=0;}
#pragma unroll
for (int i=0;i<sizeb;i++)
{
uint64_t c=0;
uint64_t u=0,v=0;
#pragma unroll
for (int j=0;j<sizea;j++) {
muladd128(u,v,am[j*threads+thread],bm[i*threads+thread],w[(i+j)*threads+thread],c);
w[(i+j)*threads+thread]=v;
c=u;
}
w[(i+sizea)*threads+thread]=u;
}
}
__global__
void m7_bigmul1_gpu(int threads, int sizea, int sizeb, uint64_t* am, uint64_t* bm, uint64_t *w)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
#pragma unroll
for (int i=0;i<sizea+sizeb;i++) {w[i*threads+thread]=0;}
#pragma unroll
for (int i=0;i<sizeb;i++) {
uint64_t c=0;
uint64_t u=0,v=0;
#pragma unroll
for (int j=0;j<sizea;j++) {
muladd128(u,v,am[j*threads+thread],bm[i*threads+thread],w[(i+j)*threads+thread],c);
w[(i+j)*threads+thread]=v;
c=u;
}
w[(i+sizea)*threads+thread]=u;
}
} // thread
}
__global__
void m7_bigmul_unroll1_gpu(int threads, uint64_t* am, uint64_t* bm, uint64_t *w)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
#pragma unroll 32
for (int i=0;i<32;i++) {
w[i*threads + thread]=0;
}
#if __CUDA_ARCH__ < 500
#pragma unroll 32
#endif
for (int i=0;i<32;i++)
{
uint64_t c=0;
uint64_t u=0,v=0;
#pragma unroll 3
for (int j=0;j<3;j++) {
muladd128(u,v,am[j*threads+thread],bm[i*threads+thread],w[(i+j)*threads+thread],c);
w[(i+j)*threads+thread]=v;
c=u;
}
w[(i+3)*threads+thread]=u;
}
} // threads
}
__global__
void m7_bigmul_unroll1_gpu_std(int threads, uint64_t* amg, uint64_t* bmg, uint64_t *wg)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint64_t * am = amg + 8*thread;
uint64_t * bm = bmg + 38*thread;
uint64_t * w = wg + 38*thread;
#pragma unroll 32
for (int i=0;i<32;i++) {
w[i]=0;
}
#if __CUDA_ARCH__ < 500
#pragma unroll 32
#endif
for (int i=0;i<32;i++)
{
uint64_t c=0;
uint64_t u=0,v=0;
#pragma unroll 3
for (int j=0;j<3;j++) {
muladd128(u,v,am[j],bm[i],w[(i+j)],c);
w[(i+j)]=v;
c=u;
}
w[(i+3)]=u;
}
} // threads
}
__global__
void m7_bigmul_unroll2_gpu(int threads, uint64_t* am, uint64_t* bm, uint64_t *w)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
#if __CUDA_ARCH__ < 500
#pragma unroll
#endif
for (int i=0;i<38;i++) {
w[i*threads+thread]=0;
}
#if __CUDA_ARCH__ < 500
#pragma unroll
#endif
for (int i=0;i<35;i++)
{
uint64_t c=0;
uint64_t u=0,v=0;
#if __CUDA_ARCH__ < 500
#pragma unroll
#endif
for (int j=0;j<3;j++) {
muladd128(u,v,am[j*threads+thread],bm[i*threads+thread],w[(i+j)*threads+thread],c);
w[(i+j)*threads+thread]=v;
c=u;
}
w[(i+3)*threads+thread]=u;
}
} // thread
}
__global__
void m7_bigmul_unroll2_gpu_std(int threads, uint64_t* amg, uint64_t* bmg, uint64_t *wg)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint64_t * am = amg + 8*thread;
uint64_t * bm = bmg + 38*thread;
uint64_t * w = wg + 38*thread;
#if __CUDA_ARCH__ < 500
#pragma unroll
#endif
for (int i=0;i<38;i++) {
w[i]=0;
}
#if __CUDA_ARCH__ < 500
#pragma unroll
#endif
for (int i=0;i<35;i++)
{
uint64_t c=0;
uint64_t u=0,v=0;
#if __CUDA_ARCH__ < 500
#pragma unroll
#endif
for (int j=0;j<3;j++) {
muladd128(u,v,am[j],bm[i],w[(i+j)],c);
w[(i+j)]=v;
c=u;
}
w[(i+3)]=u;
}
} // thread
}
__host__ void m7_bigmul1_cpu(int thr_id, int threads,int len1,int len2,uint64_t* Hash1, uint64_t* Hash2,uint64_t *finalHash,int order)
{
const int threadsperblock = 256;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size =0;
m7_bigmul1_gpu<<<grid, block, shared_size>>>(threads,len1,len2,Hash1,Hash2,finalHash);
// MyStreamSynchronize(NULL, order, thr_id);
// gpuErrchk(cudaDeviceSynchronize());
// gpuErrchk(cudaThreadSynchronize());
}
__host__ void m7_bigmul_unroll1_cpu(int thr_id, int threads,uint64_t* Hash1, uint64_t* Hash2,uint64_t *finalHash,int order)
{
const int threadsperblock = 256;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size =0;
m7_bigmul_unroll1_gpu<<<grid, block, shared_size>>>(threads,Hash1,Hash2,finalHash);
}
__host__ void m7_bigmul_unroll2_cpu(int thr_id, int threads,uint64_t* Hash1, uint64_t* Hash2,uint64_t *finalHash,int order)
{
const int threadsperblock = 256;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size =0;
m7_bigmul_unroll2_gpu<<<grid, block, shared_size>>>(threads,Hash1,Hash2,finalHash);
}
__host__ void m7_bigmul_init(int thr_id, int threads)
{
// why I am here ?
}

468
m7/cuda_mul2.cu

@ -0,0 +1,468 @@ @@ -0,0 +1,468 @@
/*
* sha256 djm34, catia
*
*/
/*
* sha-256 kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2014 djm34
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author phm <phm@inbox.com>
*/
#undef _GLIBCXX_ATOMIC_BUILTINS
#undef _GLIBCXX_USE_INT128
#include <stdio.h>
#include <memory.h>
//#include "uint256.h"
#include "cuda_helper.h"
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
//#include "cuPrintf.cu"
typedef struct t4_t{
uint64_t high,low;
} t4_t;
__device__ __forceinline__
ulonglong2 umul64wide (unsigned long long int a,
unsigned long long int b)
{
ulonglong2 res;
asm ("{\n\t"
".reg .u32 r0, r1, r2, r3, alo, ahi, blo, bhi;\n\t"
"mov.b64 {alo,ahi}, %2; \n\t"
"mov.b64 {blo,bhi}, %3; \n\t"
"mul.lo.u32 r0, alo, blo; \n\t"
"mul.hi.u32 r1, alo, blo; \n\t"
"mad.lo.cc.u32 r1, alo, bhi, r1;\n\t"
"madc.hi.u32 r2, alo, bhi, 0;\n\t"
"mad.lo.cc.u32 r1, ahi, blo, r1;\n\t"
"madc.hi.cc.u32 r2, ahi, blo, r2;\n\t"
"madc.hi.u32 r3, ahi, bhi, 0;\n\t"
"mad.lo.cc.u32 r2, ahi, bhi, r2;\n\t"
"addc.u32 r3, r3, 0; \n\t"
"mov.b64 %0, {r0,r1}; \n\t"
"mov.b64 %1, {r2,r3}; \n\t"
"}"
: "=l"(res.x), "=l"(res.y)
: "l"(a), "l"(b));
return res;
}
#define umul_ppmm(h,l,m,n) \
{ \
ulonglong2 foom = umul64wide(m,n); \
h = foom.y; \
l = foom.x; \
}
__device__ __forceinline__ void umul_ppmmT4(t4_t *h, t4_t *l, t4_t m, t4_t n)
{
asm ("{\n\t"
".reg .u32 o0, o1, o2, o3, o4; \n\t"
".reg .u32 o5, o6, o7, i8, i9; \n\t"
".reg .u32 i10, i11, i12, i13; \n\t"
".reg .u32 i14, i15, i16, i17; \n\t"
".reg .u32 i18, i19, i20, i21; \n\t"
".reg .u32 i22, i23; \n\t"
"mov.b64 { i8, i9}, %4; \n\t"
"mov.b64 {i10,i11}, %5; \n\t"
"mov.b64 {i12,i13}, %6; \n\t"
"mov.b64 {i14,i15}, %7; \n\t"
"mov.b64 {i16,i17}, %8; \n\t"
"mov.b64 {i18,i19}, %9; \n\t"
"mov.b64 {i20,i21},%10; \n\t"
"mov.b64 {i22,i23},%11; \n\t"
"mul.lo.u32 o0, i8, i16; \n\t"
"mul.hi.u32 o1, i8, i16; \n\t"
"mad.lo.cc.u32 o1, i8, i17, o1;\n\t"
"madc.hi.u32 o2, i8, i17, 0;\n\t"
"mad.lo.cc.u32 o1, i9, i16, o1;\n\t"
"madc.hi.cc.u32 o2, i9, i16, o2;\n\t"
"madc.hi.u32 o3, i8, i18, 0;\n\t"
"mad.lo.cc.u32 o2, i8, i18, o2;\n\t"
"madc.hi.cc.u32 o3, i9, i17, o3;\n\t"
"madc.hi.u32 o4, i8, i19, 0;\n\t"
"mad.lo.cc.u32 o2, i9, i17, o2;\n\t"
"madc.hi.cc.u32 o3, i10, i16, o3;\n\t"
"madc.hi.cc.u32 o4, i9, i18, o4;\n\t"
"addc.u32 o5, 0, 0;\n\t"
"mad.lo.cc.u32 o2, i10, i16, o2;\n\t"
"madc.lo.cc.u32 o3, i8, i19, o3;\n\t"
"madc.hi.cc.u32 o4, i10, i17, o4;\n\t"
"madc.hi.cc.u32 o5, i9, i19, o5;\n\t"
"addc.u32 o6, 0, 0;\n\t"
"mad.lo.cc.u32 o3, i9, i18, o3;\n\t"
"madc.hi.cc.u32 o4, i11, i16, o4;\n\t"
"madc.hi.cc.u32 o5, i10, i18, o5;\n\t"
"addc.u32 o6, 0, o6;\n\t"
"mad.lo.cc.u32 o3, i10, i17, o3;\n\t"
"addc.u32 o4, 0, o4;\n\t"
"mad.hi.cc.u32 o5, i11, i17, o5;\n\t"
"madc.hi.cc.u32 o6, i10, i19, o6;\n\t"
"addc.u32 o7, 0, 0;\n\t"
"mad.lo.cc.u32 o3, i11, i16, o3;\n\t"
"madc.lo.cc.u32 o4, i9, i19, o4;\n\t"
"addc.u32 o5, 0, o5;\n\t"
"mad.hi.cc.u32 o6, i11, i18, o6;\n\t"
"addc.u32 o7, 0, o7;\n\t"
"mad.lo.cc.u32 o4, i10, i18, o4;\n\t"
"addc.u32 o5, 0, o5;\n\t"
"mad.hi.u32 o7, i11, i19, o7;\n\t"
"mad.lo.cc.u32 o4, i11, i17, o4;\n\t"
"addc.u32 o5, 0, o5;\n\t"
"mad.lo.cc.u32 o5, i10, i19, o5;\n\t"
"addc.u32 o6, 0, o6;\n\t"
"mad.lo.cc.u32 o5, i11, i18, o5;\n\t"
"addc.u32 o6, 0, o6;\n\t"
"mad.lo.cc.u32 o6, i11, i19, o6;\n\t"
"addc.u32 o7, 0, o7;\n\t"
"mov.b64 %0, {o0,o1}; \n\t"
"mov.b64 %1, {o2,o3}; \n\t"
"mov.b64 %2, {o4,o5}; \n\t"
"mov.b64 %3, {o6,o7}; \n\t"
"}"
: "=l"(l->low), "=l"(l->high), "=l"(h->low), "=l"(h->high)
: "l"(m.low), "l"(m.high), "l"(0ULL), "l"(0ULL),
"l"(n.low), "l"(n.high), "l"(0ULL), "l"(0ULL));
}
#if 0
__device__ __forceinline__ void umul_ppmmT4(t4_t *h, t4_t *l, t4_t m, t4_t n){
uint64_t th,tl;
uint32_t c,c2;
umul_ppmm(l->high,l->low,m.low,n.low);
umul_ppmm(th,tl,m.high,n.low);
l->high += tl;
c = (l->high < tl);
h->low = th + c;
c = (h->low < c);
h->high = c;
//Second word
umul_ppmm(th,tl,m.low,n.high);
l->high += tl;
c = l->high < tl;
h->low += th;
c2 = h->low < th;
h->low += c;
c2 += h->low < c;
h->high += c2;
umul_ppmm(th,tl,m.high,n.high);
h->low += tl;
c = h->low < tl;
h->high += th + c;
}
#endif
__device__ __forceinline__ t4_t T4(uint32_t thread, uint32_t threads, uint32_t idx, uint64_t *g){
t4_t ret;
ret.high = g[(idx*2 + 1)*threads + thread];
ret.low = g[(idx*2)*threads + thread];
if(thread==0){
// cuPrintf("Load Idx: %d %8.8X %8.8X %8.8X %8.8X\n", idx, ret.high>>32, ret.high, ret.low>>32, ret.low);
}
return ret;
}
__device__ __forceinline__ void T4_store(uint32_t thread, uint32_t threads, uint32_t idx, uint64_t *g, t4_t val){
g[(idx*2 + 1)*threads + thread]=val.high;
g[(idx*2)*threads + thread]=val.low;
if(thread==0){
// cuPrintf("Store Idx: %d %8.8X %8.8X %8.8X %8.8X\n", idx, val.high>>32, val.high, val.low>>32, val.low);
}
}
__device__ __forceinline__ void T4_set(t4_t *d, uint64_t v){
d->high = 0;
d->low = v;
}
__device__ __forceinline__ t4_t T4_add(t4_t a, t4_t b){
t4_t ret;
uint32_t c=0;
ret.low = a.low + b.low;
if(ret.low < a.low)
c=1;
ret.high = a.high + b.high + c;
return ret;
}
__device__ __forceinline__ t4_t T4_add(uint64_t a, t4_t b){
t4_t ret;
uint32_t c=0;
ret.low = a + b.low;
if(ret.low < a)
c=1;
ret.high = b.high + c;
return ret;
}
__device__ __forceinline__ uint32_t T4_lt(t4_t a, t4_t b){
if(a.high < b.high)
return 1;
if(a.high == b.high && a.low < b.low)
return 1;
return 0;
}
__device__ __forceinline__ uint32_t T4_gt(t4_t a, uint64_t b){
if(a.high)
return 1;
if(a.low > b)
return 1;
return 0;
}
__device__ void mulScalarT4(uint32_t thread, uint32_t threads, uint32_t len, uint64_t* g_p, uint64_t* g_v, t4_t sml, uint32_t *size){
t4_t ul, cl, hpl, lpl;
uint32_t i;
T4_set(&cl,0);
for(i=0; i < len; i++) {
ul = T4(thread,threads,i,g_v);
umul_ppmmT4 (&hpl, &lpl, ul, sml);
lpl = T4_add(lpl,cl);
cl = T4_add(T4_lt(lpl,cl),hpl);
T4_store(thread,threads,i,g_p,lpl);
}
T4_store(thread,threads,len,g_p,cl);
*size = len + T4_gt(cl,0);
}
__device__ void mulScalar(uint32_t thread, uint32_t threads, uint32_t len, uint64_t* g_p, uint64_t* g_v, uint64_t sml, uint32_t *size){
uint64_t ul, cl, hpl, lpl;
uint32_t i;
cl = 0;
for(i=0; i < len; i++) {
ul = g_v[i*threads + thread];
umul_ppmm (hpl, lpl, ul, sml);
lpl += cl;
cl = (lpl < cl) + hpl;
g_p[i*threads + thread] = lpl;
}
g_p[len*threads + thread] = cl;
*size = len + (cl != 0);
}
uint64_t __device__ addmul_1g (uint32_t thread, uint32_t threads, uint64_t *sum, uint32_t sofst, uint64_t *x, uint64_t xsz, uint64_t a){
uint64_t carry=0;
uint32_t i;
uint64_t ul,lpl,hpl,rl;
for(i=0; i < xsz; i++){
ul = x[i*threads + thread];
umul_ppmm (hpl, lpl, ul, a);
lpl += carry;
carry = (lpl < carry) + hpl;
rl = sum[(i+sofst) * threads + thread];
lpl = rl + lpl;
carry += lpl < rl;
sum[(i+sofst)*threads + thread] = lpl;
}
return carry;
}
t4_t __device__ addmul_1gT4 (uint32_t thread, uint32_t threads, uint64_t *sum, uint32_t sofst, uint64_t *x, uint64_t xsz, t4_t a){
t4_t carry;
uint32_t i;
t4_t ul,lpl,hpl,rl;
T4_set(&carry,0);
for(i=0; i < xsz; i++){
ul = T4(thread,threads,i,x);
umul_ppmmT4 (&hpl, &lpl, ul, a);
lpl = T4_add(lpl,carry);
carry = T4_add(T4_lt(lpl,carry), hpl);
rl = T4(thread,threads,i+sofst,sum);
lpl = T4_add(rl,lpl);
carry = T4_add(T4_lt(lpl,rl),carry);
T4_store(thread,threads,i+sofst,sum,lpl);
}
return carry;
}
__global__ void gpu_mul(int threads, uint32_t ulegs, uint32_t vlegs, uint64_t *g_u, uint64_t *g_v, uint64_t *g_p)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
if(ulegs < vlegs){
uint64_t t1=ulegs;
ulegs = vlegs;
vlegs = t1;
uint64_t *t2 = g_u;
g_u = g_v;
g_v = t2;
}
uint32_t vofst=1,rofst=1,psize=0;
mulScalar(thread,threads,ulegs,g_p,g_u,g_v[thread],&psize);
#if 1
while (vofst < vlegs) {
//clear high word //TODO: right
// printf("Size: %d\n", rp->size[tid]);
g_p[(psize+0)*threads+thread] = 0;
g_p[(ulegs+rofst)*threads + thread] = addmul_1g (thread, threads, g_p ,rofst , g_u, ulegs, g_v[vofst*threads+thread]);
vofst++; rofst++;
psize++;
}
// if(D_REF(rp->d,up->size[tid] + vp->size[tid] - 1,tid) != (uint64_t)0)
// rp->size[tid]++;
#endif
}
}
__global__ void gpu_mulT4(int threads, uint32_t ulegs, uint32_t vlegs, uint64_t *g_u, uint64_t *g_v, uint64_t *g_p)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
if(ulegs < vlegs){ ///everything written the other way around... are you kidding me ?!
uint64_t t1=ulegs;
ulegs = vlegs;
vlegs = t1;
uint64_t *t2 = g_u;
g_u = g_v;
g_v = t2;
}
ulegs >>= 1; vlegs >>= 1;
if(thread == 0){
// cuPrintf("U: %d V: %d\n", ulegs, vlegs);
}
uint32_t vofst=1,rofst=1,psize=0;
mulScalarT4(thread,threads,ulegs,g_p,g_u,T4(thread,threads,0,g_v),&psize);
#if 1
t4_t zero;
T4_set(&zero,0);
// while (vofst < vlegs) {
#pragma unroll
for (vofst=1;vofst<vlegs;vofst++) {
T4_store(thread,threads,psize,g_p,zero);
T4_store(thread,threads,ulegs+rofst,g_p,addmul_1gT4 (thread, threads, g_p ,rofst , g_u, ulegs,T4(thread,threads,vofst,g_v)));
// vofst++;
rofst++;
psize++;
}
#endif
}
}
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
__host__ void cpu_mul(int thr_id, int threads, uint32_t alegs, uint32_t blegs, uint64_t *g_a, uint64_t *g_b, uint64_t *g_p,int order)
{
const int threadsperblock = 512; // Alignment mit mixtab Gr\F6sse. NICHT \C4NDERN
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size =0;
gpu_mul<<<grid, block, shared_size>>>(threads, alegs, blegs, g_a, g_b, g_p) ;
}
__host__ void cpu_mulT4(int thr_id, int threads, uint32_t alegs, uint32_t blegs, uint64_t *g_a, uint64_t *g_b, uint64_t *g_p, int order)
{
const int threadsperblock = 256; // better occupancy (for both 780 and 750 ti's)
// berechne wie viele Thread Blocks wir brauchen
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size =0;
//gpu_mulT4<<<grid, block, shared_size>>>(threads, alegs, blegs, g_a, g_b, g_p) ;
gpu_mulT4<<<grid, block, shared_size>>>(threads, blegs, alegs, g_b, g_a, g_p) ;
}
__host__ void mul_init(){
}

401
m7/cuda_ripemd160.cu

@ -0,0 +1,401 @@ @@ -0,0 +1,401 @@
/*
* ripemd-160 djm34
*
*/
/*
* ripemd-160 kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2014 djm34
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author phm <phm@inbox.com>
*/
#include <stdio.h>
#include <memory.h>
#include "cuda_helper.h"
//#define SPH_C64(x) ((uint64_t)(x ## ULL))
//#define SPH_C32(x) ((uint32_t)(x ## U))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
#define ROTL SPH_ROTL32
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
__constant__ uint32_t c_PaddedMessage80[32]; // padded message (80 bytes + padding)
static __constant__ uint32_t gpu_IV[5];
static __constant__ uint32_t bufo[5];
static const uint32_t IV[5] = {
SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), SPH_C32(0x98BADCFE),
SPH_C32(0x10325476), SPH_C32(0xC3D2E1F0)
};
/*
* Round functions for RIPEMD-128 and RIPEMD-160.
*/
#define F1(x, y, z) ((x) ^ (y) ^ (z))
#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z))
#define F3(x, y, z) (((x) | ~(y)) ^ (z))
#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y))
#define F5(x, y, z) ((x) ^ ((y) | ~(z)))
/*
* Round constants for RIPEMD-160.
*/
#define K11 SPH_C32(0x00000000)
#define K12 SPH_C32(0x5A827999)
#define K13 SPH_C32(0x6ED9EBA1)
#define K14 SPH_C32(0x8F1BBCDC)
#define K15 SPH_C32(0xA953FD4E)
#define K21 SPH_C32(0x50A28BE6)
#define K22 SPH_C32(0x5C4DD124)
#define K23 SPH_C32(0x6D703EF3)
#define K24 SPH_C32(0x7A6D76E9)
#define K25 SPH_C32(0x00000000)
#define RR(a, b, c, d, e, f, s, r, k) { \
a = SPH_T32(ROTL(SPH_T32(a + f(b, c, d) + r + k), s) + e); \
c = ROTL(c, 10); \
}
#define ROUND1(a, b, c, d, e, f, s, r, k) \
RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k)
#define ROUND2(a, b, c, d, e, f, s, r, k) \
RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
#define RIPEMD160_ROUND_BODY(in, h) { \
uint32_t A1, B1, C1, D1, E1; \
uint32_t A2, B2, C2, D2, E2; \
uint32_t tmp; \
\
A1 = A2 = (h)[0]; \
B1 = B2 = (h)[1]; \
C1 = C2 = (h)[2]; \
D1 = D2 = (h)[3]; \
E1 = E2 = (h)[4]; \
\
ROUND1(A, B, C, D, E, F1, 11, in[ 0], 1); \
ROUND1(E, A, B, C, D, F1, 14, in[ 1], 1); \
ROUND1(D, E, A, B, C, F1, 15, in[ 2], 1); \
ROUND1(C, D, E, A, B, F1, 12, in[ 3], 1); \
ROUND1(B, C, D, E, A, F1, 5, in[ 4], 1); \
ROUND1(A, B, C, D, E, F1, 8, in[ 5], 1); \
ROUND1(E, A, B, C, D, F1, 7, in[ 6], 1); \
ROUND1(D, E, A, B, C, F1, 9, in[ 7], 1); \
ROUND1(C, D, E, A, B, F1, 11, in[ 8], 1); \
ROUND1(B, C, D, E, A, F1, 13, in[ 9], 1); \
ROUND1(A, B, C, D, E, F1, 14, in[10], 1); \
ROUND1(E, A, B, C, D, F1, 15, in[11], 1); \
ROUND1(D, E, A, B, C, F1, 6, in[12], 1); \
ROUND1(C, D, E, A, B, F1, 7, in[13], 1); \
ROUND1(B, C, D, E, A, F1, 9, in[14], 1); \
ROUND1(A, B, C, D, E, F1, 8, in[15], 1); \
\
ROUND1(E, A, B, C, D, F2, 7, in[ 7], 2); \
ROUND1(D, E, A, B, C, F2, 6, in[ 4], 2); \
ROUND1(C, D, E, A, B, F2, 8, in[13], 2); \
ROUND1(B, C, D, E, A, F2, 13, in[ 1], 2); \
ROUND1(A, B, C, D, E, F2, 11, in[10], 2); \
ROUND1(E, A, B, C, D, F2, 9, in[ 6], 2); \
ROUND1(D, E, A, B, C, F2, 7, in[15], 2); \
ROUND1(C, D, E, A, B, F2, 15, in[ 3], 2); \
ROUND1(B, C, D, E, A, F2, 7, in[12], 2); \
ROUND1(A, B, C, D, E, F2, 12, in[ 0], 2); \
ROUND1(E, A, B, C, D, F2, 15, in[ 9], 2); \
ROUND1(D, E, A, B, C, F2, 9, in[ 5], 2); \
ROUND1(C, D, E, A, B, F2, 11, in[ 2], 2); \
ROUND1(B, C, D, E, A, F2, 7, in[14], 2); \
ROUND1(A, B, C, D, E, F2, 13, in[11], 2); \
ROUND1(E, A, B, C, D, F2, 12, in[ 8], 2); \
\
ROUND1(D, E, A, B, C, F3, 11, in[ 3], 3); \
ROUND1(C, D, E, A, B, F3, 13, in[10], 3); \
ROUND1(B, C, D, E, A, F3, 6, in[14], 3); \
ROUND1(A, B, C, D, E, F3, 7, in[ 4], 3); \
ROUND1(E, A, B, C, D, F3, 14, in[ 9], 3); \
ROUND1(D, E, A, B, C, F3, 9, in[15], 3); \
ROUND1(C, D, E, A, B, F3, 13, in[ 8], 3); \
ROUND1(B, C, D, E, A, F3, 15, in[ 1], 3); \
ROUND1(A, B, C, D, E, F3, 14, in[ 2], 3); \
ROUND1(E, A, B, C, D, F3, 8, in[ 7], 3); \
ROUND1(D, E, A, B, C, F3, 13, in[ 0], 3); \
ROUND1(C, D, E, A, B, F3, 6, in[ 6], 3); \
ROUND1(B, C, D, E, A, F3, 5, in[13], 3); \
ROUND1(A, B, C, D, E, F3, 12, in[11], 3); \
ROUND1(E, A, B, C, D, F3, 7, in[ 5], 3); \
ROUND1(D, E, A, B, C, F3, 5, in[12], 3); \
\
ROUND1(C, D, E, A, B, F4, 11, in[ 1], 4); \
ROUND1(B, C, D, E, A, F4, 12, in[ 9], 4); \
ROUND1(A, B, C, D, E, F4, 14, in[11], 4); \
ROUND1(E, A, B, C, D, F4, 15, in[10], 4); \
ROUND1(D, E, A, B, C, F4, 14, in[ 0], 4); \
ROUND1(C, D, E, A, B, F4, 15, in[ 8], 4); \
ROUND1(B, C, D, E, A, F4, 9, in[12], 4); \
ROUND1(A, B, C, D, E, F4, 8, in[ 4], 4); \
ROUND1(E, A, B, C, D, F4, 9, in[13], 4); \
ROUND1(D, E, A, B, C, F4, 14, in[ 3], 4); \
ROUND1(C, D, E, A, B, F4, 5, in[ 7], 4); \
ROUND1(B, C, D, E, A, F4, 6, in[15], 4); \
ROUND1(A, B, C, D, E, F4, 8, in[14], 4); \
ROUND1(E, A, B, C, D, F4, 6, in[ 5], 4); \
ROUND1(D, E, A, B, C, F4, 5, in[ 6], 4); \
ROUND1(C, D, E, A, B, F4, 12, in[ 2], 4); \
\
ROUND1(B, C, D, E, A, F5, 9, in[ 4], 5); \
ROUND1(A, B, C, D, E, F5, 15, in[ 0], 5); \
ROUND1(E, A, B, C, D, F5, 5, in[ 5], 5); \
ROUND1(D, E, A, B, C, F5, 11, in[ 9], 5); \
ROUND1(C, D, E, A, B, F5, 6, in[ 7], 5); \
ROUND1(B, C, D, E, A, F5, 8, in[12], 5); \
ROUND1(A, B, C, D, E, F5, 13, in[ 2], 5); \
ROUND1(E, A, B, C, D, F5, 12, in[10], 5); \
ROUND1(D, E, A, B, C, F5, 5, in[14], 5); \
ROUND1(C, D, E, A, B, F5, 12, in[ 1], 5); \
ROUND1(B, C, D, E, A, F5, 13, in[ 3], 5); \
ROUND1(A, B, C, D, E, F5, 14, in[ 8], 5); \
ROUND1(E, A, B, C, D, F5, 11, in[11], 5); \
ROUND1(D, E, A, B, C, F5, 8, in[ 6], 5); \
ROUND1(C, D, E, A, B, F5, 5, in[15], 5); \
ROUND1(B, C, D, E, A, F5, 6, in[13], 5); \
\
ROUND2(A, B, C, D, E, F5, 8, in[ 5], 1); \
ROUND2(E, A, B, C, D, F5, 9, in[14], 1); \
ROUND2(D, E, A, B, C, F5, 9, in[ 7], 1); \
ROUND2(C, D, E, A, B, F5, 11, in[ 0], 1); \
ROUND2(B, C, D, E, A, F5, 13, in[ 9], 1); \
ROUND2(A, B, C, D, E, F5, 15, in[ 2], 1); \
ROUND2(E, A, B, C, D, F5, 15, in[11], 1); \
ROUND2(D, E, A, B, C, F5, 5, in[ 4], 1); \
ROUND2(C, D, E, A, B, F5, 7, in[13], 1); \
ROUND2(B, C, D, E, A, F5, 7, in[ 6], 1); \
ROUND2(A, B, C, D, E, F5, 8, in[15], 1); \
ROUND2(E, A, B, C, D, F5, 11, in[ 8], 1); \
ROUND2(D, E, A, B, C, F5, 14, in[ 1], 1); \
ROUND2(C, D, E, A, B, F5, 14, in[10], 1); \
ROUND2(B, C, D, E, A, F5, 12, in[ 3], 1); \
ROUND2(A, B, C, D, E, F5, 6, in[12], 1); \
\
ROUND2(E, A, B, C, D, F4, 9, in[ 6], 2); \
ROUND2(D, E, A, B, C, F4, 13, in[11], 2); \
ROUND2(C, D, E, A, B, F4, 15, in[ 3], 2); \
ROUND2(B, C, D, E, A, F4, 7, in[ 7], 2); \
ROUND2(A, B, C, D, E, F4, 12, in[ 0], 2); \
ROUND2(E, A, B, C, D, F4, 8, in[13], 2); \
ROUND2(D, E, A, B, C, F4, 9, in[ 5], 2); \
ROUND2(C, D, E, A, B, F4, 11, in[10], 2); \
ROUND2(B, C, D, E, A, F4, 7, in[14], 2); \
ROUND2(A, B, C, D, E, F4, 7, in[15], 2); \
ROUND2(E, A, B, C, D, F4, 12, in[ 8], 2); \
ROUND2(D, E, A, B, C, F4, 7, in[12], 2); \
ROUND2(C, D, E, A, B, F4, 6, in[ 4], 2); \
ROUND2(B, C, D, E, A, F4, 15, in[ 9], 2); \
ROUND2(A, B, C, D, E, F4, 13, in[ 1], 2); \
ROUND2(E, A, B, C, D, F4, 11, in[ 2], 2); \
\
ROUND2(D, E, A, B, C, F3, 9, in[15], 3); \
ROUND2(C, D, E, A, B, F3, 7, in[ 5], 3); \
ROUND2(B, C, D, E, A, F3, 15, in[ 1], 3); \
ROUND2(A, B, C, D, E, F3, 11, in[ 3], 3); \
ROUND2(E, A, B, C, D, F3, 8, in[ 7], 3); \
ROUND2(D, E, A, B, C, F3, 6, in[14], 3); \
ROUND2(C, D, E, A, B, F3, 6, in[ 6], 3); \
ROUND2(B, C, D, E, A, F3, 14, in[ 9], 3); \
ROUND2(A, B, C, D, E, F3, 12, in[11], 3); \
ROUND2(E, A, B, C, D, F3, 13, in[ 8], 3); \
ROUND2(D, E, A, B, C, F3, 5, in[12], 3); \
ROUND2(C, D, E, A, B, F3, 14, in[ 2], 3); \
ROUND2(B, C, D, E, A, F3, 13, in[10], 3); \
ROUND2(A, B, C, D, E, F3, 13, in[ 0], 3); \
ROUND2(E, A, B, C, D, F3, 7, in[ 4], 3); \
ROUND2(D, E, A, B, C, F3, 5, in[13], 3); \
\
ROUND2(C, D, E, A, B, F2, 15, in[ 8], 4); \
ROUND2(B, C, D, E, A, F2, 5, in[ 6], 4); \
ROUND2(A, B, C, D, E, F2, 8, in[ 4], 4); \
ROUND2(E, A, B, C, D, F2, 11, in[ 1], 4); \
ROUND2(D, E, A, B, C, F2, 14, in[ 3], 4); \
ROUND2(C, D, E, A, B, F2, 14, in[11], 4); \
ROUND2(B, C, D, E, A, F2, 6, in[15], 4); \
ROUND2(A, B, C, D, E, F2, 14, in[ 0], 4); \
ROUND2(E, A, B, C, D, F2, 6, in[ 5], 4); \
ROUND2(D, E, A, B, C, F2, 9, in[12], 4); \
ROUND2(C, D, E, A, B, F2, 12, in[ 2], 4); \
ROUND2(B, C, D, E, A, F2, 9, in[13], 4); \
ROUND2(A, B, C, D, E, F2, 12, in[ 9], 4); \
ROUND2(E, A, B, C, D, F2, 5, in[ 7], 4); \
ROUND2(D, E, A, B, C, F2, 15, in[10], 4); \
ROUND2(C, D, E, A, B, F2, 8, in[14], 4); \
\
ROUND2(B, C, D, E, A, F1, 8, in[12], 5); \
ROUND2(A, B, C, D, E, F1, 5, in[15], 5); \
ROUND2(E, A, B, C, D, F1, 12, in[10], 5); \
ROUND2(D, E, A, B, C, F1, 9, in[ 4], 5); \
ROUND2(C, D, E, A, B, F1, 12, in[ 1], 5); \
ROUND2(B, C, D, E, A, F1, 5, in[ 5], 5); \
ROUND2(A, B, C, D, E, F1, 14, in[ 8], 5); \
ROUND2(E, A, B, C, D, F1, 6, in[ 7], 5); \
ROUND2(D, E, A, B, C, F1, 8, in[ 6], 5); \
ROUND2(C, D, E, A, B, F1, 13, in[ 2], 5); \
ROUND2(B, C, D, E, A, F1, 6, in[13], 5); \
ROUND2(A, B, C, D, E, F1, 5, in[14], 5); \
ROUND2(E, A, B, C, D, F1, 15, in[ 0], 5); \
ROUND2(D, E, A, B, C, F1, 13, in[ 3], 5); \
ROUND2(C, D, E, A, B, F1, 11, in[ 9], 5); \
ROUND2(B, C, D, E, A, F1, 11, in[11], 5); \
\
tmp = SPH_T32((h)[1] + C1 + D2); \
(h)[1] = SPH_T32((h)[2] + D1 + E2); \
(h)[2] = SPH_T32((h)[3] + E1 + A2); \
(h)[3] = SPH_T32((h)[4] + A1 + B2); \
(h)[4] = SPH_T32((h)[0] + B1 + C2); \
(h)[0] = tmp; \
}
__global__ void m7_ripemd160_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = startNounce + thread ;
union {
uint8_t h1[64];
uint32_t h4[16];
uint64_t h8[8];
} hash;
#undef F1
#undef F2
#undef F3
#undef F4
#undef F5
#define F1(x, y, z) xor3(x,y,z)
#define F2(x, y, z) xandx(x,y,z)
#define F3(x, y, z) xornot64(x,y,z)
#define F4(x, y, z) xandx(z,x,y)
#define F5(x, y, z) xornt64(x,y,z)
uint32_t in2[16],in3[16];
uint32_t in[16],buf[5];
// #pragma unroll 16
// for (int i=0;i<16;i++) {in[i]= c_PaddedMessage80[i];}
#pragma unroll 16
for (int i=0;i<16;i++) {if ((i+16)<29) {in2[i]= c_PaddedMessage80[i+16];}
else if ((i+16)==29) {in2[i]= nounce;}
else if ((i+16)==30) {in2[i]= c_PaddedMessage80[i+16];}
else {in2[i]= 0;}}
#pragma unroll 16
for (int i=0;i<16;i++) {in3[i]=0;}
in3[14]=0x3d0;
// #pragma unroll 5
// for (int i=0;i<5;i++) {buf[i]=gpu_IV[i];}
#pragma unroll 5
for (int i=0;i<5;i++) {buf[i]=bufo[i];}
// RIPEMD160_ROUND_BODY(in, buf); //no need to calculate it several time (need to moved)
RIPEMD160_ROUND_BODY(in2, buf);
RIPEMD160_ROUND_BODY(in3, buf);
hash.h4[5]=0;
#pragma unroll 5
for (int i=0;i<5;i++)
{hash.h4[i]=buf[i];
}
//uint64_t *outHash = (uint64_t *)outputHash + 8 * thread;
//#pragma unroll 3
//for (int i=0;i<3;i++) {outHash[i]=hash.h8[i];}
#pragma unroll 3
for (int i=0;i<3;i++) {outputHash[i*threads+thread]=hash.h8[i];}
//#pragma unroll 8
//for (int i=0;i<8;i++) { if (i<3) {outputHash[i*threads+thread]=hash.h8[i];} else {outputHash[i*threads+thread]=0;}}
}
}
void ripemd160_cpu_init(int thr_id, int threads)
{
cudaMemcpyToSymbol(gpu_IV,IV,sizeof(IV),0, cudaMemcpyHostToDevice);
}
__host__ void ripemd160_setBlock_120(void *pdata)
{
unsigned char PaddedMessage[128];
uint8_t ending =0x80;
memcpy(PaddedMessage, pdata, 122);
memset(PaddedMessage+122,ending,1);
memset(PaddedMessage+123, 0, 5); //useless
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 32*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
#undef F1
#undef F2
#undef F3
#undef F4
#undef F5
#define F1(x, y, z) ((x) ^ (y) ^ (z))
#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z))
#define F3(x, y, z) (((x) | ~(y)) ^ (z))
#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y))
#define F5(x, y, z) ((x) ^ ((y) | ~(z)))
uint32_t* alt_data =(uint32_t*)pdata;
uint32_t in[16],buf[5];
for (int i=0;i<16;i++) {in[i]= alt_data[i];}
for (int i=0;i<5;i++) {buf[i]=IV[i];}
RIPEMD160_ROUND_BODY(in, buf); //no need to calculate it several time (need to moved)
cudaMemcpyToSymbol(bufo, buf, 5*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
}
__host__ void m7_ripemd160_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
{
const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
//dim3 grid(1);
//dim3 block(1);
size_t shared_size =0;
m7_ripemd160_gpu_hash_120<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
MyStreamSynchronize(NULL, order, thr_id);
}

795
m7/cuda_tiger192.cu

@ -0,0 +1,795 @@ @@ -0,0 +1,795 @@
/*
* tiger-192 djm34
*
*/
/*
* tiger-192 kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2014 djm34
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author phm <phm@inbox.com>
*/
#include <stdio.h>
#include <memory.h>
#include "cuda_helper.h"
//#define SPH_C64(x) ((uint64_t)(x ## ULL))
//#define SPH_C32(x) ((uint32_t)(x ## U))
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define ROTL SPH_ROTL32
//#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
#define SPH_T64(x) (x)
// from heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
__constant__ uint64_t bufo[3];
static __constant__ uint64_t gpu_III[3];
static __constant__ uint64_t T1[256];
static __constant__ uint64_t T2[256];
static __constant__ uint64_t T3[256];
static __constant__ uint64_t T4[256];
static const uint64_t III[3] = {
SPH_C64(0x0123456789ABCDEF),SPH_C64(0xFEDCBA9876543210),SPH_C64(0xF096A5B4C3B2E187)
};
static const uint64_t cpu_T1[256] = {
SPH_C64(0x02AAB17CF7E90C5E), SPH_C64(0xAC424B03E243A8EC),
SPH_C64(0x72CD5BE30DD5FCD3), SPH_C64(0x6D019B93F6F97F3A),
SPH_C64(0xCD9978FFD21F9193), SPH_C64(0x7573A1C9708029E2),
SPH_C64(0xB164326B922A83C3), SPH_C64(0x46883EEE04915870),
SPH_C64(0xEAACE3057103ECE6), SPH_C64(0xC54169B808A3535C),
SPH_C64(0x4CE754918DDEC47C), SPH_C64(0x0AA2F4DFDC0DF40C),
SPH_C64(0x10B76F18A74DBEFA), SPH_C64(0xC6CCB6235AD1AB6A),
SPH_C64(0x13726121572FE2FF), SPH_C64(0x1A488C6F199D921E),
SPH_C64(0x4BC9F9F4DA0007CA), SPH_C64(0x26F5E6F6E85241C7),
SPH_C64(0x859079DBEA5947B6), SPH_C64(0x4F1885C5C99E8C92),
SPH_C64(0xD78E761EA96F864B), SPH_C64(0x8E36428C52B5C17D),
SPH_C64(0x69CF6827373063C1), SPH_C64(0xB607C93D9BB4C56E),
SPH_C64(0x7D820E760E76B5EA), SPH_C64(0x645C9CC6F07FDC42),
SPH_C64(0xBF38A078243342E0), SPH_C64(0x5F6B343C9D2E7D04),
SPH_C64(0xF2C28AEB600B0EC6), SPH_C64(0x6C0ED85F7254BCAC),
SPH_C64(0x71592281A4DB4FE5), SPH_C64(0x1967FA69CE0FED9F),
SPH_C64(0xFD5293F8B96545DB), SPH_C64(0xC879E9D7F2A7600B),
SPH_C64(0x860248920193194E), SPH_C64(0xA4F9533B2D9CC0B3),
SPH_C64(0x9053836C15957613), SPH_C64(0xDB6DCF8AFC357BF1),
SPH_C64(0x18BEEA7A7A370F57), SPH_C64(0x037117CA50B99066),
SPH_C64(0x6AB30A9774424A35), SPH_C64(0xF4E92F02E325249B),
SPH_C64(0x7739DB07061CCAE1), SPH_C64(0xD8F3B49CECA42A05),
SPH_C64(0xBD56BE3F51382F73), SPH_C64(0x45FAED5843B0BB28),
SPH_C64(0x1C813D5C11BF1F83), SPH_C64(0x8AF0E4B6D75FA169),
SPH_C64(0x33EE18A487AD9999), SPH_C64(0x3C26E8EAB1C94410),
SPH_C64(0xB510102BC0A822F9), SPH_C64(0x141EEF310CE6123B),
SPH_C64(0xFC65B90059DDB154), SPH_C64(0xE0158640C5E0E607),
SPH_C64(0x884E079826C3A3CF), SPH_C64(0x930D0D9523C535FD),
SPH_C64(0x35638D754E9A2B00), SPH_C64(0x4085FCCF40469DD5),
SPH_C64(0xC4B17AD28BE23A4C), SPH_C64(0xCAB2F0FC6A3E6A2E),
SPH_C64(0x2860971A6B943FCD), SPH_C64(0x3DDE6EE212E30446),
SPH_C64(0x6222F32AE01765AE), SPH_C64(0x5D550BB5478308FE),
SPH_C64(0xA9EFA98DA0EDA22A), SPH_C64(0xC351A71686C40DA7),
SPH_C64(0x1105586D9C867C84), SPH_C64(0xDCFFEE85FDA22853),
SPH_C64(0xCCFBD0262C5EEF76), SPH_C64(0xBAF294CB8990D201),
SPH_C64(0xE69464F52AFAD975), SPH_C64(0x94B013AFDF133E14),
SPH_C64(0x06A7D1A32823C958), SPH_C64(0x6F95FE5130F61119),
SPH_C64(0xD92AB34E462C06C0), SPH_C64(0xED7BDE33887C71D2),
SPH_C64(0x79746D6E6518393E), SPH_C64(0x5BA419385D713329),
SPH_C64(0x7C1BA6B948A97564), SPH_C64(0x31987C197BFDAC67),
SPH_C64(0xDE6C23C44B053D02), SPH_C64(0x581C49FED002D64D),
SPH_C64(0xDD474D6338261571), SPH_C64(0xAA4546C3E473D062),
SPH_C64(0x928FCE349455F860), SPH_C64(0x48161BBACAAB94D9),
SPH_C64(0x63912430770E6F68), SPH_C64(0x6EC8A5E602C6641C),
SPH_C64(0x87282515337DDD2B), SPH_C64(0x2CDA6B42034B701B),
SPH_C64(0xB03D37C181CB096D), SPH_C64(0xE108438266C71C6F),
SPH_C64(0x2B3180C7EB51B255), SPH_C64(0xDF92B82F96C08BBC),
SPH_C64(0x5C68C8C0A632F3BA), SPH_C64(0x5504CC861C3D0556),
SPH_C64(0xABBFA4E55FB26B8F), SPH_C64(0x41848B0AB3BACEB4),
SPH_C64(0xB334A273AA445D32), SPH_C64(0xBCA696F0A85AD881),
SPH_C64(0x24F6EC65B528D56C), SPH_C64(0x0CE1512E90F4524A),
SPH_C64(0x4E9DD79D5506D35A), SPH_C64(0x258905FAC6CE9779),
SPH_C64(0x2019295B3E109B33), SPH_C64(0xF8A9478B73A054CC),
SPH_C64(0x2924F2F934417EB0), SPH_C64(0x3993357D536D1BC4),
SPH_C64(0x38A81AC21DB6FF8B), SPH_C64(0x47C4FBF17D6016BF),
SPH_C64(0x1E0FAADD7667E3F5), SPH_C64(0x7ABCFF62938BEB96),
SPH_C64(0xA78DAD948FC179C9), SPH_C64(0x8F1F98B72911E50D),
SPH_C64(0x61E48EAE27121A91), SPH_C64(0x4D62F7AD31859808),
SPH_C64(0xECEBA345EF5CEAEB), SPH_C64(0xF5CEB25EBC9684CE),
SPH_C64(0xF633E20CB7F76221), SPH_C64(0xA32CDF06AB8293E4),
SPH_C64(0x985A202CA5EE2CA4), SPH_C64(0xCF0B8447CC8A8FB1),
SPH_C64(0x9F765244979859A3), SPH_C64(0xA8D516B1A1240017),
SPH_C64(0x0BD7BA3EBB5DC726), SPH_C64(0xE54BCA55B86ADB39),
SPH_C64(0x1D7A3AFD6C478063), SPH_C64(0x519EC608E7669EDD),
SPH_C64(0x0E5715A2D149AA23), SPH_C64(0x177D4571848FF194),
SPH_C64(0xEEB55F3241014C22), SPH_C64(0x0F5E5CA13A6E2EC2),
SPH_C64(0x8029927B75F5C361), SPH_C64(0xAD139FABC3D6E436),
SPH_C64(0x0D5DF1A94CCF402F), SPH_C64(0x3E8BD948BEA5DFC8),
SPH_C64(0xA5A0D357BD3FF77E), SPH_C64(0xA2D12E251F74F645),
SPH_C64(0x66FD9E525E81A082), SPH_C64(0x2E0C90CE7F687A49),
SPH_C64(0xC2E8BCBEBA973BC5), SPH_C64(0x000001BCE509745F),
SPH_C64(0x423777BBE6DAB3D6), SPH_C64(0xD1661C7EAEF06EB5),
SPH_C64(0xA1781F354DAACFD8), SPH_C64(0x2D11284A2B16AFFC),
SPH_C64(0xF1FC4F67FA891D1F), SPH_C64(0x73ECC25DCB920ADA),
SPH_C64(0xAE610C22C2A12651), SPH_C64(0x96E0A810D356B78A),
SPH_C64(0x5A9A381F2FE7870F), SPH_C64(0xD5AD62EDE94E5530),
SPH_C64(0xD225E5E8368D1427), SPH_C64(0x65977B70C7AF4631),
SPH_C64(0x99F889B2DE39D74F), SPH_C64(0x233F30BF54E1D143),
SPH_C64(0x9A9675D3D9A63C97), SPH_C64(0x5470554FF334F9A8),
SPH_C64(0x166ACB744A4F5688), SPH_C64(0x70C74CAAB2E4AEAD),
SPH_C64(0xF0D091646F294D12), SPH_C64(0x57B82A89684031D1),
SPH_C64(0xEFD95A5A61BE0B6B), SPH_C64(0x2FBD12E969F2F29A),
SPH_C64(0x9BD37013FEFF9FE8), SPH_C64(0x3F9B0404D6085A06),
SPH_C64(0x4940C1F3166CFE15), SPH_C64(0x09542C4DCDF3DEFB),
SPH_C64(0xB4C5218385CD5CE3), SPH_C64(0xC935B7DC4462A641),
SPH_C64(0x3417F8A68ED3B63F), SPH_C64(0xB80959295B215B40),
SPH_C64(0xF99CDAEF3B8C8572), SPH_C64(0x018C0614F8FCB95D),
SPH_C64(0x1B14ACCD1A3ACDF3), SPH_C64(0x84D471F200BB732D),
SPH_C64(0xC1A3110E95E8DA16), SPH_C64(0x430A7220BF1A82B8),
SPH_C64(0xB77E090D39DF210E), SPH_C64(0x5EF4BD9F3CD05E9D),
SPH_C64(0x9D4FF6DA7E57A444), SPH_C64(0xDA1D60E183D4A5F8),
SPH_C64(0xB287C38417998E47), SPH_C64(0xFE3EDC121BB31886),
SPH_C64(0xC7FE3CCC980CCBEF), SPH_C64(0xE46FB590189BFD03),
SPH_C64(0x3732FD469A4C57DC), SPH_C64(0x7EF700A07CF1AD65),
SPH_C64(0x59C64468A31D8859), SPH_C64(0x762FB0B4D45B61F6),
SPH_C64(0x155BAED099047718), SPH_C64(0x68755E4C3D50BAA6),
SPH_C64(0xE9214E7F22D8B4DF), SPH_C64(0x2ADDBF532EAC95F4),
SPH_C64(0x32AE3909B4BD0109), SPH_C64(0x834DF537B08E3450),
SPH_C64(0xFA209DA84220728D), SPH_C64(0x9E691D9B9EFE23F7),
SPH_C64(0x0446D288C4AE8D7F), SPH_C64(0x7B4CC524E169785B),
SPH_C64(0x21D87F0135CA1385), SPH_C64(0xCEBB400F137B8AA5),
SPH_C64(0x272E2B66580796BE), SPH_C64(0x3612264125C2B0DE),
SPH_C64(0x057702BDAD1EFBB2), SPH_C64(0xD4BABB8EACF84BE9),
SPH_C64(0x91583139641BC67B), SPH_C64(0x8BDC2DE08036E024),
SPH_C64(0x603C8156F49F68ED), SPH_C64(0xF7D236F7DBEF5111),
SPH_C64(0x9727C4598AD21E80), SPH_C64(0xA08A0896670A5FD7),
SPH_C64(0xCB4A8F4309EBA9CB), SPH_C64(0x81AF564B0F7036A1),
SPH_C64(0xC0B99AA778199ABD), SPH_C64(0x959F1EC83FC8E952),
SPH_C64(0x8C505077794A81B9), SPH_C64(0x3ACAAF8F056338F0),
SPH_C64(0x07B43F50627A6778), SPH_C64(0x4A44AB49F5ECCC77),
SPH_C64(0x3BC3D6E4B679EE98), SPH_C64(0x9CC0D4D1CF14108C),
SPH_C64(0x4406C00B206BC8A0), SPH_C64(0x82A18854C8D72D89),
SPH_C64(0x67E366B35C3C432C), SPH_C64(0xB923DD61102B37F2),
SPH_C64(0x56AB2779D884271D), SPH_C64(0xBE83E1B0FF1525AF),
SPH_C64(0xFB7C65D4217E49A9), SPH_C64(0x6BDBE0E76D48E7D4),
SPH_C64(0x08DF828745D9179E), SPH_C64(0x22EA6A9ADD53BD34),
SPH_C64(0xE36E141C5622200A), SPH_C64(0x7F805D1B8CB750EE),
SPH_C64(0xAFE5C7A59F58E837), SPH_C64(0xE27F996A4FB1C23C),
SPH_C64(0xD3867DFB0775F0D0), SPH_C64(0xD0E673DE6E88891A),
SPH_C64(0x123AEB9EAFB86C25), SPH_C64(0x30F1D5D5C145B895),
SPH_C64(0xBB434A2DEE7269E7), SPH_C64(0x78CB67ECF931FA38),
SPH_C64(0xF33B0372323BBF9C), SPH_C64(0x52D66336FB279C74),
SPH_C64(0x505F33AC0AFB4EAA), SPH_C64(0xE8A5CD99A2CCE187),
SPH_C64(0x534974801E2D30BB), SPH_C64(0x8D2D5711D5876D90),
SPH_C64(0x1F1A412891BC038E), SPH_C64(0xD6E2E71D82E56648),
SPH_C64(0x74036C3A497732B7), SPH_C64(0x89B67ED96361F5AB),
SPH_C64(0xFFED95D8F1EA02A2), SPH_C64(0xE72B3BD61464D43D),
SPH_C64(0xA6300F170BDC4820), SPH_C64(0xEBC18760ED78A77A)
};
static const uint64_t cpu_T2[256] = {
SPH_C64(0xE6A6BE5A05A12138), SPH_C64(0xB5A122A5B4F87C98),
SPH_C64(0x563C6089140B6990), SPH_C64(0x4C46CB2E391F5DD5),
SPH_C64(0xD932ADDBC9B79434), SPH_C64(0x08EA70E42015AFF5),
SPH_C64(0xD765A6673E478CF1), SPH_C64(0xC4FB757EAB278D99),
SPH_C64(0xDF11C6862D6E0692), SPH_C64(0xDDEB84F10D7F3B16),
SPH_C64(0x6F2EF604A665EA04), SPH_C64(0x4A8E0F0FF0E0DFB3),
SPH_C64(0xA5EDEEF83DBCBA51), SPH_C64(0xFC4F0A2A0EA4371E),
SPH_C64(0xE83E1DA85CB38429), SPH_C64(0xDC8FF882BA1B1CE2),
SPH_C64(0xCD45505E8353E80D), SPH_C64(0x18D19A00D4DB0717),
SPH_C64(0x34A0CFEDA5F38101), SPH_C64(0x0BE77E518887CAF2),
SPH_C64(0x1E341438B3C45136), SPH_C64(0xE05797F49089CCF9),
SPH_C64(0xFFD23F9DF2591D14), SPH_C64(0x543DDA228595C5CD),
SPH_C64(0x661F81FD99052A33), SPH_C64(0x8736E641DB0F7B76),
SPH_C64(0x15227725418E5307), SPH_C64(0xE25F7F46162EB2FA),
SPH_C64(0x48A8B2126C13D9FE), SPH_C64(0xAFDC541792E76EEA),
SPH_C64(0x03D912BFC6D1898F), SPH_C64(0x31B1AAFA1B83F51B),
SPH_C64(0xF1AC2796E42AB7D9), SPH_C64(0x40A3A7D7FCD2EBAC),
SPH_C64(0x1056136D0AFBBCC5), SPH_C64(0x7889E1DD9A6D0C85),
SPH_C64(0xD33525782A7974AA), SPH_C64(0xA7E25D09078AC09B),
SPH_C64(0xBD4138B3EAC6EDD0), SPH_C64(0x920ABFBE71EB9E70),
SPH_C64(0xA2A5D0F54FC2625C), SPH_C64(0xC054E36B0B1290A3),
SPH_C64(0xF6DD59FF62FE932B), SPH_C64(0x3537354511A8AC7D),
SPH_C64(0xCA845E9172FADCD4), SPH_C64(0x84F82B60329D20DC),
SPH_C64(0x79C62CE1CD672F18), SPH_C64(0x8B09A2ADD124642C),
SPH_C64(0xD0C1E96A19D9E726), SPH_C64(0x5A786A9B4BA9500C),
SPH_C64(0x0E020336634C43F3), SPH_C64(0xC17B474AEB66D822),
SPH_C64(0x6A731AE3EC9BAAC2), SPH_C64(0x8226667AE0840258),
SPH_C64(0x67D4567691CAECA5), SPH_C64(0x1D94155C4875ADB5),
SPH_C64(0x6D00FD985B813FDF), SPH_C64(0x51286EFCB774CD06),
SPH_C64(0x5E8834471FA744AF), SPH_C64(0xF72CA0AEE761AE2E),
SPH_C64(0xBE40E4CDAEE8E09A), SPH_C64(0xE9970BBB5118F665),
SPH_C64(0x726E4BEB33DF1964), SPH_C64(0x703B000729199762),
SPH_C64(0x4631D816F5EF30A7), SPH_C64(0xB880B5B51504A6BE),
SPH_C64(0x641793C37ED84B6C), SPH_C64(0x7B21ED77F6E97D96),
SPH_C64(0x776306312EF96B73), SPH_C64(0xAE528948E86FF3F4),
SPH_C64(0x53DBD7F286A3F8F8), SPH_C64(0x16CADCE74CFC1063),
SPH_C64(0x005C19BDFA52C6DD), SPH_C64(0x68868F5D64D46AD3),
SPH_C64(0x3A9D512CCF1E186A), SPH_C64(0x367E62C2385660AE),
SPH_C64(0xE359E7EA77DCB1D7), SPH_C64(0x526C0773749ABE6E),
SPH_C64(0x735AE5F9D09F734B), SPH_C64(0x493FC7CC8A558BA8),
SPH_C64(0xB0B9C1533041AB45), SPH_C64(0x321958BA470A59BD),
SPH_C64(0x852DB00B5F46C393), SPH_C64(0x91209B2BD336B0E5),
SPH_C64(0x6E604F7D659EF19F), SPH_C64(0xB99A8AE2782CCB24),
SPH_C64(0xCCF52AB6C814C4C7), SPH_C64(0x4727D9AFBE11727B),
SPH_C64(0x7E950D0C0121B34D), SPH_C64(0x756F435670AD471F),
SPH_C64(0xF5ADD442615A6849), SPH_C64(0x4E87E09980B9957A),
SPH_C64(0x2ACFA1DF50AEE355), SPH_C64(0xD898263AFD2FD556),
SPH_C64(0xC8F4924DD80C8FD6), SPH_C64(0xCF99CA3D754A173A),
SPH_C64(0xFE477BACAF91BF3C), SPH_C64(0xED5371F6D690C12D),
SPH_C64(0x831A5C285E687094), SPH_C64(0xC5D3C90A3708A0A4),
SPH_C64(0x0F7F903717D06580), SPH_C64(0x19F9BB13B8FDF27F),
SPH_C64(0xB1BD6F1B4D502843), SPH_C64(0x1C761BA38FFF4012),
SPH_C64(0x0D1530C4E2E21F3B), SPH_C64(0x8943CE69A7372C8A),
SPH_C64(0xE5184E11FEB5CE66), SPH_C64(0x618BDB80BD736621),
SPH_C64(0x7D29BAD68B574D0B), SPH_C64(0x81BB613E25E6FE5B),
SPH_C64(0x071C9C10BC07913F), SPH_C64(0xC7BEEB7909AC2D97),
SPH_C64(0xC3E58D353BC5D757), SPH_C64(0xEB017892F38F61E8),
SPH_C64(0xD4EFFB9C9B1CC21A), SPH_C64(0x99727D26F494F7AB),
SPH_C64(0xA3E063A2956B3E03), SPH_C64(0x9D4A8B9A4AA09C30),
SPH_C64(0x3F6AB7D500090FB4), SPH_C64(0x9CC0F2A057268AC0),
SPH_C64(0x3DEE9D2DEDBF42D1), SPH_C64(0x330F49C87960A972),
SPH_C64(0xC6B2720287421B41), SPH_C64(0x0AC59EC07C00369C),
SPH_C64(0xEF4EAC49CB353425), SPH_C64(0xF450244EEF0129D8),
SPH_C64(0x8ACC46E5CAF4DEB6), SPH_C64(0x2FFEAB63989263F7),
SPH_C64(0x8F7CB9FE5D7A4578), SPH_C64(0x5BD8F7644E634635),
SPH_C64(0x427A7315BF2DC900), SPH_C64(0x17D0C4AA2125261C),
SPH_C64(0x3992486C93518E50), SPH_C64(0xB4CBFEE0A2D7D4C3),
SPH_C64(0x7C75D6202C5DDD8D), SPH_C64(0xDBC295D8E35B6C61),
SPH_C64(0x60B369D302032B19), SPH_C64(0xCE42685FDCE44132),
SPH_C64(0x06F3DDB9DDF65610), SPH_C64(0x8EA4D21DB5E148F0),
SPH_C64(0x20B0FCE62FCD496F), SPH_C64(0x2C1B912358B0EE31),
SPH_C64(0xB28317B818F5A308), SPH_C64(0xA89C1E189CA6D2CF),
SPH_C64(0x0C6B18576AAADBC8), SPH_C64(0xB65DEAA91299FAE3),
SPH_C64(0xFB2B794B7F1027E7), SPH_C64(0x04E4317F443B5BEB),
SPH_C64(0x4B852D325939D0A6), SPH_C64(0xD5AE6BEEFB207FFC),
SPH_C64(0x309682B281C7D374), SPH_C64(0xBAE309A194C3B475),
SPH_C64(0x8CC3F97B13B49F05), SPH_C64(0x98A9422FF8293967),
SPH_C64(0x244B16B01076FF7C), SPH_C64(0xF8BF571C663D67EE),
SPH_C64(0x1F0D6758EEE30DA1), SPH_C64(0xC9B611D97ADEB9B7),
SPH_C64(0xB7AFD5887B6C57A2), SPH_C64(0x6290AE846B984FE1),
SPH_C64(0x94DF4CDEACC1A5FD), SPH_C64(0x058A5BD1C5483AFF),
SPH_C64(0x63166CC142BA3C37), SPH_C64(0x8DB8526EB2F76F40),
SPH_C64(0xE10880036F0D6D4E), SPH_C64(0x9E0523C9971D311D),
SPH_C64(0x45EC2824CC7CD691), SPH_C64(0x575B8359E62382C9),
SPH_C64(0xFA9E400DC4889995), SPH_C64(0xD1823ECB45721568),
SPH_C64(0xDAFD983B8206082F), SPH_C64(0xAA7D29082386A8CB),
SPH_C64(0x269FCD4403B87588), SPH_C64(0x1B91F5F728BDD1E0),
SPH_C64(0xE4669F39040201F6), SPH_C64(0x7A1D7C218CF04ADE),
SPH_C64(0x65623C29D79CE5CE), SPH_C64(0x2368449096C00BB1),
SPH_C64(0xAB9BF1879DA503BA), SPH_C64(0xBC23ECB1A458058E),
SPH_C64(0x9A58DF01BB401ECC), SPH_C64(0xA070E868A85F143D),
SPH_C64(0x4FF188307DF2239E), SPH_C64(0x14D565B41A641183),
SPH_C64(0xEE13337452701602), SPH_C64(0x950E3DCF3F285E09),
SPH_C64(0x59930254B9C80953), SPH_C64(0x3BF299408930DA6D),
SPH_C64(0xA955943F53691387), SPH_C64(0xA15EDECAA9CB8784),
SPH_C64(0x29142127352BE9A0), SPH_C64(0x76F0371FFF4E7AFB),
SPH_C64(0x0239F450274F2228), SPH_C64(0xBB073AF01D5E868B),
SPH_C64(0xBFC80571C10E96C1), SPH_C64(0xD267088568222E23),
SPH_C64(0x9671A3D48E80B5B0), SPH_C64(0x55B5D38AE193BB81),
SPH_C64(0x693AE2D0A18B04B8), SPH_C64(0x5C48B4ECADD5335F),
SPH_C64(0xFD743B194916A1CA), SPH_C64(0x2577018134BE98C4),
SPH_C64(0xE77987E83C54A4AD), SPH_C64(0x28E11014DA33E1B9),
SPH_C64(0x270CC59E226AA213), SPH_C64(0x71495F756D1A5F60),
SPH_C64(0x9BE853FB60AFEF77), SPH_C64(0xADC786A7F7443DBF),
SPH_C64(0x0904456173B29A82), SPH_C64(0x58BC7A66C232BD5E),
SPH_C64(0xF306558C673AC8B2), SPH_C64(0x41F639C6B6C9772A),
SPH_C64(0x216DEFE99FDA35DA), SPH_C64(0x11640CC71C7BE615),
SPH_C64(0x93C43694565C5527), SPH_C64(0xEA038E6246777839),
SPH_C64(0xF9ABF3CE5A3E2469), SPH_C64(0x741E768D0FD312D2),
SPH_C64(0x0144B883CED652C6), SPH_C64(0xC20B5A5BA33F8552),
SPH_C64(0x1AE69633C3435A9D), SPH_C64(0x97A28CA4088CFDEC),
SPH_C64(0x8824A43C1E96F420), SPH_C64(0x37612FA66EEEA746),
SPH_C64(0x6B4CB165F9CF0E5A), SPH_C64(0x43AA1C06A0ABFB4A),
SPH_C64(0x7F4DC26FF162796B), SPH_C64(0x6CBACC8E54ED9B0F),
SPH_C64(0xA6B7FFEFD2BB253E), SPH_C64(0x2E25BC95B0A29D4F),
SPH_C64(0x86D6A58BDEF1388C), SPH_C64(0xDED74AC576B6F054),
SPH_C64(0x8030BDBC2B45805D), SPH_C64(0x3C81AF70E94D9289),
SPH_C64(0x3EFF6DDA9E3100DB), SPH_C64(0xB38DC39FDFCC8847),
SPH_C64(0x123885528D17B87E), SPH_C64(0xF2DA0ED240B1B642),
SPH_C64(0x44CEFADCD54BF9A9), SPH_C64(0x1312200E433C7EE6),
SPH_C64(0x9FFCC84F3A78C748), SPH_C64(0xF0CD1F72248576BB),
SPH_C64(0xEC6974053638CFE4), SPH_C64(0x2BA7B67C0CEC4E4C),
SPH_C64(0xAC2F4DF3E5CE32ED), SPH_C64(0xCB33D14326EA4C11),
SPH_C64(0xA4E9044CC77E58BC), SPH_C64(0x5F513293D934FCEF),
SPH_C64(0x5DC9645506E55444), SPH_C64(0x50DE418F317DE40A),
SPH_C64(0x388CB31A69DDE259), SPH_C64(0x2DB4A83455820A86),
SPH_C64(0x9010A91E84711AE9), SPH_C64(0x4DF7F0B7B1498371),
SPH_C64(0xD62A2EABC0977179), SPH_C64(0x22FAC097AA8D5C0E)
};
static const uint64_t cpu_T3[256] = {
SPH_C64(0xF49FCC2FF1DAF39B), SPH_C64(0x487FD5C66FF29281),
SPH_C64(0xE8A30667FCDCA83F), SPH_C64(0x2C9B4BE3D2FCCE63),
SPH_C64(0xDA3FF74B93FBBBC2), SPH_C64(0x2FA165D2FE70BA66),
SPH_C64(0xA103E279970E93D4), SPH_C64(0xBECDEC77B0E45E71),
SPH_C64(0xCFB41E723985E497), SPH_C64(0xB70AAA025EF75017),
SPH_C64(0xD42309F03840B8E0), SPH_C64(0x8EFC1AD035898579),
SPH_C64(0x96C6920BE2B2ABC5), SPH_C64(0x66AF4163375A9172),
SPH_C64(0x2174ABDCCA7127FB), SPH_C64(0xB33CCEA64A72FF41),
SPH_C64(0xF04A4933083066A5), SPH_C64(0x8D970ACDD7289AF5),
SPH_C64(0x8F96E8E031C8C25E), SPH_C64(0xF3FEC02276875D47),
SPH_C64(0xEC7BF310056190DD), SPH_C64(0xF5ADB0AEBB0F1491),
SPH_C64(0x9B50F8850FD58892), SPH_C64(0x4975488358B74DE8),
SPH_C64(0xA3354FF691531C61), SPH_C64(0x0702BBE481D2C6EE),
SPH_C64(0x89FB24057DEDED98), SPH_C64(0xAC3075138596E902),
SPH_C64(0x1D2D3580172772ED), SPH_C64(0xEB738FC28E6BC30D),
SPH_C64(0x5854EF8F63044326), SPH_C64(0x9E5C52325ADD3BBE),
SPH_C64(0x90AA53CF325C4623), SPH_C64(0xC1D24D51349DD067),
SPH_C64(0x2051CFEEA69EA624), SPH_C64(0x13220F0A862E7E4F),
SPH_C64(0xCE39399404E04864), SPH_C64(0xD9C42CA47086FCB7),
SPH_C64(0x685AD2238A03E7CC), SPH_C64(0x066484B2AB2FF1DB),
SPH_C64(0xFE9D5D70EFBF79EC), SPH_C64(0x5B13B9DD9C481854),
SPH_C64(0x15F0D475ED1509AD), SPH_C64(0x0BEBCD060EC79851),
SPH_C64(0xD58C6791183AB7F8), SPH_C64(0xD1187C5052F3EEE4),
SPH_C64(0xC95D1192E54E82FF), SPH_C64(0x86EEA14CB9AC6CA2),
SPH_C64(0x3485BEB153677D5D), SPH_C64(0xDD191D781F8C492A),
SPH_C64(0xF60866BAA784EBF9), SPH_C64(0x518F643BA2D08C74),
SPH_C64(0x8852E956E1087C22), SPH_C64(0xA768CB8DC410AE8D),
SPH_C64(0x38047726BFEC8E1A), SPH_C64(0xA67738B4CD3B45AA),
SPH_C64(0xAD16691CEC0DDE19), SPH_C64(0xC6D4319380462E07),
SPH_C64(0xC5A5876D0BA61938), SPH_C64(0x16B9FA1FA58FD840),
SPH_C64(0x188AB1173CA74F18), SPH_C64(0xABDA2F98C99C021F),
SPH_C64(0x3E0580AB134AE816), SPH_C64(0x5F3B05B773645ABB),
SPH_C64(0x2501A2BE5575F2F6), SPH_C64(0x1B2F74004E7E8BA9),
SPH_C64(0x1CD7580371E8D953), SPH_C64(0x7F6ED89562764E30),
SPH_C64(0xB15926FF596F003D), SPH_C64(0x9F65293DA8C5D6B9),
SPH_C64(0x6ECEF04DD690F84C), SPH_C64(0x4782275FFF33AF88),
SPH_C64(0xE41433083F820801), SPH_C64(0xFD0DFE409A1AF9B5),
SPH_C64(0x4325A3342CDB396B), SPH_C64(0x8AE77E62B301B252),
SPH_C64(0xC36F9E9F6655615A), SPH_C64(0x85455A2D92D32C09),
SPH_C64(0xF2C7DEA949477485), SPH_C64(0x63CFB4C133A39EBA),
SPH_C64(0x83B040CC6EBC5462), SPH_C64(0x3B9454C8FDB326B0),
SPH_C64(0x56F56A9E87FFD78C), SPH_C64(0x2DC2940D99F42BC6),
SPH_C64(0x98F7DF096B096E2D), SPH_C64(0x19A6E01E3AD852BF),
SPH_C64(0x42A99CCBDBD4B40B), SPH_C64(0xA59998AF45E9C559),
SPH_C64(0x366295E807D93186), SPH_C64(0x6B48181BFAA1F773),
SPH_C64(0x1FEC57E2157A0A1D), SPH_C64(0x4667446AF6201AD5),
SPH_C64(0xE615EBCACFB0F075), SPH_C64(0xB8F31F4F68290778),
SPH_C64(0x22713ED6CE22D11E), SPH_C64(0x3057C1A72EC3C93B),
SPH_C64(0xCB46ACC37C3F1F2F), SPH_C64(0xDBB893FD02AAF50E),
SPH_C64(0x331FD92E600B9FCF), SPH_C64(0xA498F96148EA3AD6),
SPH_C64(0xA8D8426E8B6A83EA), SPH_C64(0xA089B274B7735CDC),
SPH_C64(0x87F6B3731E524A11), SPH_C64(0x118808E5CBC96749),
SPH_C64(0x9906E4C7B19BD394), SPH_C64(0xAFED7F7E9B24A20C),
SPH_C64(0x6509EADEEB3644A7), SPH_C64(0x6C1EF1D3E8EF0EDE),
SPH_C64(0xB9C97D43E9798FB4), SPH_C64(0xA2F2D784740C28A3),
SPH_C64(0x7B8496476197566F), SPH_C64(0x7A5BE3E6B65F069D),
SPH_C64(0xF96330ED78BE6F10), SPH_C64(0xEEE60DE77A076A15),
SPH_C64(0x2B4BEE4AA08B9BD0), SPH_C64(0x6A56A63EC7B8894E),
SPH_C64(0x02121359BA34FEF4), SPH_C64(0x4CBF99F8283703FC),
SPH_C64(0x398071350CAF30C8), SPH_C64(0xD0A77A89F017687A),
SPH_C64(0xF1C1A9EB9E423569), SPH_C64(0x8C7976282DEE8199),
SPH_C64(0x5D1737A5DD1F7ABD), SPH_C64(0x4F53433C09A9FA80),
SPH_C64(0xFA8B0C53DF7CA1D9), SPH_C64(0x3FD9DCBC886CCB77),
SPH_C64(0xC040917CA91B4720), SPH_C64(0x7DD00142F9D1DCDF),
SPH_C64(0x8476FC1D4F387B58), SPH_C64(0x23F8E7C5F3316503),
SPH_C64(0x032A2244E7E37339), SPH_C64(0x5C87A5D750F5A74B),
SPH_C64(0x082B4CC43698992E), SPH_C64(0xDF917BECB858F63C),
SPH_C64(0x3270B8FC5BF86DDA), SPH_C64(0x10AE72BB29B5DD76),
SPH_C64(0x576AC94E7700362B), SPH_C64(0x1AD112DAC61EFB8F),
SPH_C64(0x691BC30EC5FAA427), SPH_C64(0xFF246311CC327143),
SPH_C64(0x3142368E30E53206), SPH_C64(0x71380E31E02CA396),
SPH_C64(0x958D5C960AAD76F1), SPH_C64(0xF8D6F430C16DA536),
SPH_C64(0xC8FFD13F1BE7E1D2), SPH_C64(0x7578AE66004DDBE1),
SPH_C64(0x05833F01067BE646), SPH_C64(0xBB34B5AD3BFE586D),
SPH_C64(0x095F34C9A12B97F0), SPH_C64(0x247AB64525D60CA8),
SPH_C64(0xDCDBC6F3017477D1), SPH_C64(0x4A2E14D4DECAD24D),
SPH_C64(0xBDB5E6D9BE0A1EEB), SPH_C64(0x2A7E70F7794301AB),
SPH_C64(0xDEF42D8A270540FD), SPH_C64(0x01078EC0A34C22C1),
SPH_C64(0xE5DE511AF4C16387), SPH_C64(0x7EBB3A52BD9A330A),
SPH_C64(0x77697857AA7D6435), SPH_C64(0x004E831603AE4C32),
SPH_C64(0xE7A21020AD78E312), SPH_C64(0x9D41A70C6AB420F2),
SPH_C64(0x28E06C18EA1141E6), SPH_C64(0xD2B28CBD984F6B28),
SPH_C64(0x26B75F6C446E9D83), SPH_C64(0xBA47568C4D418D7F),
SPH_C64(0xD80BADBFE6183D8E), SPH_C64(0x0E206D7F5F166044),
SPH_C64(0xE258A43911CBCA3E), SPH_C64(0x723A1746B21DC0BC),
SPH_C64(0xC7CAA854F5D7CDD3), SPH_C64(0x7CAC32883D261D9C),
SPH_C64(0x7690C26423BA942C), SPH_C64(0x17E55524478042B8),
SPH_C64(0xE0BE477656A2389F), SPH_C64(0x4D289B5E67AB2DA0),
SPH_C64(0x44862B9C8FBBFD31), SPH_C64(0xB47CC8049D141365),
SPH_C64(0x822C1B362B91C793), SPH_C64(0x4EB14655FB13DFD8),
SPH_C64(0x1ECBBA0714E2A97B), SPH_C64(0x6143459D5CDE5F14),
SPH_C64(0x53A8FBF1D5F0AC89), SPH_C64(0x97EA04D81C5E5B00),
SPH_C64(0x622181A8D4FDB3F3), SPH_C64(0xE9BCD341572A1208),
SPH_C64(0x1411258643CCE58A), SPH_C64(0x9144C5FEA4C6E0A4),
SPH_C64(0x0D33D06565CF620F), SPH_C64(0x54A48D489F219CA1),
SPH_C64(0xC43E5EAC6D63C821), SPH_C64(0xA9728B3A72770DAF),
SPH_C64(0xD7934E7B20DF87EF), SPH_C64(0xE35503B61A3E86E5),
SPH_C64(0xCAE321FBC819D504), SPH_C64(0x129A50B3AC60BFA6),
SPH_C64(0xCD5E68EA7E9FB6C3), SPH_C64(0xB01C90199483B1C7),
SPH_C64(0x3DE93CD5C295376C), SPH_C64(0xAED52EDF2AB9AD13),
SPH_C64(0x2E60F512C0A07884), SPH_C64(0xBC3D86A3E36210C9),
SPH_C64(0x35269D9B163951CE), SPH_C64(0x0C7D6E2AD0CDB5FA),
SPH_C64(0x59E86297D87F5733), SPH_C64(0x298EF221898DB0E7),
SPH_C64(0x55000029D1A5AA7E), SPH_C64(0x8BC08AE1B5061B45),
SPH_C64(0xC2C31C2B6C92703A), SPH_C64(0x94CC596BAF25EF42),
SPH_C64(0x0A1D73DB22540456), SPH_C64(0x04B6A0F9D9C4179A),
SPH_C64(0xEFFDAFA2AE3D3C60), SPH_C64(0xF7C8075BB49496C4),
SPH_C64(0x9CC5C7141D1CD4E3), SPH_C64(0x78BD1638218E5534),
SPH_C64(0xB2F11568F850246A), SPH_C64(0xEDFABCFA9502BC29),
SPH_C64(0x796CE5F2DA23051B), SPH_C64(0xAAE128B0DC93537C),
SPH_C64(0x3A493DA0EE4B29AE), SPH_C64(0xB5DF6B2C416895D7),
SPH_C64(0xFCABBD25122D7F37), SPH_C64(0x70810B58105DC4B1),
SPH_C64(0xE10FDD37F7882A90), SPH_C64(0x524DCAB5518A3F5C),
SPH_C64(0x3C9E85878451255B), SPH_C64(0x4029828119BD34E2),
SPH_C64(0x74A05B6F5D3CECCB), SPH_C64(0xB610021542E13ECA),
SPH_C64(0x0FF979D12F59E2AC), SPH_C64(0x6037DA27E4F9CC50),
SPH_C64(0x5E92975A0DF1847D), SPH_C64(0xD66DE190D3E623FE),
SPH_C64(0x5032D6B87B568048), SPH_C64(0x9A36B7CE8235216E),
SPH_C64(0x80272A7A24F64B4A), SPH_C64(0x93EFED8B8C6916F7),
SPH_C64(0x37DDBFF44CCE1555), SPH_C64(0x4B95DB5D4B99BD25),
SPH_C64(0x92D3FDA169812FC0), SPH_C64(0xFB1A4A9A90660BB6),
SPH_C64(0x730C196946A4B9B2), SPH_C64(0x81E289AA7F49DA68),
SPH_C64(0x64669A0F83B1A05F), SPH_C64(0x27B3FF7D9644F48B),
SPH_C64(0xCC6B615C8DB675B3), SPH_C64(0x674F20B9BCEBBE95),
SPH_C64(0x6F31238275655982), SPH_C64(0x5AE488713E45CF05),
SPH_C64(0xBF619F9954C21157), SPH_C64(0xEABAC46040A8EAE9),
SPH_C64(0x454C6FE9F2C0C1CD), SPH_C64(0x419CF6496412691C),
SPH_C64(0xD3DC3BEF265B0F70), SPH_C64(0x6D0E60F5C3578A9E)
};
static const uint64_t cpu_T4[256] = {
SPH_C64(0x5B0E608526323C55), SPH_C64(0x1A46C1A9FA1B59F5),
SPH_C64(0xA9E245A17C4C8FFA), SPH_C64(0x65CA5159DB2955D7),
SPH_C64(0x05DB0A76CE35AFC2), SPH_C64(0x81EAC77EA9113D45),
SPH_C64(0x528EF88AB6AC0A0D), SPH_C64(0xA09EA253597BE3FF),
SPH_C64(0x430DDFB3AC48CD56), SPH_C64(0xC4B3A67AF45CE46F),
SPH_C64(0x4ECECFD8FBE2D05E), SPH_C64(0x3EF56F10B39935F0),
SPH_C64(0x0B22D6829CD619C6), SPH_C64(0x17FD460A74DF2069),
SPH_C64(0x6CF8CC8E8510ED40), SPH_C64(0xD6C824BF3A6ECAA7),
SPH_C64(0x61243D581A817049), SPH_C64(0x048BACB6BBC163A2),
SPH_C64(0xD9A38AC27D44CC32), SPH_C64(0x7FDDFF5BAAF410AB),
SPH_C64(0xAD6D495AA804824B), SPH_C64(0xE1A6A74F2D8C9F94),
SPH_C64(0xD4F7851235DEE8E3), SPH_C64(0xFD4B7F886540D893),
SPH_C64(0x247C20042AA4BFDA), SPH_C64(0x096EA1C517D1327C),
SPH_C64(0xD56966B4361A6685), SPH_C64(0x277DA5C31221057D),
SPH_C64(0x94D59893A43ACFF7), SPH_C64(0x64F0C51CCDC02281),
SPH_C64(0x3D33BCC4FF6189DB), SPH_C64(0xE005CB184CE66AF1),
SPH_C64(0xFF5CCD1D1DB99BEA), SPH_C64(0xB0B854A7FE42980F),
SPH_C64(0x7BD46A6A718D4B9F), SPH_C64(0xD10FA8CC22A5FD8C),
SPH_C64(0xD31484952BE4BD31), SPH_C64(0xC7FA975FCB243847),
SPH_C64(0x4886ED1E5846C407), SPH_C64(0x28CDDB791EB70B04),
SPH_C64(0xC2B00BE2F573417F), SPH_C64(0x5C9590452180F877),
SPH_C64(0x7A6BDDFFF370EB00), SPH_C64(0xCE509E38D6D9D6A4),
SPH_C64(0xEBEB0F00647FA702), SPH_C64(0x1DCC06CF76606F06),
SPH_C64(0xE4D9F28BA286FF0A), SPH_C64(0xD85A305DC918C262),
SPH_C64(0x475B1D8732225F54), SPH_C64(0x2D4FB51668CCB5FE),
SPH_C64(0xA679B9D9D72BBA20), SPH_C64(0x53841C0D912D43A5),
SPH_C64(0x3B7EAA48BF12A4E8), SPH_C64(0x781E0E47F22F1DDF),
SPH_C64(0xEFF20CE60AB50973), SPH_C64(0x20D261D19DFFB742),
SPH_C64(0x16A12B03062A2E39), SPH_C64(0x1960EB2239650495),
SPH_C64(0x251C16FED50EB8B8), SPH_C64(0x9AC0C330F826016E),
SPH_C64(0xED152665953E7671), SPH_C64(0x02D63194A6369570),
SPH_C64(0x5074F08394B1C987), SPH_C64(0x70BA598C90B25CE1),
SPH_C64(0x794A15810B9742F6), SPH_C64(0x0D5925E9FCAF8C6C),
SPH_C64(0x3067716CD868744E), SPH_C64(0x910AB077E8D7731B),
SPH_C64(0x6A61BBDB5AC42F61), SPH_C64(0x93513EFBF0851567),
SPH_C64(0xF494724B9E83E9D5), SPH_C64(0xE887E1985C09648D),
SPH_C64(0x34B1D3C675370CFD), SPH_C64(0xDC35E433BC0D255D),
SPH_C64(0xD0AAB84234131BE0), SPH_C64(0x08042A50B48B7EAF),
SPH_C64(0x9997C4EE44A3AB35), SPH_C64(0x829A7B49201799D0),
SPH_C64(0x263B8307B7C54441), SPH_C64(0x752F95F4FD6A6CA6),
SPH_C64(0x927217402C08C6E5), SPH_C64(0x2A8AB754A795D9EE),
SPH_C64(0xA442F7552F72943D), SPH_C64(0x2C31334E19781208),
SPH_C64(0x4FA98D7CEAEE6291), SPH_C64(0x55C3862F665DB309),
SPH_C64(0xBD0610175D53B1F3), SPH_C64(0x46FE6CB840413F27),
SPH_C64(0x3FE03792DF0CFA59), SPH_C64(0xCFE700372EB85E8F),
SPH_C64(0xA7BE29E7ADBCE118), SPH_C64(0xE544EE5CDE8431DD),
SPH_C64(0x8A781B1B41F1873E), SPH_C64(0xA5C94C78A0D2F0E7),
SPH_C64(0x39412E2877B60728), SPH_C64(0xA1265EF3AFC9A62C),
SPH_C64(0xBCC2770C6A2506C5), SPH_C64(0x3AB66DD5DCE1CE12),
SPH_C64(0xE65499D04A675B37), SPH_C64(0x7D8F523481BFD216),
SPH_C64(0x0F6F64FCEC15F389), SPH_C64(0x74EFBE618B5B13C8),
SPH_C64(0xACDC82B714273E1D), SPH_C64(0xDD40BFE003199D17),
SPH_C64(0x37E99257E7E061F8), SPH_C64(0xFA52626904775AAA),
SPH_C64(0x8BBBF63A463D56F9), SPH_C64(0xF0013F1543A26E64),
SPH_C64(0xA8307E9F879EC898), SPH_C64(0xCC4C27A4150177CC),
SPH_C64(0x1B432F2CCA1D3348), SPH_C64(0xDE1D1F8F9F6FA013),
SPH_C64(0x606602A047A7DDD6), SPH_C64(0xD237AB64CC1CB2C7),
SPH_C64(0x9B938E7225FCD1D3), SPH_C64(0xEC4E03708E0FF476),
SPH_C64(0xFEB2FBDA3D03C12D), SPH_C64(0xAE0BCED2EE43889A),
SPH_C64(0x22CB8923EBFB4F43), SPH_C64(0x69360D013CF7396D),
SPH_C64(0x855E3602D2D4E022), SPH_C64(0x073805BAD01F784C),
SPH_C64(0x33E17A133852F546), SPH_C64(0xDF4874058AC7B638),
SPH_C64(0xBA92B29C678AA14A), SPH_C64(0x0CE89FC76CFAADCD),
SPH_C64(0x5F9D4E0908339E34), SPH_C64(0xF1AFE9291F5923B9),
SPH_C64(0x6E3480F60F4A265F), SPH_C64(0xEEBF3A2AB29B841C),
SPH_C64(0xE21938A88F91B4AD), SPH_C64(0x57DFEFF845C6D3C3),
SPH_C64(0x2F006B0BF62CAAF2), SPH_C64(0x62F479EF6F75EE78),
SPH_C64(0x11A55AD41C8916A9), SPH_C64(0xF229D29084FED453),
SPH_C64(0x42F1C27B16B000E6), SPH_C64(0x2B1F76749823C074),
SPH_C64(0x4B76ECA3C2745360), SPH_C64(0x8C98F463B91691BD),
SPH_C64(0x14BCC93CF1ADE66A), SPH_C64(0x8885213E6D458397),
SPH_C64(0x8E177DF0274D4711), SPH_C64(0xB49B73B5503F2951),
SPH_C64(0x10168168C3F96B6B), SPH_C64(0x0E3D963B63CAB0AE),
SPH_C64(0x8DFC4B5655A1DB14), SPH_C64(0xF789F1356E14DE5C),
SPH_C64(0x683E68AF4E51DAC1), SPH_C64(0xC9A84F9D8D4B0FD9),
SPH_C64(0x3691E03F52A0F9D1), SPH_C64(0x5ED86E46E1878E80),
SPH_C64(0x3C711A0E99D07150), SPH_C64(0x5A0865B20C4E9310),
SPH_C64(0x56FBFC1FE4F0682E), SPH_C64(0xEA8D5DE3105EDF9B),
SPH_C64(0x71ABFDB12379187A), SPH_C64(0x2EB99DE1BEE77B9C),
SPH_C64(0x21ECC0EA33CF4523), SPH_C64(0x59A4D7521805C7A1),
SPH_C64(0x3896F5EB56AE7C72), SPH_C64(0xAA638F3DB18F75DC),
SPH_C64(0x9F39358DABE9808E), SPH_C64(0xB7DEFA91C00B72AC),
SPH_C64(0x6B5541FD62492D92), SPH_C64(0x6DC6DEE8F92E4D5B),
SPH_C64(0x353F57ABC4BEEA7E), SPH_C64(0x735769D6DA5690CE),
SPH_C64(0x0A234AA642391484), SPH_C64(0xF6F9508028F80D9D),
SPH_C64(0xB8E319A27AB3F215), SPH_C64(0x31AD9C1151341A4D),
SPH_C64(0x773C22A57BEF5805), SPH_C64(0x45C7561A07968633),
SPH_C64(0xF913DA9E249DBE36), SPH_C64(0xDA652D9B78A64C68),
SPH_C64(0x4C27A97F3BC334EF), SPH_C64(0x76621220E66B17F4),
SPH_C64(0x967743899ACD7D0B), SPH_C64(0xF3EE5BCAE0ED6782),
SPH_C64(0x409F753600C879FC), SPH_C64(0x06D09A39B5926DB6),
SPH_C64(0x6F83AEB0317AC588), SPH_C64(0x01E6CA4A86381F21),
SPH_C64(0x66FF3462D19F3025), SPH_C64(0x72207C24DDFD3BFB),
SPH_C64(0x4AF6B6D3E2ECE2EB), SPH_C64(0x9C994DBEC7EA08DE),
SPH_C64(0x49ACE597B09A8BC4), SPH_C64(0xB38C4766CF0797BA),
SPH_C64(0x131B9373C57C2A75), SPH_C64(0xB1822CCE61931E58),
SPH_C64(0x9D7555B909BA1C0C), SPH_C64(0x127FAFDD937D11D2),
SPH_C64(0x29DA3BADC66D92E4), SPH_C64(0xA2C1D57154C2ECBC),
SPH_C64(0x58C5134D82F6FE24), SPH_C64(0x1C3AE3515B62274F),
SPH_C64(0xE907C82E01CB8126), SPH_C64(0xF8ED091913E37FCB),
SPH_C64(0x3249D8F9C80046C9), SPH_C64(0x80CF9BEDE388FB63),
SPH_C64(0x1881539A116CF19E), SPH_C64(0x5103F3F76BD52457),
SPH_C64(0x15B7E6F5AE47F7A8), SPH_C64(0xDBD7C6DED47E9CCF),
SPH_C64(0x44E55C410228BB1A), SPH_C64(0xB647D4255EDB4E99),
SPH_C64(0x5D11882BB8AAFC30), SPH_C64(0xF5098BBB29D3212A),
SPH_C64(0x8FB5EA14E90296B3), SPH_C64(0x677B942157DD025A),
SPH_C64(0xFB58E7C0A390ACB5), SPH_C64(0x89D3674C83BD4A01),
SPH_C64(0x9E2DA4DF4BF3B93B), SPH_C64(0xFCC41E328CAB4829),
SPH_C64(0x03F38C96BA582C52), SPH_C64(0xCAD1BDBD7FD85DB2),
SPH_C64(0xBBB442C16082AE83), SPH_C64(0xB95FE86BA5DA9AB0),
SPH_C64(0xB22E04673771A93F), SPH_C64(0x845358C9493152D8),
SPH_C64(0xBE2A488697B4541E), SPH_C64(0x95A2DC2DD38E6966),
SPH_C64(0xC02C11AC923C852B), SPH_C64(0x2388B1990DF2A87B),
SPH_C64(0x7C8008FA1B4F37BE), SPH_C64(0x1F70D0C84D54E503),
SPH_C64(0x5490ADEC7ECE57D4), SPH_C64(0x002B3C27D9063A3A),
SPH_C64(0x7EAEA3848030A2BF), SPH_C64(0xC602326DED2003C0),
SPH_C64(0x83A7287D69A94086), SPH_C64(0xC57A5FCB30F57A8A),
SPH_C64(0xB56844E479EBE779), SPH_C64(0xA373B40F05DCBCE9),
SPH_C64(0xD71A786E88570EE2), SPH_C64(0x879CBACDBDE8F6A0),
SPH_C64(0x976AD1BCC164A32F), SPH_C64(0xAB21E25E9666D78B),
SPH_C64(0x901063AAE5E5C33C), SPH_C64(0x9818B34448698D90),
SPH_C64(0xE36487AE3E1E8ABB), SPH_C64(0xAFBDF931893BDCB4),
SPH_C64(0x6345A0DC5FBBD519), SPH_C64(0x8628FE269B9465CA),
SPH_C64(0x1E5D01603F9C51EC), SPH_C64(0x4DE44006A15049B7),
SPH_C64(0xBF6C70E5F776CBB1), SPH_C64(0x411218F2EF552BED),
SPH_C64(0xCB0C0708705A36A3), SPH_C64(0xE74D14754F986044),
SPH_C64(0xCD56D9430EA8280E), SPH_C64(0xC12591D7535F5065),
SPH_C64(0xC83223F1720AEF96), SPH_C64(0xC3A0396F7363A51F)
};
#define PASS(a, b, c, mul) { \
ROUND(a, b, c, X0, mul); \
ROUND(b, c, a, X1, mul); \
ROUND(c, a, b, X2, mul); \
ROUND(a, b, c, X3, mul); \
ROUND(b, c, a, X4, mul); \
ROUND(c, a, b, X5, mul); \
ROUND(a, b, c, X6, mul); \
ROUND(b, c, a, X7, mul); \
}
#define MUL5(x) SPH_T64((x) * SPH_C64(5))
#define MUL7(x) SPH_T64((x) * SPH_C64(7))
#define MUL9(x) SPH_T64((x) * SPH_C64(9))
/*
#define MUL5(x) ((x) * SPH_C64(5))
#define MUL7(x) ((x) * SPH_C64(7))
#define MUL9(x) ((x) * SPH_C64(9))
*/
#define KSCHED { \
X0 = SPH_T64(X0 - (X7 ^ SPH_C64(0xA5A5A5A5A5A5A5A5))); \
X1 ^= X0; \
X2 = SPH_T64(X2 + X1); \
X3 = SPH_T64(X3 - (X2 ^ (~X1 << 19))); \
X4 ^= X3; \
X5 = SPH_T64(X5 + X4); \
X6 = SPH_T64(X6 - (X5 ^ (~X4 >> 23))); \
X7 ^= X6; \
X0 = SPH_T64(X0 + X7); \
X1 = SPH_T64(X1 - (X0 ^ (~X7 << 19))); \
X2 ^= X1; \
X3 = SPH_T64(X3 + X2); \
X4 = SPH_T64(X4 - (X3 ^ (~X2 >> 23))); \
X5 ^= X4; \
X6 = SPH_T64(X6 + X5); \
X7 = SPH_T64(X7 - (X6 ^ SPH_C64(0x0123456789ABCDEF))); \
}
#define TIGER_ROUND_BODY(in, r) { \
uint64_t A, B, C; \
uint64_t X0, X1, X2, X3, X4, X5, X6, X7; \
\
A = (r)[0]; \
B = (r)[1]; \
C = (r)[2]; \
\
X0 = (in[0]); \
X1 = (in[1]); \
X2 = (in[2]); \
X3 = (in[3]); \
X4 = (in[4]); \
X5 = (in[5]); \
X6 = (in[6]); \
X7 = (in[7]); \
PASS(A, B, C, MUL5); \
KSCHED; \
PASS(C, A, B, MUL7); \
KSCHED; \
PASS(B, C, A, MUL9); \
\
(r)[0] ^= A; \
(r)[1] = SPH_T64(B - (r)[1]); \
(r)[2] = SPH_T64(C + (r)[2]); \
}
__global__ void m7_tiger192_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
{
__shared__ uint64_t sharedMem[1024];
if(threadIdx.x < 256)
{
sharedMem[threadIdx.x] = T1[threadIdx.x];
sharedMem[threadIdx.x+256] = T2[threadIdx.x];
sharedMem[threadIdx.x+512] = T3[threadIdx.x];
sharedMem[threadIdx.x+768] = T4[threadIdx.x];
}
__syncthreads();
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = startNounce + thread;
union {
uint8_t h1[64];
uint32_t h4[16];
uint64_t h8[8];
} hash;
/*
#undef MUL5
#undef MUL7
#undef MUL9
#define MUL5(x) mul(x,5)
#define MUL7(x) mul(x,7)
#define MUL9(x) mul(x,9)
*/
#define PASS(a, b, c, mul) { \
ROUND(a, b, c, X0, mul); \
ROUND(b, c, a, X1, mul); \
ROUND(c, a, b, X2, mul); \
ROUND(a, b, c, X3, mul); \
ROUND(b, c, a, X4, mul); \
ROUND(c, a, b, X5, mul); \
ROUND(a, b, c, X6, mul); \
ROUND(b, c, a, X7, mul); \
}
#define ROUND(a, b, c, x, mul) { \
c ^= x; \
a = SPH_T64(a - (sharedMem[c & 0xFF] ^ sharedMem[((c >> 16) & 0xFF)+256] \
^ sharedMem[((c >> 32) & 0xFF)+512] ^ sharedMem[((c >> 48) & 0xFF)+768])); \
b = SPH_T64(b + (sharedMem[((c >> 8) & 0xFF)+768] ^ sharedMem[((c >> 24) & 0xFF)+512] \
^ sharedMem[((c >> 40) & 0xFF)+256] ^ sharedMem[(c >> 56) & 0xFF])); \
b = mul(b); \
}
uint64_t in[8],buf[3];
uint64_t in2[8],in3[8];
#pragma unroll 8
for (int i=0;i<8;i++) {in2[i]= c_PaddedMessage80[i+8];}
uint32_t* Mess = (uint32_t*)in2;
Mess[13]=nounce;
#pragma unroll 8
for (int i=0;i<8;i++) {in3[i]=0;}
in3[7]=0x3d0;
#pragma unroll 3
for (int i=0;i<3;i++) {buf[i]=bufo[i];}
TIGER_ROUND_BODY(in2, buf);
TIGER_ROUND_BODY(in3, buf);
#pragma unroll 8
for (int i=0;i<8;i++) { if (i<3) {outputHash[i*threads+thread]=buf[i];} else {outputHash[i*threads+thread]=0;}}
} //// threads
}
void tiger192_cpu_init(int thr_id, int threads)
{
cudaMemcpyToSymbol(gpu_III,III,sizeof(III),0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(T1,cpu_T1,sizeof(cpu_T1),0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(T2,cpu_T2,sizeof(cpu_T2),0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(T3,cpu_T3,sizeof(cpu_T3),0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(T4,cpu_T4,sizeof(cpu_T4),0, cudaMemcpyHostToDevice);
}
__host__ void m7_tiger192_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
{
const int threadsperblock = 640; // Alignment mit mixtab Grösse. NICHT ÄNDERN
// const int threadsperblock = 256;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
//dim3 grid(1);
//dim3 block(1);
size_t shared_size =0;
m7_tiger192_gpu_hash_120<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
MyStreamSynchronize(NULL, order, thr_id);
}
__host__ void tiger192_setBlock_120(void *pdata)
{
unsigned char PaddedMessage[128];
uint8_t ending =0x01;
memcpy(PaddedMessage, pdata, 122);
memset(PaddedMessage+122,ending,1);
memset(PaddedMessage+123, 0, 5); //useless
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
#undef ROUND
#undef MUL5
#undef MUL7
#undef MUL9
#define MUL5(x) ((x) * SPH_C64(5))
#define MUL7(x) ((x) * SPH_C64(7))
#define MUL9(x) ((x) * SPH_C64(9))
#define ROUND(a, b, c, x, mul) { \
c ^= x; \
a = SPH_T64(a - (cpu_T1[c & 0xFF] ^ cpu_T2[(c >> 16) & 0xFF] \
^ cpu_T3[(c >> 32) & 0xFF] ^ cpu_T4[(c >> 48) & 0xFF])); \
b = SPH_T64(b + (cpu_T4[(c >> 8) & 0xFF] ^ cpu_T3[(c >> 24) & 0xFF] \
^ cpu_T2[(c >> 40) & 0xFF] ^ cpu_T1[(c >> 56) & 0xFF])); \
b = mul(b); \
}
uint64_t* alt_data = (uint64_t*) pdata;
uint64_t in[8],buf[3];
for (int i=0;i<8;i++) {in[i]= alt_data[i];}
for (int i=0;i<3;i++) {buf[i]=III[i];}
TIGER_ROUND_BODY(in, buf)
cudaMemcpyToSymbol( bufo, buf, 3*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
}

359
m7/m7.cu

@ -0,0 +1,359 @@ @@ -0,0 +1,359 @@
/*
* m7 algorithm
*
*/
extern "C"
{
#include "sph/sph_sha2.h"
#include "sph/sph_keccak.h"
#include "sph/sph_ripemd.h"
#include "sph/sph_haval.h"
#include "sph/sph_tiger.h"
#include "sph/sph_whirlpool.h"
#include "sph/sph_blake.h"
#include "miner.h"
}
#include "cuda_helper.h"
// configure with --with-mpir-src=...
#include "mpir.h"
// from cpu-miner.c
extern int device_map[8];
extern bool opt_benchmark;
static uint64_t *d_hash[8];
static uint64_t *FinalHash[8];
static uint64_t *KeccakH[8];
static uint64_t *WhirlpoolH[8];
static uint64_t *Sha512H[8];
static uint64_t *Sha256H[8];
static uint64_t *HavalH[8];
static uint64_t *TigerH[8];
static uint64_t *RipemdH[8];
static uint64_t *d_prod0[8];
static uint64_t *d_prod1[8];
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
#if 0
static void mpz_set_uint256(mpz_t r, uint8_t *u)
{
mpz_import(r, 32 / sizeof(unsigned long), -1, sizeof(unsigned long), -1, 0, u);
}
static void mpz_get_uint256(mpz_t r, uint8_t *u)
{
u=0;
mpz_export(u, 0, -1, sizeof(unsigned long), -1, 0, r);
}
#endif
static void mpz_set_uint512(mpz_t r, uint8_t *u)
{
mpz_import(r, 64 / sizeof(unsigned long), -1, sizeof(unsigned long), -1, 0, u);
}
static void set_one_if_zero(uint8_t *hash512) {
for (int i = 0; i < 32; i++) {
if (hash512[i] != 0) {
return;
}
}
hash512[0] = 1;
}
extern uint32_t m7_sha256_cpu_hash_300(int thr_id, int threads, uint32_t startNounce, uint64_t *d_nonceVector, uint64_t *d_hash, int order);
extern void m7_sha256_setBlock_120(void *data,const void *ptarget);
extern void m7_sha256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order);
extern void m7_sha256_cpu_init(int thr_id, int threads);
extern void m7_sha512_cpu_init(int thr_id, int threads);
extern void m7_sha512_setBlock_120(void *pdata);
extern void m7_sha512_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order);
extern void ripemd160_cpu_init(int thr_id, int threads);
extern void ripemd160_setBlock_120(void *pdata);
extern void m7_ripemd160_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order);
extern void tiger192_cpu_init(int thr_id, int threads);
extern void tiger192_setBlock_120(void *pdata);
extern void m7_tiger192_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order);
extern void m7_bigmul_cpu(int thr_id, int threads, uint64_t* Hash1, uint64_t* Hash2, uint64_t* Hash3, uint64_t* Hash4,
uint64_t *Hash5, uint64_t* Hash6, uint64_t *Hash7, uint32_t foundNonce, uint32_t StartNonce,int order);
extern void m7_bigmul1_cpu(int thr_id, int threads, int len1, int len2, uint64_t* Hash1, uint64_t* Hash2, uint64_t *finalHash, int order);
extern void m7_bigmul_init(int thr_id, int threads);
extern void m7_bigmul_unroll1_cpu(int thr_id, int threads,uint64_t* Hash1, uint64_t* Hash2,uint64_t *finalHash,int order);
extern void m7_bigmul_unroll2_cpu(int thr_id, int threads,uint64_t* Hash1, uint64_t* Hash2,uint64_t *finalHash,int order);
extern void cpu_mul(int thr_id, int threads, uint32_t alegs, uint32_t blegs, uint64_t *g_a, uint64_t *g_b, uint64_t *g_p, int order);
extern void cpu_mulT4(int thr_id, int threads, uint32_t alegs, uint32_t blegs, uint64_t *g_a, uint64_t *g_b, uint64_t *g_p, int order);
extern void mul_init();
extern void m7_keccak512_setBlock_120(void *pdata);
extern void m7_keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order);
extern void m7_keccak512_cpu_init(int thr_id, int threads);
extern void m7_whirlpool512_cpu_init(int thr_id, int threads, int flag);
extern void m7_whirlpool512_setBlock_120(void *pdata);
extern void m7_whirlpool512_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order);
extern void m7_haval256_cpu_init(int thr_id, int threads);
extern void m7_haval256_setBlock_120(void *data);
extern void m7_haval256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order);
extern void cuda_check_cpu_init(int thr_id, int threads);
extern void cuda_check_cpu_setTarget(const void *ptarget);
// m7 Hashfunktion
extern "C" void m7_hash(void *state, const void *input,uint32_t TheNonce, int debug)
{
// sha256(sha256*sha512*keccak512*ripemd160*haval*tiger1*whirlpool)
char data_str[245], hash_str[65], target_str[65];
uint8_t *bdata = 0;
mpz_t bns[7];
mpz_t product;
int rc = 0;
for(int i=0; i < 7; i++) {
mpz_init(bns[i]);
}
mpz_init(product);
uint32_t data[32] ;
uint32_t *data_p64 = data + (116 / sizeof(data[0]));
uint8_t bhash[7][64];
uint32_t hash[8];
memcpy(data,input,122);
int M7_MIDSTATE_LEN = 116;
for(int i=0; i < 7; i++) {
mpz_init(bns[i]);
}
sph_sha256_context ctx_final_sha256;
sph_sha256_context ctx_sha256;
sph_sha512_context ctx_sha512;
sph_keccak512_context ctx_keccak;
sph_whirlpool_context ctx_whirlpool;
sph_haval256_5_context ctx_haval;
sph_tiger_context ctx_tiger;
sph_ripemd160_context ctx_ripemd;
sph_sha256_init(&ctx_sha256);
sph_sha256(&ctx_sha256, data, M7_MIDSTATE_LEN);
sph_sha512_init(&ctx_sha512);
sph_sha512(&ctx_sha512, data, M7_MIDSTATE_LEN);
sph_keccak512_init(&ctx_keccak);
sph_keccak512(&ctx_keccak, data, M7_MIDSTATE_LEN);
sph_whirlpool_init(&ctx_whirlpool);
sph_whirlpool(&ctx_whirlpool, data, M7_MIDSTATE_LEN);
sph_haval256_5_init(&ctx_haval);
sph_haval256_5(&ctx_haval, data, M7_MIDSTATE_LEN);
sph_tiger_init(&ctx_tiger);
sph_tiger(&ctx_tiger, data, M7_MIDSTATE_LEN);
sph_ripemd160_init(&ctx_ripemd);
sph_ripemd160(&ctx_ripemd, data, M7_MIDSTATE_LEN);
sph_sha256_context ctx2_sha256;
sph_sha512_context ctx2_sha512;
sph_keccak512_context ctx2_keccak;
sph_whirlpool_context ctx2_whirlpool;
sph_haval256_5_context ctx2_haval;
sph_tiger_context ctx2_tiger;
sph_ripemd160_context ctx2_ripemd;
data[29] = TheNonce;
memset(bhash, 0, 7 * 64);
ctx2_sha256 = ctx_sha256;
sph_sha256(&ctx2_sha256, data_p64, 122 - M7_MIDSTATE_LEN);
sph_sha256_close(&ctx2_sha256, (void*)(bhash[0]));
ctx2_sha512 = ctx_sha512;
sph_sha512(&ctx2_sha512, data_p64, 122 - M7_MIDSTATE_LEN);
sph_sha512_close(&ctx2_sha512, (void*)(bhash[1]));
ctx2_keccak = ctx_keccak;
sph_keccak512(&ctx2_keccak, data_p64, 122 - M7_MIDSTATE_LEN);
sph_keccak512_close(&ctx2_keccak, (void*)(bhash[2]));
ctx2_whirlpool = ctx_whirlpool;
sph_whirlpool(&ctx2_whirlpool, data_p64, 122 - M7_MIDSTATE_LEN);
sph_whirlpool_close(&ctx2_whirlpool, (void*)(bhash[3]));
ctx2_haval = ctx_haval;
sph_haval256_5(&ctx2_haval, data_p64, 122 - M7_MIDSTATE_LEN);
sph_haval256_5_close(&ctx2_haval, (void*)(bhash[4]));
ctx2_tiger = ctx_tiger;
sph_tiger(&ctx2_tiger, data_p64, 122 - M7_MIDSTATE_LEN);
sph_tiger_close(&ctx2_tiger, (void*)(bhash[5]));
ctx2_ripemd = ctx_ripemd;
sph_ripemd160(&ctx2_ripemd, data_p64, 122 - M7_MIDSTATE_LEN);
sph_ripemd160_close(&ctx2_ripemd, (void*)(bhash[6]));
if (debug == 1) {
for (int i=0; i<16; i++) {
applog(LOG_INFO,"sha256[%d]=%02x %02x %02x %02x sha512[%d]=%02x %02x %02x %02x keccak[%d]=%02x %02x %02x %02x whirlpool[2][%d]=%02x %02x %02x %02x "
"haval[%d]=%02x %02x %02x %02x tiger[%d]=%02x %02x %02x %02x ripemd[%d]=%02x %02x %02x %02x\n",
i,bhash[0][4*i+3],bhash[0][4*i+2],bhash[0][4*i+1],bhash[0][4*i+0],
i,bhash[1][4*i+3],bhash[1][4*i+2],bhash[1][4*i+1],bhash[1][4*i+0],
i,bhash[2][4*i+3],bhash[2][4*i+2],bhash[2][4*i+1],bhash[2][4*i+0],
i,bhash[3][4*i+3],bhash[3][4*i+2],bhash[3][4*i+1],bhash[3][4*i+0],
i,bhash[4][4*i+3],bhash[4][4*i+2],bhash[4][4*i+1],bhash[4][4*i+0],
i,bhash[5][4*i+3],bhash[5][4*i+2],bhash[5][4*i+1],bhash[5][4*i+0],
i,bhash[6][4*i+3],bhash[6][4*i+2],bhash[6][4*i+1],bhash[6][4*i+0]
);
}
}
for(int i=0; i < 7; i++){
set_one_if_zero(bhash[i]);
mpz_set_uint512(bns[i],bhash[i]);
}
for(int i=6; i > 0; i--){
mpz_mul(bns[i-1], bns[i-1], bns[i]);
}
int bytes = mpz_sizeinbase(bns[0], 256);
bdata = (uint8_t *)realloc(bdata, bytes);
mpz_export((void *)bdata, NULL, -1, 1, 0, 0, bns[0]);
sph_sha256_init(&ctx_final_sha256);
sph_sha256(&ctx_final_sha256, bdata, bytes);
sph_sha256_close(&ctx_final_sha256, (void*)(hash));
memcpy(state, hash, 32);
}
extern "C" int scanhash_m7(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce,
unsigned long *hashes_done)
{
const int throughput = 256*256*8*2;
const uint32_t FirstNonce = pdata[29];
static bool init[8] = {0,0,0,0,0,0,0,0};
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000ff;
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
cudaMalloc(&d_prod0[thr_id], 38 *sizeof(uint64_t) * throughput);
cudaMalloc(&d_prod1[thr_id], 38 *sizeof(uint64_t) * throughput);
cudaMalloc(&FinalHash[thr_id], 8 *sizeof(uint64_t) * throughput);
cudaMalloc(&KeccakH[thr_id], 38 *sizeof(uint64_t) * throughput);
cudaMalloc(&WhirlpoolH[thr_id], 8 *sizeof(uint64_t) * throughput);
cudaMalloc(&Sha256H[thr_id], 8 *sizeof(uint64_t) * throughput);
cudaMalloc(&Sha512H[thr_id], 8 *sizeof(uint64_t) * throughput);
cudaMalloc(&HavalH[thr_id], 8 *sizeof(uint64_t) * throughput);
cudaMalloc(&RipemdH[thr_id], 8 *sizeof(uint64_t) * throughput);
cudaMalloc(&TigerH[thr_id], 8 *sizeof(uint64_t) * throughput);
m7_sha256_cpu_init(thr_id, throughput);
m7_sha512_cpu_init(thr_id, throughput);
m7_keccak512_cpu_init(thr_id, throughput);
m7_haval256_cpu_init(thr_id, throughput);
tiger192_cpu_init(thr_id, throughput);
m7_whirlpool512_cpu_init(thr_id, throughput,0);
ripemd160_cpu_init(thr_id, throughput);
cuda_check_cpu_init(thr_id, throughput);
m7_bigmul_init(thr_id, throughput);
//mul_init();
init[thr_id] = true;
}
uint32_t Htarg = ptarget[7];
m7_whirlpool512_setBlock_120((void*)pdata);
m7_sha256_setBlock_120((void*)pdata, ptarget);
m7_sha512_setBlock_120((void*)pdata);
m7_haval256_setBlock_120((void*)pdata);
m7_keccak512_setBlock_120((void*)pdata);
ripemd160_setBlock_120((void*)pdata);
tiger192_setBlock_120((void*)pdata);
cuda_check_cpu_setTarget(ptarget);
uint32_t TheNonce = pdata[29];
do {
int order = 0;
uint32_t foundNonce;
m7_sha256_cpu_hash_120(thr_id, throughput, pdata[29], Sha256H[thr_id], order++);
m7_sha512_cpu_hash_120(thr_id, throughput, pdata[29], Sha512H[thr_id], order++);
m7_keccak512_cpu_hash(thr_id, throughput, pdata[29], KeccakH[thr_id], order++);
m7_haval256_cpu_hash_120(thr_id, throughput, pdata[29], HavalH[thr_id], order++);
m7_tiger192_cpu_hash_120(thr_id, throughput, pdata[29], TigerH[thr_id], order++);
m7_ripemd160_cpu_hash_120(thr_id, throughput, pdata[29], RipemdH[thr_id], order++);
m7_whirlpool512_cpu_hash_120(thr_id, throughput, pdata[29], WhirlpoolH[thr_id], order++);
cpu_mulT4(0, throughput, 8, 8, Sha512H[thr_id], KeccakH[thr_id], d_prod0[thr_id],order); //64
MyStreamSynchronize(0,order++,thr_id);
cpu_mulT4(0, throughput,8, 16, WhirlpoolH[thr_id], d_prod0[thr_id], d_prod1[thr_id],order); //128
MyStreamSynchronize(0,order++,thr_id);
cpu_mulT4(0, throughput, 4, 24, Sha256H[thr_id], d_prod1[thr_id], d_prod0[thr_id],order); //96
MyStreamSynchronize(0,order++,thr_id);
cpu_mulT4(0, throughput, 4, 28, HavalH[thr_id], d_prod0[thr_id], d_prod1[thr_id],order); //112
MyStreamSynchronize(0,order++,thr_id);
m7_bigmul_unroll1_cpu(0, throughput, TigerH[thr_id], d_prod1[thr_id], d_prod0[thr_id],order);
MyStreamSynchronize(0,order++,thr_id);
m7_bigmul_unroll2_cpu(0, throughput, RipemdH[thr_id], d_prod0[thr_id], d_prod1[thr_id],order);
MyStreamSynchronize(0,order++,thr_id);
foundNonce = m7_sha256_cpu_hash_300(thr_id, throughput, pdata[29], NULL, d_prod1[thr_id], order);
if (foundNonce != 0xffffffff)
{
uint32_t vhash64[8];
m7_hash(vhash64, pdata, foundNonce, 0);
if (vhash64[7] <= Htarg)
{
pdata[29] = foundNonce;
*hashes_done = foundNonce - FirstNonce + 1;
return 1;
} else {
applog(LOG_INFO, "GPU #%d: result for nonce %08x does not validate on CPU! vhash64 %08x and htarg %08x", thr_id, foundNonce, vhash64[7], Htarg);
////////////////////////////////////////////
// m7_bigmul_cpu(thr_id,throughput,Sha256H[thr_id],Sha512H[thr_id],KeccakH[thr_id],WhirlpoolH[thr_id],HavalH[thr_id],TigerH[thr_id],RipemdH[thr_id],foundNonce,FirstNonce,order++);
// m7_hash(vhash64, pdata, foundNonce, 1);
////////////////////////////////////////////
}
}
pdata[29] += throughput;
} while (pdata[29] < max_nonce && !work_restart[thr_id].restart);
*hashes_done = pdata[29] - FirstNonce + 1;
return 0;
}

287
m7/m7_keccak512.cu

@ -0,0 +1,287 @@ @@ -0,0 +1,287 @@
#include <stdio.h>
#include <memory.h>
#include "cuda_helper.h"
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
static __constant__ uint64_t stateo[25];
static __constant__ uint64_t RC[24];
static const uint64_t cpu_RC[24] = {
0x0000000000000001ull, 0x0000000000008082ull,
0x800000000000808aull, 0x8000000080008000ull,
0x000000000000808bull, 0x0000000080000001ull,
0x8000000080008081ull, 0x8000000000008009ull,
0x000000000000008aull, 0x0000000000000088ull,
0x0000000080008009ull, 0x000000008000000aull,
0x000000008000808bull, 0x800000000000008bull,
0x8000000000008089ull, 0x8000000000008003ull,
0x8000000000008002ull, 0x8000000000000080ull,
0x000000000000800aull, 0x800000008000000aull,
0x8000000080008081ull, 0x8000000000008080ull,
0x0000000080000001ull, 0x8000000080008008ull
};
static __device__ __forceinline__ void keccak_block(uint64_t *s, const uint64_t *keccak_round_constants) {
size_t i;
uint64_t t[5], u[5], v, w;
/* absorb input */
//#pragma unroll
for (i = 0; i < 24; i++) {
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
uint64_t temp0,temp1,temp2,temp3,temp4;
temp0 = ROTL64(t[0], 1);
temp1 = ROTL64(t[1], 1);
temp2 = ROTL64(t[2], 1);
temp3 = ROTL64(t[3], 1);
temp4 = ROTL64(t[4], 1);
u[0] = xor1(t[4],temp1);
u[1] = xor1(t[0],temp2);
u[2] = xor1(t[1],temp3);
u[3] = xor1(t[2],temp4);
u[4] = xor1(t[3],temp0);
/*
u[0] = t[4] ^ ROTL64(t[1], 1);
u[1] = t[0] ^ ROTL64(t[2], 1);
u[2] = t[1] ^ ROTL64(t[3], 1);
u[3] = t[2] ^ ROTL64(t[4], 1);
u[4] = t[3] ^ ROTL64(t[0], 1);
*/
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0];
s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1];
s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2];
s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3];
s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4];
/* rho pi: b[..] = rotl(a[..], ..) */
v = s[ 1];
s[ 1] = ROTL64(s[ 6], 44);
s[ 6] = ROTL64(s[ 9], 20);
s[ 9] = ROTL64(s[22], 61);
s[22] = ROTL64(s[14], 39);
s[14] = ROTL64(s[20], 18);
s[20] = ROTL64(s[ 2], 62);
s[ 2] = ROTL64(s[12], 43);
s[12] = ROTL64(s[13], 25);
s[13] = ROTL64(s[19], 8);
s[19] = ROTL64(s[23], 56);
s[23] = ROTL64(s[15], 41);
s[15] = ROTL64(s[ 4], 27);
s[ 4] = ROTL64(s[24], 14);
s[24] = ROTL64(s[21], 2);
s[21] = ROTL64(s[ 8], 55);
s[ 8] = ROTL64(s[16], 45);
s[16] = ROTL64(s[ 5], 36);
s[ 5] = ROTL64(s[ 3], 28);
s[ 3] = ROTL64(s[18], 21);
s[18] = ROTL64(s[17], 15);
s[17] = ROTL64(s[11], 10);
s[11] = ROTL64(s[ 7], 6);
s[ 7] = ROTL64(s[10], 3);
s[10] = ROTL64( v, 1);
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
// chi(s[0],s[1],s[2],s[3],s[4]);
// chi(s[5],s[6],s[7],s[8],s[9]);
// chi(s[10],s[11],s[12],s[13],s[14]);
// chi(s[15],s[16],s[17],s[18],s[19]);
// chi(s[20],s[21],s[22],s[23],s[24]);
v = s[ 0]; w = s[ 1];
s[ 0] ^= (~w) & s[ 2];
s[ 1] ^= (~s[ 2]) & s[ 3];
s[ 2] ^= (~s[ 3]) & s[ 4];
s[ 3] ^= (~s[ 4]) & v;
s[ 4] ^= (~v) & w;
v = s[ 5]; w = s[ 6];
s[ 5] ^= (~w) & s[ 7];
s[ 6] ^= (~s[ 7]) & s[ 8];
s[ 7] ^= (~s[ 8]) & s[ 9];
s[ 8] ^= (~s[ 9]) & v;
s[ 9] ^= (~v) & w;
v = s[10]; w = s[11];
s[10] ^= (~w) & s[12];
s[11] ^= (~s[12]) & s[13];
s[12] ^= (~s[13]) & s[14];
s[13] ^= (~s[14]) & v;
s[14] ^= (~v) & w;
v = s[15]; w = s[16];
s[15] ^= (~w) & s[17];
s[16] ^= (~s[17]) & s[18];
s[17] ^= (~s[18]) & s[19];
s[18] ^= (~s[19]) & v;
s[19] ^= (~v) & w;
v = s[20]; w = s[21];
s[20] ^= (~w) & s[22];
s[21] ^= (~s[22]) & s[23];
s[22] ^= (~s[23]) & s[24];
s[23] ^= (~s[24]) & v;
s[24] ^= (~v) & w;
/* iota: a[0,0] ^= round constant */
s[0] ^= keccak_round_constants[i];
}
}
static __forceinline__ void keccak_block_host(uint64_t *s, const uint64_t *keccak_round_constants) {
size_t i;
uint64_t t[5], u[5], v, w;
/* absorb input */
for (i = 0; i < 24; i++) {
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
u[0] = t[4] ^ ROTL64(t[1], 1);
u[1] = t[0] ^ ROTL64(t[2], 1);
u[2] = t[1] ^ ROTL64(t[3], 1);
u[3] = t[2] ^ ROTL64(t[4], 1);
u[4] = t[3] ^ ROTL64(t[0], 1);
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0];
s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1];
s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2];
s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3];
s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4];
/* rho pi: b[..] = rotl(a[..], ..) */
v = s[ 1];
s[ 1] = ROTL64(s[ 6], 44);
s[ 6] = ROTL64(s[ 9], 20);
s[ 9] = ROTL64(s[22], 61);
s[22] = ROTL64(s[14], 39);
s[14] = ROTL64(s[20], 18);
s[20] = ROTL64(s[ 2], 62);
s[ 2] = ROTL64(s[12], 43);
s[12] = ROTL64(s[13], 25);
s[13] = ROTL64(s[19], 8);
s[19] = ROTL64(s[23], 56);
s[23] = ROTL64(s[15], 41);
s[15] = ROTL64(s[ 4], 27);
s[ 4] = ROTL64(s[24], 14);
s[24] = ROTL64(s[21], 2);
s[21] = ROTL64(s[ 8], 55);
s[ 8] = ROTL64(s[16], 45);
s[16] = ROTL64(s[ 5], 36);
s[ 5] = ROTL64(s[ 3], 28);
s[ 3] = ROTL64(s[18], 21);
s[18] = ROTL64(s[17], 15);
s[17] = ROTL64(s[11], 10);
s[11] = ROTL64(s[ 7], 6);
s[ 7] = ROTL64(s[10], 3);
s[10] = ROTL64( v, 1);
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
v = s[ 0]; w = s[ 1]; s[ 0] ^= (~w) & s[ 2]; s[ 1] ^= (~s[ 2]) & s[ 3]; s[ 2] ^= (~s[ 3]) & s[ 4]; s[ 3] ^= (~s[ 4]) & v; s[ 4] ^= (~v) & w;
v = s[ 5]; w = s[ 6]; s[ 5] ^= (~w) & s[ 7]; s[ 6] ^= (~s[ 7]) & s[ 8]; s[ 7] ^= (~s[ 8]) & s[ 9]; s[ 8] ^= (~s[ 9]) & v; s[ 9] ^= (~v) & w;
v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w;
v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w;
v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w;
/* iota: a[0,0] ^= round constant */
s[0] ^= keccak_round_constants[i];
}
}
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
__global__ void m7_keccak512_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
{
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t nounce = startNounce + thread;
uint64_t state[25];
#pragma unroll 16
for (int i=9;i<25;i++) {state[i]=stateo[i];}
state[0] = xor1(stateo[0],c_PaddedMessage80[9]);
state[1] = xor1(stateo[1],c_PaddedMessage80[10]);
state[2] = xor1(stateo[2],c_PaddedMessage80[11]);
state[3] = xor1(stateo[3],c_PaddedMessage80[12]);
state[4] = xor1(stateo[4],c_PaddedMessage80[13]);
state[5] = xor1(stateo[5],REPLACE_HIWORD(c_PaddedMessage80[14],nounce));
state[6] = xor1(stateo[6],c_PaddedMessage80[15]);
state[7] = stateo[7];
state[8] = xor1(stateo[8],0x8000000000000000);
keccak_block(state,RC);
#pragma unroll 8
for (int i=0;i<8;i++) {outputHash[i*threads+thread]=state[i];}
} //thread
}
void m7_keccak512_cpu_init(int thr_id, int threads)
{
cudaMemcpyToSymbol( RC,cpu_RC,sizeof(cpu_RC),0,cudaMemcpyHostToDevice);
}
__host__ void m7_keccak512_setBlock_120(void *pdata)
{
unsigned char PaddedMessage[128];
uint8_t ending =0x01;
memcpy(PaddedMessage, pdata, 122);
memset(PaddedMessage+122,ending,1);
memset(PaddedMessage+123, 0, 5);
cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
uint64_t* alt_data = (uint64_t*) pdata;
uint64_t state[25];
for(int i=0;i<25;i++) {state[i]=0;}
for (int i=0;i<9;i++) {state[i] ^= alt_data[i];}
keccak_block_host(state,cpu_RC);
cudaMemcpyToSymbol(stateo, state, 25*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
}
__host__ void m7_keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order)
{
const int threadsperblock = 256;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);
size_t shared_size = 0;
m7_keccak512_gpu_hash_120<<<grid, block, shared_size>>>(threads, startNounce, d_hash);
MyStreamSynchronize(NULL, order, thr_id);
}

833
sph/ripemd.c

@ -0,0 +1,833 @@ @@ -0,0 +1,833 @@
/* $Id: ripemd.c 216 2010-06-08 09:46:57Z tp $ */
/*
* RIPEMD-160 implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include <stddef.h>
#include <string.h>
#include "sph_ripemd.h"
/*
* Round functions for RIPEMD (original).
*/
#define F(x, y, z) ((((y) ^ (z)) & (x)) ^ (z))
#define G(x, y, z) (((x) & (y)) | (((x) | (y)) & (z)))
#define H(x, y, z) ((x) ^ (y) ^ (z))
static const sph_u32 oIV[5] = {
SPH_C32(0x67452301), SPH_C32(0xEFCDAB89),
SPH_C32(0x98BADCFE), SPH_C32(0x10325476)
};
/*
* Round functions for RIPEMD-128 and RIPEMD-160.
*/
#define F1(x, y, z) ((x) ^ (y) ^ (z))
#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z))
#define F3(x, y, z) (((x) | ~(y)) ^ (z))
#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y))
#define F5(x, y, z) ((x) ^ ((y) | ~(z)))
static const sph_u32 IV[5] = {
SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), SPH_C32(0x98BADCFE),
SPH_C32(0x10325476), SPH_C32(0xC3D2E1F0)
};
#define ROTL SPH_ROTL32
/* ===================================================================== */
/*
* RIPEMD (original hash, deprecated).
*/
#define FF1(A, B, C, D, X, s) do { \
sph_u32 tmp = SPH_T32((A) + F(B, C, D) + (X)); \
(A) = ROTL(tmp, (s)); \
} while (0)
#define GG1(A, B, C, D, X, s) do { \
sph_u32 tmp = SPH_T32((A) + G(B, C, D) \
+ (X) + SPH_C32(0x5A827999)); \
(A) = ROTL(tmp, (s)); \
} while (0)
#define HH1(A, B, C, D, X, s) do { \
sph_u32 tmp = SPH_T32((A) + H(B, C, D) \
+ (X) + SPH_C32(0x6ED9EBA1)); \
(A) = ROTL(tmp, (s)); \
} while (0)
#define FF2(A, B, C, D, X, s) do { \
sph_u32 tmp = SPH_T32((A) + F(B, C, D) \
+ (X) + SPH_C32(0x50A28BE6)); \
(A) = ROTL(tmp, (s)); \
} while (0)
#define GG2(A, B, C, D, X, s) do { \
sph_u32 tmp = SPH_T32((A) + G(B, C, D) + (X)); \
(A) = ROTL(tmp, (s)); \
} while (0)
#define HH2(A, B, C, D, X, s) do { \
sph_u32 tmp = SPH_T32((A) + H(B, C, D) \
+ (X) + SPH_C32(0x5C4DD124)); \
(A) = ROTL(tmp, (s)); \
} while (0)
#define RIPEMD_ROUND_BODY(in, h) do { \
sph_u32 A1, B1, C1, D1; \
sph_u32 A2, B2, C2, D2; \
sph_u32 tmp; \
\
A1 = A2 = (h)[0]; \
B1 = B2 = (h)[1]; \
C1 = C2 = (h)[2]; \
D1 = D2 = (h)[3]; \
\
FF1(A1, B1, C1, D1, in( 0), 11); \
FF1(D1, A1, B1, C1, in( 1), 14); \
FF1(C1, D1, A1, B1, in( 2), 15); \
FF1(B1, C1, D1, A1, in( 3), 12); \
FF1(A1, B1, C1, D1, in( 4), 5); \
FF1(D1, A1, B1, C1, in( 5), 8); \
FF1(C1, D1, A1, B1, in( 6), 7); \
FF1(B1, C1, D1, A1, in( 7), 9); \
FF1(A1, B1, C1, D1, in( 8), 11); \
FF1(D1, A1, B1, C1, in( 9), 13); \
FF1(C1, D1, A1, B1, in(10), 14); \
FF1(B1, C1, D1, A1, in(11), 15); \
FF1(A1, B1, C1, D1, in(12), 6); \
FF1(D1, A1, B1, C1, in(13), 7); \
FF1(C1, D1, A1, B1, in(14), 9); \
FF1(B1, C1, D1, A1, in(15), 8); \
\
GG1(A1, B1, C1, D1, in( 7), 7); \
GG1(D1, A1, B1, C1, in( 4), 6); \
GG1(C1, D1, A1, B1, in(13), 8); \
GG1(B1, C1, D1, A1, in( 1), 13); \
GG1(A1, B1, C1, D1, in(10), 11); \
GG1(D1, A1, B1, C1, in( 6), 9); \
GG1(C1, D1, A1, B1, in(15), 7); \
GG1(B1, C1, D1, A1, in( 3), 15); \
GG1(A1, B1, C1, D1, in(12), 7); \
GG1(D1, A1, B1, C1, in( 0), 12); \
GG1(C1, D1, A1, B1, in( 9), 15); \
GG1(B1, C1, D1, A1, in( 5), 9); \
GG1(A1, B1, C1, D1, in(14), 7); \
GG1(D1, A1, B1, C1, in( 2), 11); \
GG1(C1, D1, A1, B1, in(11), 13); \
GG1(B1, C1, D1, A1, in( 8), 12); \
\
HH1(A1, B1, C1, D1, in( 3), 11); \
HH1(D1, A1, B1, C1, in(10), 13); \
HH1(C1, D1, A1, B1, in( 2), 14); \
HH1(B1, C1, D1, A1, in( 4), 7); \
HH1(A1, B1, C1, D1, in( 9), 14); \
HH1(D1, A1, B1, C1, in(15), 9); \
HH1(C1, D1, A1, B1, in( 8), 13); \
HH1(B1, C1, D1, A1, in( 1), 15); \
HH1(A1, B1, C1, D1, in(14), 6); \
HH1(D1, A1, B1, C1, in( 7), 8); \
HH1(C1, D1, A1, B1, in( 0), 13); \
HH1(B1, C1, D1, A1, in( 6), 6); \
HH1(A1, B1, C1, D1, in(11), 12); \
HH1(D1, A1, B1, C1, in(13), 5); \
HH1(C1, D1, A1, B1, in( 5), 7); \
HH1(B1, C1, D1, A1, in(12), 5); \
\
FF2(A2, B2, C2, D2, in( 0), 11); \
FF2(D2, A2, B2, C2, in( 1), 14); \
FF2(C2, D2, A2, B2, in( 2), 15); \
FF2(B2, C2, D2, A2, in( 3), 12); \
FF2(A2, B2, C2, D2, in( 4), 5); \
FF2(D2, A2, B2, C2, in( 5), 8); \
FF2(C2, D2, A2, B2, in( 6), 7); \
FF2(B2, C2, D2, A2, in( 7), 9); \
FF2(A2, B2, C2, D2, in( 8), 11); \
FF2(D2, A2, B2, C2, in( 9), 13); \
FF2(C2, D2, A2, B2, in(10), 14); \
FF2(B2, C2, D2, A2, in(11), 15); \
FF2(A2, B2, C2, D2, in(12), 6); \
FF2(D2, A2, B2, C2, in(13), 7); \
FF2(C2, D2, A2, B2, in(14), 9); \
FF2(B2, C2, D2, A2, in(15), 8); \
\
GG2(A2, B2, C2, D2, in( 7), 7); \
GG2(D2, A2, B2, C2, in( 4), 6); \
GG2(C2, D2, A2, B2, in(13), 8); \
GG2(B2, C2, D2, A2, in( 1), 13); \
GG2(A2, B2, C2, D2, in(10), 11); \
GG2(D2, A2, B2, C2, in( 6), 9); \
GG2(C2, D2, A2, B2, in(15), 7); \
GG2(B2, C2, D2, A2, in( 3), 15); \
GG2(A2, B2, C2, D2, in(12), 7); \
GG2(D2, A2, B2, C2, in( 0), 12); \
GG2(C2, D2, A2, B2, in( 9), 15); \
GG2(B2, C2, D2, A2, in( 5), 9); \
GG2(A2, B2, C2, D2, in(14), 7); \
GG2(D2, A2, B2, C2, in( 2), 11); \
GG2(C2, D2, A2, B2, in(11), 13); \
GG2(B2, C2, D2, A2, in( 8), 12); \
\
HH2(A2, B2, C2, D2, in( 3), 11); \
HH2(D2, A2, B2, C2, in(10), 13); \
HH2(C2, D2, A2, B2, in( 2), 14); \
HH2(B2, C2, D2, A2, in( 4), 7); \
HH2(A2, B2, C2, D2, in( 9), 14); \
HH2(D2, A2, B2, C2, in(15), 9); \
HH2(C2, D2, A2, B2, in( 8), 13); \
HH2(B2, C2, D2, A2, in( 1), 15); \
HH2(A2, B2, C2, D2, in(14), 6); \
HH2(D2, A2, B2, C2, in( 7), 8); \
HH2(C2, D2, A2, B2, in( 0), 13); \
HH2(B2, C2, D2, A2, in( 6), 6); \
HH2(A2, B2, C2, D2, in(11), 12); \
HH2(D2, A2, B2, C2, in(13), 5); \
HH2(C2, D2, A2, B2, in( 5), 7); \
HH2(B2, C2, D2, A2, in(12), 5); \
\
tmp = SPH_T32((h)[1] + C1 + D2); \
(h)[1] = SPH_T32((h)[2] + D1 + A2); \
(h)[2] = SPH_T32((h)[3] + A1 + B2); \
(h)[3] = SPH_T32((h)[0] + B1 + C2); \
(h)[0] = tmp; \
} while (0)
/*
* One round of RIPEMD. The data must be aligned for 32-bit access.
*/
static void
ripemd_round(const unsigned char *data, sph_u32 r[5])
{
#if SPH_LITTLE_FAST
#define RIPEMD_IN(x) sph_dec32le_aligned(data + (4 * (x)))
#else
sph_u32 X_var[16];
int i;
for (i = 0; i < 16; i ++)
X_var[i] = sph_dec32le_aligned(data + 4 * i);
#define RIPEMD_IN(x) X_var[x]
#endif
RIPEMD_ROUND_BODY(RIPEMD_IN, r);
#undef RIPEMD_IN
}
/* see sph_ripemd.h */
void
sph_ripemd_init(void *cc)
{
sph_ripemd_context *sc;
sc = cc;
memcpy(sc->val, oIV, sizeof sc->val);
#if SPH_64
sc->count = 0;
#else
sc->count_high = sc->count_low = 0;
#endif
}
#define RFUN ripemd_round
#define HASH ripemd
#define LE32 1
#include "md_helper.c"
#undef RFUN
#undef HASH
#undef LE32
/* see sph_ripemd.h */
void
sph_ripemd_close(void *cc, void *dst)
{
ripemd_close(cc, dst, 4);
sph_ripemd_init(cc);
}
/* see sph_ripemd.h */
void
sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4])
{
#define RIPEMD_IN(x) msg[x]
RIPEMD_ROUND_BODY(RIPEMD_IN, val);
#undef RIPEMD_IN
}
/* ===================================================================== */
/*
* RIPEMD-128.
*/
/*
* Round constants for RIPEMD-128.
*/
#define sK11 SPH_C32(0x00000000)
#define sK12 SPH_C32(0x5A827999)
#define sK13 SPH_C32(0x6ED9EBA1)
#define sK14 SPH_C32(0x8F1BBCDC)
#define sK21 SPH_C32(0x50A28BE6)
#define sK22 SPH_C32(0x5C4DD124)
#define sK23 SPH_C32(0x6D703EF3)
#define sK24 SPH_C32(0x00000000)
#define sRR(a, b, c, d, f, s, r, k) do { \
a = ROTL(SPH_T32(a + f(b, c, d) + r + k), s); \
} while (0)
#define sROUND1(a, b, c, d, f, s, r, k) \
sRR(a ## 1, b ## 1, c ## 1, d ## 1, f, s, r, sK1 ## k)
#define sROUND2(a, b, c, d, f, s, r, k) \
sRR(a ## 2, b ## 2, c ## 2, d ## 2, f, s, r, sK2 ## k)
/*
* This macro defines the body for a RIPEMD-128 compression function
* implementation. The "in" parameter should evaluate, when applied to a
* numerical input parameter from 0 to 15, to an expression which yields
* the corresponding input block. The "h" parameter should evaluate to
* an array or pointer expression designating the array of 4 words which
* contains the input and output of the compression function.
*/
#define RIPEMD128_ROUND_BODY(in, h) do { \
sph_u32 A1, B1, C1, D1; \
sph_u32 A2, B2, C2, D2; \
sph_u32 tmp; \
\
A1 = A2 = (h)[0]; \
B1 = B2 = (h)[1]; \
C1 = C2 = (h)[2]; \
D1 = D2 = (h)[3]; \
\
sROUND1(A, B, C, D, F1, 11, in( 0), 1); \
sROUND1(D, A, B, C, F1, 14, in( 1), 1); \
sROUND1(C, D, A, B, F1, 15, in( 2), 1); \
sROUND1(B, C, D, A, F1, 12, in( 3), 1); \
sROUND1(A, B, C, D, F1, 5, in( 4), 1); \
sROUND1(D, A, B, C, F1, 8, in( 5), 1); \
sROUND1(C, D, A, B, F1, 7, in( 6), 1); \
sROUND1(B, C, D, A, F1, 9, in( 7), 1); \
sROUND1(A, B, C, D, F1, 11, in( 8), 1); \
sROUND1(D, A, B, C, F1, 13, in( 9), 1); \
sROUND1(C, D, A, B, F1, 14, in(10), 1); \
sROUND1(B, C, D, A, F1, 15, in(11), 1); \
sROUND1(A, B, C, D, F1, 6, in(12), 1); \
sROUND1(D, A, B, C, F1, 7, in(13), 1); \
sROUND1(C, D, A, B, F1, 9, in(14), 1); \
sROUND1(B, C, D, A, F1, 8, in(15), 1); \
\
sROUND1(A, B, C, D, F2, 7, in( 7), 2); \
sROUND1(D, A, B, C, F2, 6, in( 4), 2); \
sROUND1(C, D, A, B, F2, 8, in(13), 2); \
sROUND1(B, C, D, A, F2, 13, in( 1), 2); \
sROUND1(A, B, C, D, F2, 11, in(10), 2); \
sROUND1(D, A, B, C, F2, 9, in( 6), 2); \
sROUND1(C, D, A, B, F2, 7, in(15), 2); \
sROUND1(B, C, D, A, F2, 15, in( 3), 2); \
sROUND1(A, B, C, D, F2, 7, in(12), 2); \
sROUND1(D, A, B, C, F2, 12, in( 0), 2); \
sROUND1(C, D, A, B, F2, 15, in( 9), 2); \
sROUND1(B, C, D, A, F2, 9, in( 5), 2); \
sROUND1(A, B, C, D, F2, 11, in( 2), 2); \
sROUND1(D, A, B, C, F2, 7, in(14), 2); \
sROUND1(C, D, A, B, F2, 13, in(11), 2); \
sROUND1(B, C, D, A, F2, 12, in( 8), 2); \
\
sROUND1(A, B, C, D, F3, 11, in( 3), 3); \
sROUND1(D, A, B, C, F3, 13, in(10), 3); \
sROUND1(C, D, A, B, F3, 6, in(14), 3); \
sROUND1(B, C, D, A, F3, 7, in( 4), 3); \
sROUND1(A, B, C, D, F3, 14, in( 9), 3); \
sROUND1(D, A, B, C, F3, 9, in(15), 3); \
sROUND1(C, D, A, B, F3, 13, in( 8), 3); \
sROUND1(B, C, D, A, F3, 15, in( 1), 3); \
sROUND1(A, B, C, D, F3, 14, in( 2), 3); \
sROUND1(D, A, B, C, F3, 8, in( 7), 3); \
sROUND1(C, D, A, B, F3, 13, in( 0), 3); \
sROUND1(B, C, D, A, F3, 6, in( 6), 3); \
sROUND1(A, B, C, D, F3, 5, in(13), 3); \
sROUND1(D, A, B, C, F3, 12, in(11), 3); \
sROUND1(C, D, A, B, F3, 7, in( 5), 3); \
sROUND1(B, C, D, A, F3, 5, in(12), 3); \
\
sROUND1(A, B, C, D, F4, 11, in( 1), 4); \
sROUND1(D, A, B, C, F4, 12, in( 9), 4); \
sROUND1(C, D, A, B, F4, 14, in(11), 4); \
sROUND1(B, C, D, A, F4, 15, in(10), 4); \
sROUND1(A, B, C, D, F4, 14, in( 0), 4); \
sROUND1(D, A, B, C, F4, 15, in( 8), 4); \
sROUND1(C, D, A, B, F4, 9, in(12), 4); \
sROUND1(B, C, D, A, F4, 8, in( 4), 4); \
sROUND1(A, B, C, D, F4, 9, in(13), 4); \
sROUND1(D, A, B, C, F4, 14, in( 3), 4); \
sROUND1(C, D, A, B, F4, 5, in( 7), 4); \
sROUND1(B, C, D, A, F4, 6, in(15), 4); \
sROUND1(A, B, C, D, F4, 8, in(14), 4); \
sROUND1(D, A, B, C, F4, 6, in( 5), 4); \
sROUND1(C, D, A, B, F4, 5, in( 6), 4); \
sROUND1(B, C, D, A, F4, 12, in( 2), 4); \
\
sROUND2(A, B, C, D, F4, 8, in( 5), 1); \
sROUND2(D, A, B, C, F4, 9, in(14), 1); \
sROUND2(C, D, A, B, F4, 9, in( 7), 1); \
sROUND2(B, C, D, A, F4, 11, in( 0), 1); \
sROUND2(A, B, C, D, F4, 13, in( 9), 1); \
sROUND2(D, A, B, C, F4, 15, in( 2), 1); \
sROUND2(C, D, A, B, F4, 15, in(11), 1); \
sROUND2(B, C, D, A, F4, 5, in( 4), 1); \
sROUND2(A, B, C, D, F4, 7, in(13), 1); \
sROUND2(D, A, B, C, F4, 7, in( 6), 1); \
sROUND2(C, D, A, B, F4, 8, in(15), 1); \
sROUND2(B, C, D, A, F4, 11, in( 8), 1); \
sROUND2(A, B, C, D, F4, 14, in( 1), 1); \
sROUND2(D, A, B, C, F4, 14, in(10), 1); \
sROUND2(C, D, A, B, F4, 12, in( 3), 1); \
sROUND2(B, C, D, A, F4, 6, in(12), 1); \
\
sROUND2(A, B, C, D, F3, 9, in( 6), 2); \
sROUND2(D, A, B, C, F3, 13, in(11), 2); \
sROUND2(C, D, A, B, F3, 15, in( 3), 2); \
sROUND2(B, C, D, A, F3, 7, in( 7), 2); \
sROUND2(A, B, C, D, F3, 12, in( 0), 2); \
sROUND2(D, A, B, C, F3, 8, in(13), 2); \
sROUND2(C, D, A, B, F3, 9, in( 5), 2); \
sROUND2(B, C, D, A, F3, 11, in(10), 2); \
sROUND2(A, B, C, D, F3, 7, in(14), 2); \
sROUND2(D, A, B, C, F3, 7, in(15), 2); \
sROUND2(C, D, A, B, F3, 12, in( 8), 2); \
sROUND2(B, C, D, A, F3, 7, in(12), 2); \
sROUND2(A, B, C, D, F3, 6, in( 4), 2); \
sROUND2(D, A, B, C, F3, 15, in( 9), 2); \
sROUND2(C, D, A, B, F3, 13, in( 1), 2); \
sROUND2(B, C, D, A, F3, 11, in( 2), 2); \
\
sROUND2(A, B, C, D, F2, 9, in(15), 3); \
sROUND2(D, A, B, C, F2, 7, in( 5), 3); \
sROUND2(C, D, A, B, F2, 15, in( 1), 3); \
sROUND2(B, C, D, A, F2, 11, in( 3), 3); \
sROUND2(A, B, C, D, F2, 8, in( 7), 3); \
sROUND2(D, A, B, C, F2, 6, in(14), 3); \
sROUND2(C, D, A, B, F2, 6, in( 6), 3); \
sROUND2(B, C, D, A, F2, 14, in( 9), 3); \
sROUND2(A, B, C, D, F2, 12, in(11), 3); \
sROUND2(D, A, B, C, F2, 13, in( 8), 3); \
sROUND2(C, D, A, B, F2, 5, in(12), 3); \
sROUND2(B, C, D, A, F2, 14, in( 2), 3); \
sROUND2(A, B, C, D, F2, 13, in(10), 3); \
sROUND2(D, A, B, C, F2, 13, in( 0), 3); \
sROUND2(C, D, A, B, F2, 7, in( 4), 3); \
sROUND2(B, C, D, A, F2, 5, in(13), 3); \
\
sROUND2(A, B, C, D, F1, 15, in( 8), 4); \
sROUND2(D, A, B, C, F1, 5, in( 6), 4); \
sROUND2(C, D, A, B, F1, 8, in( 4), 4); \
sROUND2(B, C, D, A, F1, 11, in( 1), 4); \
sROUND2(A, B, C, D, F1, 14, in( 3), 4); \
sROUND2(D, A, B, C, F1, 14, in(11), 4); \
sROUND2(C, D, A, B, F1, 6, in(15), 4); \
sROUND2(B, C, D, A, F1, 14, in( 0), 4); \
sROUND2(A, B, C, D, F1, 6, in( 5), 4); \
sROUND2(D, A, B, C, F1, 9, in(12), 4); \
sROUND2(C, D, A, B, F1, 12, in( 2), 4); \
sROUND2(B, C, D, A, F1, 9, in(13), 4); \
sROUND2(A, B, C, D, F1, 12, in( 9), 4); \
sROUND2(D, A, B, C, F1, 5, in( 7), 4); \
sROUND2(C, D, A, B, F1, 15, in(10), 4); \
sROUND2(B, C, D, A, F1, 8, in(14), 4); \
\
tmp = SPH_T32((h)[1] + C1 + D2); \
(h)[1] = SPH_T32((h)[2] + D1 + A2); \
(h)[2] = SPH_T32((h)[3] + A1 + B2); \
(h)[3] = SPH_T32((h)[0] + B1 + C2); \
(h)[0] = tmp; \
} while (0)
/*
* One round of RIPEMD-128. The data must be aligned for 32-bit access.
*/
static void
ripemd128_round(const unsigned char *data, sph_u32 r[5])
{
#if SPH_LITTLE_FAST
#define RIPEMD128_IN(x) sph_dec32le_aligned(data + (4 * (x)))
#else
sph_u32 X_var[16];
int i;
for (i = 0; i < 16; i ++)
X_var[i] = sph_dec32le_aligned(data + 4 * i);
#define RIPEMD128_IN(x) X_var[x]
#endif
RIPEMD128_ROUND_BODY(RIPEMD128_IN, r);
#undef RIPEMD128_IN
}
/* see sph_ripemd.h */
void
sph_ripemd128_init(void *cc)
{
sph_ripemd128_context *sc;
sc = cc;
memcpy(sc->val, IV, sizeof sc->val);
#if SPH_64
sc->count = 0;
#else
sc->count_high = sc->count_low = 0;
#endif
}
#define RFUN ripemd128_round
#define HASH ripemd128
#define LE32 1
#include "md_helper.c"
#undef RFUN
#undef HASH
#undef LE32
/* see sph_ripemd.h */
void
sph_ripemd128_close(void *cc, void *dst)
{
ripemd128_close(cc, dst, 4);
sph_ripemd128_init(cc);
}
/* see sph_ripemd.h */
void
sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4])
{
#define RIPEMD128_IN(x) msg[x]
RIPEMD128_ROUND_BODY(RIPEMD128_IN, val);
#undef RIPEMD128_IN
}
/* ===================================================================== */
/*
* RIPEMD-160.
*/
/*
* Round constants for RIPEMD-160.
*/
#define K11 SPH_C32(0x00000000)
#define K12 SPH_C32(0x5A827999)
#define K13 SPH_C32(0x6ED9EBA1)
#define K14 SPH_C32(0x8F1BBCDC)
#define K15 SPH_C32(0xA953FD4E)
#define K21 SPH_C32(0x50A28BE6)
#define K22 SPH_C32(0x5C4DD124)
#define K23 SPH_C32(0x6D703EF3)
#define K24 SPH_C32(0x7A6D76E9)
#define K25 SPH_C32(0x00000000)
#define RR(a, b, c, d, e, f, s, r, k) do { \
a = SPH_T32(ROTL(SPH_T32(a + f(b, c, d) + r + k), s) + e); \
c = ROTL(c, 10); \
} while (0)
#define ROUND1(a, b, c, d, e, f, s, r, k) \
RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k)
#define ROUND2(a, b, c, d, e, f, s, r, k) \
RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
/*
* This macro defines the body for a RIPEMD-160 compression function
* implementation. The "in" parameter should evaluate, when applied to a
* numerical input parameter from 0 to 15, to an expression which yields
* the corresponding input block. The "h" parameter should evaluate to
* an array or pointer expression designating the array of 5 words which
* contains the input and output of the compression function.
*/
#define RIPEMD160_ROUND_BODY(in, h) do { \
sph_u32 A1, B1, C1, D1, E1; \
sph_u32 A2, B2, C2, D2, E2; \
sph_u32 tmp; \
\
A1 = A2 = (h)[0]; \
B1 = B2 = (h)[1]; \
C1 = C2 = (h)[2]; \
D1 = D2 = (h)[3]; \
E1 = E2 = (h)[4]; \
\
ROUND1(A, B, C, D, E, F1, 11, in( 0), 1); \
ROUND1(E, A, B, C, D, F1, 14, in( 1), 1); \
ROUND1(D, E, A, B, C, F1, 15, in( 2), 1); \
ROUND1(C, D, E, A, B, F1, 12, in( 3), 1); \
ROUND1(B, C, D, E, A, F1, 5, in( 4), 1); \
ROUND1(A, B, C, D, E, F1, 8, in( 5), 1); \
ROUND1(E, A, B, C, D, F1, 7, in( 6), 1); \
ROUND1(D, E, A, B, C, F1, 9, in( 7), 1); \
ROUND1(C, D, E, A, B, F1, 11, in( 8), 1); \
ROUND1(B, C, D, E, A, F1, 13, in( 9), 1); \
ROUND1(A, B, C, D, E, F1, 14, in(10), 1); \
ROUND1(E, A, B, C, D, F1, 15, in(11), 1); \
ROUND1(D, E, A, B, C, F1, 6, in(12), 1); \
ROUND1(C, D, E, A, B, F1, 7, in(13), 1); \
ROUND1(B, C, D, E, A, F1, 9, in(14), 1); \
ROUND1(A, B, C, D, E, F1, 8, in(15), 1); \
\
ROUND1(E, A, B, C, D, F2, 7, in( 7), 2); \
ROUND1(D, E, A, B, C, F2, 6, in( 4), 2); \
ROUND1(C, D, E, A, B, F2, 8, in(13), 2); \
ROUND1(B, C, D, E, A, F2, 13, in( 1), 2); \
ROUND1(A, B, C, D, E, F2, 11, in(10), 2); \
ROUND1(E, A, B, C, D, F2, 9, in( 6), 2); \
ROUND1(D, E, A, B, C, F2, 7, in(15), 2); \
ROUND1(C, D, E, A, B, F2, 15, in( 3), 2); \
ROUND1(B, C, D, E, A, F2, 7, in(12), 2); \
ROUND1(A, B, C, D, E, F2, 12, in( 0), 2); \
ROUND1(E, A, B, C, D, F2, 15, in( 9), 2); \
ROUND1(D, E, A, B, C, F2, 9, in( 5), 2); \
ROUND1(C, D, E, A, B, F2, 11, in( 2), 2); \
ROUND1(B, C, D, E, A, F2, 7, in(14), 2); \
ROUND1(A, B, C, D, E, F2, 13, in(11), 2); \
ROUND1(E, A, B, C, D, F2, 12, in( 8), 2); \
\
ROUND1(D, E, A, B, C, F3, 11, in( 3), 3); \
ROUND1(C, D, E, A, B, F3, 13, in(10), 3); \
ROUND1(B, C, D, E, A, F3, 6, in(14), 3); \
ROUND1(A, B, C, D, E, F3, 7, in( 4), 3); \
ROUND1(E, A, B, C, D, F3, 14, in( 9), 3); \
ROUND1(D, E, A, B, C, F3, 9, in(15), 3); \
ROUND1(C, D, E, A, B, F3, 13, in( 8), 3); \
ROUND1(B, C, D, E, A, F3, 15, in( 1), 3); \
ROUND1(A, B, C, D, E, F3, 14, in( 2), 3); \
ROUND1(E, A, B, C, D, F3, 8, in( 7), 3); \
ROUND1(D, E, A, B, C, F3, 13, in( 0), 3); \
ROUND1(C, D, E, A, B, F3, 6, in( 6), 3); \
ROUND1(B, C, D, E, A, F3, 5, in(13), 3); \
ROUND1(A, B, C, D, E, F3, 12, in(11), 3); \
ROUND1(E, A, B, C, D, F3, 7, in( 5), 3); \
ROUND1(D, E, A, B, C, F3, 5, in(12), 3); \
\
ROUND1(C, D, E, A, B, F4, 11, in( 1), 4); \
ROUND1(B, C, D, E, A, F4, 12, in( 9), 4); \
ROUND1(A, B, C, D, E, F4, 14, in(11), 4); \
ROUND1(E, A, B, C, D, F4, 15, in(10), 4); \
ROUND1(D, E, A, B, C, F4, 14, in( 0), 4); \
ROUND1(C, D, E, A, B, F4, 15, in( 8), 4); \
ROUND1(B, C, D, E, A, F4, 9, in(12), 4); \
ROUND1(A, B, C, D, E, F4, 8, in( 4), 4); \
ROUND1(E, A, B, C, D, F4, 9, in(13), 4); \
ROUND1(D, E, A, B, C, F4, 14, in( 3), 4); \
ROUND1(C, D, E, A, B, F4, 5, in( 7), 4); \
ROUND1(B, C, D, E, A, F4, 6, in(15), 4); \
ROUND1(A, B, C, D, E, F4, 8, in(14), 4); \
ROUND1(E, A, B, C, D, F4, 6, in( 5), 4); \
ROUND1(D, E, A, B, C, F4, 5, in( 6), 4); \
ROUND1(C, D, E, A, B, F4, 12, in( 2), 4); \
\
ROUND1(B, C, D, E, A, F5, 9, in( 4), 5); \
ROUND1(A, B, C, D, E, F5, 15, in( 0), 5); \
ROUND1(E, A, B, C, D, F5, 5, in( 5), 5); \
ROUND1(D, E, A, B, C, F5, 11, in( 9), 5); \
ROUND1(C, D, E, A, B, F5, 6, in( 7), 5); \
ROUND1(B, C, D, E, A, F5, 8, in(12), 5); \
ROUND1(A, B, C, D, E, F5, 13, in( 2), 5); \
ROUND1(E, A, B, C, D, F5, 12, in(10), 5); \
ROUND1(D, E, A, B, C, F5, 5, in(14), 5); \
ROUND1(C, D, E, A, B, F5, 12, in( 1), 5); \
ROUND1(B, C, D, E, A, F5, 13, in( 3), 5); \
ROUND1(A, B, C, D, E, F5, 14, in( 8), 5); \
ROUND1(E, A, B, C, D, F5, 11, in(11), 5); \
ROUND1(D, E, A, B, C, F5, 8, in( 6), 5); \
ROUND1(C, D, E, A, B, F5, 5, in(15), 5); \
ROUND1(B, C, D, E, A, F5, 6, in(13), 5); \
\
ROUND2(A, B, C, D, E, F5, 8, in( 5), 1); \
ROUND2(E, A, B, C, D, F5, 9, in(14), 1); \
ROUND2(D, E, A, B, C, F5, 9, in( 7), 1); \
ROUND2(C, D, E, A, B, F5, 11, in( 0), 1); \
ROUND2(B, C, D, E, A, F5, 13, in( 9), 1); \
ROUND2(A, B, C, D, E, F5, 15, in( 2), 1); \
ROUND2(E, A, B, C, D, F5, 15, in(11), 1); \
ROUND2(D, E, A, B, C, F5, 5, in( 4), 1); \
ROUND2(C, D, E, A, B, F5, 7, in(13), 1); \
ROUND2(B, C, D, E, A, F5, 7, in( 6), 1); \
ROUND2(A, B, C, D, E, F5, 8, in(15), 1); \
ROUND2(E, A, B, C, D, F5, 11, in( 8), 1); \
ROUND2(D, E, A, B, C, F5, 14, in( 1), 1); \
ROUND2(C, D, E, A, B, F5, 14, in(10), 1); \
ROUND2(B, C, D, E, A, F5, 12, in( 3), 1); \
ROUND2(A, B, C, D, E, F5, 6, in(12), 1); \
\
ROUND2(E, A, B, C, D, F4, 9, in( 6), 2); \
ROUND2(D, E, A, B, C, F4, 13, in(11), 2); \
ROUND2(C, D, E, A, B, F4, 15, in( 3), 2); \
ROUND2(B, C, D, E, A, F4, 7, in( 7), 2); \
ROUND2(A, B, C, D, E, F4, 12, in( 0), 2); \
ROUND2(E, A, B, C, D, F4, 8, in(13), 2); \
ROUND2(D, E, A, B, C, F4, 9, in( 5), 2); \
ROUND2(C, D, E, A, B, F4, 11, in(10), 2); \
ROUND2(B, C, D, E, A, F4, 7, in(14), 2); \
ROUND2(A, B, C, D, E, F4, 7, in(15), 2); \
ROUND2(E, A, B, C, D, F4, 12, in( 8), 2); \
ROUND2(D, E, A, B, C, F4, 7, in(12), 2); \
ROUND2(C, D, E, A, B, F4, 6, in( 4), 2); \
ROUND2(B, C, D, E, A, F4, 15, in( 9), 2); \
ROUND2(A, B, C, D, E, F4, 13, in( 1), 2); \
ROUND2(E, A, B, C, D, F4, 11, in( 2), 2); \
\
ROUND2(D, E, A, B, C, F3, 9, in(15), 3); \
ROUND2(C, D, E, A, B, F3, 7, in( 5), 3); \
ROUND2(B, C, D, E, A, F3, 15, in( 1), 3); \
ROUND2(A, B, C, D, E, F3, 11, in( 3), 3); \
ROUND2(E, A, B, C, D, F3, 8, in( 7), 3); \
ROUND2(D, E, A, B, C, F3, 6, in(14), 3); \
ROUND2(C, D, E, A, B, F3, 6, in( 6), 3); \
ROUND2(B, C, D, E, A, F3, 14, in( 9), 3); \
ROUND2(A, B, C, D, E, F3, 12, in(11), 3); \
ROUND2(E, A, B, C, D, F3, 13, in( 8), 3); \
ROUND2(D, E, A, B, C, F3, 5, in(12), 3); \
ROUND2(C, D, E, A, B, F3, 14, in( 2), 3); \
ROUND2(B, C, D, E, A, F3, 13, in(10), 3); \
ROUND2(A, B, C, D, E, F3, 13, in( 0), 3); \
ROUND2(E, A, B, C, D, F3, 7, in( 4), 3); \
ROUND2(D, E, A, B, C, F3, 5, in(13), 3); \
\
ROUND2(C, D, E, A, B, F2, 15, in( 8), 4); \
ROUND2(B, C, D, E, A, F2, 5, in( 6), 4); \
ROUND2(A, B, C, D, E, F2, 8, in( 4), 4); \
ROUND2(E, A, B, C, D, F2, 11, in( 1), 4); \
ROUND2(D, E, A, B, C, F2, 14, in( 3), 4); \
ROUND2(C, D, E, A, B, F2, 14, in(11), 4); \
ROUND2(B, C, D, E, A, F2, 6, in(15), 4); \
ROUND2(A, B, C, D, E, F2, 14, in( 0), 4); \
ROUND2(E, A, B, C, D, F2, 6, in( 5), 4); \
ROUND2(D, E, A, B, C, F2, 9, in(12), 4); \
ROUND2(C, D, E, A, B, F2, 12, in( 2), 4); \
ROUND2(B, C, D, E, A, F2, 9, in(13), 4); \
ROUND2(A, B, C, D, E, F2, 12, in( 9), 4); \
ROUND2(E, A, B, C, D, F2, 5, in( 7), 4); \
ROUND2(D, E, A, B, C, F2, 15, in(10), 4); \
ROUND2(C, D, E, A, B, F2, 8, in(14), 4); \
\
ROUND2(B, C, D, E, A, F1, 8, in(12), 5); \
ROUND2(A, B, C, D, E, F1, 5, in(15), 5); \
ROUND2(E, A, B, C, D, F1, 12, in(10), 5); \
ROUND2(D, E, A, B, C, F1, 9, in( 4), 5); \
ROUND2(C, D, E, A, B, F1, 12, in( 1), 5); \
ROUND2(B, C, D, E, A, F1, 5, in( 5), 5); \
ROUND2(A, B, C, D, E, F1, 14, in( 8), 5); \
ROUND2(E, A, B, C, D, F1, 6, in( 7), 5); \
ROUND2(D, E, A, B, C, F1, 8, in( 6), 5); \
ROUND2(C, D, E, A, B, F1, 13, in( 2), 5); \
ROUND2(B, C, D, E, A, F1, 6, in(13), 5); \
ROUND2(A, B, C, D, E, F1, 5, in(14), 5); \
ROUND2(E, A, B, C, D, F1, 15, in( 0), 5); \
ROUND2(D, E, A, B, C, F1, 13, in( 3), 5); \
ROUND2(C, D, E, A, B, F1, 11, in( 9), 5); \
ROUND2(B, C, D, E, A, F1, 11, in(11), 5); \
\
tmp = SPH_T32((h)[1] + C1 + D2); \
(h)[1] = SPH_T32((h)[2] + D1 + E2); \
(h)[2] = SPH_T32((h)[3] + E1 + A2); \
(h)[3] = SPH_T32((h)[4] + A1 + B2); \
(h)[4] = SPH_T32((h)[0] + B1 + C2); \
(h)[0] = tmp; \
} while (0)
/*
* One round of RIPEMD-160. The data must be aligned for 32-bit access.
*/
static void
ripemd160_round(const unsigned char *data, sph_u32 r[5])
{
#if SPH_LITTLE_FAST
#define RIPEMD160_IN(x) sph_dec32le_aligned(data + (4 * (x)))
#else
sph_u32 X_var[16];
int i;
for (i = 0; i < 16; i ++)
X_var[i] = sph_dec32le_aligned(data + 4 * i);
#define RIPEMD160_IN(x) X_var[x]
#endif
RIPEMD160_ROUND_BODY(RIPEMD160_IN, r);
#undef RIPEMD160_IN
}
/* see sph_ripemd.h */
void
sph_ripemd160_init(void *cc)
{
sph_ripemd160_context *sc;
sc = cc;
memcpy(sc->val, IV, sizeof sc->val);
#if SPH_64
sc->count = 0;
#else
sc->count_high = sc->count_low = 0;
#endif
}
#define RFUN ripemd160_round
#define HASH ripemd160
#define LE32 1
#include "md_helper.c"
#undef RFUN
#undef HASH
#undef LE32
/* see sph_ripemd.h */
void
sph_ripemd160_close(void *cc, void *dst)
{
ripemd160_close(cc, dst, 5);
sph_ripemd160_init(cc);
}
/* see sph_ripemd.h */
void
sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5])
{
#define RIPEMD160_IN(x) msg[x]
RIPEMD160_ROUND_BODY(RIPEMD160_IN, val);
#undef RIPEMD160_IN
}

273
sph/sph_ripemd.h

@ -0,0 +1,273 @@ @@ -0,0 +1,273 @@
/* $Id: sph_ripemd.h 216 2010-06-08 09:46:57Z tp $ */
/**
* RIPEMD, RIPEMD-128 and RIPEMD-160 interface.
*
* RIPEMD was first described in: Research and Development in Advanced
* Communication Technologies in Europe, "RIPE Integrity Primitives:
* Final Report of RACE Integrity Primitives Evaluation (R1040)", RACE,
* June 1992.
*
* A new, strengthened version, dubbed RIPEMD-160, was published in: H.
* Dobbertin, A. Bosselaers, and B. Preneel, "RIPEMD-160, a strengthened
* version of RIPEMD", Fast Software Encryption - FSE'96, LNCS 1039,
* Springer (1996), pp. 71--82.
*
* This article describes both RIPEMD-160, with a 160-bit output, and a
* reduced version called RIPEMD-128, which has a 128-bit output. RIPEMD-128
* was meant as a "drop-in" replacement for any hash function with 128-bit
* output, especially the original RIPEMD.
*
* @warning Collisions, and an efficient method to build other collisions,
* have been published for the original RIPEMD, which is thus considered as
* cryptographically broken. It is also very rarely encountered, and there
* seems to exist no free description or implementation of RIPEMD (except
* the sphlib code, of course). As of january 2007, RIPEMD-128 and RIPEMD-160
* seem as secure as their output length allows.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_ripemd.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_RIPEMD_H__
#define SPH_RIPEMD_H__
#include <stddef.h>
#include "sph_types.h"
/**
* Output size (in bits) for RIPEMD.
*/
#define SPH_SIZE_ripemd 128
/**
* Output size (in bits) for RIPEMD-128.
*/
#define SPH_SIZE_ripemd128 128
/**
* Output size (in bits) for RIPEMD-160.
*/
#define SPH_SIZE_ripemd160 160
/**
* This structure is a context for RIPEMD computations: it contains the
* intermediate values and some data from the last entered block. Once
* a RIPEMD computation has been performed, the context can be reused for
* another computation.
*
* The contents of this structure are private. A running RIPEMD computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[64]; /* first field, for alignment */
sph_u32 val[4];
#if SPH_64
sph_u64 count;
#else
sph_u32 count_high, count_low;
#endif
#endif
} sph_ripemd_context;
/**
* Initialize a RIPEMD context. This process performs no memory allocation.
*
* @param cc the RIPEMD context (pointer to
* a <code>sph_ripemd_context</code>)
*/
void sph_ripemd_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the RIPEMD context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_ripemd(void *cc, const void *data, size_t len);
/**
* Terminate the current RIPEMD computation and output the result into the
* provided buffer. The destination buffer must be wide enough to
* accomodate the result (16 bytes). The context is automatically
* reinitialized.
*
* @param cc the RIPEMD context
* @param dst the destination buffer
*/
void sph_ripemd_close(void *cc, void *dst);
/**
* Apply the RIPEMD compression function on the provided data. The
* <code>msg</code> parameter contains the 16 32-bit input blocks,
* as numerical values (hence after the little-endian decoding). The
* <code>val</code> parameter contains the 5 32-bit input blocks for
* the compression function; the output is written in place in this
* array.
*
* @param msg the message block (16 values)
* @param val the function 128-bit input and output
*/
void sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4]);
/* ===================================================================== */
/**
* This structure is a context for RIPEMD-128 computations: it contains the
* intermediate values and some data from the last entered block. Once
* a RIPEMD-128 computation has been performed, the context can be reused for
* another computation.
*
* The contents of this structure are private. A running RIPEMD-128 computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[64]; /* first field, for alignment */
sph_u32 val[4];
#if SPH_64
sph_u64 count;
#else
sph_u32 count_high, count_low;
#endif
#endif
} sph_ripemd128_context;
/**
* Initialize a RIPEMD-128 context. This process performs no memory allocation.
*
* @param cc the RIPEMD-128 context (pointer to
* a <code>sph_ripemd128_context</code>)
*/
void sph_ripemd128_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the RIPEMD-128 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_ripemd128(void *cc, const void *data, size_t len);
/**
* Terminate the current RIPEMD-128 computation and output the result into the
* provided buffer. The destination buffer must be wide enough to
* accomodate the result (16 bytes). The context is automatically
* reinitialized.
*
* @param cc the RIPEMD-128 context
* @param dst the destination buffer
*/
void sph_ripemd128_close(void *cc, void *dst);
/**
* Apply the RIPEMD-128 compression function on the provided data. The
* <code>msg</code> parameter contains the 16 32-bit input blocks,
* as numerical values (hence after the little-endian decoding). The
* <code>val</code> parameter contains the 5 32-bit input blocks for
* the compression function; the output is written in place in this
* array.
*
* @param msg the message block (16 values)
* @param val the function 128-bit input and output
*/
void sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]);
/* ===================================================================== */
/**
* This structure is a context for RIPEMD-160 computations: it contains the
* intermediate values and some data from the last entered block. Once
* a RIPEMD-160 computation has been performed, the context can be reused for
* another computation.
*
* The contents of this structure are private. A running RIPEMD-160 computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[64]; /* first field, for alignment */
sph_u32 val[5];
#if SPH_64
sph_u64 count;
#else
sph_u32 count_high, count_low;
#endif
#endif
} sph_ripemd160_context;
/**
* Initialize a RIPEMD-160 context. This process performs no memory allocation.
*
* @param cc the RIPEMD-160 context (pointer to
* a <code>sph_ripemd160_context</code>)
*/
void sph_ripemd160_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the RIPEMD-160 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_ripemd160(void *cc, const void *data, size_t len);
/**
* Terminate the current RIPEMD-160 computation and output the result into the
* provided buffer. The destination buffer must be wide enough to
* accomodate the result (20 bytes). The context is automatically
* reinitialized.
*
* @param cc the RIPEMD-160 context
* @param dst the destination buffer
*/
void sph_ripemd160_close(void *cc, void *dst);
/**
* Apply the RIPEMD-160 compression function on the provided data. The
* <code>msg</code> parameter contains the 16 32-bit input blocks,
* as numerical values (hence after the little-endian decoding). The
* <code>val</code> parameter contains the 5 32-bit input blocks for
* the compression function; the output is written in place in this
* array.
*
* @param msg the message block (16 values)
* @param val the function 160-bit input and output
*/
void sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5]);
#endif

691
sph/sph_sha2.c

@ -0,0 +1,691 @@ @@ -0,0 +1,691 @@
/* $Id: sha2.c 227 2010-06-16 17:28:38Z tp $ */
/*
* SHA-224 / SHA-256 implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include <stddef.h>
#include <string.h>
#include "sph_sha2.h"
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SHA2
#define SPH_SMALL_FOOTPRINT_SHA2 1
#endif
#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
#define MAJ(X, Y, Z) (((Y) & (Z)) | (((Y) | (Z)) & (X)))
#define ROTR SPH_ROTR32
#define BSG2_0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
#define BSG2_1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
#define SSG2_0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SPH_T32((x) >> 3))
#define SSG2_1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SPH_T32((x) >> 10))
static const sph_u32 H224[8] = {
SPH_C32(0xC1059ED8), SPH_C32(0x367CD507), SPH_C32(0x3070DD17),
SPH_C32(0xF70E5939), SPH_C32(0xFFC00B31), SPH_C32(0x68581511),
SPH_C32(0x64F98FA7), SPH_C32(0xBEFA4FA4)
};
static const sph_u32 H256[8] = {
SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85), SPH_C32(0x3C6EF372),
SPH_C32(0xA54FF53A), SPH_C32(0x510E527F), SPH_C32(0x9B05688C),
SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19)
};
/*
* The SHA2_ROUND_BODY defines the body for a SHA-224 / SHA-256
* compression function implementation. The "in" parameter should
* evaluate, when applied to a numerical input parameter from 0 to 15,
* to an expression which yields the corresponding input block. The "r"
* parameter should evaluate to an array or pointer expression
* designating the array of 8 words which contains the input and output
* of the compression function.
*/
#if SPH_SMALL_FOOTPRINT_SHA2
static const sph_u32 K[64] = {
SPH_C32(0x428A2F98), SPH_C32(0x71374491),
SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5),
SPH_C32(0x3956C25B), SPH_C32(0x59F111F1),
SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5),
SPH_C32(0xD807AA98), SPH_C32(0x12835B01),
SPH_C32(0x243185BE), SPH_C32(0x550C7DC3),
SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE),
SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174),
SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786),
SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC),
SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA),
SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA),
SPH_C32(0x983E5152), SPH_C32(0xA831C66D),
SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7),
SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147),
SPH_C32(0x06CA6351), SPH_C32(0x14292967),
SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138),
SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13),
SPH_C32(0x650A7354), SPH_C32(0x766A0ABB),
SPH_C32(0x81C2C92E), SPH_C32(0x92722C85),
SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B),
SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3),
SPH_C32(0xD192E819), SPH_C32(0xD6990624),
SPH_C32(0xF40E3585), SPH_C32(0x106AA070),
SPH_C32(0x19A4C116), SPH_C32(0x1E376C08),
SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5),
SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A),
SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3),
SPH_C32(0x748F82EE), SPH_C32(0x78A5636F),
SPH_C32(0x84C87814), SPH_C32(0x8CC70208),
SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB),
SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2)
};
#define SHA2_MEXP1(in, pc) do { \
W[pc] = in(pc); \
} while (0)
#define SHA2_MEXP2(in, pc) do { \
W[(pc) & 0x0F] = SPH_T32(SSG2_1(W[((pc) - 2) & 0x0F]) \
+ W[((pc) - 7) & 0x0F] \
+ SSG2_0(W[((pc) - 15) & 0x0F]) + W[(pc) & 0x0F]); \
} while (0)
#define SHA2_STEPn(n, a, b, c, d, e, f, g, h, in, pc) do { \
sph_u32 t1, t2; \
SHA2_MEXP ## n(in, pc); \
t1 = SPH_T32(h + BSG2_1(e) + CH(e, f, g) \
+ K[pcount + (pc)] + W[(pc) & 0x0F]); \
t2 = SPH_T32(BSG2_0(a) + MAJ(a, b, c)); \
d = SPH_T32(d + t1); \
h = SPH_T32(t1 + t2); \
} while (0)
#define SHA2_STEP1(a, b, c, d, e, f, g, h, in, pc) \
SHA2_STEPn(1, a, b, c, d, e, f, g, h, in, pc)
#define SHA2_STEP2(a, b, c, d, e, f, g, h, in, pc) \
SHA2_STEPn(2, a, b, c, d, e, f, g, h, in, pc)
#define SHA2_ROUND_BODY(in, r) do { \
sph_u32 A, B, C, D, E, F, G, H; \
sph_u32 W[16]; \
unsigned pcount; \
\
A = (r)[0]; \
B = (r)[1]; \
C = (r)[2]; \
D = (r)[3]; \
E = (r)[4]; \
F = (r)[5]; \
G = (r)[6]; \
H = (r)[7]; \
pcount = 0; \
SHA2_STEP1(A, B, C, D, E, F, G, H, in, 0); \
SHA2_STEP1(H, A, B, C, D, E, F, G, in, 1); \
SHA2_STEP1(G, H, A, B, C, D, E, F, in, 2); \
SHA2_STEP1(F, G, H, A, B, C, D, E, in, 3); \
SHA2_STEP1(E, F, G, H, A, B, C, D, in, 4); \
SHA2_STEP1(D, E, F, G, H, A, B, C, in, 5); \
SHA2_STEP1(C, D, E, F, G, H, A, B, in, 6); \
SHA2_STEP1(B, C, D, E, F, G, H, A, in, 7); \
SHA2_STEP1(A, B, C, D, E, F, G, H, in, 8); \
SHA2_STEP1(H, A, B, C, D, E, F, G, in, 9); \
SHA2_STEP1(G, H, A, B, C, D, E, F, in, 10); \
SHA2_STEP1(F, G, H, A, B, C, D, E, in, 11); \
SHA2_STEP1(E, F, G, H, A, B, C, D, in, 12); \
SHA2_STEP1(D, E, F, G, H, A, B, C, in, 13); \
SHA2_STEP1(C, D, E, F, G, H, A, B, in, 14); \
SHA2_STEP1(B, C, D, E, F, G, H, A, in, 15); \
for (pcount = 16; pcount < 64; pcount += 16) { \
SHA2_STEP2(A, B, C, D, E, F, G, H, in, 0); \
SHA2_STEP2(H, A, B, C, D, E, F, G, in, 1); \
SHA2_STEP2(G, H, A, B, C, D, E, F, in, 2); \
SHA2_STEP2(F, G, H, A, B, C, D, E, in, 3); \
SHA2_STEP2(E, F, G, H, A, B, C, D, in, 4); \
SHA2_STEP2(D, E, F, G, H, A, B, C, in, 5); \
SHA2_STEP2(C, D, E, F, G, H, A, B, in, 6); \
SHA2_STEP2(B, C, D, E, F, G, H, A, in, 7); \
SHA2_STEP2(A, B, C, D, E, F, G, H, in, 8); \
SHA2_STEP2(H, A, B, C, D, E, F, G, in, 9); \
SHA2_STEP2(G, H, A, B, C, D, E, F, in, 10); \
SHA2_STEP2(F, G, H, A, B, C, D, E, in, 11); \
SHA2_STEP2(E, F, G, H, A, B, C, D, in, 12); \
SHA2_STEP2(D, E, F, G, H, A, B, C, in, 13); \
SHA2_STEP2(C, D, E, F, G, H, A, B, in, 14); \
SHA2_STEP2(B, C, D, E, F, G, H, A, in, 15); \
} \
(r)[0] = SPH_T32((r)[0] + A); \
(r)[1] = SPH_T32((r)[1] + B); \
(r)[2] = SPH_T32((r)[2] + C); \
(r)[3] = SPH_T32((r)[3] + D); \
(r)[4] = SPH_T32((r)[4] + E); \
(r)[5] = SPH_T32((r)[5] + F); \
(r)[6] = SPH_T32((r)[6] + G); \
(r)[7] = SPH_T32((r)[7] + H); \
} while (0)
#else
#define SHA2_ROUND_BODY(in, r) do { \
sph_u32 A, B, C, D, E, F, G, H, T1, T2; \
sph_u32 W00, W01, W02, W03, W04, W05, W06, W07; \
sph_u32 W08, W09, W10, W11, W12, W13, W14, W15; \
int i; \
\
A = (r)[0]; \
B = (r)[1]; \
C = (r)[2]; \
D = (r)[3]; \
E = (r)[4]; \
F = (r)[5]; \
G = (r)[6]; \
H = (r)[7]; \
W00 = in(0); \
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+ SPH_C32(0x428A2F98) + W00); \
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
D = SPH_T32(D + T1); \
H = SPH_T32(T1 + T2); \
W01 = in(1); \
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+ SPH_C32(0x71374491) + W01); \
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
C = SPH_T32(C + T1); \
G = SPH_T32(T1 + T2); \
W02 = in(2); \
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+ SPH_C32(0xB5C0FBCF) + W02); \
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
B = SPH_T32(B + T1); \
F = SPH_T32(T1 + T2); \
W03 = in(3); \
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+ SPH_C32(0xE9B5DBA5) + W03); \
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
A = SPH_T32(A + T1); \
E = SPH_T32(T1 + T2); \
W04 = in(4); \
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+ SPH_C32(0x3956C25B) + W04); \
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
H = SPH_T32(H + T1); \
D = SPH_T32(T1 + T2); \
W05 = in(5); \
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+ SPH_C32(0x59F111F1) + W05); \
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
G = SPH_T32(G + T1); \
C = SPH_T32(T1 + T2); \
W06 = in(6); \
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+ SPH_C32(0x923F82A4) + W06); \
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
F = SPH_T32(F + T1); \
B = SPH_T32(T1 + T2); \
W07 = in(7); \
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+ SPH_C32(0xAB1C5ED5) + W07); \
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
E = SPH_T32(E + T1); \
A = SPH_T32(T1 + T2); \
W08 = in(8); \
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+ SPH_C32(0xD807AA98) + W08); \
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
D = SPH_T32(D + T1); \
H = SPH_T32(T1 + T2); \
W09 = in(9); \
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+ SPH_C32(0x12835B01) + W09); \
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
C = SPH_T32(C + T1); \
G = SPH_T32(T1 + T2); \
W10 = in(10); \
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+ SPH_C32(0x243185BE) + W10); \
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
B = SPH_T32(B + T1); \
F = SPH_T32(T1 + T2); \
W11 = in(11); \
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+ SPH_C32(0x550C7DC3) + W11); \
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
A = SPH_T32(A + T1); \
E = SPH_T32(T1 + T2); \
W12 = in(12); \
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+ SPH_C32(0x72BE5D74) + W12); \
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
H = SPH_T32(H + T1); \
D = SPH_T32(T1 + T2); \
W13 = in(13); \
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+ SPH_C32(0x80DEB1FE) + W13); \
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
G = SPH_T32(G + T1); \
C = SPH_T32(T1 + T2); \
W14 = in(14); \
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+ SPH_C32(0x9BDC06A7) + W14); \
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
F = SPH_T32(F + T1); \
B = SPH_T32(T1 + T2); \
W15 = in(15); \
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+ SPH_C32(0xC19BF174) + W15); \
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
E = SPH_T32(E + T1); \
A = SPH_T32(T1 + T2); \
W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+ SPH_C32(0xE49B69C1) + W00); \
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
D = SPH_T32(D + T1); \
H = SPH_T32(T1 + T2); \
W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+ SPH_C32(0xEFBE4786) + W01); \
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
C = SPH_T32(C + T1); \
G = SPH_T32(T1 + T2); \
W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+ SPH_C32(0x0FC19DC6) + W02); \
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
B = SPH_T32(B + T1); \
F = SPH_T32(T1 + T2); \
W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+ SPH_C32(0x240CA1CC) + W03); \
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
A = SPH_T32(A + T1); \
E = SPH_T32(T1 + T2); \
W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+ SPH_C32(0x2DE92C6F) + W04); \
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
H = SPH_T32(H + T1); \
D = SPH_T32(T1 + T2); \
W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+ SPH_C32(0x4A7484AA) + W05); \
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
G = SPH_T32(G + T1); \
C = SPH_T32(T1 + T2); \
W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+ SPH_C32(0x5CB0A9DC) + W06); \
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
F = SPH_T32(F + T1); \
B = SPH_T32(T1 + T2); \
W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+ SPH_C32(0x76F988DA) + W07); \
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
E = SPH_T32(E + T1); \
A = SPH_T32(T1 + T2); \
W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+ SPH_C32(0x983E5152) + W08); \
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
D = SPH_T32(D + T1); \
H = SPH_T32(T1 + T2); \
W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+ SPH_C32(0xA831C66D) + W09); \
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
C = SPH_T32(C + T1); \
G = SPH_T32(T1 + T2); \
W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+ SPH_C32(0xB00327C8) + W10); \
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
B = SPH_T32(B + T1); \
F = SPH_T32(T1 + T2); \
W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+ SPH_C32(0xBF597FC7) + W11); \
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
A = SPH_T32(A + T1); \
E = SPH_T32(T1 + T2); \
W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+ SPH_C32(0xC6E00BF3) + W12); \
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
H = SPH_T32(H + T1); \
D = SPH_T32(T1 + T2); \
W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+ SPH_C32(0xD5A79147) + W13); \
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
G = SPH_T32(G + T1); \
C = SPH_T32(T1 + T2); \
W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+ SPH_C32(0x06CA6351) + W14); \
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
F = SPH_T32(F + T1); \
B = SPH_T32(T1 + T2); \
W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+ SPH_C32(0x14292967) + W15); \
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
E = SPH_T32(E + T1); \
A = SPH_T32(T1 + T2); \
W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+ SPH_C32(0x27B70A85) + W00); \
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
D = SPH_T32(D + T1); \
H = SPH_T32(T1 + T2); \
W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+ SPH_C32(0x2E1B2138) + W01); \
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
C = SPH_T32(C + T1); \
G = SPH_T32(T1 + T2); \
W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+ SPH_C32(0x4D2C6DFC) + W02); \
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
B = SPH_T32(B + T1); \
F = SPH_T32(T1 + T2); \
W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+ SPH_C32(0x53380D13) + W03); \
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
A = SPH_T32(A + T1); \
E = SPH_T32(T1 + T2); \
W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+ SPH_C32(0x650A7354) + W04); \
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
H = SPH_T32(H + T1); \
D = SPH_T32(T1 + T2); \
W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+ SPH_C32(0x766A0ABB) + W05); \
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
G = SPH_T32(G + T1); \
C = SPH_T32(T1 + T2); \
W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+ SPH_C32(0x81C2C92E) + W06); \
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
F = SPH_T32(F + T1); \
B = SPH_T32(T1 + T2); \
W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+ SPH_C32(0x92722C85) + W07); \
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
E = SPH_T32(E + T1); \
A = SPH_T32(T1 + T2); \
W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+ SPH_C32(0xA2BFE8A1) + W08); \
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
D = SPH_T32(D + T1); \
H = SPH_T32(T1 + T2); \
W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+ SPH_C32(0xA81A664B) + W09); \
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
C = SPH_T32(C + T1); \
G = SPH_T32(T1 + T2); \
W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+ SPH_C32(0xC24B8B70) + W10); \
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
B = SPH_T32(B + T1); \
F = SPH_T32(T1 + T2); \
W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+ SPH_C32(0xC76C51A3) + W11); \
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
A = SPH_T32(A + T1); \
E = SPH_T32(T1 + T2); \
W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+ SPH_C32(0xD192E819) + W12); \
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
H = SPH_T32(H + T1); \
D = SPH_T32(T1 + T2); \
W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+ SPH_C32(0xD6990624) + W13); \
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
G = SPH_T32(G + T1); \
C = SPH_T32(T1 + T2); \
W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+ SPH_C32(0xF40E3585) + W14); \
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
F = SPH_T32(F + T1); \
B = SPH_T32(T1 + T2); \
W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+ SPH_C32(0x106AA070) + W15); \
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
E = SPH_T32(E + T1); \
A = SPH_T32(T1 + T2); \
W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+ SPH_C32(0x19A4C116) + W00); \
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
D = SPH_T32(D + T1); \
H = SPH_T32(T1 + T2); \
W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+ SPH_C32(0x1E376C08) + W01); \
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
C = SPH_T32(C + T1); \
G = SPH_T32(T1 + T2); \
W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+ SPH_C32(0x2748774C) + W02); \
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
B = SPH_T32(B + T1); \
F = SPH_T32(T1 + T2); \
W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+ SPH_C32(0x34B0BCB5) + W03); \
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
A = SPH_T32(A + T1); \
E = SPH_T32(T1 + T2); \
W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+ SPH_C32(0x391C0CB3) + W04); \
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
H = SPH_T32(H + T1); \
D = SPH_T32(T1 + T2); \
W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+ SPH_C32(0x4ED8AA4A) + W05); \
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
G = SPH_T32(G + T1); \
C = SPH_T32(T1 + T2); \
W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+ SPH_C32(0x5B9CCA4F) + W06); \
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
F = SPH_T32(F + T1); \
B = SPH_T32(T1 + T2); \
W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+ SPH_C32(0x682E6FF3) + W07); \
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
E = SPH_T32(E + T1); \
A = SPH_T32(T1 + T2); \
W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+ SPH_C32(0x748F82EE) + W08); \
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
D = SPH_T32(D + T1); \
H = SPH_T32(T1 + T2); \
W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+ SPH_C32(0x78A5636F) + W09); \
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
C = SPH_T32(C + T1); \
G = SPH_T32(T1 + T2); \
W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+ SPH_C32(0x84C87814) + W10); \
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
B = SPH_T32(B + T1); \
F = SPH_T32(T1 + T2); \
W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+ SPH_C32(0x8CC70208) + W11); \
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
A = SPH_T32(A + T1); \
E = SPH_T32(T1 + T2); \
W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+ SPH_C32(0x90BEFFFA) + W12); \
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
H = SPH_T32(H + T1); \
D = SPH_T32(T1 + T2); \
W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+ SPH_C32(0xA4506CEB) + W13); \
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
G = SPH_T32(G + T1); \
C = SPH_T32(T1 + T2); \
W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+ SPH_C32(0xBEF9A3F7) + W14); \
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
F = SPH_T32(F + T1); \
B = SPH_T32(T1 + T2); \
W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+ SPH_C32(0xC67178F2) + W15); \
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
E = SPH_T32(E + T1); \
A = SPH_T32(T1 + T2); \
(r)[0] = SPH_T32((r)[0] + A); \
(r)[1] = SPH_T32((r)[1] + B); \
(r)[2] = SPH_T32((r)[2] + C); \
(r)[3] = SPH_T32((r)[3] + D); \
(r)[4] = SPH_T32((r)[4] + E); \
(r)[5] = SPH_T32((r)[5] + F); \
(r)[6] = SPH_T32((r)[6] + G); \
(r)[7] = SPH_T32((r)[7] + H); \
} while (0)
#endif
/*
* One round of SHA-224 / SHA-256. The data must be aligned for 32-bit access.
*/
static void
sha2_round(const unsigned char *data, sph_u32 r[8])
{
#define SHA2_IN(x) sph_dec32be_aligned(data + (4 * (x)))
SHA2_ROUND_BODY(SHA2_IN, r);
#undef SHA2_IN
}
/* see sph_sha2.h */
void
sph_sha224_init(void *cc)
{
sph_sha224_context *sc;
sc = cc;
memcpy(sc->val, H224, sizeof H224);
#if SPH_64
sc->count = 0;
#else
sc->count_high = sc->count_low = 0;
#endif
}
/* see sph_sha2.h */
void
sph_sha256_init(void *cc)
{
sph_sha256_context *sc;
sc = cc;
memcpy(sc->val, H256, sizeof H256);
#if SPH_64
sc->count = 0;
#else
sc->count_high = sc->count_low = 0;
#endif
}
#define RFUN sha2_round
#define HASH sha224
#define BE32 1
#include "md_helper.c"
/* see sph_sha2.h */
void
sph_sha224_close(void *cc, void *dst)
{
sha224_close(cc, dst, 7);
sph_sha224_init(cc);
}
/* see sph_sha2.h */
void
sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
sha224_addbits_and_close(cc, ub, n, dst, 7);
sph_sha224_init(cc);
}
/* see sph_sha2.h */
void
sph_sha256_close(void *cc, void *dst)
{
sha224_close(cc, dst, 8);
sph_sha256_init(cc);
}
/* see sph_sha2.h */
void
sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
{
sha224_addbits_and_close(cc, ub, n, dst, 8);
sph_sha256_init(cc);
}
/* see sph_sha2.h */
void
sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8])
{
#define SHA2_IN(x) msg[x]
SHA2_ROUND_BODY(SHA2_IN, val);
#undef SHA2_IN
}

191
sph/sph_tiger.h

@ -0,0 +1,191 @@ @@ -0,0 +1,191 @@
/* $Id: sph_tiger.h 216 2010-06-08 09:46:57Z tp $ */
/**
* Tiger / Tiger-2 interface.
*
* Tiger has been published in: R. Anderson, E. Biham, "Tiger: A Fast
* New Hash Function", Fast Software Encryption - FSE'96, LNCS 1039,
* Springer (1996), pp. 89--97.
*
* Tiger2 has never been formally published, but it was described as
* identical to Tiger, except for the padding which is the same in
* Tiger2 as it is in MD4. Fortunately, an implementation of Tiger2
* was submitted to NESSIE, which produced test vectors; the sphlib
* implementation of Tiger2 is compatible with the NESSIE test vectors.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_tiger.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_TIGER_H__
#define SPH_TIGER_H__
#include <stddef.h>
#include "sph_types.h"
#if SPH_64
/**
* Output size (in bits) for Tiger.
*/
#define SPH_SIZE_tiger 192
/**
* Output size (in bits) for Tiger2.
*/
#define SPH_SIZE_tiger2 192
/**
* This structure is a context for Tiger computations: it contains the
* intermediate values and some data from the last entered block. Once
* a Tiger computation has been performed, the context can be reused for
* another computation.
*
* The contents of this structure are private. A running Tiger computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[64]; /* first field, for alignment */
sph_u64 val[3];
sph_u64 count;
#endif
} sph_tiger_context;
/**
* Initialize a Tiger context. This process performs no memory allocation.
*
* @param cc the Tiger context (pointer to
* a <code>sph_tiger_context</code>)
*/
void sph_tiger_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the Tiger context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_tiger(void *cc, const void *data, size_t len);
/**
* Terminate the current Tiger computation and output the result into the
* provided buffer. The destination buffer must be wide enough to
* accomodate the result (24 bytes). The context is automatically
* reinitialized.
*
* @param cc the Tiger context
* @param dst the destination buffer
*/
void sph_tiger_close(void *cc, void *dst);
/**
* Apply the Tiger compression function on the provided data. The
* <code>msg</code> parameter contains the 8 64-bit input blocks,
* as numerical values (hence after the little-endian decoding). The
* <code>val</code> parameter contains the 3 64-bit input blocks for
* the compression function; the output is written in place in this
* array.
*
* @param msg the message block (8 values)
* @param val the function 192-bit input and output
*/
void sph_tiger_comp(const sph_u64 msg[8], sph_u64 val[3]);
/**
* This structure is a context for Tiger2 computations. It is identical
* to the Tiger context, and they may be freely exchanged, since the
* difference between Tiger and Tiger2 resides solely in the padding, which
* is computed only in the last computation step.
*/
typedef sph_tiger_context sph_tiger2_context;
#ifdef DOXYGEN_IGNORE
/**
* Initialize a Tiger2 context. This function is identical to
* <code>sph_tiger_init()</code>.
*
* @param cc the Tiger2 context (pointer to
* a <code>sph_tiger2_context</code>)
*/
void sph_tiger2_init(void *cc);
#endif
#ifndef DOXYGEN_IGNORE
#define sph_tiger2_init sph_tiger_init
#endif
#ifdef DOXYGEN_IGNORE
/**
* Process some data bytes. This function is identical to
* <code>sph_tiger()</code>.
*
* @param cc the Tiger2 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_tiger2(void *cc, const void *data, size_t len);
#endif
#ifndef DOXYGEN_IGNORE
#define sph_tiger2 sph_tiger
#endif
/**
* Terminate the current Tiger2 computation and output the result into the
* provided buffer. The destination buffer must be wide enough to
* accomodate the result (24 bytes). The context is automatically
* reinitialized. Note that this function is NOT identical to
* <code>sph_tiger2_close()</code>: this is the exact and unique point
* where Tiger and Tiger2 differ.
*
* @param cc the Tiger context
* @param dst the destination buffer
*/
void sph_tiger2_close(void *cc, void *dst);
#ifdef DOXYGEN_IGNORE
/**
* Apply the Tiger2 compression function, which is identical to the Tiger
* compression function.
*
* @param msg the message block (8 values)
* @param val the function 192-bit input and output
*/
void sph_tiger2_comp(const sph_u64 msg[8], sph_u64 val[3]);
#endif
#ifndef DOXYGEN_IGNORE
#define sph_tiger2_comp sph_tiger_comp
#endif
#endif
#endif

698
sph/tiger.c

@ -0,0 +1,698 @@ @@ -0,0 +1,698 @@
/* $Id: tiger.c 216 2010-06-08 09:46:57Z tp $ */
/*
* Tiger / Tiger2 implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#include <stddef.h>
#include <string.h>
#include "sph_tiger.h"
#if SPH_64
static const sph_u64 T1[256] = {
SPH_C64(0x02AAB17CF7E90C5E), SPH_C64(0xAC424B03E243A8EC),
SPH_C64(0x72CD5BE30DD5FCD3), SPH_C64(0x6D019B93F6F97F3A),
SPH_C64(0xCD9978FFD21F9193), SPH_C64(0x7573A1C9708029E2),
SPH_C64(0xB164326B922A83C3), SPH_C64(0x46883EEE04915870),
SPH_C64(0xEAACE3057103ECE6), SPH_C64(0xC54169B808A3535C),
SPH_C64(0x4CE754918DDEC47C), SPH_C64(0x0AA2F4DFDC0DF40C),
SPH_C64(0x10B76F18A74DBEFA), SPH_C64(0xC6CCB6235AD1AB6A),
SPH_C64(0x13726121572FE2FF), SPH_C64(0x1A488C6F199D921E),
SPH_C64(0x4BC9F9F4DA0007CA), SPH_C64(0x26F5E6F6E85241C7),
SPH_C64(0x859079DBEA5947B6), SPH_C64(0x4F1885C5C99E8C92),
SPH_C64(0xD78E761EA96F864B), SPH_C64(0x8E36428C52B5C17D),
SPH_C64(0x69CF6827373063C1), SPH_C64(0xB607C93D9BB4C56E),
SPH_C64(0x7D820E760E76B5EA), SPH_C64(0x645C9CC6F07FDC42),
SPH_C64(0xBF38A078243342E0), SPH_C64(0x5F6B343C9D2E7D04),
SPH_C64(0xF2C28AEB600B0EC6), SPH_C64(0x6C0ED85F7254BCAC),
SPH_C64(0x71592281A4DB4FE5), SPH_C64(0x1967FA69CE0FED9F),
SPH_C64(0xFD5293F8B96545DB), SPH_C64(0xC879E9D7F2A7600B),
SPH_C64(0x860248920193194E), SPH_C64(0xA4F9533B2D9CC0B3),
SPH_C64(0x9053836C15957613), SPH_C64(0xDB6DCF8AFC357BF1),
SPH_C64(0x18BEEA7A7A370F57), SPH_C64(0x037117CA50B99066),
SPH_C64(0x6AB30A9774424A35), SPH_C64(0xF4E92F02E325249B),
SPH_C64(0x7739DB07061CCAE1), SPH_C64(0xD8F3B49CECA42A05),
SPH_C64(0xBD56BE3F51382F73), SPH_C64(0x45FAED5843B0BB28),
SPH_C64(0x1C813D5C11BF1F83), SPH_C64(0x8AF0E4B6D75FA169),
SPH_C64(0x33EE18A487AD9999), SPH_C64(0x3C26E8EAB1C94410),
SPH_C64(0xB510102BC0A822F9), SPH_C64(0x141EEF310CE6123B),
SPH_C64(0xFC65B90059DDB154), SPH_C64(0xE0158640C5E0E607),
SPH_C64(0x884E079826C3A3CF), SPH_C64(0x930D0D9523C535FD),
SPH_C64(0x35638D754E9A2B00), SPH_C64(0x4085FCCF40469DD5),
SPH_C64(0xC4B17AD28BE23A4C), SPH_C64(0xCAB2F0FC6A3E6A2E),
SPH_C64(0x2860971A6B943FCD), SPH_C64(0x3DDE6EE212E30446),
SPH_C64(0x6222F32AE01765AE), SPH_C64(0x5D550BB5478308FE),
SPH_C64(0xA9EFA98DA0EDA22A), SPH_C64(0xC351A71686C40DA7),
SPH_C64(0x1105586D9C867C84), SPH_C64(0xDCFFEE85FDA22853),
SPH_C64(0xCCFBD0262C5EEF76), SPH_C64(0xBAF294CB8990D201),
SPH_C64(0xE69464F52AFAD975), SPH_C64(0x94B013AFDF133E14),
SPH_C64(0x06A7D1A32823C958), SPH_C64(0x6F95FE5130F61119),
SPH_C64(0xD92AB34E462C06C0), SPH_C64(0xED7BDE33887C71D2),
SPH_C64(0x79746D6E6518393E), SPH_C64(0x5BA419385D713329),
SPH_C64(0x7C1BA6B948A97564), SPH_C64(0x31987C197BFDAC67),
SPH_C64(0xDE6C23C44B053D02), SPH_C64(0x581C49FED002D64D),
SPH_C64(0xDD474D6338261571), SPH_C64(0xAA4546C3E473D062),
SPH_C64(0x928FCE349455F860), SPH_C64(0x48161BBACAAB94D9),
SPH_C64(0x63912430770E6F68), SPH_C64(0x6EC8A5E602C6641C),
SPH_C64(0x87282515337DDD2B), SPH_C64(0x2CDA6B42034B701B),
SPH_C64(0xB03D37C181CB096D), SPH_C64(0xE108438266C71C6F),
SPH_C64(0x2B3180C7EB51B255), SPH_C64(0xDF92B82F96C08BBC),
SPH_C64(0x5C68C8C0A632F3BA), SPH_C64(0x5504CC861C3D0556),
SPH_C64(0xABBFA4E55FB26B8F), SPH_C64(0x41848B0AB3BACEB4),
SPH_C64(0xB334A273AA445D32), SPH_C64(0xBCA696F0A85AD881),
SPH_C64(0x24F6EC65B528D56C), SPH_C64(0x0CE1512E90F4524A),
SPH_C64(0x4E9DD79D5506D35A), SPH_C64(0x258905FAC6CE9779),
SPH_C64(0x2019295B3E109B33), SPH_C64(0xF8A9478B73A054CC),
SPH_C64(0x2924F2F934417EB0), SPH_C64(0x3993357D536D1BC4),
SPH_C64(0x38A81AC21DB6FF8B), SPH_C64(0x47C4FBF17D6016BF),
SPH_C64(0x1E0FAADD7667E3F5), SPH_C64(0x7ABCFF62938BEB96),
SPH_C64(0xA78DAD948FC179C9), SPH_C64(0x8F1F98B72911E50D),
SPH_C64(0x61E48EAE27121A91), SPH_C64(0x4D62F7AD31859808),
SPH_C64(0xECEBA345EF5CEAEB), SPH_C64(0xF5CEB25EBC9684CE),
SPH_C64(0xF633E20CB7F76221), SPH_C64(0xA32CDF06AB8293E4),
SPH_C64(0x985A202CA5EE2CA4), SPH_C64(0xCF0B8447CC8A8FB1),
SPH_C64(0x9F765244979859A3), SPH_C64(0xA8D516B1A1240017),
SPH_C64(0x0BD7BA3EBB5DC726), SPH_C64(0xE54BCA55B86ADB39),
SPH_C64(0x1D7A3AFD6C478063), SPH_C64(0x519EC608E7669EDD),
SPH_C64(0x0E5715A2D149AA23), SPH_C64(0x177D4571848FF194),
SPH_C64(0xEEB55F3241014C22), SPH_C64(0x0F5E5CA13A6E2EC2),
SPH_C64(0x8029927B75F5C361), SPH_C64(0xAD139FABC3D6E436),
SPH_C64(0x0D5DF1A94CCF402F), SPH_C64(0x3E8BD948BEA5DFC8),
SPH_C64(0xA5A0D357BD3FF77E), SPH_C64(0xA2D12E251F74F645),
SPH_C64(0x66FD9E525E81A082), SPH_C64(0x2E0C90CE7F687A49),
SPH_C64(0xC2E8BCBEBA973BC5), SPH_C64(0x000001BCE509745F),
SPH_C64(0x423777BBE6DAB3D6), SPH_C64(0xD1661C7EAEF06EB5),
SPH_C64(0xA1781F354DAACFD8), SPH_C64(0x2D11284A2B16AFFC),
SPH_C64(0xF1FC4F67FA891D1F), SPH_C64(0x73ECC25DCB920ADA),
SPH_C64(0xAE610C22C2A12651), SPH_C64(0x96E0A810D356B78A),
SPH_C64(0x5A9A381F2FE7870F), SPH_C64(0xD5AD62EDE94E5530),
SPH_C64(0xD225E5E8368D1427), SPH_C64(0x65977B70C7AF4631),
SPH_C64(0x99F889B2DE39D74F), SPH_C64(0x233F30BF54E1D143),
SPH_C64(0x9A9675D3D9A63C97), SPH_C64(0x5470554FF334F9A8),
SPH_C64(0x166ACB744A4F5688), SPH_C64(0x70C74CAAB2E4AEAD),
SPH_C64(0xF0D091646F294D12), SPH_C64(0x57B82A89684031D1),
SPH_C64(0xEFD95A5A61BE0B6B), SPH_C64(0x2FBD12E969F2F29A),
SPH_C64(0x9BD37013FEFF9FE8), SPH_C64(0x3F9B0404D6085A06),
SPH_C64(0x4940C1F3166CFE15), SPH_C64(0x09542C4DCDF3DEFB),
SPH_C64(0xB4C5218385CD5CE3), SPH_C64(0xC935B7DC4462A641),
SPH_C64(0x3417F8A68ED3B63F), SPH_C64(0xB80959295B215B40),
SPH_C64(0xF99CDAEF3B8C8572), SPH_C64(0x018C0614F8FCB95D),
SPH_C64(0x1B14ACCD1A3ACDF3), SPH_C64(0x84D471F200BB732D),
SPH_C64(0xC1A3110E95E8DA16), SPH_C64(0x430A7220BF1A82B8),
SPH_C64(0xB77E090D39DF210E), SPH_C64(0x5EF4BD9F3CD05E9D),
SPH_C64(0x9D4FF6DA7E57A444), SPH_C64(0xDA1D60E183D4A5F8),
SPH_C64(0xB287C38417998E47), SPH_C64(0xFE3EDC121BB31886),
SPH_C64(0xC7FE3CCC980CCBEF), SPH_C64(0xE46FB590189BFD03),
SPH_C64(0x3732FD469A4C57DC), SPH_C64(0x7EF700A07CF1AD65),
SPH_C64(0x59C64468A31D8859), SPH_C64(0x762FB0B4D45B61F6),
SPH_C64(0x155BAED099047718), SPH_C64(0x68755E4C3D50BAA6),
SPH_C64(0xE9214E7F22D8B4DF), SPH_C64(0x2ADDBF532EAC95F4),
SPH_C64(0x32AE3909B4BD0109), SPH_C64(0x834DF537B08E3450),
SPH_C64(0xFA209DA84220728D), SPH_C64(0x9E691D9B9EFE23F7),
SPH_C64(0x0446D288C4AE8D7F), SPH_C64(0x7B4CC524E169785B),
SPH_C64(0x21D87F0135CA1385), SPH_C64(0xCEBB400F137B8AA5),
SPH_C64(0x272E2B66580796BE), SPH_C64(0x3612264125C2B0DE),
SPH_C64(0x057702BDAD1EFBB2), SPH_C64(0xD4BABB8EACF84BE9),
SPH_C64(0x91583139641BC67B), SPH_C64(0x8BDC2DE08036E024),
SPH_C64(0x603C8156F49F68ED), SPH_C64(0xF7D236F7DBEF5111),
SPH_C64(0x9727C4598AD21E80), SPH_C64(0xA08A0896670A5FD7),
SPH_C64(0xCB4A8F4309EBA9CB), SPH_C64(0x81AF564B0F7036A1),
SPH_C64(0xC0B99AA778199ABD), SPH_C64(0x959F1EC83FC8E952),
SPH_C64(0x8C505077794A81B9), SPH_C64(0x3ACAAF8F056338F0),
SPH_C64(0x07B43F50627A6778), SPH_C64(0x4A44AB49F5ECCC77),
SPH_C64(0x3BC3D6E4B679EE98), SPH_C64(0x9CC0D4D1CF14108C),
SPH_C64(0x4406C00B206BC8A0), SPH_C64(0x82A18854C8D72D89),
SPH_C64(0x67E366B35C3C432C), SPH_C64(0xB923DD61102B37F2),
SPH_C64(0x56AB2779D884271D), SPH_C64(0xBE83E1B0FF1525AF),
SPH_C64(0xFB7C65D4217E49A9), SPH_C64(0x6BDBE0E76D48E7D4),
SPH_C64(0x08DF828745D9179E), SPH_C64(0x22EA6A9ADD53BD34),
SPH_C64(0xE36E141C5622200A), SPH_C64(0x7F805D1B8CB750EE),
SPH_C64(0xAFE5C7A59F58E837), SPH_C64(0xE27F996A4FB1C23C),
SPH_C64(0xD3867DFB0775F0D0), SPH_C64(0xD0E673DE6E88891A),
SPH_C64(0x123AEB9EAFB86C25), SPH_C64(0x30F1D5D5C145B895),
SPH_C64(0xBB434A2DEE7269E7), SPH_C64(0x78CB67ECF931FA38),
SPH_C64(0xF33B0372323BBF9C), SPH_C64(0x52D66336FB279C74),
SPH_C64(0x505F33AC0AFB4EAA), SPH_C64(0xE8A5CD99A2CCE187),
SPH_C64(0x534974801E2D30BB), SPH_C64(0x8D2D5711D5876D90),
SPH_C64(0x1F1A412891BC038E), SPH_C64(0xD6E2E71D82E56648),
SPH_C64(0x74036C3A497732B7), SPH_C64(0x89B67ED96361F5AB),
SPH_C64(0xFFED95D8F1EA02A2), SPH_C64(0xE72B3BD61464D43D),
SPH_C64(0xA6300F170BDC4820), SPH_C64(0xEBC18760ED78A77A),
};
static const sph_u64 T2[256] = {
SPH_C64(0xE6A6BE5A05A12138), SPH_C64(0xB5A122A5B4F87C98),
SPH_C64(0x563C6089140B6990), SPH_C64(0x4C46CB2E391F5DD5),
SPH_C64(0xD932ADDBC9B79434), SPH_C64(0x08EA70E42015AFF5),
SPH_C64(0xD765A6673E478CF1), SPH_C64(0xC4FB757EAB278D99),
SPH_C64(0xDF11C6862D6E0692), SPH_C64(0xDDEB84F10D7F3B16),
SPH_C64(0x6F2EF604A665EA04), SPH_C64(0x4A8E0F0FF0E0DFB3),
SPH_C64(0xA5EDEEF83DBCBA51), SPH_C64(0xFC4F0A2A0EA4371E),
SPH_C64(0xE83E1DA85CB38429), SPH_C64(0xDC8FF882BA1B1CE2),
SPH_C64(0xCD45505E8353E80D), SPH_C64(0x18D19A00D4DB0717),
SPH_C64(0x34A0CFEDA5F38101), SPH_C64(0x0BE77E518887CAF2),
SPH_C64(0x1E341438B3C45136), SPH_C64(0xE05797F49089CCF9),
SPH_C64(0xFFD23F9DF2591D14), SPH_C64(0x543DDA228595C5CD),
SPH_C64(0x661F81FD99052A33), SPH_C64(0x8736E641DB0F7B76),
SPH_C64(0x15227725418E5307), SPH_C64(0xE25F7F46162EB2FA),
SPH_C64(0x48A8B2126C13D9FE), SPH_C64(0xAFDC541792E76EEA),
SPH_C64(0x03D912BFC6D1898F), SPH_C64(0x31B1AAFA1B83F51B),
SPH_C64(0xF1AC2796E42AB7D9), SPH_C64(0x40A3A7D7FCD2EBAC),
SPH_C64(0x1056136D0AFBBCC5), SPH_C64(0x7889E1DD9A6D0C85),
SPH_C64(0xD33525782A7974AA), SPH_C64(0xA7E25D09078AC09B),
SPH_C64(0xBD4138B3EAC6EDD0), SPH_C64(0x920ABFBE71EB9E70),
SPH_C64(0xA2A5D0F54FC2625C), SPH_C64(0xC054E36B0B1290A3),
SPH_C64(0xF6DD59FF62FE932B), SPH_C64(0x3537354511A8AC7D),
SPH_C64(0xCA845E9172FADCD4), SPH_C64(0x84F82B60329D20DC),
SPH_C64(0x79C62CE1CD672F18), SPH_C64(0x8B09A2ADD124642C),
SPH_C64(0xD0C1E96A19D9E726), SPH_C64(0x5A786A9B4BA9500C),
SPH_C64(0x0E020336634C43F3), SPH_C64(0xC17B474AEB66D822),
SPH_C64(0x6A731AE3EC9BAAC2), SPH_C64(0x8226667AE0840258),
SPH_C64(0x67D4567691CAECA5), SPH_C64(0x1D94155C4875ADB5),
SPH_C64(0x6D00FD985B813FDF), SPH_C64(0x51286EFCB774CD06),
SPH_C64(0x5E8834471FA744AF), SPH_C64(0xF72CA0AEE761AE2E),
SPH_C64(0xBE40E4CDAEE8E09A), SPH_C64(0xE9970BBB5118F665),
SPH_C64(0x726E4BEB33DF1964), SPH_C64(0x703B000729199762),
SPH_C64(0x4631D816F5EF30A7), SPH_C64(0xB880B5B51504A6BE),
SPH_C64(0x641793C37ED84B6C), SPH_C64(0x7B21ED77F6E97D96),
SPH_C64(0x776306312EF96B73), SPH_C64(0xAE528948E86FF3F4),
SPH_C64(0x53DBD7F286A3F8F8), SPH_C64(0x16CADCE74CFC1063),
SPH_C64(0x005C19BDFA52C6DD), SPH_C64(0x68868F5D64D46AD3),
SPH_C64(0x3A9D512CCF1E186A), SPH_C64(0x367E62C2385660AE),
SPH_C64(0xE359E7EA77DCB1D7), SPH_C64(0x526C0773749ABE6E),
SPH_C64(0x735AE5F9D09F734B), SPH_C64(0x493FC7CC8A558BA8),
SPH_C64(0xB0B9C1533041AB45), SPH_C64(0x321958BA470A59BD),
SPH_C64(0x852DB00B5F46C393), SPH_C64(0x91209B2BD336B0E5),
SPH_C64(0x6E604F7D659EF19F), SPH_C64(0xB99A8AE2782CCB24),
SPH_C64(0xCCF52AB6C814C4C7), SPH_C64(0x4727D9AFBE11727B),
SPH_C64(0x7E950D0C0121B34D), SPH_C64(0x756F435670AD471F),
SPH_C64(0xF5ADD442615A6849), SPH_C64(0x4E87E09980B9957A),
SPH_C64(0x2ACFA1DF50AEE355), SPH_C64(0xD898263AFD2FD556),
SPH_C64(0xC8F4924DD80C8FD6), SPH_C64(0xCF99CA3D754A173A),
SPH_C64(0xFE477BACAF91BF3C), SPH_C64(0xED5371F6D690C12D),
SPH_C64(0x831A5C285E687094), SPH_C64(0xC5D3C90A3708A0A4),
SPH_C64(0x0F7F903717D06580), SPH_C64(0x19F9BB13B8FDF27F),
SPH_C64(0xB1BD6F1B4D502843), SPH_C64(0x1C761BA38FFF4012),
SPH_C64(0x0D1530C4E2E21F3B), SPH_C64(0x8943CE69A7372C8A),
SPH_C64(0xE5184E11FEB5CE66), SPH_C64(0x618BDB80BD736621),
SPH_C64(0x7D29BAD68B574D0B), SPH_C64(0x81BB613E25E6FE5B),
SPH_C64(0x071C9C10BC07913F), SPH_C64(0xC7BEEB7909AC2D97),
SPH_C64(0xC3E58D353BC5D757), SPH_C64(0xEB017892F38F61E8),
SPH_C64(0xD4EFFB9C9B1CC21A), SPH_C64(0x99727D26F494F7AB),
SPH_C64(0xA3E063A2956B3E03), SPH_C64(0x9D4A8B9A4AA09C30),
SPH_C64(0x3F6AB7D500090FB4), SPH_C64(0x9CC0F2A057268AC0),
SPH_C64(0x3DEE9D2DEDBF42D1), SPH_C64(0x330F49C87960A972),
SPH_C64(0xC6B2720287421B41), SPH_C64(0x0AC59EC07C00369C),
SPH_C64(0xEF4EAC49CB353425), SPH_C64(0xF450244EEF0129D8),
SPH_C64(0x8ACC46E5CAF4DEB6), SPH_C64(0x2FFEAB63989263F7),
SPH_C64(0x8F7CB9FE5D7A4578), SPH_C64(0x5BD8F7644E634635),
SPH_C64(0x427A7315BF2DC900), SPH_C64(0x17D0C4AA2125261C),
SPH_C64(0x3992486C93518E50), SPH_C64(0xB4CBFEE0A2D7D4C3),
SPH_C64(0x7C75D6202C5DDD8D), SPH_C64(0xDBC295D8E35B6C61),
SPH_C64(0x60B369D302032B19), SPH_C64(0xCE42685FDCE44132),
SPH_C64(0x06F3DDB9DDF65610), SPH_C64(0x8EA4D21DB5E148F0),
SPH_C64(0x20B0FCE62FCD496F), SPH_C64(0x2C1B912358B0EE31),
SPH_C64(0xB28317B818F5A308), SPH_C64(0xA89C1E189CA6D2CF),
SPH_C64(0x0C6B18576AAADBC8), SPH_C64(0xB65DEAA91299FAE3),
SPH_C64(0xFB2B794B7F1027E7), SPH_C64(0x04E4317F443B5BEB),
SPH_C64(0x4B852D325939D0A6), SPH_C64(0xD5AE6BEEFB207FFC),
SPH_C64(0x309682B281C7D374), SPH_C64(0xBAE309A194C3B475),
SPH_C64(0x8CC3F97B13B49F05), SPH_C64(0x98A9422FF8293967),
SPH_C64(0x244B16B01076FF7C), SPH_C64(0xF8BF571C663D67EE),
SPH_C64(0x1F0D6758EEE30DA1), SPH_C64(0xC9B611D97ADEB9B7),
SPH_C64(0xB7AFD5887B6C57A2), SPH_C64(0x6290AE846B984FE1),
SPH_C64(0x94DF4CDEACC1A5FD), SPH_C64(0x058A5BD1C5483AFF),
SPH_C64(0x63166CC142BA3C37), SPH_C64(0x8DB8526EB2F76F40),
SPH_C64(0xE10880036F0D6D4E), SPH_C64(0x9E0523C9971D311D),
SPH_C64(0x45EC2824CC7CD691), SPH_C64(0x575B8359E62382C9),
SPH_C64(0xFA9E400DC4889995), SPH_C64(0xD1823ECB45721568),
SPH_C64(0xDAFD983B8206082F), SPH_C64(0xAA7D29082386A8CB),
SPH_C64(0x269FCD4403B87588), SPH_C64(0x1B91F5F728BDD1E0),
SPH_C64(0xE4669F39040201F6), SPH_C64(0x7A1D7C218CF04ADE),
SPH_C64(0x65623C29D79CE5CE), SPH_C64(0x2368449096C00BB1),
SPH_C64(0xAB9BF1879DA503BA), SPH_C64(0xBC23ECB1A458058E),
SPH_C64(0x9A58DF01BB401ECC), SPH_C64(0xA070E868A85F143D),
SPH_C64(0x4FF188307DF2239E), SPH_C64(0x14D565B41A641183),
SPH_C64(0xEE13337452701602), SPH_C64(0x950E3DCF3F285E09),
SPH_C64(0x59930254B9C80953), SPH_C64(0x3BF299408930DA6D),
SPH_C64(0xA955943F53691387), SPH_C64(0xA15EDECAA9CB8784),
SPH_C64(0x29142127352BE9A0), SPH_C64(0x76F0371FFF4E7AFB),
SPH_C64(0x0239F450274F2228), SPH_C64(0xBB073AF01D5E868B),
SPH_C64(0xBFC80571C10E96C1), SPH_C64(0xD267088568222E23),
SPH_C64(0x9671A3D48E80B5B0), SPH_C64(0x55B5D38AE193BB81),
SPH_C64(0x693AE2D0A18B04B8), SPH_C64(0x5C48B4ECADD5335F),
SPH_C64(0xFD743B194916A1CA), SPH_C64(0x2577018134BE98C4),
SPH_C64(0xE77987E83C54A4AD), SPH_C64(0x28E11014DA33E1B9),
SPH_C64(0x270CC59E226AA213), SPH_C64(0x71495F756D1A5F60),
SPH_C64(0x9BE853FB60AFEF77), SPH_C64(0xADC786A7F7443DBF),
SPH_C64(0x0904456173B29A82), SPH_C64(0x58BC7A66C232BD5E),
SPH_C64(0xF306558C673AC8B2), SPH_C64(0x41F639C6B6C9772A),
SPH_C64(0x216DEFE99FDA35DA), SPH_C64(0x11640CC71C7BE615),
SPH_C64(0x93C43694565C5527), SPH_C64(0xEA038E6246777839),
SPH_C64(0xF9ABF3CE5A3E2469), SPH_C64(0x741E768D0FD312D2),
SPH_C64(0x0144B883CED652C6), SPH_C64(0xC20B5A5BA33F8552),
SPH_C64(0x1AE69633C3435A9D), SPH_C64(0x97A28CA4088CFDEC),
SPH_C64(0x8824A43C1E96F420), SPH_C64(0x37612FA66EEEA746),
SPH_C64(0x6B4CB165F9CF0E5A), SPH_C64(0x43AA1C06A0ABFB4A),
SPH_C64(0x7F4DC26FF162796B), SPH_C64(0x6CBACC8E54ED9B0F),
SPH_C64(0xA6B7FFEFD2BB253E), SPH_C64(0x2E25BC95B0A29D4F),
SPH_C64(0x86D6A58BDEF1388C), SPH_C64(0xDED74AC576B6F054),
SPH_C64(0x8030BDBC2B45805D), SPH_C64(0x3C81AF70E94D9289),
SPH_C64(0x3EFF6DDA9E3100DB), SPH_C64(0xB38DC39FDFCC8847),
SPH_C64(0x123885528D17B87E), SPH_C64(0xF2DA0ED240B1B642),
SPH_C64(0x44CEFADCD54BF9A9), SPH_C64(0x1312200E433C7EE6),
SPH_C64(0x9FFCC84F3A78C748), SPH_C64(0xF0CD1F72248576BB),
SPH_C64(0xEC6974053638CFE4), SPH_C64(0x2BA7B67C0CEC4E4C),
SPH_C64(0xAC2F4DF3E5CE32ED), SPH_C64(0xCB33D14326EA4C11),
SPH_C64(0xA4E9044CC77E58BC), SPH_C64(0x5F513293D934FCEF),
SPH_C64(0x5DC9645506E55444), SPH_C64(0x50DE418F317DE40A),
SPH_C64(0x388CB31A69DDE259), SPH_C64(0x2DB4A83455820A86),
SPH_C64(0x9010A91E84711AE9), SPH_C64(0x4DF7F0B7B1498371),
SPH_C64(0xD62A2EABC0977179), SPH_C64(0x22FAC097AA8D5C0E),
};
static const sph_u64 T3[256] = {
SPH_C64(0xF49FCC2FF1DAF39B), SPH_C64(0x487FD5C66FF29281),
SPH_C64(0xE8A30667FCDCA83F), SPH_C64(0x2C9B4BE3D2FCCE63),
SPH_C64(0xDA3FF74B93FBBBC2), SPH_C64(0x2FA165D2FE70BA66),
SPH_C64(0xA103E279970E93D4), SPH_C64(0xBECDEC77B0E45E71),
SPH_C64(0xCFB41E723985E497), SPH_C64(0xB70AAA025EF75017),
SPH_C64(0xD42309F03840B8E0), SPH_C64(0x8EFC1AD035898579),
SPH_C64(0x96C6920BE2B2ABC5), SPH_C64(0x66AF4163375A9172),
SPH_C64(0x2174ABDCCA7127FB), SPH_C64(0xB33CCEA64A72FF41),
SPH_C64(0xF04A4933083066A5), SPH_C64(0x8D970ACDD7289AF5),
SPH_C64(0x8F96E8E031C8C25E), SPH_C64(0xF3FEC02276875D47),
SPH_C64(0xEC7BF310056190DD), SPH_C64(0xF5ADB0AEBB0F1491),
SPH_C64(0x9B50F8850FD58892), SPH_C64(0x4975488358B74DE8),
SPH_C64(0xA3354FF691531C61), SPH_C64(0x0702BBE481D2C6EE),
SPH_C64(0x89FB24057DEDED98), SPH_C64(0xAC3075138596E902),
SPH_C64(0x1D2D3580172772ED), SPH_C64(0xEB738FC28E6BC30D),
SPH_C64(0x5854EF8F63044326), SPH_C64(0x9E5C52325ADD3BBE),
SPH_C64(0x90AA53CF325C4623), SPH_C64(0xC1D24D51349DD067),
SPH_C64(0x2051CFEEA69EA624), SPH_C64(0x13220F0A862E7E4F),
SPH_C64(0xCE39399404E04864), SPH_C64(0xD9C42CA47086FCB7),
SPH_C64(0x685AD2238A03E7CC), SPH_C64(0x066484B2AB2FF1DB),
SPH_C64(0xFE9D5D70EFBF79EC), SPH_C64(0x5B13B9DD9C481854),
SPH_C64(0x15F0D475ED1509AD), SPH_C64(0x0BEBCD060EC79851),
SPH_C64(0xD58C6791183AB7F8), SPH_C64(0xD1187C5052F3EEE4),
SPH_C64(0xC95D1192E54E82FF), SPH_C64(0x86EEA14CB9AC6CA2),
SPH_C64(0x3485BEB153677D5D), SPH_C64(0xDD191D781F8C492A),
SPH_C64(0xF60866BAA784EBF9), SPH_C64(0x518F643BA2D08C74),
SPH_C64(0x8852E956E1087C22), SPH_C64(0xA768CB8DC410AE8D),
SPH_C64(0x38047726BFEC8E1A), SPH_C64(0xA67738B4CD3B45AA),
SPH_C64(0xAD16691CEC0DDE19), SPH_C64(0xC6D4319380462E07),
SPH_C64(0xC5A5876D0BA61938), SPH_C64(0x16B9FA1FA58FD840),
SPH_C64(0x188AB1173CA74F18), SPH_C64(0xABDA2F98C99C021F),
SPH_C64(0x3E0580AB134AE816), SPH_C64(0x5F3B05B773645ABB),
SPH_C64(0x2501A2BE5575F2F6), SPH_C64(0x1B2F74004E7E8BA9),
SPH_C64(0x1CD7580371E8D953), SPH_C64(0x7F6ED89562764E30),
SPH_C64(0xB15926FF596F003D), SPH_C64(0x9F65293DA8C5D6B9),
SPH_C64(0x6ECEF04DD690F84C), SPH_C64(0x4782275FFF33AF88),
SPH_C64(0xE41433083F820801), SPH_C64(0xFD0DFE409A1AF9B5),
SPH_C64(0x4325A3342CDB396B), SPH_C64(0x8AE77E62B301B252),
SPH_C64(0xC36F9E9F6655615A), SPH_C64(0x85455A2D92D32C09),
SPH_C64(0xF2C7DEA949477485), SPH_C64(0x63CFB4C133A39EBA),
SPH_C64(0x83B040CC6EBC5462), SPH_C64(0x3B9454C8FDB326B0),
SPH_C64(0x56F56A9E87FFD78C), SPH_C64(0x2DC2940D99F42BC6),
SPH_C64(0x98F7DF096B096E2D), SPH_C64(0x19A6E01E3AD852BF),
SPH_C64(0x42A99CCBDBD4B40B), SPH_C64(0xA59998AF45E9C559),
SPH_C64(0x366295E807D93186), SPH_C64(0x6B48181BFAA1F773),
SPH_C64(0x1FEC57E2157A0A1D), SPH_C64(0x4667446AF6201AD5),
SPH_C64(0xE615EBCACFB0F075), SPH_C64(0xB8F31F4F68290778),
SPH_C64(0x22713ED6CE22D11E), SPH_C64(0x3057C1A72EC3C93B),
SPH_C64(0xCB46ACC37C3F1F2F), SPH_C64(0xDBB893FD02AAF50E),
SPH_C64(0x331FD92E600B9FCF), SPH_C64(0xA498F96148EA3AD6),
SPH_C64(0xA8D8426E8B6A83EA), SPH_C64(0xA089B274B7735CDC),
SPH_C64(0x87F6B3731E524A11), SPH_C64(0x118808E5CBC96749),
SPH_C64(0x9906E4C7B19BD394), SPH_C64(0xAFED7F7E9B24A20C),
SPH_C64(0x6509EADEEB3644A7), SPH_C64(0x6C1EF1D3E8EF0EDE),
SPH_C64(0xB9C97D43E9798FB4), SPH_C64(0xA2F2D784740C28A3),
SPH_C64(0x7B8496476197566F), SPH_C64(0x7A5BE3E6B65F069D),
SPH_C64(0xF96330ED78BE6F10), SPH_C64(0xEEE60DE77A076A15),
SPH_C64(0x2B4BEE4AA08B9BD0), SPH_C64(0x6A56A63EC7B8894E),
SPH_C64(0x02121359BA34FEF4), SPH_C64(0x4CBF99F8283703FC),
SPH_C64(0x398071350CAF30C8), SPH_C64(0xD0A77A89F017687A),
SPH_C64(0xF1C1A9EB9E423569), SPH_C64(0x8C7976282DEE8199),
SPH_C64(0x5D1737A5DD1F7ABD), SPH_C64(0x4F53433C09A9FA80),
SPH_C64(0xFA8B0C53DF7CA1D9), SPH_C64(0x3FD9DCBC886CCB77),
SPH_C64(0xC040917CA91B4720), SPH_C64(0x7DD00142F9D1DCDF),
SPH_C64(0x8476FC1D4F387B58), SPH_C64(0x23F8E7C5F3316503),
SPH_C64(0x032A2244E7E37339), SPH_C64(0x5C87A5D750F5A74B),
SPH_C64(0x082B4CC43698992E), SPH_C64(0xDF917BECB858F63C),
SPH_C64(0x3270B8FC5BF86DDA), SPH_C64(0x10AE72BB29B5DD76),
SPH_C64(0x576AC94E7700362B), SPH_C64(0x1AD112DAC61EFB8F),
SPH_C64(0x691BC30EC5FAA427), SPH_C64(0xFF246311CC327143),
SPH_C64(0x3142368E30E53206), SPH_C64(0x71380E31E02CA396),
SPH_C64(0x958D5C960AAD76F1), SPH_C64(0xF8D6F430C16DA536),
SPH_C64(0xC8FFD13F1BE7E1D2), SPH_C64(0x7578AE66004DDBE1),
SPH_C64(0x05833F01067BE646), SPH_C64(0xBB34B5AD3BFE586D),
SPH_C64(0x095F34C9A12B97F0), SPH_C64(0x247AB64525D60CA8),
SPH_C64(0xDCDBC6F3017477D1), SPH_C64(0x4A2E14D4DECAD24D),
SPH_C64(0xBDB5E6D9BE0A1EEB), SPH_C64(0x2A7E70F7794301AB),
SPH_C64(0xDEF42D8A270540FD), SPH_C64(0x01078EC0A34C22C1),
SPH_C64(0xE5DE511AF4C16387), SPH_C64(0x7EBB3A52BD9A330A),
SPH_C64(0x77697857AA7D6435), SPH_C64(0x004E831603AE4C32),
SPH_C64(0xE7A21020AD78E312), SPH_C64(0x9D41A70C6AB420F2),
SPH_C64(0x28E06C18EA1141E6), SPH_C64(0xD2B28CBD984F6B28),
SPH_C64(0x26B75F6C446E9D83), SPH_C64(0xBA47568C4D418D7F),
SPH_C64(0xD80BADBFE6183D8E), SPH_C64(0x0E206D7F5F166044),
SPH_C64(0xE258A43911CBCA3E), SPH_C64(0x723A1746B21DC0BC),
SPH_C64(0xC7CAA854F5D7CDD3), SPH_C64(0x7CAC32883D261D9C),
SPH_C64(0x7690C26423BA942C), SPH_C64(0x17E55524478042B8),
SPH_C64(0xE0BE477656A2389F), SPH_C64(0x4D289B5E67AB2DA0),
SPH_C64(0x44862B9C8FBBFD31), SPH_C64(0xB47CC8049D141365),
SPH_C64(0x822C1B362B91C793), SPH_C64(0x4EB14655FB13DFD8),
SPH_C64(0x1ECBBA0714E2A97B), SPH_C64(0x6143459D5CDE5F14),
SPH_C64(0x53A8FBF1D5F0AC89), SPH_C64(0x97EA04D81C5E5B00),
SPH_C64(0x622181A8D4FDB3F3), SPH_C64(0xE9BCD341572A1208),
SPH_C64(0x1411258643CCE58A), SPH_C64(0x9144C5FEA4C6E0A4),
SPH_C64(0x0D33D06565CF620F), SPH_C64(0x54A48D489F219CA1),
SPH_C64(0xC43E5EAC6D63C821), SPH_C64(0xA9728B3A72770DAF),
SPH_C64(0xD7934E7B20DF87EF), SPH_C64(0xE35503B61A3E86E5),
SPH_C64(0xCAE321FBC819D504), SPH_C64(0x129A50B3AC60BFA6),
SPH_C64(0xCD5E68EA7E9FB6C3), SPH_C64(0xB01C90199483B1C7),
SPH_C64(0x3DE93CD5C295376C), SPH_C64(0xAED52EDF2AB9AD13),
SPH_C64(0x2E60F512C0A07884), SPH_C64(0xBC3D86A3E36210C9),
SPH_C64(0x35269D9B163951CE), SPH_C64(0x0C7D6E2AD0CDB5FA),
SPH_C64(0x59E86297D87F5733), SPH_C64(0x298EF221898DB0E7),
SPH_C64(0x55000029D1A5AA7E), SPH_C64(0x8BC08AE1B5061B45),
SPH_C64(0xC2C31C2B6C92703A), SPH_C64(0x94CC596BAF25EF42),
SPH_C64(0x0A1D73DB22540456), SPH_C64(0x04B6A0F9D9C4179A),
SPH_C64(0xEFFDAFA2AE3D3C60), SPH_C64(0xF7C8075BB49496C4),
SPH_C64(0x9CC5C7141D1CD4E3), SPH_C64(0x78BD1638218E5534),
SPH_C64(0xB2F11568F850246A), SPH_C64(0xEDFABCFA9502BC29),
SPH_C64(0x796CE5F2DA23051B), SPH_C64(0xAAE128B0DC93537C),
SPH_C64(0x3A493DA0EE4B29AE), SPH_C64(0xB5DF6B2C416895D7),
SPH_C64(0xFCABBD25122D7F37), SPH_C64(0x70810B58105DC4B1),
SPH_C64(0xE10FDD37F7882A90), SPH_C64(0x524DCAB5518A3F5C),
SPH_C64(0x3C9E85878451255B), SPH_C64(0x4029828119BD34E2),
SPH_C64(0x74A05B6F5D3CECCB), SPH_C64(0xB610021542E13ECA),
SPH_C64(0x0FF979D12F59E2AC), SPH_C64(0x6037DA27E4F9CC50),
SPH_C64(0x5E92975A0DF1847D), SPH_C64(0xD66DE190D3E623FE),
SPH_C64(0x5032D6B87B568048), SPH_C64(0x9A36B7CE8235216E),
SPH_C64(0x80272A7A24F64B4A), SPH_C64(0x93EFED8B8C6916F7),
SPH_C64(0x37DDBFF44CCE1555), SPH_C64(0x4B95DB5D4B99BD25),
SPH_C64(0x92D3FDA169812FC0), SPH_C64(0xFB1A4A9A90660BB6),
SPH_C64(0x730C196946A4B9B2), SPH_C64(0x81E289AA7F49DA68),
SPH_C64(0x64669A0F83B1A05F), SPH_C64(0x27B3FF7D9644F48B),
SPH_C64(0xCC6B615C8DB675B3), SPH_C64(0x674F20B9BCEBBE95),
SPH_C64(0x6F31238275655982), SPH_C64(0x5AE488713E45CF05),
SPH_C64(0xBF619F9954C21157), SPH_C64(0xEABAC46040A8EAE9),
SPH_C64(0x454C6FE9F2C0C1CD), SPH_C64(0x419CF6496412691C),
SPH_C64(0xD3DC3BEF265B0F70), SPH_C64(0x6D0E60F5C3578A9E),
};
static const sph_u64 T4[256] = {
SPH_C64(0x5B0E608526323C55), SPH_C64(0x1A46C1A9FA1B59F5),
SPH_C64(0xA9E245A17C4C8FFA), SPH_C64(0x65CA5159DB2955D7),
SPH_C64(0x05DB0A76CE35AFC2), SPH_C64(0x81EAC77EA9113D45),
SPH_C64(0x528EF88AB6AC0A0D), SPH_C64(0xA09EA253597BE3FF),
SPH_C64(0x430DDFB3AC48CD56), SPH_C64(0xC4B3A67AF45CE46F),
SPH_C64(0x4ECECFD8FBE2D05E), SPH_C64(0x3EF56F10B39935F0),
SPH_C64(0x0B22D6829CD619C6), SPH_C64(0x17FD460A74DF2069),
SPH_C64(0x6CF8CC8E8510ED40), SPH_C64(0xD6C824BF3A6ECAA7),
SPH_C64(0x61243D581A817049), SPH_C64(0x048BACB6BBC163A2),
SPH_C64(0xD9A38AC27D44CC32), SPH_C64(0x7FDDFF5BAAF410AB),
SPH_C64(0xAD6D495AA804824B), SPH_C64(0xE1A6A74F2D8C9F94),
SPH_C64(0xD4F7851235DEE8E3), SPH_C64(0xFD4B7F886540D893),
SPH_C64(0x247C20042AA4BFDA), SPH_C64(0x096EA1C517D1327C),
SPH_C64(0xD56966B4361A6685), SPH_C64(0x277DA5C31221057D),
SPH_C64(0x94D59893A43ACFF7), SPH_C64(0x64F0C51CCDC02281),
SPH_C64(0x3D33BCC4FF6189DB), SPH_C64(0xE005CB184CE66AF1),
SPH_C64(0xFF5CCD1D1DB99BEA), SPH_C64(0xB0B854A7FE42980F),
SPH_C64(0x7BD46A6A718D4B9F), SPH_C64(0xD10FA8CC22A5FD8C),
SPH_C64(0xD31484952BE4BD31), SPH_C64(0xC7FA975FCB243847),
SPH_C64(0x4886ED1E5846C407), SPH_C64(0x28CDDB791EB70B04),
SPH_C64(0xC2B00BE2F573417F), SPH_C64(0x5C9590452180F877),
SPH_C64(0x7A6BDDFFF370EB00), SPH_C64(0xCE509E38D6D9D6A4),
SPH_C64(0xEBEB0F00647FA702), SPH_C64(0x1DCC06CF76606F06),
SPH_C64(0xE4D9F28BA286FF0A), SPH_C64(0xD85A305DC918C262),
SPH_C64(0x475B1D8732225F54), SPH_C64(0x2D4FB51668CCB5FE),
SPH_C64(0xA679B9D9D72BBA20), SPH_C64(0x53841C0D912D43A5),
SPH_C64(0x3B7EAA48BF12A4E8), SPH_C64(0x781E0E47F22F1DDF),
SPH_C64(0xEFF20CE60AB50973), SPH_C64(0x20D261D19DFFB742),
SPH_C64(0x16A12B03062A2E39), SPH_C64(0x1960EB2239650495),
SPH_C64(0x251C16FED50EB8B8), SPH_C64(0x9AC0C330F826016E),
SPH_C64(0xED152665953E7671), SPH_C64(0x02D63194A6369570),
SPH_C64(0x5074F08394B1C987), SPH_C64(0x70BA598C90B25CE1),
SPH_C64(0x794A15810B9742F6), SPH_C64(0x0D5925E9FCAF8C6C),
SPH_C64(0x3067716CD868744E), SPH_C64(0x910AB077E8D7731B),
SPH_C64(0x6A61BBDB5AC42F61), SPH_C64(0x93513EFBF0851567),
SPH_C64(0xF494724B9E83E9D5), SPH_C64(0xE887E1985C09648D),
SPH_C64(0x34B1D3C675370CFD), SPH_C64(0xDC35E433BC0D255D),
SPH_C64(0xD0AAB84234131BE0), SPH_C64(0x08042A50B48B7EAF),
SPH_C64(0x9997C4EE44A3AB35), SPH_C64(0x829A7B49201799D0),
SPH_C64(0x263B8307B7C54441), SPH_C64(0x752F95F4FD6A6CA6),
SPH_C64(0x927217402C08C6E5), SPH_C64(0x2A8AB754A795D9EE),
SPH_C64(0xA442F7552F72943D), SPH_C64(0x2C31334E19781208),
SPH_C64(0x4FA98D7CEAEE6291), SPH_C64(0x55C3862F665DB309),
SPH_C64(0xBD0610175D53B1F3), SPH_C64(0x46FE6CB840413F27),
SPH_C64(0x3FE03792DF0CFA59), SPH_C64(0xCFE700372EB85E8F),
SPH_C64(0xA7BE29E7ADBCE118), SPH_C64(0xE544EE5CDE8431DD),
SPH_C64(0x8A781B1B41F1873E), SPH_C64(0xA5C94C78A0D2F0E7),
SPH_C64(0x39412E2877B60728), SPH_C64(0xA1265EF3AFC9A62C),
SPH_C64(0xBCC2770C6A2506C5), SPH_C64(0x3AB66DD5DCE1CE12),
SPH_C64(0xE65499D04A675B37), SPH_C64(0x7D8F523481BFD216),
SPH_C64(0x0F6F64FCEC15F389), SPH_C64(0x74EFBE618B5B13C8),
SPH_C64(0xACDC82B714273E1D), SPH_C64(0xDD40BFE003199D17),
SPH_C64(0x37E99257E7E061F8), SPH_C64(0xFA52626904775AAA),
SPH_C64(0x8BBBF63A463D56F9), SPH_C64(0xF0013F1543A26E64),
SPH_C64(0xA8307E9F879EC898), SPH_C64(0xCC4C27A4150177CC),
SPH_C64(0x1B432F2CCA1D3348), SPH_C64(0xDE1D1F8F9F6FA013),
SPH_C64(0x606602A047A7DDD6), SPH_C64(0xD237AB64CC1CB2C7),
SPH_C64(0x9B938E7225FCD1D3), SPH_C64(0xEC4E03708E0FF476),
SPH_C64(0xFEB2FBDA3D03C12D), SPH_C64(0xAE0BCED2EE43889A),
SPH_C64(0x22CB8923EBFB4F43), SPH_C64(0x69360D013CF7396D),
SPH_C64(0x855E3602D2D4E022), SPH_C64(0x073805BAD01F784C),
SPH_C64(0x33E17A133852F546), SPH_C64(0xDF4874058AC7B638),
SPH_C64(0xBA92B29C678AA14A), SPH_C64(0x0CE89FC76CFAADCD),
SPH_C64(0x5F9D4E0908339E34), SPH_C64(0xF1AFE9291F5923B9),
SPH_C64(0x6E3480F60F4A265F), SPH_C64(0xEEBF3A2AB29B841C),
SPH_C64(0xE21938A88F91B4AD), SPH_C64(0x57DFEFF845C6D3C3),
SPH_C64(0x2F006B0BF62CAAF2), SPH_C64(0x62F479EF6F75EE78),
SPH_C64(0x11A55AD41C8916A9), SPH_C64(0xF229D29084FED453),
SPH_C64(0x42F1C27B16B000E6), SPH_C64(0x2B1F76749823C074),
SPH_C64(0x4B76ECA3C2745360), SPH_C64(0x8C98F463B91691BD),
SPH_C64(0x14BCC93CF1ADE66A), SPH_C64(0x8885213E6D458397),
SPH_C64(0x8E177DF0274D4711), SPH_C64(0xB49B73B5503F2951),
SPH_C64(0x10168168C3F96B6B), SPH_C64(0x0E3D963B63CAB0AE),
SPH_C64(0x8DFC4B5655A1DB14), SPH_C64(0xF789F1356E14DE5C),
SPH_C64(0x683E68AF4E51DAC1), SPH_C64(0xC9A84F9D8D4B0FD9),
SPH_C64(0x3691E03F52A0F9D1), SPH_C64(0x5ED86E46E1878E80),
SPH_C64(0x3C711A0E99D07150), SPH_C64(0x5A0865B20C4E9310),
SPH_C64(0x56FBFC1FE4F0682E), SPH_C64(0xEA8D5DE3105EDF9B),
SPH_C64(0x71ABFDB12379187A), SPH_C64(0x2EB99DE1BEE77B9C),
SPH_C64(0x21ECC0EA33CF4523), SPH_C64(0x59A4D7521805C7A1),
SPH_C64(0x3896F5EB56AE7C72), SPH_C64(0xAA638F3DB18F75DC),
SPH_C64(0x9F39358DABE9808E), SPH_C64(0xB7DEFA91C00B72AC),
SPH_C64(0x6B5541FD62492D92), SPH_C64(0x6DC6DEE8F92E4D5B),
SPH_C64(0x353F57ABC4BEEA7E), SPH_C64(0x735769D6DA5690CE),
SPH_C64(0x0A234AA642391484), SPH_C64(0xF6F9508028F80D9D),
SPH_C64(0xB8E319A27AB3F215), SPH_C64(0x31AD9C1151341A4D),
SPH_C64(0x773C22A57BEF5805), SPH_C64(0x45C7561A07968633),
SPH_C64(0xF913DA9E249DBE36), SPH_C64(0xDA652D9B78A64C68),
SPH_C64(0x4C27A97F3BC334EF), SPH_C64(0x76621220E66B17F4),
SPH_C64(0x967743899ACD7D0B), SPH_C64(0xF3EE5BCAE0ED6782),
SPH_C64(0x409F753600C879FC), SPH_C64(0x06D09A39B5926DB6),
SPH_C64(0x6F83AEB0317AC588), SPH_C64(0x01E6CA4A86381F21),
SPH_C64(0x66FF3462D19F3025), SPH_C64(0x72207C24DDFD3BFB),
SPH_C64(0x4AF6B6D3E2ECE2EB), SPH_C64(0x9C994DBEC7EA08DE),
SPH_C64(0x49ACE597B09A8BC4), SPH_C64(0xB38C4766CF0797BA),
SPH_C64(0x131B9373C57C2A75), SPH_C64(0xB1822CCE61931E58),
SPH_C64(0x9D7555B909BA1C0C), SPH_C64(0x127FAFDD937D11D2),
SPH_C64(0x29DA3BADC66D92E4), SPH_C64(0xA2C1D57154C2ECBC),
SPH_C64(0x58C5134D82F6FE24), SPH_C64(0x1C3AE3515B62274F),
SPH_C64(0xE907C82E01CB8126), SPH_C64(0xF8ED091913E37FCB),
SPH_C64(0x3249D8F9C80046C9), SPH_C64(0x80CF9BEDE388FB63),
SPH_C64(0x1881539A116CF19E), SPH_C64(0x5103F3F76BD52457),
SPH_C64(0x15B7E6F5AE47F7A8), SPH_C64(0xDBD7C6DED47E9CCF),
SPH_C64(0x44E55C410228BB1A), SPH_C64(0xB647D4255EDB4E99),
SPH_C64(0x5D11882BB8AAFC30), SPH_C64(0xF5098BBB29D3212A),
SPH_C64(0x8FB5EA14E90296B3), SPH_C64(0x677B942157DD025A),
SPH_C64(0xFB58E7C0A390ACB5), SPH_C64(0x89D3674C83BD4A01),
SPH_C64(0x9E2DA4DF4BF3B93B), SPH_C64(0xFCC41E328CAB4829),
SPH_C64(0x03F38C96BA582C52), SPH_C64(0xCAD1BDBD7FD85DB2),
SPH_C64(0xBBB442C16082AE83), SPH_C64(0xB95FE86BA5DA9AB0),
SPH_C64(0xB22E04673771A93F), SPH_C64(0x845358C9493152D8),
SPH_C64(0xBE2A488697B4541E), SPH_C64(0x95A2DC2DD38E6966),
SPH_C64(0xC02C11AC923C852B), SPH_C64(0x2388B1990DF2A87B),
SPH_C64(0x7C8008FA1B4F37BE), SPH_C64(0x1F70D0C84D54E503),
SPH_C64(0x5490ADEC7ECE57D4), SPH_C64(0x002B3C27D9063A3A),
SPH_C64(0x7EAEA3848030A2BF), SPH_C64(0xC602326DED2003C0),
SPH_C64(0x83A7287D69A94086), SPH_C64(0xC57A5FCB30F57A8A),
SPH_C64(0xB56844E479EBE779), SPH_C64(0xA373B40F05DCBCE9),
SPH_C64(0xD71A786E88570EE2), SPH_C64(0x879CBACDBDE8F6A0),
SPH_C64(0x976AD1BCC164A32F), SPH_C64(0xAB21E25E9666D78B),
SPH_C64(0x901063AAE5E5C33C), SPH_C64(0x9818B34448698D90),
SPH_C64(0xE36487AE3E1E8ABB), SPH_C64(0xAFBDF931893BDCB4),
SPH_C64(0x6345A0DC5FBBD519), SPH_C64(0x8628FE269B9465CA),
SPH_C64(0x1E5D01603F9C51EC), SPH_C64(0x4DE44006A15049B7),
SPH_C64(0xBF6C70E5F776CBB1), SPH_C64(0x411218F2EF552BED),
SPH_C64(0xCB0C0708705A36A3), SPH_C64(0xE74D14754F986044),
SPH_C64(0xCD56D9430EA8280E), SPH_C64(0xC12591D7535F5065),
SPH_C64(0xC83223F1720AEF96), SPH_C64(0xC3A0396F7363A51F),
};
#define PASS(a, b, c, mul) do { \
ROUND(a, b, c, X0, mul); \
ROUND(b, c, a, X1, mul); \
ROUND(c, a, b, X2, mul); \
ROUND(a, b, c, X3, mul); \
ROUND(b, c, a, X4, mul); \
ROUND(c, a, b, X5, mul); \
ROUND(a, b, c, X6, mul); \
ROUND(b, c, a, X7, mul); \
} while (0)
#define ROUND(a, b, c, x, mul) do { \
c ^= x; \
a = SPH_T64(a - (T1[c & 0xFF] ^ T2[(c >> 16) & 0xFF] \
^ T3[(c >> 32) & 0xFF] ^ T4[(c >> 48) & 0xFF])); \
b = SPH_T64(b + (T4[(c >> 8) & 0xFF] ^ T3[(c >> 24) & 0xFF] \
^ T2[(c >> 40) & 0xFF] ^ T1[(c >> 56) & 0xFF])); \
b = mul(b); \
} while (0)
#define MUL5(x) SPH_T64((x) * SPH_C64(5))
#define MUL7(x) SPH_T64((x) * SPH_C64(7))
#define MUL9(x) SPH_T64((x) * SPH_C64(9))
#define KSCHED do { \
X0 = SPH_T64(X0 - (X7 ^ SPH_C64(0xA5A5A5A5A5A5A5A5))); \
X1 ^= X0; \
X2 = SPH_T64(X2 + X1); \
X3 = SPH_T64(X3 - (X2 ^ (~X1 << 19))); \
X4 ^= X3; \
X5 = SPH_T64(X5 + X4); \
X6 = SPH_T64(X6 - (X5 ^ (~X4 >> 23))); \
X7 ^= X6; \
X0 = SPH_T64(X0 + X7); \
X1 = SPH_T64(X1 - (X0 ^ (~X7 << 19))); \
X2 ^= X1; \
X3 = SPH_T64(X3 + X2); \
X4 = SPH_T64(X4 - (X3 ^ (~X2 >> 23))); \
X5 ^= X4; \
X6 = SPH_T64(X6 + X5); \
X7 = SPH_T64(X7 - (X6 ^ SPH_C64(0x0123456789ABCDEF))); \
} while (0)
#define TIGER_ROUND_BODY(in, r) do { \
sph_u64 A, B, C; \
sph_u64 X0, X1, X2, X3, X4, X5, X6, X7; \
int i; \
\
A = (r)[0]; \
B = (r)[1]; \
C = (r)[2]; \
\
X0 = (in(0)); \
X1 = (in(1)); \
X2 = (in(2)); \
X3 = (in(3)); \
X4 = (in(4)); \
X5 = (in(5)); \
X6 = (in(6)); \
X7 = (in(7)); \
PASS(A, B, C, MUL5); \
KSCHED; \
PASS(C, A, B, MUL7); \
KSCHED; \
PASS(B, C, A, MUL9); \
\
(r)[0] ^= A; \
(r)[1] = SPH_T64(B - (r)[1]); \
(r)[2] = SPH_T64(C + (r)[2]); \
} while (0)
/*
* One round of Tiger. The data must be aligned for 64-bit access.
*/
static void
tiger_round(const unsigned char *data, sph_u64 r[3])
{
#define TIGER_IN(i) sph_dec64le_aligned(data + 8 * (i))
TIGER_ROUND_BODY(TIGER_IN, r);
#undef TIGER_IN
}
/* see sph_tiger.h */
void
sph_tiger_init(void *cc)
{
sph_tiger_context *sc;
sc = cc;
sc->val[0] = SPH_C64(0x0123456789ABCDEF);
sc->val[1] = SPH_C64(0xFEDCBA9876543210);
sc->val[2] = SPH_C64(0xF096A5B4C3B2E187);
sc->count = 0;
}
#define RFUN tiger_round
#define HASH tiger
#define LE64 1
#define BLEN 64U
#define PW01 1
#define PLW1 1
#include "md_helper.c"
/* see sph_tiger.h */
void
sph_tiger_close(void *cc, void *dst)
{
tiger_close(cc, dst, 3);
sph_tiger_init(cc);
}
/* see sph_tiger.h */
void
sph_tiger_comp(const sph_u64 msg[8], sph_u64 val[3])
{
#define TIGER_IN(i) msg[i]
TIGER_ROUND_BODY(TIGER_IN, val);
#undef TIGER_IN
}
#undef HASH
#define HASH tiger2
#undef PW01
#define CLOSE_ONLY 1
#include "md_helper.c"
/* see sph_tiger.h */
void
sph_tiger2_close(void *cc, void *dst)
{
tiger2_close(cc, dst, 3);
sph_tiger2_init(cc);
}
#endif

4
x15/cuda_x15_whirlpool.cu

@ -16,8 +16,8 @@ extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int t @@ -16,8 +16,8 @@ extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int t
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
__constant__ uint32_t pTarget[8];
uint32_t *d_wnounce[8];
uint32_t *d_WNonce[8];
static uint32_t *d_wnounce[8];
static uint32_t *d_WNonce[8];
#define USE_ALL_TABLES 1

Loading…
Cancel
Save