Browse Source

Merge pull request #416 from gzm55/master

Compile CPU mining for win32 and win64
nfactor-troky
Con Kolivas 12 years ago
parent
commit
1a8bfad0a0
  1. 2
      .gitignore
  2. 10
      Makefile.am
  3. 35
      autogen-win32.sh
  4. 36
      autogen-win64.sh
  5. 17
      cgminer.c
  6. 12
      compat.h
  7. 41
      configure.ac
  8. 10
      driver-bitforce.c
  9. 8
      driver-cpu.c
  10. 2
      driver-cpu.h
  11. 2
      driver-icarus.c
  12. 2
      driver-ztex.c
  13. 5
      elist.h
  14. 5
      fpgautils.c
  15. 2
      lib/signal.in.h
  16. 3
      logging.c
  17. 3
      miner.h
  18. 2
      x86_32/Makefile.am
  19. 8
      x86_32/sha256_xmm.asm
  20. 2
      x86_64/Makefile.am
  21. 43
      x86_64/sha256_sse4_amd64.asm
  22. 25
      x86_64/sha256_xmm_amd64.asm

2
.gitignore vendored

@ -41,3 +41,5 @@ lib/string.h @@ -41,3 +41,5 @@ lib/string.h
lib/warn-on-use.h
mkinstalldirs
*.swp

10
Makefile.am

@ -19,7 +19,7 @@ INCLUDES = $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) @@ -19,7 +19,7 @@ INCLUDES = $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES)
bin_PROGRAMS = cgminer
bin_SCRIPTS = *.cl
bin_SCRIPTS = $(top_srcdir)/*.cl
cgminer_LDFLAGS = $(PTHREAD_FLAGS)
cgminer_LDADD = $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \
@ -27,11 +27,7 @@ cgminer_LDADD = $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \ @@ -27,11 +27,7 @@ cgminer_LDADD = $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \
@UDEV_LIBS@ @LIBUSB_LIBS@ \
@MATH_LIBS@ lib/libgnu.a ccan/libccan.a
if HAVE_WINDOWS
cgminer_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib @OPENCL_FLAGS@ @LIBUSB_CFLAGS@
else
cgminer_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib @OPENCL_FLAGS@ @LIBUSB_CFLAGS@ @LIBCURL_CFLAGS@
endif
# common sources
cgminer_SOURCES := cgminer.c
@ -101,11 +97,11 @@ endif @@ -101,11 +97,11 @@ endif
if HAS_MODMINER
cgminer_SOURCES += driver-modminer.c
bitstreamsdir = $(bindir)/bitstreams
dist_bitstreams_DATA = bitstreams/*
dist_bitstreams_DATA = $(top_srcdir)/bitstreams/*
endif
if HAS_ZTEX
cgminer_SOURCES += driver-ztex.c libztex.c libztex.h
bitstreamsdir = $(bindir)/bitstreams
dist_bitstreams_DATA = bitstreams/*
dist_bitstreams_DATA = $(top_srcdir)/bitstreams/*
endif

35
autogen-win32.sh

@ -0,0 +1,35 @@ @@ -0,0 +1,35 @@
#!/bin/bash
bs_dir="$(dirname $(readlink -f $0))"
build_dir="$PWD"
rm -rf "${bs_dir}"/autom4te.cache
rm -f "${bs_dir}"/aclocal.m4 "${bs_dir}"/ltmain.sh
echo 'Running autoreconf -ifv...'
autoreconf -ifv -I "/usr/local/share/aclocal/" "$bs_dir" || exit 1
if test -z "$NOCONFIGURE" ; then
echo 'Configuring...'
if [[ "$bs_dir" != "`pwd`" ]]; then
export CPPFLAGS+=" -I $bs_dir"
fi
if [[ ! -z "$CGMINER_SDK" ]]; then
export CPPFLAGS="-I $CGMINER_SDK/include $CPPFLAGS"
export LDFLAGS="-L $CGMINER_SDK/lib $LDFLAGS"
export PKG_CONFIG_PATH="$CGMINER_SDK/lib/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH}"
export ADL_SDK="$CGMINER_SDK/include/ADL_SDK"
fi
CFLAGS="-O3 -msse2" \
"$bs_dir"/configure \
--prefix="$build_dir"/opt \
--enable-cpumining \
--enable-scrypt \
--enable-bitforce \
--enable-icarus \
--enable-modminer \
--enable-ztex \
$@
fi

36
autogen-win64.sh

@ -0,0 +1,36 @@ @@ -0,0 +1,36 @@
#!/bin/bash
bs_dir="$(dirname $(readlink -f $0))"
build_dir="$PWD"
rm -rf "${bs_dir}"/autom4te.cache
rm -f "${bs_dir}"/aclocal.m4 "${bs_dir}"/ltmain.sh
echo 'Running autoreconf -ifv...'
autoreconf -ifv -I "/usr/local/share/aclocal/" "$bs_dir" || exit 1
if test -z "$NOCONFIGURE" ; then
echo 'Configuring...'
if [[ "$bs_dir" != "`pwd`" ]]; then
export CPPFLAGS+=" -I $bs_dir"
fi
if [[ ! -z "$CGMINER_SDK" ]]; then
export CPPFLAGS="-I $CGMINER_SDK/include $CPPFLAGS"
export LDFLAGS="-L $CGMINER_SDK/lib64 $LDFLAGS"
export PKG_CONFIG_PATH="$CGMINER_SDK/lib64/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH}"
export ADL_SDK="$CGMINER_SDK/include/ADL_SDK"
fi
CFLAGS="-O3 -msse4" \
"$bs_dir"/configure \
--target=x86_64-w64-mingw32 \
--prefix="$build_dir"/opt \
--enable-cpumining \
--enable-scrypt \
--enable-bitforce \
--enable-icarus \
--enable-modminer \
--enable-ztex \
$@
fi

17
cgminer.c

@ -318,7 +318,8 @@ static bool should_run(void) @@ -318,7 +318,8 @@ static bool should_run(void)
return true;
gettimeofday(&tv, NULL);
tm = localtime(&tv.tv_sec);
const time_t tmp_time = tv.tv_sec;
tm = localtime(&tmp_time);
if (schedstart.enable) {
if (!schedstop.enable) {
if (time_before(tm, &schedstart.tm))
@ -350,7 +351,8 @@ void get_datestamp(char *f, struct timeval *tv) @@ -350,7 +351,8 @@ void get_datestamp(char *f, struct timeval *tv)
{
struct tm *tm;
tm = localtime(&tv->tv_sec);
const time_t tmp_time = tv->tv_sec;
tm = localtime(&tmp_time);
sprintf(f, "[%d-%02d-%02d %02d:%02d:%02d]",
tm->tm_year + 1900,
tm->tm_mon + 1,
@ -364,7 +366,8 @@ void get_timestamp(char *f, struct timeval *tv) @@ -364,7 +366,8 @@ void get_timestamp(char *f, struct timeval *tv)
{
struct tm *tm;
tm = localtime(&tv->tv_sec);
const time_t tmp_time = tv->tv_sec;
tm = localtime(&tmp_time);
sprintf(f, "[%02d:%02d:%02d]",
tm->tm_hour,
tm->tm_min,
@ -2584,9 +2587,11 @@ static bool submit_upstream_work(struct work *work, CURL *curl, bool resubmit) @@ -2584,9 +2587,11 @@ static bool submit_upstream_work(struct work *work, CURL *curl, bool resubmit)
double submit_time = tdiff(&tv_submit_reply, &tv_submit);
int diffplaces = 3;
tm = localtime(&(work->tv_getwork.tv_sec));
time_t tmp_time = work->tv_getwork.tv_sec;
tm = localtime(&tmp_time);
memcpy(&tm_getwork, tm, sizeof(struct tm));
tm = localtime(&(tv_submit_reply.tv_sec));
tmp_time = tv_submit_reply.tv_sec;
tm = localtime(&tmp_time);
memcpy(&tm_submit_reply, tm, sizeof(struct tm));
if (work->clone) {
@ -2957,7 +2962,7 @@ void app_restart(void) @@ -2957,7 +2962,7 @@ void app_restart(void)
}
#endif
execv(initial_args[0], initial_args);
execv(initial_args[0], (EXECV_2ND_ARG_TYPE)initial_args);
applog(LOG_WARNING, "Failed to restart application");
}

12
compat.h

@ -2,15 +2,17 @@ @@ -2,15 +2,17 @@
#define __COMPAT_H__
#ifdef WIN32
#include "config.h"
#include <errno.h>
#include <time.h>
#include <pthread.h>
#include <sys/time.h>
#include <windows.h>
#include "miner.h" // for timersub
#include <windows.h>
#ifndef HAVE_LIBWINPTHREAD
static inline int nanosleep(const struct timespec *req, struct timespec *rem)
{
struct timeval tstart;
@ -42,6 +44,7 @@ static inline int nanosleep(const struct timespec *req, struct timespec *rem) @@ -42,6 +44,7 @@ static inline int nanosleep(const struct timespec *req, struct timespec *rem)
}
return 0;
}
#endif
static inline int sleep(unsigned int secs)
{
@ -71,7 +74,12 @@ typedef unsigned int uint; @@ -71,7 +74,12 @@ typedef unsigned int uint;
typedef long suseconds_t;
#endif
#ifdef HAVE_LIBWINPTHREAD
#define PTH(thr) ((thr)->pth)
#else
#define PTH(thr) ((thr)->pth.p)
#endif
#else
#define PTH(thr) ((thr)->pth)
#endif /* WIN32 */

41
configure.ac

@ -81,7 +81,6 @@ esac @@ -81,7 +81,6 @@ esac
case $target in
*-*-mingw*)
have_x86_64=false
have_win32=true
PTHREAD_FLAGS=""
DLOPEN_FLAGS=""
@ -166,9 +165,18 @@ else @@ -166,9 +165,18 @@ else
OPENCL_LIBS=""
fi
AC_CHECK_LIB(pthread, pthread_create, ,
AC_MSG_ERROR([Could not find pthread library - please install libpthread]))
PTHREAD_LIBS=-lpthread
has_winpthread=false
if test "x$have_win32" = xtrue; then
has_winpthread=true
AC_CHECK_LIB(winpthread, nanosleep, , has_winpthread=false)
PTHREAD_LIBS=-lwinpthread
fi
if test "x$has_winpthread" != xtrue; then
AC_CHECK_LIB(pthread, pthread_create, ,
AC_MSG_ERROR([Could not find pthread library - please install libpthread]))
PTHREAD_LIBS=-lpthread
fi
AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true)
@ -181,7 +189,7 @@ scrypt="no" @@ -181,7 +189,7 @@ scrypt="no"
if test "$found_opencl" = 1; then
if test "x$adl" != xno; then
AC_CHECK_FILE([ADL_SDK/adl_sdk.h], have_adl=true, have_adl=false,)
AC_CHECK_FILE([${ADL_SDK:-ADL_SDK}/adl_sdk.h], have_adl=true, have_adl=false,)
if test x$have_adl = xtrue
then
AC_DEFINE([HAVE_ADL], [1], [Defined if ADL headers were found])
@ -305,7 +313,7 @@ has_yasm=false @@ -305,7 +313,7 @@ has_yasm=false
AC_PATH_PROG([YASM],[yasm],[false])
if test "x$YASM" != "xfalse" ; then
AC_MSG_CHECKING([if yasm version is greater than 1.0.1])
yasmver=`yasm --version | head -1 | cut -d\ -f2`
yasmver=`"$YASM" --version | head -1 | cut -d\ -f2`
yamajor=`echo $yasmver | cut -d. -f1`
yaminor=`echo $yasmver | cut -d. -f2`
yamini=`echo $yasmver | cut -d. -f3`
@ -332,6 +340,18 @@ if test "x$YASM" != "xfalse" ; then @@ -332,6 +340,18 @@ if test "x$YASM" != "xfalse" ; then
fi
if test "x$has_yasm" = "xfalse" ; then
AC_MSG_NOTICE([yasm is required for the assembly algorithms. They will be skipped.])
else
if test "x$have_x86_64" = xtrue; then
if test "x$have_win32" = xtrue; then
YASM_FMT="win64"
else
YASM_FMT="elf64"
fi
elif test "x$have_win32" = xtrue; then
YASM_FMT="coff"
else
YASM_FMT="elf32"
fi
fi
AM_CONDITIONAL([HAS_YASM], [test x$has_yasm = xtrue])
@ -382,6 +402,14 @@ else @@ -382,6 +402,14 @@ else
fi
AC_SUBST(LIBCURL_LIBS)
#check execv signature
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <process.h>
int execv(const char*, const char*const*);
])],
AC_DEFINE([EXECV_2ND_ARG_TYPE], [const char* const*], [int execv(const char*, const char*const*);]),
AC_DEFINE([EXECV_2ND_ARG_TYPE], [char* const*], [int execv(const char*, char*const*);]))
dnl CCAN wants to know a lot of vars.
# All the configuration checks. Regrettably, the __attribute__ checks will
# give false positives on old GCCs, since they just cause warnings. But that's
@ -438,6 +466,7 @@ AC_SUBST(PDCURSES_LIBS) @@ -438,6 +466,7 @@ AC_SUBST(PDCURSES_LIBS)
AC_SUBST(WS2_LIBS)
AC_SUBST(MATH_LIBS)
AC_SUBST(UDEV_LIBS)
AC_SUBST(YASM_FMT)
AC_CONFIG_FILES([
Makefile

10
driver-bitforce.c

@ -9,6 +9,8 @@ @@ -9,6 +9,8 @@
* any later version. See COPYING for more details.
*/
#include "config.h"
#include <limits.h>
#include <pthread.h>
#include <stdint.h>
@ -17,16 +19,14 @@ @@ -17,16 +19,14 @@
#include <sys/time.h>
#include <unistd.h>
#include "config.h"
#include "compat.h"
#include "miner.h"
#include "usbutils.h"
#ifdef WIN32
#include <windows.h>
#endif /* WIN32 */
#include "compat.h"
#include "miner.h"
#include "usbutils.h"
#define BITFORCE_IDENTIFY "ZGX"
#define BITFORCE_IDENTIFY_LEN (sizeof(BITFORCE_IDENTIFY)-1)
#define BITFORCE_FLASH "ZMX"

8
driver-cpu.c

@ -202,7 +202,9 @@ static const sha256_func sha256_funcs[] = { @@ -202,7 +202,9 @@ static const sha256_func sha256_funcs[] = {
#ifdef WANT_CPUMINE
#if defined(WANT_X8664_SSE2) && defined(__SSE2__)
#if defined(WANT_X8664_SSE4) && defined(__SSE4_1__)
enum sha256_algos opt_algo = ALGO_SSE4_64;
#elif defined(WANT_X8664_SSE2) && defined(__SSE2__)
enum sha256_algos opt_algo = ALGO_SSE2_64;
#elif defined(WANT_X8632_SSE2) && defined(__SSE2__)
enum sha256_algos opt_algo = ALGO_SSE2_32;
@ -720,8 +722,8 @@ static void cpu_detect() @@ -720,8 +722,8 @@ static void cpu_detect()
// Reckon number of cores in the box
#if defined(WIN32)
{
DWORD system_am;
DWORD process_am;
DWORD_PTR system_am;
DWORD_PTR process_am;
BOOL ok = GetProcessAffinityMask(
GetCurrentProcess(),
&system_am,

2
driver-cpu.h

@ -30,7 +30,7 @@ @@ -30,7 +30,7 @@
#define WANT_X8664_SSE2 1
#endif
#if defined(__x86_64__) && defined(HAS_YASM)
#if defined(__x86_64__) && defined(HAS_YASM) && defined(__SSE4_1__)
#define WANT_X8664_SSE4 1
#endif

2
driver-icarus.c

@ -30,6 +30,7 @@ @@ -30,6 +30,7 @@
*/
#include "config.h"
#include "miner.h"
#include <limits.h>
#include <pthread.h>
@ -51,7 +52,6 @@ @@ -51,7 +52,6 @@
#endif
#include "elist.h"
#include "miner.h"
#include "fpgautils.h"
// The serial I/O speed - Linux uses a define 'B115200' in bits/termios.h

2
driver-ztex.c

@ -23,9 +23,9 @@ @@ -23,9 +23,9 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, see http://www.gnu.org/licenses/.
**/
#include "miner.h"
#include <unistd.h>
#include <sha2.h>
#include "miner.h"
#include "libztex.h"
#define GOLDEN_BACKLOG 5

5
elist.h

@ -180,8 +180,13 @@ static inline void list_splice_init(struct list_head *list, @@ -180,8 +180,13 @@ static inline void list_splice_init(struct list_head *list,
* @type: the type of the struct this is embedded in.
* @member: the name of the list_struct within the struct.
*/
#ifndef _WIN64
#define list_entry(ptr, type, member) \
((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
#else
#define list_entry(ptr, type, member) \
((type *)((char *)(ptr)-(unsigned long long)(&((type *)0)->member)))
#endif
/**
* list_for_each - iterate over a list

5
fpgautils.c

@ -14,6 +14,8 @@ @@ -14,6 +14,8 @@
#include <dirent.h>
#include <string.h>
#include "miner.h"
#ifndef WIN32
#include <errno.h>
#include <termios.h>
@ -34,7 +36,6 @@ @@ -34,7 +36,6 @@
#include "elist.h"
#include "logging.h"
#include "miner.h"
#include "fpgautils.h"
#ifdef HAVE_LIBUDEV
@ -356,7 +357,7 @@ int serial_open(const char *devpath, unsigned long baud, signed short timeout, b @@ -356,7 +357,7 @@ int serial_open(const char *devpath, unsigned long baud, signed short timeout, b
PurgeComm(hSerial, PURGE_TXCLEAR);
}
return _open_osfhandle((LONG)hSerial, 0);
return _open_osfhandle((intptr_t)hSerial, 0);
#else
int fdDev = open(devpath, O_RDWR | O_CLOEXEC | O_NOCTTY);

2
lib/signal.in.h

@ -20,6 +20,8 @@ @@ -20,6 +20,8 @@
#endif
@PRAGMA_COLUMNS@
#include "config.h"
#if defined __need_sig_atomic_t || defined __need_sigset_t
/* Special invocation convention inside glibc header files. */

3
logging.c

@ -85,7 +85,8 @@ static void log_generic(int prio, const char *fmt, va_list ap) @@ -85,7 +85,8 @@ static void log_generic(int prio, const char *fmt, va_list ap)
gettimeofday(&tv, NULL);
tm = localtime(&tv.tv_sec);
const time_t tmp_time = tv.tv_sec;
tm = localtime(&tmp_time);
len = 40 + strlen(fmt) + 22;
f = alloca(len);

3
miner.h

@ -118,7 +118,8 @@ static inline int fsync (int fd) @@ -118,7 +118,8 @@ static inline int fsync (int fd)
#include "usbutils.h"
#endif
#if !defined(WIN32) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
#if (!defined(WIN32) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(WIN32) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
#define bswap_16 __builtin_bswap16
#define bswap_32 __builtin_bswap32
#define bswap_64 __builtin_bswap64

2
x86_32/Makefile.am

@ -5,4 +5,4 @@ SUFFIXES = .asm @@ -5,4 +5,4 @@ SUFFIXES = .asm
libx8632_a_SOURCES = sha256_xmm.asm
.asm.o:
$(YASM) -f elf32 $<
$(YASM) -f $(YASM_FMT) $<

8
x86_32/sha256_xmm.asm

@ -19,11 +19,11 @@ BITS 32 @@ -19,11 +19,11 @@ BITS 32
%define LAB_LOOP_UNROLL 64
extern sha256_consts_m128i
extern _sha256_consts_m128i
global CalcSha256_x86
global $@CalcSha256_x86@12
; CalcSha256 hash(ecx), data(edx), init([esp+4])
CalcSha256_x86:
@CalcSha256_x86@12:
push esi
push edi
mov init, [esp+12]
@ -134,7 +134,7 @@ LAB_LOOP: @@ -134,7 +134,7 @@ LAB_LOOP:
%macro lab_loop_blk 1
movdqa xmm6, [data+%1]
paddd xmm6, sha256_consts_m128i[%1]
paddd xmm6, _sha256_consts_m128i[%1]
paddd xmm6, [hash+2*16] ; +h

2
x86_64/Makefile.am

@ -5,4 +5,4 @@ SUFFIXES = .asm @@ -5,4 +5,4 @@ SUFFIXES = .asm
libx8664_a_SOURCES = sha256_xmm_amd64.asm sha256_sse4_amd64.asm
.asm.o:
$(YASM) -f elf64 $<
$(YASM) -f $(YASM_FMT) -o $@ $<

43
x86_64/sha256_sse4_amd64.asm

@ -13,9 +13,17 @@ @@ -13,9 +13,17 @@
ALIGN 32
BITS 64
%ifidn __OUTPUT_FORMAT__,win64
%define hash rcx
%define data rdx
%define init r8
%define temp r9
%else
%define hash rdi
%define data rsi
%define init rdx
%define temp rcx
%endif
; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16))
%define LAB_CALC_PARA 2
@ -27,18 +35,28 @@ extern g_4sha256_k @@ -27,18 +35,28 @@ extern g_4sha256_k
global CalcSha256_x64_sse4
; CalcSha256 hash(rdi), data(rsi), init(rdx)
; CalcSha256 hash(rcx), data(rdx), init(r8)
CalcSha256_x64_sse4:
push rbx
%ifidn __OUTPUT_FORMAT__,win64
sub rsp, 16 * 6
movdqa [rsp + 16*0], xmm6
movdqa [rsp + 16*1], xmm7
movdqa [rsp + 16*2], xmm8
movdqa [rsp + 16*3], xmm9
movdqa [rsp + 16*4], xmm10
movdqa [rsp + 16*5], xmm11
%endif
LAB_NEXT_NONCE:
mov rcx, 64*4 ; 256 - rcx is # of SHA-2 rounds
mov temp, 64*4 ; 256 - temp is # of SHA-2 rounds
mov rax, 16*4 ; 64 - rax is where we expand to
LAB_SHA:
push rcx
lea rcx, qword [data+rcx*4] ; + 1024
push temp
lea temp, qword [data+temp*4] ; + 1024
lea r11, qword [data+rax*4] ; + 256
LAB_CALC:
@ -122,10 +140,10 @@ LAB_CALC: @@ -122,10 +140,10 @@ LAB_CALC:
%endrep
add r11, LAB_CALC_UNROLL*LAB_CALC_PARA*16
cmp r11, rcx
cmp r11, temp
jb LAB_CALC
pop rcx
pop temp
mov rax, 0
; Load the init values of the message into the hash.
@ -219,12 +237,12 @@ LAB_LOOP: @@ -219,12 +237,12 @@ LAB_LOOP:
%assign i i+1
%endrep
cmp rax, rcx
cmp rax, temp
jb LAB_LOOP
; Finished the 64 rounds, calculate hash and save
movntdqa xmm1, [rdx]
movntdqa xmm1, [init]
pshufd xmm2, xmm1, 0x55
paddd xmm5, xmm2
pshufd xmm6, xmm1, 0xAA
@ -234,7 +252,7 @@ LAB_LOOP: @@ -234,7 +252,7 @@ LAB_LOOP:
pshufd xmm1, xmm1, 0
paddd xmm7, xmm1
movntdqa xmm1, [rdx+4*4]
movntdqa xmm1, [init+4*4]
pshufd xmm2, xmm1, 0x55
paddd xmm8, xmm2
pshufd xmm6, xmm1, 0xAA
@ -254,6 +272,15 @@ LAB_LOOP: @@ -254,6 +272,15 @@ LAB_LOOP:
movdqa [hash+7*16], xmm10
LAB_RET:
%ifidn __OUTPUT_FORMAT__,win64
movdqa xmm6, [rsp + 16*0]
movdqa xmm7, [rsp + 16*1]
movdqa xmm8, [rsp + 16*2]
movdqa xmm9, [rsp + 16*3]
movdqa xmm10, [rsp + 16*4]
movdqa xmm11, [rsp + 16*5]
add rsp, 16 * 6
%endif
pop rbx
ret

25
x86_64/sha256_xmm_amd64.asm

@ -22,10 +22,17 @@ @@ -22,10 +22,17 @@
ALIGN 32
BITS 64
%ifidn __OUTPUT_FORMAT__,win64
%define hash rcx
%define hash1 rdx
%define data r8
%define init r9
%else
%define hash rdi
%define hash1 rsi
%define data rdx
%define init rcx
%endif
; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16))
%define SHA_CALC_W_PARA 2
@ -227,6 +234,15 @@ sha256_sse2_64_new: @@ -227,6 +234,15 @@ sha256_sse2_64_new:
%endif
push rbx
%ifidn __OUTPUT_FORMAT__,win64
sub rsp, 16 * 6
movdqa [rsp + 16*0], xmm6
movdqa [rsp + 16*1], xmm7
movdqa [rsp + 16*2], xmm8
movdqa [rsp + 16*3], xmm9
movdqa [rsp + 16*4], xmm10
movdqa [rsp + 16*5], xmm13
%endif
%macro SHA_256 0
mov rbx, 64*4 ; rbx is # of SHA-2 rounds
@ -318,6 +334,15 @@ sha256_sse2_64_new: @@ -318,6 +334,15 @@ sha256_sse2_64_new:
movdqa [hash+7*16], rH
LAB_RET:
%ifidn __OUTPUT_FORMAT__,win64
movdqa xmm6, [rsp + 16*0]
movdqa xmm7, [rsp + 16*1]
movdqa xmm8, [rsp + 16*2]
movdqa xmm9, [rsp + 16*3]
movdqa xmm10, [rsp + 16*4]
movdqa xmm13, [rsp + 16*5]
add rsp, 16 * 6
%endif
pop rbx
ret

Loading…
Cancel
Save