diff --git a/.gitignore b/.gitignore index 3e345162..7ab4ca0c 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,5 @@ lib/string.h lib/warn-on-use.h mkinstalldirs + +*.swp diff --git a/Makefile.am b/Makefile.am index b8905920..140bbc1f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -19,7 +19,7 @@ INCLUDES = $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) bin_PROGRAMS = cgminer -bin_SCRIPTS = *.cl +bin_SCRIPTS = $(top_srcdir)/*.cl cgminer_LDFLAGS = $(PTHREAD_FLAGS) cgminer_LDADD = $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \ @@ -27,11 +27,7 @@ cgminer_LDADD = $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \ @UDEV_LIBS@ @LIBUSB_LIBS@ \ @MATH_LIBS@ lib/libgnu.a ccan/libccan.a -if HAVE_WINDOWS -cgminer_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib @OPENCL_FLAGS@ @LIBUSB_CFLAGS@ -else cgminer_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib @OPENCL_FLAGS@ @LIBUSB_CFLAGS@ @LIBCURL_CFLAGS@ -endif # common sources cgminer_SOURCES := cgminer.c @@ -101,11 +97,11 @@ endif if HAS_MODMINER cgminer_SOURCES += driver-modminer.c bitstreamsdir = $(bindir)/bitstreams -dist_bitstreams_DATA = bitstreams/* +dist_bitstreams_DATA = $(top_srcdir)/bitstreams/* endif if HAS_ZTEX cgminer_SOURCES += driver-ztex.c libztex.c libztex.h bitstreamsdir = $(bindir)/bitstreams -dist_bitstreams_DATA = bitstreams/* +dist_bitstreams_DATA = $(top_srcdir)/bitstreams/* endif diff --git a/autogen-win32.sh b/autogen-win32.sh new file mode 100644 index 00000000..c0258af9 --- /dev/null +++ b/autogen-win32.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +bs_dir="$(dirname $(readlink -f $0))" +build_dir="$PWD" +rm -rf "${bs_dir}"/autom4te.cache +rm -f "${bs_dir}"/aclocal.m4 "${bs_dir}"/ltmain.sh + +echo 'Running autoreconf -ifv...' +autoreconf -ifv -I "/usr/local/share/aclocal/" "$bs_dir" || exit 1 + +if test -z "$NOCONFIGURE" ; then + echo 'Configuring...' + + if [[ "$bs_dir" != "`pwd`" ]]; then + export CPPFLAGS+=" -I $bs_dir" + fi + + if [[ ! -z "$CGMINER_SDK" ]]; then + export CPPFLAGS="-I $CGMINER_SDK/include $CPPFLAGS" + export LDFLAGS="-L $CGMINER_SDK/lib $LDFLAGS" + export PKG_CONFIG_PATH="$CGMINER_SDK/lib/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH}" + export ADL_SDK="$CGMINER_SDK/include/ADL_SDK" + fi + + CFLAGS="-O3 -msse2" \ + "$bs_dir"/configure \ + --prefix="$build_dir"/opt \ + --enable-cpumining \ + --enable-scrypt \ + --enable-bitforce \ + --enable-icarus \ + --enable-modminer \ + --enable-ztex \ + $@ +fi diff --git a/autogen-win64.sh b/autogen-win64.sh new file mode 100644 index 00000000..c9878485 --- /dev/null +++ b/autogen-win64.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +bs_dir="$(dirname $(readlink -f $0))" +build_dir="$PWD" +rm -rf "${bs_dir}"/autom4te.cache +rm -f "${bs_dir}"/aclocal.m4 "${bs_dir}"/ltmain.sh + +echo 'Running autoreconf -ifv...' +autoreconf -ifv -I "/usr/local/share/aclocal/" "$bs_dir" || exit 1 + +if test -z "$NOCONFIGURE" ; then + echo 'Configuring...' + + if [[ "$bs_dir" != "`pwd`" ]]; then + export CPPFLAGS+=" -I $bs_dir" + fi + + if [[ ! -z "$CGMINER_SDK" ]]; then + export CPPFLAGS="-I $CGMINER_SDK/include $CPPFLAGS" + export LDFLAGS="-L $CGMINER_SDK/lib64 $LDFLAGS" + export PKG_CONFIG_PATH="$CGMINER_SDK/lib64/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH}" + export ADL_SDK="$CGMINER_SDK/include/ADL_SDK" + fi + + CFLAGS="-O3 -msse4" \ + "$bs_dir"/configure \ + --target=x86_64-w64-mingw32 \ + --prefix="$build_dir"/opt \ + --enable-cpumining \ + --enable-scrypt \ + --enable-bitforce \ + --enable-icarus \ + --enable-modminer \ + --enable-ztex \ + $@ +fi diff --git a/cgminer.c b/cgminer.c index 3bf285f1..d9481c06 100644 --- a/cgminer.c +++ b/cgminer.c @@ -318,7 +318,8 @@ static bool should_run(void) return true; gettimeofday(&tv, NULL); - tm = localtime(&tv.tv_sec); + const time_t tmp_time = tv.tv_sec; + tm = localtime(&tmp_time); if (schedstart.enable) { if (!schedstop.enable) { if (time_before(tm, &schedstart.tm)) @@ -350,7 +351,8 @@ void get_datestamp(char *f, struct timeval *tv) { struct tm *tm; - tm = localtime(&tv->tv_sec); + const time_t tmp_time = tv->tv_sec; + tm = localtime(&tmp_time); sprintf(f, "[%d-%02d-%02d %02d:%02d:%02d]", tm->tm_year + 1900, tm->tm_mon + 1, @@ -364,7 +366,8 @@ void get_timestamp(char *f, struct timeval *tv) { struct tm *tm; - tm = localtime(&tv->tv_sec); + const time_t tmp_time = tv->tv_sec; + tm = localtime(&tmp_time); sprintf(f, "[%02d:%02d:%02d]", tm->tm_hour, tm->tm_min, @@ -2584,9 +2587,11 @@ static bool submit_upstream_work(struct work *work, CURL *curl, bool resubmit) double submit_time = tdiff(&tv_submit_reply, &tv_submit); int diffplaces = 3; - tm = localtime(&(work->tv_getwork.tv_sec)); + time_t tmp_time = work->tv_getwork.tv_sec; + tm = localtime(&tmp_time); memcpy(&tm_getwork, tm, sizeof(struct tm)); - tm = localtime(&(tv_submit_reply.tv_sec)); + tmp_time = tv_submit_reply.tv_sec; + tm = localtime(&tmp_time); memcpy(&tm_submit_reply, tm, sizeof(struct tm)); if (work->clone) { @@ -2957,7 +2962,7 @@ void app_restart(void) } #endif - execv(initial_args[0], initial_args); + execv(initial_args[0], (EXECV_2ND_ARG_TYPE)initial_args); applog(LOG_WARNING, "Failed to restart application"); } diff --git a/compat.h b/compat.h index f7cb4da5..0059a17d 100644 --- a/compat.h +++ b/compat.h @@ -2,15 +2,17 @@ #define __COMPAT_H__ #ifdef WIN32 +#include "config.h" #include #include #include #include -#include - #include "miner.h" // for timersub +#include + +#ifndef HAVE_LIBWINPTHREAD static inline int nanosleep(const struct timespec *req, struct timespec *rem) { struct timeval tstart; @@ -42,6 +44,7 @@ static inline int nanosleep(const struct timespec *req, struct timespec *rem) } return 0; } +#endif static inline int sleep(unsigned int secs) { @@ -71,7 +74,12 @@ typedef unsigned int uint; typedef long suseconds_t; #endif +#ifdef HAVE_LIBWINPTHREAD +#define PTH(thr) ((thr)->pth) +#else #define PTH(thr) ((thr)->pth.p) +#endif + #else #define PTH(thr) ((thr)->pth) #endif /* WIN32 */ diff --git a/configure.ac b/configure.ac index 78515c3a..495e54a4 100644 --- a/configure.ac +++ b/configure.ac @@ -81,7 +81,6 @@ esac case $target in *-*-mingw*) - have_x86_64=false have_win32=true PTHREAD_FLAGS="" DLOPEN_FLAGS="" @@ -166,9 +165,18 @@ else OPENCL_LIBS="" fi -AC_CHECK_LIB(pthread, pthread_create, , - AC_MSG_ERROR([Could not find pthread library - please install libpthread])) -PTHREAD_LIBS=-lpthread +has_winpthread=false +if test "x$have_win32" = xtrue; then + has_winpthread=true + AC_CHECK_LIB(winpthread, nanosleep, , has_winpthread=false) + PTHREAD_LIBS=-lwinpthread +fi + +if test "x$has_winpthread" != xtrue; then + AC_CHECK_LIB(pthread, pthread_create, , + AC_MSG_ERROR([Could not find pthread library - please install libpthread])) + PTHREAD_LIBS=-lpthread +fi AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true) @@ -181,7 +189,7 @@ scrypt="no" if test "$found_opencl" = 1; then if test "x$adl" != xno; then - AC_CHECK_FILE([ADL_SDK/adl_sdk.h], have_adl=true, have_adl=false,) + AC_CHECK_FILE([${ADL_SDK:-ADL_SDK}/adl_sdk.h], have_adl=true, have_adl=false,) if test x$have_adl = xtrue then AC_DEFINE([HAVE_ADL], [1], [Defined if ADL headers were found]) @@ -305,7 +313,7 @@ has_yasm=false AC_PATH_PROG([YASM],[yasm],[false]) if test "x$YASM" != "xfalse" ; then AC_MSG_CHECKING([if yasm version is greater than 1.0.1]) - yasmver=`yasm --version | head -1 | cut -d\ -f2` + yasmver=`"$YASM" --version | head -1 | cut -d\ -f2` yamajor=`echo $yasmver | cut -d. -f1` yaminor=`echo $yasmver | cut -d. -f2` yamini=`echo $yasmver | cut -d. -f3` @@ -332,6 +340,18 @@ if test "x$YASM" != "xfalse" ; then fi if test "x$has_yasm" = "xfalse" ; then AC_MSG_NOTICE([yasm is required for the assembly algorithms. They will be skipped.]) +else + if test "x$have_x86_64" = xtrue; then + if test "x$have_win32" = xtrue; then + YASM_FMT="win64" + else + YASM_FMT="elf64" + fi + elif test "x$have_win32" = xtrue; then + YASM_FMT="coff" + else + YASM_FMT="elf32" + fi fi AM_CONDITIONAL([HAS_YASM], [test x$has_yasm = xtrue]) @@ -382,6 +402,14 @@ else fi AC_SUBST(LIBCURL_LIBS) +#check execv signature +AC_COMPILE_IFELSE([AC_LANG_SOURCE([ + #include + int execv(const char*, const char*const*); + ])], + AC_DEFINE([EXECV_2ND_ARG_TYPE], [const char* const*], [int execv(const char*, const char*const*);]), + AC_DEFINE([EXECV_2ND_ARG_TYPE], [char* const*], [int execv(const char*, char*const*);])) + dnl CCAN wants to know a lot of vars. # All the configuration checks. Regrettably, the __attribute__ checks will # give false positives on old GCCs, since they just cause warnings. But that's @@ -438,6 +466,7 @@ AC_SUBST(PDCURSES_LIBS) AC_SUBST(WS2_LIBS) AC_SUBST(MATH_LIBS) AC_SUBST(UDEV_LIBS) +AC_SUBST(YASM_FMT) AC_CONFIG_FILES([ Makefile diff --git a/driver-bitforce.c b/driver-bitforce.c index 228fa5e7..b9a95ac7 100644 --- a/driver-bitforce.c +++ b/driver-bitforce.c @@ -9,6 +9,8 @@ * any later version. See COPYING for more details. */ +#include "config.h" + #include #include #include @@ -17,16 +19,14 @@ #include #include -#include "config.h" +#include "compat.h" +#include "miner.h" +#include "usbutils.h" #ifdef WIN32 #include #endif /* WIN32 */ -#include "compat.h" -#include "miner.h" -#include "usbutils.h" - #define BITFORCE_IDENTIFY "ZGX" #define BITFORCE_IDENTIFY_LEN (sizeof(BITFORCE_IDENTIFY)-1) #define BITFORCE_FLASH "ZMX" diff --git a/driver-cpu.c b/driver-cpu.c index a8b7a56a..6e563c5b 100644 --- a/driver-cpu.c +++ b/driver-cpu.c @@ -202,7 +202,9 @@ static const sha256_func sha256_funcs[] = { #ifdef WANT_CPUMINE -#if defined(WANT_X8664_SSE2) && defined(__SSE2__) +#if defined(WANT_X8664_SSE4) && defined(__SSE4_1__) +enum sha256_algos opt_algo = ALGO_SSE4_64; +#elif defined(WANT_X8664_SSE2) && defined(__SSE2__) enum sha256_algos opt_algo = ALGO_SSE2_64; #elif defined(WANT_X8632_SSE2) && defined(__SSE2__) enum sha256_algos opt_algo = ALGO_SSE2_32; @@ -720,8 +722,8 @@ static void cpu_detect() // Reckon number of cores in the box #if defined(WIN32) { - DWORD system_am; - DWORD process_am; + DWORD_PTR system_am; + DWORD_PTR process_am; BOOL ok = GetProcessAffinityMask( GetCurrentProcess(), &system_am, diff --git a/driver-cpu.h b/driver-cpu.h index 361ae5d8..dd4bcb86 100644 --- a/driver-cpu.h +++ b/driver-cpu.h @@ -30,7 +30,7 @@ #define WANT_X8664_SSE2 1 #endif -#if defined(__x86_64__) && defined(HAS_YASM) +#if defined(__x86_64__) && defined(HAS_YASM) && defined(__SSE4_1__) #define WANT_X8664_SSE4 1 #endif diff --git a/driver-icarus.c b/driver-icarus.c index 9a99e866..88b203c7 100644 --- a/driver-icarus.c +++ b/driver-icarus.c @@ -30,6 +30,7 @@ */ #include "config.h" +#include "miner.h" #include #include @@ -51,7 +52,6 @@ #endif #include "elist.h" -#include "miner.h" #include "fpgautils.h" // The serial I/O speed - Linux uses a define 'B115200' in bits/termios.h diff --git a/driver-ztex.c b/driver-ztex.c index f046c404..210060cf 100644 --- a/driver-ztex.c +++ b/driver-ztex.c @@ -23,9 +23,9 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see http://www.gnu.org/licenses/. **/ +#include "miner.h" #include #include -#include "miner.h" #include "libztex.h" #define GOLDEN_BACKLOG 5 diff --git a/elist.h b/elist.h index b2e8263d..afd59371 100644 --- a/elist.h +++ b/elist.h @@ -180,8 +180,13 @@ static inline void list_splice_init(struct list_head *list, * @type: the type of the struct this is embedded in. * @member: the name of the list_struct within the struct. */ +#ifndef _WIN64 #define list_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) +#else +#define list_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long long)(&((type *)0)->member))) +#endif /** * list_for_each - iterate over a list diff --git a/fpgautils.c b/fpgautils.c index 307bec49..489c89cf 100644 --- a/fpgautils.c +++ b/fpgautils.c @@ -14,6 +14,8 @@ #include #include +#include "miner.h" + #ifndef WIN32 #include #include @@ -34,7 +36,6 @@ #include "elist.h" #include "logging.h" -#include "miner.h" #include "fpgautils.h" #ifdef HAVE_LIBUDEV @@ -356,7 +357,7 @@ int serial_open(const char *devpath, unsigned long baud, signed short timeout, b PurgeComm(hSerial, PURGE_TXCLEAR); } - return _open_osfhandle((LONG)hSerial, 0); + return _open_osfhandle((intptr_t)hSerial, 0); #else int fdDev = open(devpath, O_RDWR | O_CLOEXEC | O_NOCTTY); diff --git a/lib/signal.in.h b/lib/signal.in.h index bcf79352..96692154 100644 --- a/lib/signal.in.h +++ b/lib/signal.in.h @@ -20,6 +20,8 @@ #endif @PRAGMA_COLUMNS@ +#include "config.h" + #if defined __need_sig_atomic_t || defined __need_sigset_t /* Special invocation convention inside glibc header files. */ diff --git a/logging.c b/logging.c index afc70089..addec90d 100644 --- a/logging.c +++ b/logging.c @@ -85,7 +85,8 @@ static void log_generic(int prio, const char *fmt, va_list ap) gettimeofday(&tv, NULL); - tm = localtime(&tv.tv_sec); + const time_t tmp_time = tv.tv_sec; + tm = localtime(&tmp_time); len = 40 + strlen(fmt) + 22; f = alloca(len); diff --git a/miner.h b/miner.h index 04c8a469..68f17c7b 100644 --- a/miner.h +++ b/miner.h @@ -118,7 +118,8 @@ static inline int fsync (int fd) #include "usbutils.h" #endif -#if !defined(WIN32) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +#if (!defined(WIN32) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ + || (defined(WIN32) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) #define bswap_16 __builtin_bswap16 #define bswap_32 __builtin_bswap32 #define bswap_64 __builtin_bswap64 diff --git a/x86_32/Makefile.am b/x86_32/Makefile.am index effaba85..8916a305 100644 --- a/x86_32/Makefile.am +++ b/x86_32/Makefile.am @@ -5,4 +5,4 @@ SUFFIXES = .asm libx8632_a_SOURCES = sha256_xmm.asm .asm.o: - $(YASM) -f elf32 $< + $(YASM) -f $(YASM_FMT) $< diff --git a/x86_32/sha256_xmm.asm b/x86_32/sha256_xmm.asm index c2cd35bb..3e9c9283 100644 --- a/x86_32/sha256_xmm.asm +++ b/x86_32/sha256_xmm.asm @@ -19,11 +19,11 @@ BITS 32 %define LAB_LOOP_UNROLL 64 -extern sha256_consts_m128i +extern _sha256_consts_m128i -global CalcSha256_x86 +global $@CalcSha256_x86@12 ; CalcSha256 hash(ecx), data(edx), init([esp+4]) -CalcSha256_x86: +@CalcSha256_x86@12: push esi push edi mov init, [esp+12] @@ -134,7 +134,7 @@ LAB_LOOP: %macro lab_loop_blk 1 movdqa xmm6, [data+%1] - paddd xmm6, sha256_consts_m128i[%1] + paddd xmm6, _sha256_consts_m128i[%1] paddd xmm6, [hash+2*16] ; +h diff --git a/x86_64/Makefile.am b/x86_64/Makefile.am index a4c16fc2..85d997c8 100644 --- a/x86_64/Makefile.am +++ b/x86_64/Makefile.am @@ -5,4 +5,4 @@ SUFFIXES = .asm libx8664_a_SOURCES = sha256_xmm_amd64.asm sha256_sse4_amd64.asm .asm.o: - $(YASM) -f elf64 $< + $(YASM) -f $(YASM_FMT) -o $@ $< diff --git a/x86_64/sha256_sse4_amd64.asm b/x86_64/sha256_sse4_amd64.asm index 8bedc20d..f1f5d75d 100644 --- a/x86_64/sha256_sse4_amd64.asm +++ b/x86_64/sha256_sse4_amd64.asm @@ -13,9 +13,17 @@ ALIGN 32 BITS 64 +%ifidn __OUTPUT_FORMAT__,win64 +%define hash rcx +%define data rdx +%define init r8 +%define temp r9 +%else %define hash rdi %define data rsi %define init rdx +%define temp rcx +%endif ; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16)) %define LAB_CALC_PARA 2 @@ -27,18 +35,28 @@ extern g_4sha256_k global CalcSha256_x64_sse4 ; CalcSha256 hash(rdi), data(rsi), init(rdx) +; CalcSha256 hash(rcx), data(rdx), init(r8) CalcSha256_x64_sse4: push rbx +%ifidn __OUTPUT_FORMAT__,win64 + sub rsp, 16 * 6 + movdqa [rsp + 16*0], xmm6 + movdqa [rsp + 16*1], xmm7 + movdqa [rsp + 16*2], xmm8 + movdqa [rsp + 16*3], xmm9 + movdqa [rsp + 16*4], xmm10 + movdqa [rsp + 16*5], xmm11 +%endif LAB_NEXT_NONCE: - mov rcx, 64*4 ; 256 - rcx is # of SHA-2 rounds + mov temp, 64*4 ; 256 - temp is # of SHA-2 rounds mov rax, 16*4 ; 64 - rax is where we expand to LAB_SHA: - push rcx - lea rcx, qword [data+rcx*4] ; + 1024 + push temp + lea temp, qword [data+temp*4] ; + 1024 lea r11, qword [data+rax*4] ; + 256 LAB_CALC: @@ -122,10 +140,10 @@ LAB_CALC: %endrep add r11, LAB_CALC_UNROLL*LAB_CALC_PARA*16 - cmp r11, rcx + cmp r11, temp jb LAB_CALC - pop rcx + pop temp mov rax, 0 ; Load the init values of the message into the hash. @@ -219,12 +237,12 @@ LAB_LOOP: %assign i i+1 %endrep - cmp rax, rcx + cmp rax, temp jb LAB_LOOP ; Finished the 64 rounds, calculate hash and save - movntdqa xmm1, [rdx] + movntdqa xmm1, [init] pshufd xmm2, xmm1, 0x55 paddd xmm5, xmm2 pshufd xmm6, xmm1, 0xAA @@ -234,7 +252,7 @@ LAB_LOOP: pshufd xmm1, xmm1, 0 paddd xmm7, xmm1 - movntdqa xmm1, [rdx+4*4] + movntdqa xmm1, [init+4*4] pshufd xmm2, xmm1, 0x55 paddd xmm8, xmm2 pshufd xmm6, xmm1, 0xAA @@ -254,6 +272,15 @@ LAB_LOOP: movdqa [hash+7*16], xmm10 LAB_RET: +%ifidn __OUTPUT_FORMAT__,win64 + movdqa xmm6, [rsp + 16*0] + movdqa xmm7, [rsp + 16*1] + movdqa xmm8, [rsp + 16*2] + movdqa xmm9, [rsp + 16*3] + movdqa xmm10, [rsp + 16*4] + movdqa xmm11, [rsp + 16*5] + add rsp, 16 * 6 +%endif pop rbx ret diff --git a/x86_64/sha256_xmm_amd64.asm b/x86_64/sha256_xmm_amd64.asm index e8c97781..6b2ee169 100644 --- a/x86_64/sha256_xmm_amd64.asm +++ b/x86_64/sha256_xmm_amd64.asm @@ -22,10 +22,17 @@ ALIGN 32 BITS 64 +%ifidn __OUTPUT_FORMAT__,win64 +%define hash rcx +%define hash1 rdx +%define data r8 +%define init r9 +%else %define hash rdi %define hash1 rsi %define data rdx %define init rcx +%endif ; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16)) %define SHA_CALC_W_PARA 2 @@ -227,6 +234,15 @@ sha256_sse2_64_new: %endif push rbx +%ifidn __OUTPUT_FORMAT__,win64 + sub rsp, 16 * 6 + movdqa [rsp + 16*0], xmm6 + movdqa [rsp + 16*1], xmm7 + movdqa [rsp + 16*2], xmm8 + movdqa [rsp + 16*3], xmm9 + movdqa [rsp + 16*4], xmm10 + movdqa [rsp + 16*5], xmm13 +%endif %macro SHA_256 0 mov rbx, 64*4 ; rbx is # of SHA-2 rounds @@ -318,6 +334,15 @@ sha256_sse2_64_new: movdqa [hash+7*16], rH LAB_RET: +%ifidn __OUTPUT_FORMAT__,win64 + movdqa xmm6, [rsp + 16*0] + movdqa xmm7, [rsp + 16*1] + movdqa xmm8, [rsp + 16*2] + movdqa xmm9, [rsp + 16*3] + movdqa xmm10, [rsp + 16*4] + movdqa xmm13, [rsp + 16*5] + add rsp, 16 * 6 +%endif pop rbx ret