mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-22 04:24:19 +00:00
Compile CPU mining for win32 and win64
This commit is contained in:
parent
dfea623960
commit
c9ae715019
2
.gitignore
vendored
2
.gitignore
vendored
@ -41,3 +41,5 @@ lib/string.h
|
||||
lib/warn-on-use.h
|
||||
|
||||
mkinstalldirs
|
||||
|
||||
*.swp
|
||||
|
10
Makefile.am
10
Makefile.am
@ -19,7 +19,7 @@ INCLUDES = $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES)
|
||||
|
||||
bin_PROGRAMS = cgminer
|
||||
|
||||
bin_SCRIPTS = *.cl
|
||||
bin_SCRIPTS = $(top_srcdir)/*.cl
|
||||
|
||||
cgminer_LDFLAGS = $(PTHREAD_FLAGS)
|
||||
cgminer_LDADD = $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \
|
||||
@ -27,11 +27,7 @@ cgminer_LDADD = $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \
|
||||
@UDEV_LIBS@ @LIBUSB_LIBS@ \
|
||||
@MATH_LIBS@ lib/libgnu.a ccan/libccan.a
|
||||
|
||||
if HAVE_WINDOWS
|
||||
cgminer_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib @OPENCL_FLAGS@ @LIBUSB_CFLAGS@
|
||||
else
|
||||
cgminer_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib @OPENCL_FLAGS@ @LIBUSB_CFLAGS@ @LIBCURL_CFLAGS@
|
||||
endif
|
||||
|
||||
# common sources
|
||||
cgminer_SOURCES := cgminer.c
|
||||
@ -101,11 +97,11 @@ endif
|
||||
if HAS_MODMINER
|
||||
cgminer_SOURCES += driver-modminer.c
|
||||
bitstreamsdir = $(bindir)/bitstreams
|
||||
dist_bitstreams_DATA = bitstreams/*
|
||||
dist_bitstreams_DATA = $(top_srcdir)/bitstreams/*
|
||||
endif
|
||||
|
||||
if HAS_ZTEX
|
||||
cgminer_SOURCES += driver-ztex.c libztex.c libztex.h
|
||||
bitstreamsdir = $(bindir)/bitstreams
|
||||
dist_bitstreams_DATA = bitstreams/*
|
||||
dist_bitstreams_DATA = $(top_srcdir)/bitstreams/*
|
||||
endif
|
||||
|
35
autogen-win32.sh
Normal file
35
autogen-win32.sh
Normal file
@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
|
||||
bs_dir="$(dirname $(readlink -f $0))"
|
||||
build_dir="$PWD"
|
||||
rm -rf "${bs_dir}"/autom4te.cache
|
||||
rm -f "${bs_dir}"/aclocal.m4 "${bs_dir}"/ltmain.sh
|
||||
|
||||
echo 'Running autoreconf -ifv...'
|
||||
autoreconf -ifv -I "/usr/local/share/aclocal/" "$bs_dir" || exit 1
|
||||
|
||||
if test -z "$NOCONFIGURE" ; then
|
||||
echo 'Configuring...'
|
||||
|
||||
if [[ "$bs_dir" != "`pwd`" ]]; then
|
||||
export CPPFLAGS+=" -I $bs_dir"
|
||||
fi
|
||||
|
||||
if [[ ! -z "$CGMINER_SDK" ]]; then
|
||||
export CPPFLAGS="-I $CGMINER_SDK/include $CPPFLAGS"
|
||||
export LDFLAGS="-L $CGMINER_SDK/lib $LDFLAGS"
|
||||
export PKG_CONFIG_PATH="$CGMINER_SDK/lib/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH}"
|
||||
export ADL_SDK="$CGMINER_SDK/include/ADL_SDK"
|
||||
fi
|
||||
|
||||
CFLAGS="-O3 -msse2" \
|
||||
"$bs_dir"/configure \
|
||||
--prefix="$build_dir"/opt \
|
||||
--enable-cpumining \
|
||||
--enable-scrypt \
|
||||
--enable-bitforce \
|
||||
--enable-icarus \
|
||||
--enable-modminer \
|
||||
--enable-ztex \
|
||||
$@
|
||||
fi
|
36
autogen-win64.sh
Normal file
36
autogen-win64.sh
Normal file
@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
|
||||
bs_dir="$(dirname $(readlink -f $0))"
|
||||
build_dir="$PWD"
|
||||
rm -rf "${bs_dir}"/autom4te.cache
|
||||
rm -f "${bs_dir}"/aclocal.m4 "${bs_dir}"/ltmain.sh
|
||||
|
||||
echo 'Running autoreconf -ifv...'
|
||||
autoreconf -ifv -I "/usr/local/share/aclocal/" "$bs_dir" || exit 1
|
||||
|
||||
if test -z "$NOCONFIGURE" ; then
|
||||
echo 'Configuring...'
|
||||
|
||||
if [[ "$bs_dir" != "`pwd`" ]]; then
|
||||
export CPPFLAGS+=" -I $bs_dir"
|
||||
fi
|
||||
|
||||
if [[ ! -z "$CGMINER_SDK" ]]; then
|
||||
export CPPFLAGS="-I $CGMINER_SDK/include $CPPFLAGS"
|
||||
export LDFLAGS="-L $CGMINER_SDK/lib64 $LDFLAGS"
|
||||
export PKG_CONFIG_PATH="$CGMINER_SDK/lib64/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH}"
|
||||
export ADL_SDK="$CGMINER_SDK/include/ADL_SDK"
|
||||
fi
|
||||
|
||||
CFLAGS="-O3 -msse4" \
|
||||
"$bs_dir"/configure \
|
||||
--target=x86_64-w64-mingw32 \
|
||||
--prefix="$build_dir"/opt \
|
||||
--enable-cpumining \
|
||||
--enable-scrypt \
|
||||
--enable-bitforce \
|
||||
--enable-icarus \
|
||||
--enable-modminer \
|
||||
--enable-ztex \
|
||||
$@
|
||||
fi
|
17
cgminer.c
17
cgminer.c
@ -318,7 +318,8 @@ static bool should_run(void)
|
||||
return true;
|
||||
|
||||
gettimeofday(&tv, NULL);
|
||||
tm = localtime(&tv.tv_sec);
|
||||
const time_t tmp_time = tv.tv_sec;
|
||||
tm = localtime(&tmp_time);
|
||||
if (schedstart.enable) {
|
||||
if (!schedstop.enable) {
|
||||
if (time_before(tm, &schedstart.tm))
|
||||
@ -350,7 +351,8 @@ void get_datestamp(char *f, struct timeval *tv)
|
||||
{
|
||||
struct tm *tm;
|
||||
|
||||
tm = localtime(&tv->tv_sec);
|
||||
const time_t tmp_time = tv->tv_sec;
|
||||
tm = localtime(&tmp_time);
|
||||
sprintf(f, "[%d-%02d-%02d %02d:%02d:%02d]",
|
||||
tm->tm_year + 1900,
|
||||
tm->tm_mon + 1,
|
||||
@ -364,7 +366,8 @@ void get_timestamp(char *f, struct timeval *tv)
|
||||
{
|
||||
struct tm *tm;
|
||||
|
||||
tm = localtime(&tv->tv_sec);
|
||||
const time_t tmp_time = tv->tv_sec;
|
||||
tm = localtime(&tmp_time);
|
||||
sprintf(f, "[%02d:%02d:%02d]",
|
||||
tm->tm_hour,
|
||||
tm->tm_min,
|
||||
@ -2584,9 +2587,11 @@ static bool submit_upstream_work(struct work *work, CURL *curl, bool resubmit)
|
||||
double submit_time = tdiff(&tv_submit_reply, &tv_submit);
|
||||
int diffplaces = 3;
|
||||
|
||||
tm = localtime(&(work->tv_getwork.tv_sec));
|
||||
time_t tmp_time = work->tv_getwork.tv_sec;
|
||||
tm = localtime(&tmp_time);
|
||||
memcpy(&tm_getwork, tm, sizeof(struct tm));
|
||||
tm = localtime(&(tv_submit_reply.tv_sec));
|
||||
tmp_time = tv_submit_reply.tv_sec;
|
||||
tm = localtime(&tmp_time);
|
||||
memcpy(&tm_submit_reply, tm, sizeof(struct tm));
|
||||
|
||||
if (work->clone) {
|
||||
@ -2957,7 +2962,7 @@ void app_restart(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
execv(initial_args[0], initial_args);
|
||||
execv(initial_args[0], (EXECV_2ND_ARG_TYPE)initial_args);
|
||||
applog(LOG_WARNING, "Failed to restart application");
|
||||
}
|
||||
|
||||
|
12
compat.h
12
compat.h
@ -2,15 +2,17 @@
|
||||
#define __COMPAT_H__
|
||||
|
||||
#ifdef WIN32
|
||||
#include "config.h"
|
||||
#include <errno.h>
|
||||
#include <time.h>
|
||||
#include <pthread.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
#include "miner.h" // for timersub
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
#ifndef HAVE_LIBWINPTHREAD
|
||||
static inline int nanosleep(const struct timespec *req, struct timespec *rem)
|
||||
{
|
||||
struct timeval tstart;
|
||||
@ -42,6 +44,7 @@ static inline int nanosleep(const struct timespec *req, struct timespec *rem)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int sleep(unsigned int secs)
|
||||
{
|
||||
@ -71,7 +74,12 @@ typedef unsigned int uint;
|
||||
typedef long suseconds_t;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_LIBWINPTHREAD
|
||||
#define PTH(thr) ((thr)->pth)
|
||||
#else
|
||||
#define PTH(thr) ((thr)->pth.p)
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define PTH(thr) ((thr)->pth)
|
||||
#endif /* WIN32 */
|
||||
|
41
configure.ac
41
configure.ac
@ -81,7 +81,6 @@ esac
|
||||
|
||||
case $target in
|
||||
*-*-mingw*)
|
||||
have_x86_64=false
|
||||
have_win32=true
|
||||
PTHREAD_FLAGS=""
|
||||
DLOPEN_FLAGS=""
|
||||
@ -166,9 +165,18 @@ else
|
||||
OPENCL_LIBS=""
|
||||
fi
|
||||
|
||||
AC_CHECK_LIB(pthread, pthread_create, ,
|
||||
AC_MSG_ERROR([Could not find pthread library - please install libpthread]))
|
||||
PTHREAD_LIBS=-lpthread
|
||||
has_winpthread=false
|
||||
if test "x$have_win32" = xtrue; then
|
||||
has_winpthread=true
|
||||
AC_CHECK_LIB(winpthread, nanosleep, , has_winpthread=false)
|
||||
PTHREAD_LIBS=-lwinpthread
|
||||
fi
|
||||
|
||||
if test "x$has_winpthread" != xtrue; then
|
||||
AC_CHECK_LIB(pthread, pthread_create, ,
|
||||
AC_MSG_ERROR([Could not find pthread library - please install libpthread]))
|
||||
PTHREAD_LIBS=-lpthread
|
||||
fi
|
||||
|
||||
AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true)
|
||||
|
||||
@ -181,7 +189,7 @@ scrypt="no"
|
||||
|
||||
if test "$found_opencl" = 1; then
|
||||
if test "x$adl" != xno; then
|
||||
AC_CHECK_FILE([ADL_SDK/adl_sdk.h], have_adl=true, have_adl=false,)
|
||||
AC_CHECK_FILE([${ADL_SDK:-ADL_SDK}/adl_sdk.h], have_adl=true, have_adl=false,)
|
||||
if test x$have_adl = xtrue
|
||||
then
|
||||
AC_DEFINE([HAVE_ADL], [1], [Defined if ADL headers were found])
|
||||
@ -305,7 +313,7 @@ has_yasm=false
|
||||
AC_PATH_PROG([YASM],[yasm],[false])
|
||||
if test "x$YASM" != "xfalse" ; then
|
||||
AC_MSG_CHECKING([if yasm version is greater than 1.0.1])
|
||||
yasmver=`yasm --version | head -1 | cut -d\ -f2`
|
||||
yasmver=`"$YASM" --version | head -1 | cut -d\ -f2`
|
||||
yamajor=`echo $yasmver | cut -d. -f1`
|
||||
yaminor=`echo $yasmver | cut -d. -f2`
|
||||
yamini=`echo $yasmver | cut -d. -f3`
|
||||
@ -332,6 +340,18 @@ if test "x$YASM" != "xfalse" ; then
|
||||
fi
|
||||
if test "x$has_yasm" = "xfalse" ; then
|
||||
AC_MSG_NOTICE([yasm is required for the assembly algorithms. They will be skipped.])
|
||||
else
|
||||
if test "x$have_x86_64" = xtrue; then
|
||||
if test "x$have_win32" = xtrue; then
|
||||
YASM_FMT="win64"
|
||||
else
|
||||
YASM_FMT="elf64"
|
||||
fi
|
||||
elif test "x$have_win32" = xtrue; then
|
||||
YASM_FMT="coff"
|
||||
else
|
||||
YASM_FMT="elf32"
|
||||
fi
|
||||
fi
|
||||
|
||||
AM_CONDITIONAL([HAS_YASM], [test x$has_yasm = xtrue])
|
||||
@ -382,6 +402,14 @@ else
|
||||
fi
|
||||
AC_SUBST(LIBCURL_LIBS)
|
||||
|
||||
#check execv signature
|
||||
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
|
||||
#include <process.h>
|
||||
int execv(const char*, const char*const*);
|
||||
])],
|
||||
AC_DEFINE([EXECV_2ND_ARG_TYPE], [const char* const*], [int execv(const char*, const char*const*);]),
|
||||
AC_DEFINE([EXECV_2ND_ARG_TYPE], [char* const*], [int execv(const char*, char*const*);]))
|
||||
|
||||
dnl CCAN wants to know a lot of vars.
|
||||
# All the configuration checks. Regrettably, the __attribute__ checks will
|
||||
# give false positives on old GCCs, since they just cause warnings. But that's
|
||||
@ -438,6 +466,7 @@ AC_SUBST(PDCURSES_LIBS)
|
||||
AC_SUBST(WS2_LIBS)
|
||||
AC_SUBST(MATH_LIBS)
|
||||
AC_SUBST(UDEV_LIBS)
|
||||
AC_SUBST(YASM_FMT)
|
||||
|
||||
AC_CONFIG_FILES([
|
||||
Makefile
|
||||
|
@ -9,6 +9,8 @@
|
||||
* any later version. See COPYING for more details.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <limits.h>
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
@ -17,16 +19,14 @@
|
||||
#include <sys/time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "compat.h"
|
||||
#include "miner.h"
|
||||
#include "usbutils.h"
|
||||
|
||||
#ifdef WIN32
|
||||
#include <windows.h>
|
||||
#endif /* WIN32 */
|
||||
|
||||
#include "compat.h"
|
||||
#include "miner.h"
|
||||
#include "usbutils.h"
|
||||
|
||||
#define BITFORCE_IDENTIFY "ZGX"
|
||||
#define BITFORCE_IDENTIFY_LEN (sizeof(BITFORCE_IDENTIFY)-1)
|
||||
#define BITFORCE_FLASH "ZMX"
|
||||
|
@ -202,7 +202,9 @@ static const sha256_func sha256_funcs[] = {
|
||||
|
||||
|
||||
#ifdef WANT_CPUMINE
|
||||
#if defined(WANT_X8664_SSE2) && defined(__SSE2__)
|
||||
#if defined(WANT_X8664_SSE4) && defined(__SSE4_1__)
|
||||
enum sha256_algos opt_algo = ALGO_SSE4_64;
|
||||
#elif defined(WANT_X8664_SSE2) && defined(__SSE2__)
|
||||
enum sha256_algos opt_algo = ALGO_SSE2_64;
|
||||
#elif defined(WANT_X8632_SSE2) && defined(__SSE2__)
|
||||
enum sha256_algos opt_algo = ALGO_SSE2_32;
|
||||
@ -720,8 +722,8 @@ static void cpu_detect()
|
||||
// Reckon number of cores in the box
|
||||
#if defined(WIN32)
|
||||
{
|
||||
DWORD system_am;
|
||||
DWORD process_am;
|
||||
DWORD_PTR system_am;
|
||||
DWORD_PTR process_am;
|
||||
BOOL ok = GetProcessAffinityMask(
|
||||
GetCurrentProcess(),
|
||||
&system_am,
|
||||
|
@ -30,7 +30,7 @@
|
||||
#define WANT_X8664_SSE2 1
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && defined(HAS_YASM)
|
||||
#if defined(__x86_64__) && defined(HAS_YASM) && defined(__SSE4_1__)
|
||||
#define WANT_X8664_SSE4 1
|
||||
#endif
|
||||
|
||||
|
@ -30,6 +30,7 @@
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "miner.h"
|
||||
|
||||
#include <limits.h>
|
||||
#include <pthread.h>
|
||||
@ -51,7 +52,6 @@
|
||||
#endif
|
||||
|
||||
#include "elist.h"
|
||||
#include "miner.h"
|
||||
#include "fpgautils.h"
|
||||
|
||||
// The serial I/O speed - Linux uses a define 'B115200' in bits/termios.h
|
||||
|
@ -23,9 +23,9 @@
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, see http://www.gnu.org/licenses/.
|
||||
**/
|
||||
#include "miner.h"
|
||||
#include <unistd.h>
|
||||
#include <sha2.h>
|
||||
#include "miner.h"
|
||||
#include "libztex.h"
|
||||
|
||||
#define GOLDEN_BACKLOG 5
|
||||
|
5
elist.h
5
elist.h
@ -180,8 +180,13 @@ static inline void list_splice_init(struct list_head *list,
|
||||
* @type: the type of the struct this is embedded in.
|
||||
* @member: the name of the list_struct within the struct.
|
||||
*/
|
||||
#ifndef _WIN64
|
||||
#define list_entry(ptr, type, member) \
|
||||
((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
|
||||
#else
|
||||
#define list_entry(ptr, type, member) \
|
||||
((type *)((char *)(ptr)-(unsigned long long)(&((type *)0)->member)))
|
||||
#endif
|
||||
|
||||
/**
|
||||
* list_for_each - iterate over a list
|
||||
|
@ -14,6 +14,8 @@
|
||||
#include <dirent.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "miner.h"
|
||||
|
||||
#ifndef WIN32
|
||||
#include <errno.h>
|
||||
#include <termios.h>
|
||||
@ -34,7 +36,6 @@
|
||||
|
||||
#include "elist.h"
|
||||
#include "logging.h"
|
||||
#include "miner.h"
|
||||
#include "fpgautils.h"
|
||||
|
||||
#ifdef HAVE_LIBUDEV
|
||||
@ -356,7 +357,7 @@ int serial_open(const char *devpath, unsigned long baud, signed short timeout, b
|
||||
PurgeComm(hSerial, PURGE_TXCLEAR);
|
||||
}
|
||||
|
||||
return _open_osfhandle((LONG)hSerial, 0);
|
||||
return _open_osfhandle((intptr_t)hSerial, 0);
|
||||
#else
|
||||
int fdDev = open(devpath, O_RDWR | O_CLOEXEC | O_NOCTTY);
|
||||
|
||||
|
@ -20,6 +20,8 @@
|
||||
#endif
|
||||
@PRAGMA_COLUMNS@
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if defined __need_sig_atomic_t || defined __need_sigset_t
|
||||
/* Special invocation convention inside glibc header files. */
|
||||
|
||||
|
@ -85,7 +85,8 @@ static void log_generic(int prio, const char *fmt, va_list ap)
|
||||
|
||||
gettimeofday(&tv, NULL);
|
||||
|
||||
tm = localtime(&tv.tv_sec);
|
||||
const time_t tmp_time = tv.tv_sec;
|
||||
tm = localtime(&tmp_time);
|
||||
|
||||
len = 40 + strlen(fmt) + 22;
|
||||
f = alloca(len);
|
||||
|
3
miner.h
3
miner.h
@ -118,7 +118,8 @@ static inline int fsync (int fd)
|
||||
#include "usbutils.h"
|
||||
#endif
|
||||
|
||||
#if !defined(WIN32) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
|
||||
#if (!defined(WIN32) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|
||||
|| (defined(WIN32) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
|
||||
#define bswap_16 __builtin_bswap16
|
||||
#define bswap_32 __builtin_bswap32
|
||||
#define bswap_64 __builtin_bswap64
|
||||
|
@ -5,4 +5,4 @@ SUFFIXES = .asm
|
||||
libx8632_a_SOURCES = sha256_xmm.asm
|
||||
|
||||
.asm.o:
|
||||
$(YASM) -f elf32 $<
|
||||
$(YASM) -f $(YASM_FMT) $<
|
||||
|
@ -19,11 +19,11 @@ BITS 32
|
||||
|
||||
%define LAB_LOOP_UNROLL 64
|
||||
|
||||
extern sha256_consts_m128i
|
||||
extern _sha256_consts_m128i
|
||||
|
||||
global CalcSha256_x86
|
||||
global $@CalcSha256_x86@12
|
||||
; CalcSha256 hash(ecx), data(edx), init([esp+4])
|
||||
CalcSha256_x86:
|
||||
@CalcSha256_x86@12:
|
||||
push esi
|
||||
push edi
|
||||
mov init, [esp+12]
|
||||
@ -134,7 +134,7 @@ LAB_LOOP:
|
||||
|
||||
%macro lab_loop_blk 1
|
||||
movdqa xmm6, [data+%1]
|
||||
paddd xmm6, sha256_consts_m128i[%1]
|
||||
paddd xmm6, _sha256_consts_m128i[%1]
|
||||
|
||||
paddd xmm6, [hash+2*16] ; +h
|
||||
|
||||
|
@ -5,4 +5,4 @@ SUFFIXES = .asm
|
||||
libx8664_a_SOURCES = sha256_xmm_amd64.asm sha256_sse4_amd64.asm
|
||||
|
||||
.asm.o:
|
||||
$(YASM) -f elf64 $<
|
||||
$(YASM) -f $(YASM_FMT) -o $@ $<
|
||||
|
@ -13,9 +13,17 @@
|
||||
ALIGN 32
|
||||
BITS 64
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__,win64
|
||||
%define hash rcx
|
||||
%define data rdx
|
||||
%define init r8
|
||||
%define temp r9
|
||||
%else
|
||||
%define hash rdi
|
||||
%define data rsi
|
||||
%define init rdx
|
||||
%define temp rcx
|
||||
%endif
|
||||
|
||||
; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16))
|
||||
%define LAB_CALC_PARA 2
|
||||
@ -27,18 +35,28 @@ extern g_4sha256_k
|
||||
|
||||
global CalcSha256_x64_sse4
|
||||
; CalcSha256 hash(rdi), data(rsi), init(rdx)
|
||||
; CalcSha256 hash(rcx), data(rdx), init(r8)
|
||||
CalcSha256_x64_sse4:
|
||||
|
||||
push rbx
|
||||
%ifidn __OUTPUT_FORMAT__,win64
|
||||
sub rsp, 16 * 6
|
||||
movdqa [rsp + 16*0], xmm6
|
||||
movdqa [rsp + 16*1], xmm7
|
||||
movdqa [rsp + 16*2], xmm8
|
||||
movdqa [rsp + 16*3], xmm9
|
||||
movdqa [rsp + 16*4], xmm10
|
||||
movdqa [rsp + 16*5], xmm11
|
||||
%endif
|
||||
|
||||
LAB_NEXT_NONCE:
|
||||
|
||||
mov rcx, 64*4 ; 256 - rcx is # of SHA-2 rounds
|
||||
mov temp, 64*4 ; 256 - temp is # of SHA-2 rounds
|
||||
mov rax, 16*4 ; 64 - rax is where we expand to
|
||||
|
||||
LAB_SHA:
|
||||
push rcx
|
||||
lea rcx, qword [data+rcx*4] ; + 1024
|
||||
push temp
|
||||
lea temp, qword [data+temp*4] ; + 1024
|
||||
lea r11, qword [data+rax*4] ; + 256
|
||||
|
||||
LAB_CALC:
|
||||
@ -122,10 +140,10 @@ LAB_CALC:
|
||||
%endrep
|
||||
|
||||
add r11, LAB_CALC_UNROLL*LAB_CALC_PARA*16
|
||||
cmp r11, rcx
|
||||
cmp r11, temp
|
||||
jb LAB_CALC
|
||||
|
||||
pop rcx
|
||||
pop temp
|
||||
mov rax, 0
|
||||
|
||||
; Load the init values of the message into the hash.
|
||||
@ -219,12 +237,12 @@ LAB_LOOP:
|
||||
%assign i i+1
|
||||
%endrep
|
||||
|
||||
cmp rax, rcx
|
||||
cmp rax, temp
|
||||
jb LAB_LOOP
|
||||
|
||||
; Finished the 64 rounds, calculate hash and save
|
||||
|
||||
movntdqa xmm1, [rdx]
|
||||
movntdqa xmm1, [init]
|
||||
pshufd xmm2, xmm1, 0x55
|
||||
paddd xmm5, xmm2
|
||||
pshufd xmm6, xmm1, 0xAA
|
||||
@ -234,7 +252,7 @@ LAB_LOOP:
|
||||
pshufd xmm1, xmm1, 0
|
||||
paddd xmm7, xmm1
|
||||
|
||||
movntdqa xmm1, [rdx+4*4]
|
||||
movntdqa xmm1, [init+4*4]
|
||||
pshufd xmm2, xmm1, 0x55
|
||||
paddd xmm8, xmm2
|
||||
pshufd xmm6, xmm1, 0xAA
|
||||
@ -254,6 +272,15 @@ LAB_LOOP:
|
||||
movdqa [hash+7*16], xmm10
|
||||
|
||||
LAB_RET:
|
||||
%ifidn __OUTPUT_FORMAT__,win64
|
||||
movdqa xmm6, [rsp + 16*0]
|
||||
movdqa xmm7, [rsp + 16*1]
|
||||
movdqa xmm8, [rsp + 16*2]
|
||||
movdqa xmm9, [rsp + 16*3]
|
||||
movdqa xmm10, [rsp + 16*4]
|
||||
movdqa xmm11, [rsp + 16*5]
|
||||
add rsp, 16 * 6
|
||||
%endif
|
||||
pop rbx
|
||||
ret
|
||||
|
||||
|
@ -22,10 +22,17 @@
|
||||
ALIGN 32
|
||||
BITS 64
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__,win64
|
||||
%define hash rcx
|
||||
%define hash1 rdx
|
||||
%define data r8
|
||||
%define init r9
|
||||
%else
|
||||
%define hash rdi
|
||||
%define hash1 rsi
|
||||
%define data rdx
|
||||
%define init rcx
|
||||
%endif
|
||||
|
||||
; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16))
|
||||
%define SHA_CALC_W_PARA 2
|
||||
@ -227,6 +234,15 @@ sha256_sse2_64_new:
|
||||
%endif
|
||||
|
||||
push rbx
|
||||
%ifidn __OUTPUT_FORMAT__,win64
|
||||
sub rsp, 16 * 6
|
||||
movdqa [rsp + 16*0], xmm6
|
||||
movdqa [rsp + 16*1], xmm7
|
||||
movdqa [rsp + 16*2], xmm8
|
||||
movdqa [rsp + 16*3], xmm9
|
||||
movdqa [rsp + 16*4], xmm10
|
||||
movdqa [rsp + 16*5], xmm13
|
||||
%endif
|
||||
|
||||
%macro SHA_256 0
|
||||
mov rbx, 64*4 ; rbx is # of SHA-2 rounds
|
||||
@ -318,6 +334,15 @@ sha256_sse2_64_new:
|
||||
movdqa [hash+7*16], rH
|
||||
|
||||
LAB_RET:
|
||||
%ifidn __OUTPUT_FORMAT__,win64
|
||||
movdqa xmm6, [rsp + 16*0]
|
||||
movdqa xmm7, [rsp + 16*1]
|
||||
movdqa xmm8, [rsp + 16*2]
|
||||
movdqa xmm9, [rsp + 16*3]
|
||||
movdqa xmm10, [rsp + 16*4]
|
||||
movdqa xmm13, [rsp + 16*5]
|
||||
add rsp, 16 * 6
|
||||
%endif
|
||||
pop rbx
|
||||
ret
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user