From cd8eeccc2d02e1d546109ab997b977fd4d50edcf Mon Sep 17 00:00:00 2001 From: Warren Togami Date: Sat, 14 Sep 2013 05:19:30 -1000 Subject: [PATCH] Litecoin: Runtime detection of SSE2 32bit x86 for Scrypt * x86_64 and Intel MacOS X always uses scrypt-sse2, non-x86 uses scrypt-generic. * x86 (not Mac) detects cpuid features and chooses sse2 or generic during runtime. How to Build with SSE2 Support ============================== * make litecoind with USE_SSE2=1 * qmake with USE_SSE2=1 --- bitcoin-qt.pro | 5 +++ contrib/gitian-descriptors/gitian-win32.yml | 4 +- contrib/gitian-descriptors/gitian.yml | 4 +- src/init.cpp | 33 ++++++++++++++ src/main.h | 3 ++ src/makefile.linux-mingw | 9 ++++ src/makefile.mingw | 8 ++++ src/makefile.osx | 13 ++++++ src/makefile.unix | 4 +- src/scrypt-sse2.cpp | 6 +-- src/scrypt.cpp | 49 ++++++++++++++++++--- src/scrypt.h | 11 +++-- 12 files changed, 128 insertions(+), 21 deletions(-) diff --git a/bitcoin-qt.pro b/bitcoin-qt.pro index 49b6fe9fb..81d7367c2 100644 --- a/bitcoin-qt.pro +++ b/bitcoin-qt.pro @@ -316,6 +316,11 @@ DEFINES += BITCOIN_QT_TEST macx: CONFIG -= app_bundle } +contains(USE_SSE2, 1) { +SOURCES += src/scrypt-sse2.cpp +DEFINES += USE_SSE2 +} + # Todo: Remove this line when switching to Qt5, as that option was removed CODECFORTR = UTF-8 diff --git a/contrib/gitian-descriptors/gitian-win32.yml b/contrib/gitian-descriptors/gitian-win32.yml index 3ffd1d4cd..f1694e010 100644 --- a/contrib/gitian-descriptors/gitian-win32.yml +++ b/contrib/gitian-descriptors/gitian-win32.yml @@ -51,7 +51,7 @@ script: | export LD_PRELOAD=/usr/lib/faketime/libfaketime.so.1 export FAKETIME=$REFERENCE_DATETIME export TZ=UTC - $HOME/qt/src/bin/qmake -spec unsupported/win32-g++-cross MINIUPNPC_LIB_PATH=$HOME/build/miniupnpc MINIUPNPC_INCLUDE_PATH=$HOME/build/ BDB_LIB_PATH=$HOME/build/db-4.8.30.NC/build_unix BDB_INCLUDE_PATH=$HOME/build/db-4.8.30.NC/build_unix BOOST_LIB_PATH=$HOME/build/boost_1_50_0/stage/lib BOOST_INCLUDE_PATH=$HOME/build/boost_1_50_0 BOOST_LIB_SUFFIX=-mt-s BOOST_THREAD_LIB_SUFFIX=_win32-mt-s OPENSSL_LIB_PATH=$HOME/build/openssl-1.0.1c OPENSSL_INCLUDE_PATH=$HOME/build/openssl-1.0.1c/include QRENCODE_LIB_PATH=$HOME/build/qrencode-3.2.0/.libs QRENCODE_INCLUDE_PATH=$HOME/build/qrencode-3.2.0 USE_QRCODE=1 INCLUDEPATH=$HOME/build DEFINES=BOOST_THREAD_USE_LIB BITCOIN_NEED_QT_PLUGINS=1 QMAKE_LRELEASE=lrelease QMAKE_CXXFLAGS=-frandom-seed=litecoin USE_BUILD_INFO=1 + $HOME/qt/src/bin/qmake -spec unsupported/win32-g++-cross MINIUPNPC_LIB_PATH=$HOME/build/miniupnpc MINIUPNPC_INCLUDE_PATH=$HOME/build/ BDB_LIB_PATH=$HOME/build/db-4.8.30.NC/build_unix BDB_INCLUDE_PATH=$HOME/build/db-4.8.30.NC/build_unix BOOST_LIB_PATH=$HOME/build/boost_1_50_0/stage/lib BOOST_INCLUDE_PATH=$HOME/build/boost_1_50_0 BOOST_LIB_SUFFIX=-mt-s BOOST_THREAD_LIB_SUFFIX=_win32-mt-s OPENSSL_LIB_PATH=$HOME/build/openssl-1.0.1c OPENSSL_INCLUDE_PATH=$HOME/build/openssl-1.0.1c/include QRENCODE_LIB_PATH=$HOME/build/qrencode-3.2.0/.libs QRENCODE_INCLUDE_PATH=$HOME/build/qrencode-3.2.0 USE_QRCODE=1 INCLUDEPATH=$HOME/build DEFINES=BOOST_THREAD_USE_LIB BITCOIN_NEED_QT_PLUGINS=1 QMAKE_LRELEASE=lrelease QMAKE_CXXFLAGS=-frandom-seed=litecoin USE_BUILD_INFO=1 USE_SSE2=1 make $MAKEOPTS i586-mingw32msvc-strip release/litecoin-qt.exe cp release/litecoin-qt.exe $OUTDIR/ @@ -60,7 +60,7 @@ script: | export LD_PRELOAD=/usr/lib/faketime/libfaketime.so.1 export FAKETIME=$REFERENCE_DATETIME export TZ=UTC - make -f makefile.linux-mingw $MAKEOPTS DEPSDIR=$HOME/build litecoind.exe USE_UPNP=0 DEBUGFLAGS="-frandom-seed=litecoin" + make -f makefile.linux-mingw $MAKEOPTS DEPSDIR=$HOME/build litecoind.exe USE_UPNP=0 DEBUGFLAGS="-frandom-seed=litecoin" USE_SSE2=1 i586-mingw32msvc-strip litecoind.exe mkdir $OUTDIR/daemon cp litecoind.exe $OUTDIR/daemon diff --git a/contrib/gitian-descriptors/gitian.yml b/contrib/gitian-descriptors/gitian.yml index d42c9f8b5..dc56f0d5b 100644 --- a/contrib/gitian-descriptors/gitian.yml +++ b/contrib/gitian-descriptors/gitian.yml @@ -46,10 +46,10 @@ script: | cp $OUTDIR/src/doc/README.md $OUTDIR cp $OUTDIR/src/COPYING $OUTDIR cd src - make -f makefile.unix STATIC=1 OPENSSL_INCLUDE_PATH="$INSTDIR/include" OPENSSL_LIB_PATH="$INSTDIR/lib" $MAKEOPTS litecoind USE_UPNP=0 DEBUGFLAGS= + make -f makefile.unix STATIC=1 OPENSSL_INCLUDE_PATH="$INSTDIR/include" OPENSSL_LIB_PATH="$INSTDIR/lib" $MAKEOPTS litecoind USE_UPNP=0 DEBUGFLAGS= USE_SSE2=1 mkdir -p $OUTDIR/bin/$GBUILD_BITS install -s litecoind $OUTDIR/bin/$GBUILD_BITS cd .. - qmake INCLUDEPATH="$INSTDIR/include" LIBS="-L$INSTDIR/lib" RELEASE=1 USE_QRCODE=1 + qmake INCLUDEPATH="$INSTDIR/include" LIBS="-L$INSTDIR/lib" RELEASE=1 USE_QRCODE=1 USE_SSE2=1 make $MAKEOPTS install litecoin-qt $OUTDIR/bin/$GBUILD_BITS diff --git a/src/init.cpp b/src/init.cpp index e9a24e469..61c3846d4 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -22,6 +22,18 @@ #include #endif +#if defined(USE_SSE2) +#if !defined(MAC_OSX) && (defined(_M_IX86) || defined(__i386__) || defined(__i386)) +#ifdef _MSC_VER +// MSVC 64bit is unable to use inline asm +#include +#else +// GCC Linux or i686-w64-mingw32 +#include +#endif +#endif +#endif + using namespace std; using namespace boost; @@ -494,6 +506,23 @@ bool AppInit2(boost::thread_group& threadGroup) sigaction(SIGHUP, &sa_hup, NULL); #endif +#if defined(USE_SSE2) + unsigned int cpuid_edx=0; +#if !defined(MAC_OSX) && (defined(_M_IX86) || defined(__i386__) || defined(__i386)) + // 32bit x86 Linux or Windows, detect cpuid features +#if defined(_MSC_VER) + // MSVC + int x86cpuid[4]; + __cpuid(x86cpuid, 1); + cpuid_edx = (unsigned int)buffer[3]; +#else + // Linux or i686-w64-mingw32 (gcc-4.6.3) + unsigned int eax, ebx, ecx; + __get_cpuid(1, &eax, &ebx, &ecx, &cpuid_edx); +#endif +#endif +#endif + // ********************************************************* Step 2: parameter interactions fTestNet = GetBoolArg("-testnet"); @@ -808,6 +837,10 @@ bool AppInit2(boost::thread_group& threadGroup) // ********************************************************* Step 7: load block chain +#if defined(USE_SSE2) + scrypt_detect_sse2(cpuid_edx); +#endif + fReindex = GetBoolArg("-reindex"); // Upgrading to 0.8; hard-link the old blknnnn.dat files into /blocks/ diff --git a/src/main.h b/src/main.h index eee985892..417343a82 100644 --- a/src/main.h +++ b/src/main.h @@ -2234,6 +2234,9 @@ struct CBlockTemplate std::vector vTxSigOps; }; +#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_X64) || defined(__x86_64__) || defined(_M_AMD64) +extern unsigned int cpuid_edx; +#endif diff --git a/src/makefile.linux-mingw b/src/makefile.linux-mingw index 403c929b1..037abbc07 100644 --- a/src/makefile.linux-mingw +++ b/src/makefile.linux-mingw @@ -94,6 +94,12 @@ OBJS= \ obj/leveldb.o \ obj/txdb.o +ifdef USE_SSE2 +DEFS += -DUSE_SSE2 +OBJS_SSE2= obj/scrypt-sse2.o +OBJS += $(OBJS_SSE2) +endif + all: litecoind.exe DEFS += -I"$(CURDIR)/leveldb/include" @@ -106,6 +112,9 @@ obj/build.h: FORCE version.cpp: obj/build.h DEFS += -DHAVE_BUILD_INFO +obj/%-sse2.o: %-sse2.cpp + $(CXX) -c $(xCXXFLAGS) -msse2 -o $@ $< + obj/%.o: %.cpp $(HEADERS) $(CXX) -c $(xCXXFLAGS) -o $@ $< diff --git a/src/makefile.mingw b/src/makefile.mingw index 52f4b6b80..9c5e867f9 100644 --- a/src/makefile.mingw +++ b/src/makefile.mingw @@ -102,6 +102,11 @@ OBJS= \ obj/leveldb.o \ obj/txdb.o +ifdef USE_SSE2 +DEFS += -DUSE_SSE2 +OBJS_SSE2= obj/scrypt-sse2.o +OBJS += $(OBJS_SSE2) +endif all: litecoind.exe @@ -117,6 +122,9 @@ DEFS += $(addprefix -I,$(CURDIR)/leveldb/helpers) leveldb/libleveldb.a: cd leveldb && $(MAKE) CC=$(CC) CXX=$(CXX) OPT="$(CFLAGS)" TARGET_OS=NATIVE_WINDOWS libleveldb.a libmemenv.a && cd .. +obj/%-sse2.o: %-sse2.cpp + $(CXX) -c $(CFLAGS) -msse2 -o $@ $< + obj/%.o: %.cpp $(HEADERS) $(CXX) -c $(CFLAGS) -o $@ $< diff --git a/src/makefile.osx b/src/makefile.osx index 2bee224cd..7f9ff4f0d 100644 --- a/src/makefile.osx +++ b/src/makefile.osx @@ -105,6 +105,12 @@ OBJS= \ obj/leveldb.o \ obj/txdb.o +ifdef USE_SSE2 +DEFS += -DUSE_SSE2 +OBJS_SSE2= obj/scrypt-sse2.o +OBJS += $(OBJS_SSE2) +endif + ifndef USE_UPNP override USE_UPNP = - endif @@ -144,6 +150,13 @@ obj/build.h: FORCE version.cpp: obj/build.h DEFS += -DHAVE_BUILD_INFO +obj/%-sse2.o: %-sse2.cpp + $(CXX) -c $(CFLAGS) -msse2 -MMD -MF $(@:%.o=%.d) -o $@ $< + @cp $(@:%.o=%.d) $(@:%.o=%.P); \ + sed -e 's/#.*//' -e 's/^[^:]*: *//' -e 's/ *\\$$//' \ + -e '/^$$/ d' -e 's/$$/ :/' < $(@:%.o=%.d) >> $(@:%.o=%.P); \ + rm -f $(@:%.o=%.d) + obj/%.o: %.cpp $(CXX) -c $(CFLAGS) -MMD -MF $(@:%.o=%.d) -o $@ $< @cp $(@:%.o=%.d) $(@:%.o=%.P); \ diff --git a/src/makefile.unix b/src/makefile.unix index 7cb8173e9..a9685fe6e 100644 --- a/src/makefile.unix +++ b/src/makefile.unix @@ -144,10 +144,10 @@ OBJS= \ obj/leveldb.o \ obj/txdb.o -OBJS_SSE2= obj/scrypt-sse2.o -ifdef SSE2 +ifdef USE_SSE2 DEFS += -DUSE_SSE2 +OBJS_SSE2= obj/scrypt-sse2.o OBJS += $(OBJS_SSE2) endif diff --git a/src/scrypt-sse2.cpp b/src/scrypt-sse2.cpp index 30cd6dadd..711dfa92c 100644 --- a/src/scrypt-sse2.cpp +++ b/src/scrypt-sse2.cpp @@ -26,7 +26,7 @@ * This file was originally written by Colin Percival as part of the Tarsnap * online backup system. */ -#ifdef __SSE2__ + #include "scrypt.h" #include #include @@ -91,10 +91,9 @@ static inline void xor_salsa8_sse2(__m128i B[4], const __m128i Bx[4]) B[2] = _mm_add_epi32(B[2], X2); B[3] = _mm_add_epi32(B[3], X3); } -#endif + void scrypt_1024_1_1_256_sp_sse2(const char *input, char *output, char *scratchpad) { -#ifdef __SSE2__ uint8_t B[128]; union { __m128i i128[8]; @@ -134,5 +133,4 @@ void scrypt_1024_1_1_256_sp_sse2(const char *input, char *output, char *scratchp } PBKDF2_SHA256((const uint8_t *)input, 80, B, 128, 1, (uint8_t *)output, 32); -#endif } diff --git a/src/scrypt.cpp b/src/scrypt.cpp index 6746b8ee0..772701db4 100644 --- a/src/scrypt.cpp +++ b/src/scrypt.cpp @@ -28,6 +28,7 @@ */ #include "scrypt.h" +#include "util.h" #include #include #include @@ -242,7 +243,7 @@ static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16]) B[15] += x15; } -void scrypt_1024_1_1_256_sp(const char *input, char *output, char *scratchpad) +void scrypt_1024_1_1_256_sp_generic(const char *input, char *output, char *scratchpad) { uint8_t B[128]; uint32_t X[32]; @@ -275,14 +276,48 @@ void scrypt_1024_1_1_256_sp(const char *input, char *output, char *scratchpad) PBKDF2_SHA256((const uint8_t *)input, 80, B, 128, 1, (uint8_t *)output, 32); } +#if defined(USE_SSE2) +#if defined(_M_X64) || defined(__x86_64__) || defined(_M_AMD64) || (defined(MAC_OSX) && defined(__i386__)) +/* Always SSE2 */ +void scrypt_detect_sse2(unsigned int cpuid_edx) +{ + printf("scrypt: using scrypt-sse2 as built.\n"); +} +#else +/* Detect SSE2 */ +void (*scrypt_1024_1_1_256_sp)(const char *input, char *output, char *scratchpad); + +void scrypt_detect_sse2(unsigned int cpuid_edx) +{ + if (cpuid_edx & 1<<26) + { + scrypt_1024_1_1_256_sp = &scrypt_1024_1_1_256_sp_sse2; + printf("scrypt: using scrypt-sse2 as detected.\n"); + } + else + { + scrypt_1024_1_1_256_sp = &scrypt_1024_1_1_256_sp_generic; + printf("scrypt: using scrypt-generic, SSE2 unavailable.\n"); + } +} +#endif +#endif + void scrypt_1024_1_1_256(const char *input, char *output) { char scratchpad[SCRYPT_SCRATCHPAD_SIZE]; -#ifdef USE_SSE2 - // todo: runtime detection at startup and use function pointer - if(1) - scrypt_1024_1_1_256_sp_sse2(input, output, scratchpad); - else +#if defined(USE_SSE2) + // Detection would work, but in cases where we KNOW it always has SSE2, + // it is faster to use directly than to use a function pointer or conditional. +#if defined(_M_X64) || defined(__x86_64__) || defined(_M_AMD64) || (defined(MAC_OSX) && defined(__i386__)) + // Always SSE2: x86_64 or Intel MacOS X + scrypt_1024_1_1_256_sp_sse2(input, output, scratchpad); +#else + // Detect SSE2: 32bit x86 Linux or Windows + scrypt_1024_1_1_256_sp(input, output, scratchpad); +#endif +#else + // Generic scrypt + scrypt_1024_1_1_256_sp_generic(input, output, scratchpad); #endif - scrypt_1024_1_1_256_sp(input, output, scratchpad); } diff --git a/src/scrypt.h b/src/scrypt.h index 1e6c5b48c..9df1bc33a 100644 --- a/src/scrypt.h +++ b/src/scrypt.h @@ -4,9 +4,14 @@ #include static const int SCRYPT_SCRATCHPAD_SIZE = 131072 + 63; -void scrypt_1024_1_1_256_sp_sse2(const char *input, char *output, char *scratchpad); -void scrypt_1024_1_1_256_sp(const char *input, char *output, char *scratchpad); void scrypt_1024_1_1_256(const char *input, char *output); +void scrypt_1024_1_1_256_sp_generic(const char *input, char *output, char *scratchpad); + +#if defined(USE_SSE2) +extern void scrypt_detect_sse2(unsigned int cpuid_edx); +void scrypt_1024_1_1_256_sp_sse2(const char *input, char *output, char *scratchpad); +extern void (*scrypt_1024_1_1_256_sp)(const char *input, char *output, char *scratchpad); +#endif void PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, const uint8_t *salt, @@ -27,6 +32,4 @@ static inline void le32enc(void *pp, uint32_t x) p[2] = (x >> 16) & 0xff; p[3] = (x >> 24) & 0xff; } - - #endif