From 8555125a1a1af68c9a89b7d4f9447d19ed1736ca Mon Sep 17 00:00:00 2001 From: s_nakamoto Date: Thu, 9 Sep 2010 01:00:40 +0000 Subject: [PATCH] try to auto-detect whether to use 128-bit 4-way SSE2 git-svn-id: https://bitcoin.svn.sourceforge.net/svnroot/bitcoin/trunk@150 1a98c847-1fd6-4fd8-948a-caf3550aa51b --- cryptopp/secblock.h | 4 +-- main.cpp | 66 ++++++++++++++++++++++++++++++++++++++++++++- net.cpp | 4 ++- serialize.h | 2 +- 4 files changed, 71 insertions(+), 5 deletions(-) diff --git a/cryptopp/secblock.h b/cryptopp/secblock.h index 5e3882916..2025757db 100644 --- a/cryptopp/secblock.h +++ b/cryptopp/secblock.h @@ -184,8 +184,8 @@ public: void deallocate(void *p, size_type n) { - //// Bitcoin: can't figure out why this is tripping on a few compiles. - //assert(false); + //// Bitcoin: don't know why this trips, probably a false alarm, depends on the compiler used. + //assert(false); } size_type max_size() const {return 0;} diff --git a/main.cpp b/main.cpp index 835b7c7a4..e3c44145d 100644 --- a/main.cpp +++ b/main.cpp @@ -2767,6 +2767,68 @@ inline void SHA256Transform(void* pstate, void* pinput, const void* pinit) static const int NPAR = 32; extern void Double_BlockSHA256(const void* pin, void* pout, const void* pinit, unsigned int hash[8][NPAR], const void* init2); +#ifdef __GNUC__ +void CallCPUID(int in, int& aret, int& cret) +{ + int a, c; + asm ( + "mov %2, %%eax; " // in into eax + "cpuid;" + "mov %%eax, %0;" // eax into ret + "mov %%ecx, %1;" // eax into ret + :"=r"(a),"=r"(c) /* output */ + :"r"(in) /* input */ + :"%eax","%ecx" /* clobbered register */ + ); + aret = a; + cret = c; +} + +bool Detect128BitSSE2() +{ + int a, c, nBrand; + CallCPUID(0, a, nBrand); + bool fIntel = (nBrand == 0x6c65746e); // ntel + bool fAMD = (nBrand == 0x444d4163); // cAMD + + struct + { + unsigned int nStepping : 4; + unsigned int nModel : 4; + unsigned int nFamily : 4; + unsigned int nProcessorType : 2; + unsigned int nUnused : 2; + unsigned int nExtendedModel : 4; + unsigned int nExtendedFamily : 8; + } + cpu; + CallCPUID(1, a, c); + memcpy(&cpu, &a, sizeof(cpu)); + int nFamily = cpu.nExtendedFamily + cpu.nFamily; + int nModel = cpu.nExtendedModel*16 + cpu.nModel; + + // We need Intel Nehalem or AMD K10 or better for 128bit SSE2 + // Nehalem = i3/i5/i7 and some Xeon + // K10 = Opterons with 4 or more cores, Phenom, Phenom II, Athlon II + // Intel Core i5 family 6, model 26 or 30 + // Intel Core i7 family 6, model 26 or 30 + // Intel Core i3 family 6, model 37 + // AMD Phenom family 16, model 10 + bool fUseSSE2 = ((fIntel && nFamily * 10000 + nModel >= 60026) || + (fAMD && nFamily * 10000 + nModel >= 160010)); + + static bool fPrinted; + if (!fPrinted) + { + fPrinted = true; + printf("CPUID %08x family %d, model %d, stepping %d, fUseSSE2=%d\n", nBrand, nFamily, nModel, cpu.nStepping, fUseSSE2); + } + return fUseSSE2; +} +#else +bool Detect128BitSSE2() { return false; } +#endif + @@ -2774,6 +2836,9 @@ void BitcoinMiner() { printf("BitcoinMiner started\n"); SetThreadPriority(THREAD_PRIORITY_LOWEST); + bool f4WaySSE2 = Detect128BitSSE2(); + if (mapArgs.count("-4way")) + f4WaySSE2 = (mapArgs["-4way"] != "0"); CKey key; key.MakeNewKey(); @@ -2913,7 +2978,6 @@ void BitcoinMiner() // // Search // - bool f4WaySSE2 = mapArgs.count("-4way"); int64 nStart = GetTime(); uint256 hashTarget = CBigNum().SetCompact(pblock->nBits).getuint256(); uint256 hashbuf[2]; diff --git a/net.cpp b/net.cpp index 1a64a82c1..82b3ffb15 100644 --- a/net.cpp +++ b/net.cpp @@ -1160,7 +1160,9 @@ void ThreadMessageHandler2(void* parg) pnode->Release(); } - // Wait and allow messages to bunch up + // Wait and allow messages to bunch up. + // Reduce vnThreadsRunning so StopNode has permission to exit while + // we're sleeping, but we must always check fShutdown after doing this. vnThreadsRunning[2]--; Sleep(100); if (fRequestShutdown) diff --git a/serialize.h b/serialize.h index 5ded6bf09..2b88d3536 100644 --- a/serialize.h +++ b/serialize.h @@ -23,7 +23,7 @@ class CAutoFile; static const unsigned int MAX_SIZE = 0x02000000; static const int VERSION = 312; -static const char* pszSubVer = ".1"; +static const char* pszSubVer = ".2";