Browse Source

CPU: remove AVX code, switch to __builtin for AES detection (#1959)

* [cpu] remove avx detect and code blocks, try to switch to __builtin
* [cpu] use __builtin_* only on x86 systems
* [cpu] perform check in separate function
* [cpu] set AES definition on MSVC
* update x86 and aes support checks at compile time
* [cmake] update comment about AES on MSVC
cpu
R4SAS 1 year ago committed by GitHub
parent
commit
7b6aa41ca8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 3
      .editorconfig
  2. 11
      build/CMakeLists.txt
  3. 2
      contrib/i2pd.conf
  4. 3
      daemon/Daemon.cpp
  5. 54
      libi2pd/CPU.cpp
  6. 5
      libi2pd/CPU.h
  7. 4
      libi2pd/Config.cpp
  8. 332
      libi2pd/Crypto.cpp
  9. 2
      libi2pd/Crypto.h
  10. 29
      libi2pd/Identity.cpp
  11. 3
      libi2pd/api.cpp

3
.editorconfig

@ -34,3 +34,6 @@ trim_trailing_whitespace = false @@ -34,3 +34,6 @@ trim_trailing_whitespace = false
[*.yml]
indent_style = space
indent_size = 2
[*.patch]
trim_trailing_whitespace = false

11
build/CMakeLists.txt

@ -197,14 +197,11 @@ endif() @@ -197,14 +197,11 @@ endif()
# Note: AES-NI and AVX is available on x86-based CPU's.
# Here also ARM64 implementation, but currently we don't support it.
# MSVC is not supported.
if(MSVC)
message(STATUS "AES-NI is not supported on MSVC, option was disabled")
set(WITH_AESNI OFF)
endif()
# MSVC is not supported due to different ASM processing, so we hope OpenSSL has its own checks to run optimized code.
if(WITH_AESNI AND (ARCHITECTURE MATCHES "x86_64" OR ARCHITECTURE MATCHES "i386"))
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
endif()
add_definitions(-D__AES__)
endif()

2
contrib/i2pd.conf

@ -280,8 +280,6 @@ verify = true @@ -280,8 +280,6 @@ verify = true
[cpuext]
## Use CPU AES-NI instructions set when work with cryptography when available (default: true)
# aesni = true
## Use CPU AVX instructions set when work with cryptography when available (default: true)
# avx = true
## Force usage of CPU instructions set, even if they not found (default: false)
## DO NOT TOUCH that option if you really don't know what are you doing!
# force = false

3
daemon/Daemon.cpp

@ -150,12 +150,11 @@ namespace util @@ -150,12 +150,11 @@ namespace util
bool precomputation; i2p::config::GetOption("precomputation.elgamal", precomputation);
bool aesni; i2p::config::GetOption("cpuext.aesni", aesni);
bool avx; i2p::config::GetOption("cpuext.avx", avx);
bool forceCpuExt; i2p::config::GetOption("cpuext.force", forceCpuExt);
bool ssu; i2p::config::GetOption("ssu", ssu);
if (!ssu && i2p::config::IsDefault ("precomputation.elgamal"))
precomputation = false; // we don't elgamal table if no ssu, unless it's specified explicitly
i2p::crypto::InitCrypto (precomputation, aesni, avx, forceCpuExt);
i2p::crypto::InitCrypto (precomputation, aesni, forceCpuExt);
i2p::transport::InitAddressFromIface (); // get address4/6 from interfaces

54
libi2pd/CPU.cpp

@ -1,5 +1,5 @@ @@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2020, The PurpleI2P Project
* Copyright (c) 2013-2023, The PurpleI2P Project
*
* This file is part of Purple i2pd project and licensed under BSD3
*
@ -7,52 +7,52 @@ @@ -7,52 +7,52 @@
*/
#include "CPU.h"
#if defined(__x86_64__) || defined(__i386__)
#include <cpuid.h>
#endif
#include "Log.h"
#if defined(_MSC_VER)
#include <intrin.h>
#ifndef bit_AES
#define bit_AES (1 << 25)
#define bit_AES (1 << 25)
#endif
#ifndef bit_AVX
#define bit_AVX (1 << 28)
#endif
namespace i2p
{
namespace cpu
{
bool aesni = false;
bool avx = false;
void Detect(bool AesSwitch, bool AvxSwitch, bool force)
inline bool cpu_support_aes()
{
#if defined(__x86_64__) || defined(__i386__)
int info[4];
__cpuid(0, info[0], info[1], info[2], info[3]);
if (info[0] >= 0x00000001) {
__cpuid(0x00000001, info[0], info[1], info[2], info[3]);
#if defined (_WIN32) && (WINVER == 0x0501) // WinXP
if (AesSwitch && force) { // only if forced
#if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_IX86) || defined(__i386__))
#if defined(_MSC_VER)
int cpu_info[4];
__cpuid(cpu_info, 1);
return ((cpu_info[2] & bit_AES) != 0);
#elif defined(__clang__)
#if __clang_major__ >= 6
__builtin_cpu_init();
#endif
return __builtin_cpu_supports("aes");
#elif defined(__GNUC__)
__builtin_cpu_init();
return __builtin_cpu_supports("aes");
#else
if ((info[2] & bit_AES && AesSwitch) || (AesSwitch && force)) {
return false;
#endif
aesni = true;
}
#if defined (_WIN32) && (WINVER == 0x0501) // WinXP
if (AvxSwitch && force) { // only if forced
#else
if ((info[2] & bit_AVX && AvxSwitch) || (AvxSwitch && force)) {
return false;
#endif
avx = true;
}
}
void Detect(bool AesSwitch, bool force)
{
if ((cpu_support_aes() && AesSwitch) || (AesSwitch && force)) {
aesni = true;
}
#endif // defined(__x86_64__) || defined(__i386__)
LogPrint(eLogInfo, "AESNI ", (aesni ? "enabled" : "disabled"));
LogPrint(eLogInfo, "AVX ", (avx ? "enabled" : "disabled"));
}
}
}

5
libi2pd/CPU.h

@ -1,5 +1,5 @@ @@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2020, The PurpleI2P Project
* Copyright (c) 2013-2023, The PurpleI2P Project
*
* This file is part of Purple i2pd project and licensed under BSD3
*
@ -14,9 +14,8 @@ namespace i2p @@ -14,9 +14,8 @@ namespace i2p
namespace cpu
{
extern bool aesni;
extern bool avx;
void Detect(bool AesSwitch, bool AvxSwitch, bool force);
void Detect(bool AesSwitch, bool force);
}
}

4
libi2pd/Config.cpp

@ -193,7 +193,7 @@ namespace config { @@ -193,7 +193,7 @@ namespace config {
options_description precomputation("Precomputation options");
precomputation.add_options()
("precomputation.elgamal",
#if defined(__x86_64__)
#if (defined(_M_AMD64) || defined(__x86_64__))
value<bool>()->default_value(false),
#else
value<bool>()->default_value(true),
@ -308,7 +308,7 @@ namespace config { @@ -308,7 +308,7 @@ namespace config {
options_description cpuext("CPU encryption extensions options");
cpuext.add_options()
("cpuext.aesni", bool_switch()->default_value(true), "Use auto detection for AESNI CPU extensions. If false, AESNI will be not used")
("cpuext.avx", bool_switch()->default_value(true), "Use auto detection for AVX CPU extensions. If false, AVX will be not used")
("cpuext.avx", bool_switch()->default_value(false), "Deprecated option")
("cpuext.force", bool_switch()->default_value(false), "Force usage of CPU extensions. Useful when cpuinfo is not available on virtual machines")
;

332
libi2pd/Crypto.cpp

@ -1,5 +1,5 @@ @@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2022, The PurpleI2P Project
* Copyright (c) 2013-2023, The PurpleI2P Project
*
* This file is part of Purple i2pd project and licensed under BSD3
*
@ -28,6 +28,12 @@ @@ -28,6 +28,12 @@
#include "I2PEndian.h"
#include "Log.h"
#if defined(__AES__) && !defined(_MSC_VER) && ((defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_IX86) || defined(__i386__)))
#define SUPPORTS_AES 1
#else
#define SUPPORTS_AES 0
#endif
namespace i2p
{
namespace crypto
@ -361,7 +367,7 @@ namespace crypto @@ -361,7 +367,7 @@ namespace crypto
BIGNUM * b1 = BN_CTX_get (ctx);
BIGNUM * b = BN_CTX_get (ctx);
// select random k
#if defined(__x86_64__)
#if (defined(_M_AMD64) || defined(__x86_64__))
BN_rand (k, ELGAMAL_FULL_EXPONENT_NUM_BITS, -1, 1); // full exponent for x64
#else
BN_rand (k, ELGAMAL_SHORT_EXPONENT_NUM_BITS, -1, 1); // short exponent of 226 bits
@ -428,7 +434,7 @@ namespace crypto @@ -428,7 +434,7 @@ namespace crypto
void GenerateElGamalKeyPair (uint8_t * priv, uint8_t * pub)
{
#if defined(__x86_64__) || defined(__i386__) || defined(_MSC_VER)
#if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_IX86) || defined(__i386__)) || defined(_MSC_VER)
RAND_bytes (priv, 256);
#else
// lower 226 bits (28 bytes and 2 bits) only. short exponent
@ -555,7 +561,7 @@ namespace crypto @@ -555,7 +561,7 @@ namespace crypto
}
// AES
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
#define KeyExpansion256(round0,round1) \
"pshufd $0xff, %%xmm2, %%xmm2 \n" \
"movaps %%xmm1, %%xmm4 \n" \
@ -580,7 +586,7 @@ namespace crypto @@ -580,7 +586,7 @@ namespace crypto
"movaps %%xmm3, "#round1"(%[sched]) \n"
#endif
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
void ECBCryptoAESNI::ExpandKey (const AESKey& key)
{
__asm__
@ -621,7 +627,7 @@ namespace crypto @@ -621,7 +627,7 @@ namespace crypto
#endif
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
#define EncryptAES256(sched) \
"pxor (%["#sched"]), %%xmm0 \n" \
"aesenc 16(%["#sched"]), %%xmm0 \n" \
@ -642,16 +648,18 @@ namespace crypto @@ -642,16 +648,18 @@ namespace crypto
void ECBEncryption::Encrypt (const ChipherBlock * in, ChipherBlock * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
(
"movups (%[in]), %%xmm0 \n"
EncryptAES256(sched)
"movups %%xmm0, (%[out]) \n"
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory"
);
(
"movups (%[in]), %%xmm0 \n"
EncryptAES256(sched)
"movups %%xmm0, (%[out]) \n"
:
: [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out)
: "%xmm0", "memory"
);
}
else
#endif
@ -660,7 +668,7 @@ namespace crypto @@ -660,7 +668,7 @@ namespace crypto
}
}
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
#define DecryptAES256(sched) \
"pxor 224(%["#sched"]), %%xmm0 \n" \
"aesdec 208(%["#sched"]), %%xmm0 \n" \
@ -681,16 +689,18 @@ namespace crypto @@ -681,16 +689,18 @@ namespace crypto
void ECBDecryption::Decrypt (const ChipherBlock * in, ChipherBlock * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
(
"movups (%[in]), %%xmm0 \n"
DecryptAES256(sched)
"movups %%xmm0, (%[out]) \n"
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory"
);
(
"movups (%[in]), %%xmm0 \n"
DecryptAES256(sched)
"movups %%xmm0, (%[out]) \n"
:
: [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out)
: "%xmm0", "memory"
);
}
else
#endif
@ -699,7 +709,7 @@ namespace crypto @@ -699,7 +709,7 @@ namespace crypto
}
}
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
#define CallAESIMC(offset) \
"movaps "#offset"(%[shed]), %%xmm0 \n" \
"aesimc %%xmm0, %%xmm0 \n" \
@ -708,7 +718,7 @@ namespace crypto @@ -708,7 +718,7 @@ namespace crypto
void ECBEncryption::SetKey (const AESKey& key)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
ExpandKey (key);
@ -722,28 +732,30 @@ namespace crypto @@ -722,28 +732,30 @@ namespace crypto
void ECBDecryption::SetKey (const AESKey& key)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
ExpandKey (key); // expand encryption key first
// then invert it using aesimc
__asm__
(
CallAESIMC(16)
CallAESIMC(32)
CallAESIMC(48)
CallAESIMC(64)
CallAESIMC(80)
CallAESIMC(96)
CallAESIMC(112)
CallAESIMC(128)
CallAESIMC(144)
CallAESIMC(160)
CallAESIMC(176)
CallAESIMC(192)
CallAESIMC(208)
: : [shed]"r"(GetKeySchedule ()) : "%xmm0", "memory"
);
(
CallAESIMC(16)
CallAESIMC(32)
CallAESIMC(48)
CallAESIMC(64)
CallAESIMC(80)
CallAESIMC(96)
CallAESIMC(112)
CallAESIMC(128)
CallAESIMC(144)
CallAESIMC(160)
CallAESIMC(176)
CallAESIMC(192)
CallAESIMC(208)
:
: [shed]"r"(GetKeySchedule ())
: "%xmm0", "memory"
);
}
else
#endif
@ -754,28 +766,28 @@ namespace crypto @@ -754,28 +766,28 @@ namespace crypto
void CBCEncryption::Encrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
(
"movups (%[iv]), %%xmm1 \n"
"1: \n"
"movups (%[in]), %%xmm0 \n"
"pxor %%xmm1, %%xmm0 \n"
EncryptAES256(sched)
"movaps %%xmm0, %%xmm1 \n"
"movups %%xmm0, (%[out]) \n"
"add $16, %[in] \n"
"add $16, %[out] \n"
"dec %[num] \n"
"jnz 1b \n"
"movups %%xmm1, (%[iv]) \n"
:
: [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out), [num]"r"(numBlocks)
: "%xmm0", "%xmm1", "cc", "memory"
);
(
"movups (%[iv]), %%xmm1 \n"
"1: \n"
"movups (%[in]), %%xmm0 \n"
"pxor %%xmm1, %%xmm0 \n"
EncryptAES256(sched)
"movaps %%xmm0, %%xmm1 \n"
"movups %%xmm0, (%[out]) \n"
"add $16, %[in] \n"
"add $16, %[out] \n"
"dec %[num] \n"
"jnz 1b \n"
"movups %%xmm1, (%[iv]) \n"
:
: [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out), [num]"r"(numBlocks)
: "%xmm0", "%xmm1", "cc", "memory"
);
}
else
#endif
@ -799,22 +811,22 @@ namespace crypto @@ -799,22 +811,22 @@ namespace crypto
void CBCEncryption::Encrypt (const uint8_t * in, uint8_t * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
(
"movups (%[iv]), %%xmm1 \n"
"movups (%[in]), %%xmm0 \n"
"pxor %%xmm1, %%xmm0 \n"
EncryptAES256(sched)
"movups %%xmm0, (%[out]) \n"
"movups %%xmm0, (%[iv]) \n"
:
: [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out)
: "%xmm0", "%xmm1", "memory"
);
(
"movups (%[iv]), %%xmm1 \n"
"movups (%[in]), %%xmm0 \n"
"pxor %%xmm1, %%xmm0 \n"
EncryptAES256(sched)
"movups %%xmm0, (%[out]) \n"
"movups %%xmm0, (%[iv]) \n"
:
: [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out)
: "%xmm0", "%xmm1", "memory"
);
}
else
#endif
@ -823,29 +835,29 @@ namespace crypto @@ -823,29 +835,29 @@ namespace crypto
void CBCDecryption::Decrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
(
"movups (%[iv]), %%xmm1 \n"
"1: \n"
"movups (%[in]), %%xmm0 \n"
"movaps %%xmm0, %%xmm2 \n"
DecryptAES256(sched)
"pxor %%xmm1, %%xmm0 \n"
"movups %%xmm0, (%[out]) \n"
"movaps %%xmm2, %%xmm1 \n"
"add $16, %[in] \n"
"add $16, %[out] \n"
"dec %[num] \n"
"jnz 1b \n"
"movups %%xmm1, (%[iv]) \n"
:
: [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out), [num]"r"(numBlocks)
: "%xmm0", "%xmm1", "%xmm2", "cc", "memory"
);
(
"movups (%[iv]), %%xmm1 \n"
"1: \n"
"movups (%[in]), %%xmm0 \n"
"movaps %%xmm0, %%xmm2 \n"
DecryptAES256(sched)
"pxor %%xmm1, %%xmm0 \n"
"movups %%xmm0, (%[out]) \n"
"movaps %%xmm2, %%xmm1 \n"
"add $16, %[in] \n"
"add $16, %[out] \n"
"dec %[num] \n"
"jnz 1b \n"
"movups %%xmm1, (%[iv]) \n"
:
: [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out), [num]"r"(numBlocks)
: "%xmm0", "%xmm1", "%xmm2", "cc", "memory"
);
}
else
#endif
@ -869,22 +881,22 @@ namespace crypto @@ -869,22 +881,22 @@ namespace crypto
void CBCDecryption::Decrypt (const uint8_t * in, uint8_t * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
(
"movups (%[iv]), %%xmm1 \n"
"movups (%[in]), %%xmm0 \n"
"movups %%xmm0, (%[iv]) \n"
DecryptAES256(sched)
"pxor %%xmm1, %%xmm0 \n"
"movups %%xmm0, (%[out]) \n"
:
: [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out)
: "%xmm0", "%xmm1", "memory"
);
(
"movups (%[iv]), %%xmm1 \n"
"movups (%[in]), %%xmm0 \n"
"movups %%xmm0, (%[iv]) \n"
DecryptAES256(sched)
"pxor %%xmm1, %%xmm0 \n"
"movups %%xmm0, (%[out]) \n"
:
: [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out)
: "%xmm0", "%xmm1", "memory"
);
}
else
#endif
@ -893,34 +905,34 @@ namespace crypto @@ -893,34 +905,34 @@ namespace crypto
void TunnelEncryption::Encrypt (const uint8_t * in, uint8_t * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
(
// encrypt IV
"movups (%[in]), %%xmm0 \n"
EncryptAES256(sched_iv)
"movaps %%xmm0, %%xmm1 \n"
// double IV encryption
EncryptAES256(sched_iv)
"movups %%xmm0, (%[out]) \n"
// encrypt data, IV is xmm1
"1: \n"
"add $16, %[in] \n"
"add $16, %[out] \n"
"movups (%[in]), %%xmm0 \n"
"pxor %%xmm1, %%xmm0 \n"
EncryptAES256(sched_l)
"movaps %%xmm0, %%xmm1 \n"
"movups %%xmm0, (%[out]) \n"
"dec %[num] \n"
"jnz 1b \n"
:
: [sched_iv]"r"(m_IVEncryption.GetKeySchedule ()), [sched_l]"r"(m_LayerEncryption.ECB().GetKeySchedule ()),
[in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes
: "%xmm0", "%xmm1", "cc", "memory"
);
(
// encrypt IV
"movups (%[in]), %%xmm0 \n"
EncryptAES256(sched_iv)
"movaps %%xmm0, %%xmm1 \n"
// double IV encryption
EncryptAES256(sched_iv)
"movups %%xmm0, (%[out]) \n"
// encrypt data, IV is xmm1
"1: \n"
"add $16, %[in] \n"
"add $16, %[out] \n"
"movups (%[in]), %%xmm0 \n"
"pxor %%xmm1, %%xmm0 \n"
EncryptAES256(sched_l)
"movaps %%xmm0, %%xmm1 \n"
"movups %%xmm0, (%[out]) \n"
"dec %[num] \n"
"jnz 1b \n"
:
: [sched_iv]"r"(m_IVEncryption.GetKeySchedule ()), [sched_l]"r"(m_LayerEncryption.ECB().GetKeySchedule ()),
[in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes
: "%xmm0", "%xmm1", "cc", "memory"
);
}
else
#endif
@ -934,35 +946,35 @@ namespace crypto @@ -934,35 +946,35 @@ namespace crypto
void TunnelDecryption::Decrypt (const uint8_t * in, uint8_t * out)
{
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__))
#if SUPPORTS_AES
if(i2p::cpu::aesni)
{
__asm__
(
// decrypt IV
"movups (%[in]), %%xmm0 \n"
DecryptAES256(sched_iv)
"movaps %%xmm0, %%xmm1 \n"
// double IV encryption
DecryptAES256(sched_iv)
"movups %%xmm0, (%[out]) \n"
// decrypt data, IV is xmm1
"1: \n"
"add $16, %[in] \n"
"add $16, %[out] \n"
"movups (%[in]), %%xmm0 \n"
"movaps %%xmm0, %%xmm2 \n"
DecryptAES256(sched_l)
"pxor %%xmm1, %%xmm0 \n"
"movups %%xmm0, (%[out]) \n"
"movaps %%xmm2, %%xmm1 \n"
"dec %[num] \n"
"jnz 1b \n"
:
: [sched_iv]"r"(m_IVDecryption.GetKeySchedule ()), [sched_l]"r"(m_LayerDecryption.ECB().GetKeySchedule ()),
[in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes
: "%xmm0", "%xmm1", "%xmm2", "cc", "memory"
);
(
// decrypt IV
"movups (%[in]), %%xmm0 \n"
DecryptAES256(sched_iv)
"movaps %%xmm0, %%xmm1 \n"
// double IV encryption
DecryptAES256(sched_iv)
"movups %%xmm0, (%[out]) \n"
// decrypt data, IV is xmm1
"1: \n"
"add $16, %[in] \n"
"add $16, %[out] \n"
"movups (%[in]), %%xmm0 \n"
"movaps %%xmm0, %%xmm2 \n"
DecryptAES256(sched_l)
"pxor %%xmm1, %%xmm0 \n"
"movups %%xmm0, (%[out]) \n"
"movaps %%xmm2, %%xmm1 \n"
"dec %[num] \n"
"jnz 1b \n"
:
: [sched_iv]"r"(m_IVDecryption.GetKeySchedule ()), [sched_l]"r"(m_LayerDecryption.ECB().GetKeySchedule ()),
[in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes
: "%xmm0", "%xmm1", "%xmm2", "cc", "memory"
);
}
else
#endif
@ -1285,9 +1297,9 @@ namespace crypto @@ -1285,9 +1297,9 @@ namespace crypto
}
}*/
void InitCrypto (bool precomputation, bool aesni, bool avx, bool force)
void InitCrypto (bool precomputation, bool aesni, bool force)
{
i2p::cpu::Detect (aesni, avx, force);
i2p::cpu::Detect (aesni, force);
#if LEGACY_OPENSSL
SSL_library_init ();
#endif
@ -1297,7 +1309,7 @@ namespace crypto @@ -1297,7 +1309,7 @@ namespace crypto
CRYPTO_set_locking_callback (OpensslLockingCallback);*/
if (precomputation)
{
#if defined(__x86_64__)
#if (defined(_M_AMD64) || defined(__x86_64__))
g_ElggTable = new BIGNUM * [ELGAMAL_FULL_EXPONENT_NUM_BYTES][255];
PrecalculateElggTable (g_ElggTable, ELGAMAL_FULL_EXPONENT_NUM_BYTES);
#else
@ -1312,7 +1324,7 @@ namespace crypto @@ -1312,7 +1324,7 @@ namespace crypto
if (g_ElggTable)
{
DestroyElggTable (g_ElggTable,
#if defined(__x86_64__)
#if (defined(_M_AMD64) || defined(__x86_64__))
ELGAMAL_FULL_EXPONENT_NUM_BYTES
#else
ELGAMAL_SHORT_EXPONENT_NUM_BYTES

2
libi2pd/Crypto.h

@ -307,7 +307,7 @@ namespace crypto @@ -307,7 +307,7 @@ namespace crypto
void InitNoiseIKState (NoiseSymmetricState& state, const uint8_t * pub); // Noise_IK (ratchets)
// init and terminate
void InitCrypto (bool precomputation, bool aesni, bool avx, bool force);
void InitCrypto (bool precomputation, bool aesni, bool force);
void TerminateCrypto ();
}
}

29
libi2pd/Identity.cpp

@ -803,29 +803,12 @@ namespace data @@ -803,29 +803,12 @@ namespace data
XORMetric operator^(const IdentHash& key1, const IdentHash& key2)
{
XORMetric m;
#if (defined(__x86_64__) || defined(__i386__)) && defined(__AVX__) // not all X86 targets supports AVX (like old Pentium, see #1600)
if(i2p::cpu::avx)
{
__asm__
(
"vmovups %1, %%ymm0 \n"
"vmovups %2, %%ymm1 \n"
"vxorps %%ymm0, %%ymm1, %%ymm1 \n"
"vmovups %%ymm1, %0 \n"
: "=m"(*m.metric)
: "m"(*key1), "m"(*key2)
: "memory", "%xmm0", "%xmm1" // should be replaced by %ymm0/1 once supported by compiler
);
}
else
#endif
{
const uint64_t * hash1 = key1.GetLL (), * hash2 = key2.GetLL ();
m.metric_ll[0] = hash1[0] ^ hash2[0];
m.metric_ll[1] = hash1[1] ^ hash2[1];
m.metric_ll[2] = hash1[2] ^ hash2[2];
m.metric_ll[3] = hash1[3] ^ hash2[3];
}
const uint64_t * hash1 = key1.GetLL (), * hash2 = key2.GetLL ();
m.metric_ll[0] = hash1[0] ^ hash2[0];
m.metric_ll[1] = hash1[1] ^ hash2[1];
m.metric_ll[2] = hash1[2] ^ hash2[2];
m.metric_ll[3] = hash1[3] ^ hash2[3];
return m;
}

3
libi2pd/api.cpp

@ -38,9 +38,8 @@ namespace api @@ -38,9 +38,8 @@ namespace api
bool precomputation; i2p::config::GetOption("precomputation.elgamal", precomputation);
bool aesni; i2p::config::GetOption("cpuext.aesni", aesni);
bool avx; i2p::config::GetOption("cpuext.avx", avx);
bool forceCpuExt; i2p::config::GetOption("cpuext.force", forceCpuExt);
i2p::crypto::InitCrypto (precomputation, aesni, avx, forceCpuExt);
i2p::crypto::InitCrypto (precomputation, aesni, forceCpuExt);
int netID; i2p::config::GetOption("netid", netID);
i2p::context.SetNetID (netID);

Loading…
Cancel
Save