Browse Source

initial code for runtime detection of aesni/avx

pull/1115/head
Jeff Becker 7 years ago
parent
commit
91e45d9a4a
No known key found for this signature in database
GPG Key ID: F357B3B42F6F9B05
  1. 587
      libi2pd/Crypto.cpp
  2. 121
      libi2pd/Crypto.h

587
libi2pd/Crypto.cpp

@ -479,10 +479,9 @@ namespace crypto
const uint64_t IPAD = 0x3636363636363636; const uint64_t IPAD = 0x3636363636363636;
const uint64_t OPAD = 0x5C5C5C5C5C5C5C5C; const uint64_t OPAD = 0x5C5C5C5C5C5C5C5C;
#if defined(__AVX__)
static const uint64_t ipads[] = { IPAD, IPAD, IPAD, IPAD }; static const uint64_t ipads[] = { IPAD, IPAD, IPAD, IPAD };
static const uint64_t opads[] = { OPAD, OPAD, OPAD, OPAD }; static const uint64_t opads[] = { OPAD, OPAD, OPAD, OPAD };
#endif
void HMACMD5Digest (uint8_t * msg, size_t len, const MACKey& key, uint8_t * digest) void HMACMD5Digest (uint8_t * msg, size_t len, const MACKey& key, uint8_t * digest)
// key is 32 bytes // key is 32 bytes
@ -491,47 +490,73 @@ namespace crypto
{ {
uint64_t buf[256]; uint64_t buf[256];
uint64_t hash[12]; // 96 bytes uint64_t hash[12]; // 96 bytes
#if defined(__AVX__) // for AVX if(i2p::cpu::avx)
__asm__ {
( #ifdef AVX
"vmovups %[key], %%ymm0 \n" __asm__
"vmovups %[ipad], %%ymm1 \n" (
"vmovups %%ymm1, 32(%[buf]) \n" "vmovups %[key], %%ymm0 \n"
"vxorps %%ymm0, %%ymm1, %%ymm1 \n" "vmovups %[ipad], %%ymm1 \n"
"vmovups %%ymm1, (%[buf]) \n" "vmovups %%ymm1, 32(%[buf]) \n"
"vmovups %[opad], %%ymm1 \n" "vxorps %%ymm0, %%ymm1, %%ymm1 \n"
"vmovups %%ymm1, 32(%[hash]) \n" "vmovups %%ymm1, (%[buf]) \n"
"vxorps %%ymm0, %%ymm1, %%ymm1 \n" "vmovups %[opad], %%ymm1 \n"
"vmovups %%ymm1, (%[hash]) \n" "vmovups %%ymm1, 32(%[hash]) \n"
"vzeroall \n" // end of AVX "vxorps %%ymm0, %%ymm1, %%ymm1 \n"
"movups %%xmm0, 80(%[hash]) \n" // zero last 16 bytes "vmovups %%ymm1, (%[hash]) \n"
: "vzeroall \n" // end of AVX
: [key]"m"(*(const uint8_t *)key), [ipad]"m"(*ipads), [opad]"m"(*opads), "movups %%xmm0, 80(%[hash]) \n" // zero last 16 bytes
[buf]"r"(buf), [hash]"r"(hash) :
: "memory", "%xmm0" // TODO: change to %ymm0 later : [key]"m"(*(const uint8_t *)key), [ipad]"m"(*ipads), [opad]"m"(*opads),
); [buf]"r"(buf), [hash]"r"(hash)
: "memory", "%xmm0" // TODO: change to %ymm0 later
);
#else #else
// ikeypad // ikeypad
buf[0] = key.GetLL ()[0] ^ IPAD; buf[0] = key.GetLL ()[0] ^ IPAD;
buf[1] = key.GetLL ()[1] ^ IPAD; buf[1] = key.GetLL ()[1] ^ IPAD;
buf[2] = key.GetLL ()[2] ^ IPAD; buf[2] = key.GetLL ()[2] ^ IPAD;
buf[3] = key.GetLL ()[3] ^ IPAD; buf[3] = key.GetLL ()[3] ^ IPAD;
buf[4] = IPAD; buf[4] = IPAD;
buf[5] = IPAD; buf[5] = IPAD;
buf[6] = IPAD; buf[6] = IPAD;
buf[7] = IPAD; buf[7] = IPAD;
// okeypad // okeypad
hash[0] = key.GetLL ()[0] ^ OPAD; hash[0] = key.GetLL ()[0] ^ OPAD;
hash[1] = key.GetLL ()[1] ^ OPAD; hash[1] = key.GetLL ()[1] ^ OPAD;
hash[2] = key.GetLL ()[2] ^ OPAD; hash[2] = key.GetLL ()[2] ^ OPAD;
hash[3] = key.GetLL ()[3] ^ OPAD; hash[3] = key.GetLL ()[3] ^ OPAD;
hash[4] = OPAD; hash[4] = OPAD;
hash[5] = OPAD; hash[5] = OPAD;
hash[6] = OPAD; hash[6] = OPAD;
hash[7] = OPAD; hash[7] = OPAD;
// fill last 16 bytes with zeros (first hash size assumed 32 bytes in I2P) // fill last 16 bytes with zeros (first hash size assumed 32 bytes in I2P)
memset (hash + 10, 0, 16); memset (hash + 10, 0, 16);
#endif #endif
}
else
{
// ikeypad
buf[0] = key.GetLL ()[0] ^ IPAD;
buf[1] = key.GetLL ()[1] ^ IPAD;
buf[2] = key.GetLL ()[2] ^ IPAD;
buf[3] = key.GetLL ()[3] ^ IPAD;
buf[4] = IPAD;
buf[5] = IPAD;
buf[6] = IPAD;
buf[7] = IPAD;
// okeypad
hash[0] = key.GetLL ()[0] ^ OPAD;
hash[1] = key.GetLL ()[1] ^ OPAD;
hash[2] = key.GetLL ()[2] ^ OPAD;
hash[3] = key.GetLL ()[3] ^ OPAD;
hash[4] = OPAD;
hash[5] = OPAD;
hash[6] = OPAD;
hash[7] = OPAD;
// fill last 16 bytes with zeros (first hash size assumed 32 bytes in I2P)
memset (hash + 10, 0, 16);
}
// concatenate with msg // concatenate with msg
memcpy (buf + 8, msg, len); memcpy (buf + 8, msg, len);
@ -543,8 +568,7 @@ namespace crypto
} }
// AES // AES
#ifdef AESNI #ifdef AESNI
#define KeyExpansion256(round0,round1) \ #define KeyExpansion256(round0,round1) \
"pshufd $0xff, %%xmm2, %%xmm2 \n" \ "pshufd $0xff, %%xmm2, %%xmm2 \n" \
"movaps %%xmm1, %%xmm4 \n" \ "movaps %%xmm1, %%xmm4 \n" \
@ -567,7 +591,9 @@ namespace crypto
"pxor %%xmm4, %%xmm3 \n" \ "pxor %%xmm4, %%xmm3 \n" \
"pxor %%xmm2, %%xmm3 \n" \ "pxor %%xmm2, %%xmm3 \n" \
"movaps %%xmm3, "#round1"(%[sched]) \n" "movaps %%xmm3, "#round1"(%[sched]) \n"
#endif
#ifdef AESNI
void ECBCryptoAESNI::ExpandKey (const AESKey& key) void ECBCryptoAESNI::ExpandKey (const AESKey& key)
{ {
__asm__ __asm__
@ -605,7 +631,10 @@ namespace crypto
: "%xmm1", "%xmm2", "%xmm3", "%xmm4", "memory" // clogged : "%xmm1", "%xmm2", "%xmm3", "%xmm4", "memory" // clogged
); );
} }
#endif
#if AESNI
#define EncryptAES256(sched) \ #define EncryptAES256(sched) \
"pxor (%["#sched"]), %%xmm0 \n" \ "pxor (%["#sched"]), %%xmm0 \n" \
"aesenc 16(%["#sched"]), %%xmm0 \n" \ "aesenc 16(%["#sched"]), %%xmm0 \n" \
@ -622,18 +651,31 @@ namespace crypto
"aesenc 192(%["#sched"]), %%xmm0 \n" \ "aesenc 192(%["#sched"]), %%xmm0 \n" \
"aesenc 208(%["#sched"]), %%xmm0 \n" \ "aesenc 208(%["#sched"]), %%xmm0 \n" \
"aesenclast 224(%["#sched"]), %%xmm0 \n" "aesenclast 224(%["#sched"]), %%xmm0 \n"
#endif
void ECBEncryptionAESNI::Encrypt (const ChipherBlock * in, ChipherBlock * out) void ECBEncryption::Encrypt (const ChipherBlock * in, ChipherBlock * out)
{ {
__asm__ if(i2p::cpu::aesni)
( {
"movups (%[in]), %%xmm0 \n" #ifdef AESNI
EncryptAES256(sched) __asm__
"movups %%xmm0, (%[out]) \n" (
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory" "movups (%[in]), %%xmm0 \n"
); EncryptAES256(sched)
"movups %%xmm0, (%[out]) \n"
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory"
);
#else
AES_encrypt (in->buf, out->buf, &m_Key);
#endif
}
else
{
AES_encrypt (in->buf, out->buf, &m_Key);
}
} }
#ifdef AESNI
#define DecryptAES256(sched) \ #define DecryptAES256(sched) \
"pxor 224(%["#sched"]), %%xmm0 \n" \ "pxor 224(%["#sched"]), %%xmm0 \n" \
"aesdec 208(%["#sched"]), %%xmm0 \n" \ "aesdec 208(%["#sched"]), %%xmm0 \n" \
@ -650,79 +692,148 @@ namespace crypto
"aesdec 32(%["#sched"]), %%xmm0 \n" \ "aesdec 32(%["#sched"]), %%xmm0 \n" \
"aesdec 16(%["#sched"]), %%xmm0 \n" \ "aesdec 16(%["#sched"]), %%xmm0 \n" \
"aesdeclast (%["#sched"]), %%xmm0 \n" "aesdeclast (%["#sched"]), %%xmm0 \n"
#endif
void ECBDecryptionAESNI::Decrypt (const ChipherBlock * in, ChipherBlock * out) void ECBDecryption::Decrypt (const ChipherBlock * in, ChipherBlock * out)
{ {
__asm__ if(i2p::cpu::aesni)
( {
"movups (%[in]), %%xmm0 \n" #ifdef AESNI
DecryptAES256(sched) __asm__
"movups %%xmm0, (%[out]) \n" (
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory" "movups (%[in]), %%xmm0 \n"
); DecryptAES256(sched)
"movups %%xmm0, (%[out]) \n"
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory"
);
#else
AES_decrypt (in->buf, out->buf, &m_Key);
#endif
}
else
{
AES_decrypt (in->buf, out->buf, &m_Key);
}
} }
#ifdef AESNI
#define CallAESIMC(offset) \ #define CallAESIMC(offset) \
"movaps "#offset"(%[shed]), %%xmm0 \n" \ "movaps "#offset"(%[shed]), %%xmm0 \n" \
"aesimc %%xmm0, %%xmm0 \n" \ "aesimc %%xmm0, %%xmm0 \n" \
"movaps %%xmm0, "#offset"(%[shed]) \n" "movaps %%xmm0, "#offset"(%[shed]) \n"
#endif
void ECBDecryptionAESNI::SetKey (const AESKey& key) void ECBEncryption::SetKey (const AESKey& key)
{ {
ExpandKey (key); // expand encryption key first if(i2p::cpu::aesni)
// then invert it using aesimc {
__asm__ #ifdef AESNI
( ExpandKey (key); // expand encryption key first
CallAESIMC(16) // then invert it using aesimc
CallAESIMC(32) __asm__
CallAESIMC(48) (
CallAESIMC(64) CallAESIMC(16)
CallAESIMC(80) CallAESIMC(32)
CallAESIMC(96) CallAESIMC(48)
CallAESIMC(112) CallAESIMC(64)
CallAESIMC(128) CallAESIMC(80)
CallAESIMC(144) CallAESIMC(96)
CallAESIMC(160) CallAESIMC(112)
CallAESIMC(176) CallAESIMC(128)
CallAESIMC(192) CallAESIMC(144)
CallAESIMC(208) CallAESIMC(160)
: : [shed]"r"(GetKeySchedule ()) : "%xmm0", "memory" CallAESIMC(176)
); CallAESIMC(192)
CallAESIMC(208)
: : [shed]"r"(GetKeySchedule ()) : "%xmm0", "memory"
);
#else
AES_set_encrypt_key (key, 256, &m_Key);
#endif
}
else
{
AES_set_encrypt_key (key, 256, &m_Key);
}
} }
void ECBDecryption::SetKey (const AESKey& key)
{
if(i2p::cpu::aesni)
{
#ifdef AESNI
ExpandKey (key); // expand encryption key first
// then invert it using aesimc
__asm__
(
CallAESIMC(16)
CallAESIMC(32)
CallAESIMC(48)
CallAESIMC(64)
CallAESIMC(80)
CallAESIMC(96)
CallAESIMC(112)
CallAESIMC(128)
CallAESIMC(144)
CallAESIMC(160)
CallAESIMC(176)
CallAESIMC(192)
CallAESIMC(208)
: : [shed]"r"(GetKeySchedule ()) : "%xmm0", "memory"
);
#else
AES_set_decrypt_key (key, 256, &m_Key);
#endif #endif
}
else
{
AES_set_decrypt_key (key, 256, &m_Key);
}
}
void CBCEncryption::Encrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out) void CBCEncryption::Encrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out)
{ {
if(i2p::cpu::aesni)
{
#ifdef AESNI #ifdef AESNI
__asm__ __asm__
( (
"movups (%[iv]), %%xmm1 \n" "movups (%[iv]), %%xmm1 \n"
"1: \n" "1: \n"
"movups (%[in]), %%xmm0 \n" "movups (%[in]), %%xmm0 \n"
"pxor %%xmm1, %%xmm0 \n" "pxor %%xmm1, %%xmm0 \n"
EncryptAES256(sched) EncryptAES256(sched)
"movaps %%xmm0, %%xmm1 \n" "movaps %%xmm0, %%xmm1 \n"
"movups %%xmm0, (%[out]) \n" "movups %%xmm0, (%[out]) \n"
"add $16, %[in] \n" "add $16, %[in] \n"
"add $16, %[out] \n" "add $16, %[out] \n"
"dec %[num] \n" "dec %[num] \n"
"jnz 1b \n" "jnz 1b \n"
"movups %%xmm1, (%[iv]) \n" "movups %%xmm1, (%[iv]) \n"
: :
: [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()), : [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out), [num]"r"(numBlocks) [in]"r"(in), [out]"r"(out), [num]"r"(numBlocks)
: "%xmm0", "%xmm1", "cc", "memory" : "%xmm0", "%xmm1", "cc", "memory"
); );
#else #else
for (int i = 0; i < numBlocks; i++) for (int i = 0; i < numBlocks; i++)
{
*m_LastBlock.GetChipherBlock () ^= in[i];
m_ECBEncryption.Encrypt (m_LastBlock.GetChipherBlock (), m_LastBlock.GetChipherBlock ());
out[i] = *m_LastBlock.GetChipherBlock ();
}
#endif
}
else
{ {
*m_LastBlock.GetChipherBlock () ^= in[i]; for (int i = 0; i < numBlocks; i++)
m_ECBEncryption.Encrypt (m_LastBlock.GetChipherBlock (), m_LastBlock.GetChipherBlock ()); {
out[i] = *m_LastBlock.GetChipherBlock (); *m_LastBlock.GetChipherBlock () ^= in[i];
m_ECBEncryption.Encrypt (m_LastBlock.GetChipherBlock (), m_LastBlock.GetChipherBlock ());
out[i] = *m_LastBlock.GetChipherBlock ();
}
} }
#endif
} }
void CBCEncryption::Encrypt (const uint8_t * in, std::size_t len, uint8_t * out) void CBCEncryption::Encrypt (const uint8_t * in, std::size_t len, uint8_t * out)
@ -735,57 +846,75 @@ namespace crypto
void CBCEncryption::Encrypt (const uint8_t * in, uint8_t * out) void CBCEncryption::Encrypt (const uint8_t * in, uint8_t * out)
{ {
if(i2p::cpu::aesni)
{
#ifdef AESNI #ifdef AESNI
__asm__ __asm__
( (
"movups (%[iv]), %%xmm1 \n" "movups (%[iv]), %%xmm1 \n"
"movups (%[in]), %%xmm0 \n" "movups (%[in]), %%xmm0 \n"
"pxor %%xmm1, %%xmm0 \n" "pxor %%xmm1, %%xmm0 \n"
EncryptAES256(sched) EncryptAES256(sched)
"movups %%xmm0, (%[out]) \n" "movups %%xmm0, (%[out]) \n"
"movups %%xmm0, (%[iv]) \n" "movups %%xmm0, (%[iv]) \n"
: :
: [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()), : [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out) [in]"r"(in), [out]"r"(out)
: "%xmm0", "%xmm1", "memory" : "%xmm0", "%xmm1", "memory"
); );
#else #else
Encrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out); Encrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out);
#endif #endif
}
else
Encrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out);
} }
void CBCDecryption::Decrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out) void CBCDecryption::Decrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out)
{ {
if(i2p::cpu::aesni)
{
#ifdef AESNI #ifdef AESNI
__asm__ __asm__
( (
"movups (%[iv]), %%xmm1 \n" "movups (%[iv]), %%xmm1 \n"
"1: \n" "1: \n"
"movups (%[in]), %%xmm0 \n" "movups (%[in]), %%xmm0 \n"
"movaps %%xmm0, %%xmm2 \n" "movaps %%xmm0, %%xmm2 \n"
DecryptAES256(sched) DecryptAES256(sched)
"pxor %%xmm1, %%xmm0 \n" "pxor %%xmm1, %%xmm0 \n"
"movups %%xmm0, (%[out]) \n" "movups %%xmm0, (%[out]) \n"
"movaps %%xmm2, %%xmm1 \n" "movaps %%xmm2, %%xmm1 \n"
"add $16, %[in] \n" "add $16, %[in] \n"
"add $16, %[out] \n" "add $16, %[out] \n"
"dec %[num] \n" "dec %[num] \n"
"jnz 1b \n" "jnz 1b \n"
"movups %%xmm1, (%[iv]) \n" "movups %%xmm1, (%[iv]) \n"
: :
: [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()), : [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out), [num]"r"(numBlocks) [in]"r"(in), [out]"r"(out), [num]"r"(numBlocks)
: "%xmm0", "%xmm1", "%xmm2", "cc", "memory" : "%xmm0", "%xmm1", "%xmm2", "cc", "memory"
); );
#else #else
for (int i = 0; i < numBlocks; i++) for (int i = 0; i < numBlocks; i++)
{
ChipherBlock tmp = in[i];
m_ECBDecryption.Decrypt (in + i, out + i);
out[i] ^= *m_IV.GetChipherBlock ();
*m_IV.GetChipherBlock () = tmp;
}
#endif
}
else
{ {
ChipherBlock tmp = in[i]; for (int i = 0; i < numBlocks; i++)
m_ECBDecryption.Decrypt (in + i, out + i); {
out[i] ^= *m_IV.GetChipherBlock (); ChipherBlock tmp = in[i];
*m_IV.GetChipherBlock () = tmp; m_ECBDecryption.Decrypt (in + i, out + i);
out[i] ^= *m_IV.GetChipherBlock ();
*m_IV.GetChipherBlock () = tmp;
}
} }
#endif
} }
void CBCDecryption::Decrypt (const uint8_t * in, std::size_t len, uint8_t * out) void CBCDecryption::Decrypt (const uint8_t * in, std::size_t len, uint8_t * out)
@ -797,96 +926,121 @@ namespace crypto
void CBCDecryption::Decrypt (const uint8_t * in, uint8_t * out) void CBCDecryption::Decrypt (const uint8_t * in, uint8_t * out)
{ {
if(i2p::cpu::aesni)
{
#ifdef AESNI #ifdef AESNI
__asm__ __asm__
( (
"movups (%[iv]), %%xmm1 \n" "movups (%[iv]), %%xmm1 \n"
"movups (%[in]), %%xmm0 \n" "movups (%[in]), %%xmm0 \n"
"movups %%xmm0, (%[iv]) \n" "movups %%xmm0, (%[iv]) \n"
DecryptAES256(sched) DecryptAES256(sched)
"pxor %%xmm1, %%xmm0 \n" "pxor %%xmm1, %%xmm0 \n"
"movups %%xmm0, (%[out]) \n" "movups %%xmm0, (%[out]) \n"
: :
: [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()), : [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()),
[in]"r"(in), [out]"r"(out) [in]"r"(in), [out]"r"(out)
: "%xmm0", "%xmm1", "memory" : "%xmm0", "%xmm1", "memory"
); );
#else #else
Decrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out); Decrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out);
#endif #endif
}
else
Decrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out);
} }
void TunnelEncryption::Encrypt (const uint8_t * in, uint8_t * out) void TunnelEncryption::Encrypt (const uint8_t * in, uint8_t * out)
{ {
if(i2p::cpu::aesni)
{
#ifdef AESNI #ifdef AESNI
__asm__ __asm__
( (
// encrypt IV // encrypt IV
"movups (%[in]), %%xmm0 \n" "movups (%[in]), %%xmm0 \n"
EncryptAES256(sched_iv) EncryptAES256(sched_iv)
"movaps %%xmm0, %%xmm1 \n" "movaps %%xmm0, %%xmm1 \n"
// double IV encryption // double IV encryption
EncryptAES256(sched_iv) EncryptAES256(sched_iv)
"movups %%xmm0, (%[out]) \n" "movups %%xmm0, (%[out]) \n"
// encrypt data, IV is xmm1 // encrypt data, IV is xmm1
"1: \n" "1: \n"
"add $16, %[in] \n" "add $16, %[in] \n"
"add $16, %[out] \n" "add $16, %[out] \n"
"movups (%[in]), %%xmm0 \n" "movups (%[in]), %%xmm0 \n"
"pxor %%xmm1, %%xmm0 \n" "pxor %%xmm1, %%xmm0 \n"
EncryptAES256(sched_l) EncryptAES256(sched_l)
"movaps %%xmm0, %%xmm1 \n" "movaps %%xmm0, %%xmm1 \n"
"movups %%xmm0, (%[out]) \n" "movups %%xmm0, (%[out]) \n"
"dec %[num] \n" "dec %[num] \n"
"jnz 1b \n" "jnz 1b \n"
: :
: [sched_iv]"r"(m_IVEncryption.GetKeySchedule ()), [sched_l]"r"(m_LayerEncryption.GetKeySchedule ()), : [sched_iv]"r"(m_IVEncryption.GetKeySchedule ()), [sched_l]"r"(m_LayerEncryption.ECB().GetKeySchedule ()),
[in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes [in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes
: "%xmm0", "%xmm1", "cc", "memory" : "%xmm0", "%xmm1", "cc", "memory"
); );
#else #else
m_IVEncryption.Encrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv m_IVEncryption.Encrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv
m_LayerEncryption.SetIV (out); m_LayerEncryption.SetIV (out);
m_LayerEncryption.Encrypt (in + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, out + 16); // data m_LayerEncryption.Encrypt (in + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, out + 16); // data
m_IVEncryption.Encrypt ((ChipherBlock *)out, (ChipherBlock *)out); // double iv m_IVEncryption.Encrypt ((ChipherBlock *)out, (ChipherBlock *)out); // double iv
#endif #endif
}
else
{
m_IVEncryption.Encrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv
m_LayerEncryption.SetIV (out);
m_LayerEncryption.Encrypt (in + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, out + 16); // data
m_IVEncryption.Encrypt ((ChipherBlock *)out, (ChipherBlock *)out); // double iv
}
} }
void TunnelDecryption::Decrypt (const uint8_t * in, uint8_t * out) void TunnelDecryption::Decrypt (const uint8_t * in, uint8_t * out)
{ {
if(i2p::cpu::aesni)
{
#ifdef AESNI #ifdef AESNI
__asm__ __asm__
( (
// decrypt IV // decrypt IV
"movups (%[in]), %%xmm0 \n" "movups (%[in]), %%xmm0 \n"
DecryptAES256(sched_iv) DecryptAES256(sched_iv)
"movaps %%xmm0, %%xmm1 \n" "movaps %%xmm0, %%xmm1 \n"
// double IV encryption // double IV encryption
DecryptAES256(sched_iv) DecryptAES256(sched_iv)
"movups %%xmm0, (%[out]) \n" "movups %%xmm0, (%[out]) \n"
// decrypt data, IV is xmm1 // decrypt data, IV is xmm1
"1: \n" "1: \n"
"add $16, %[in] \n" "add $16, %[in] \n"
"add $16, %[out] \n" "add $16, %[out] \n"
"movups (%[in]), %%xmm0 \n" "movups (%[in]), %%xmm0 \n"
"movaps %%xmm0, %%xmm2 \n" "movaps %%xmm0, %%xmm2 \n"
DecryptAES256(sched_l) DecryptAES256(sched_l)
"pxor %%xmm1, %%xmm0 \n" "pxor %%xmm1, %%xmm0 \n"
"movups %%xmm0, (%[out]) \n" "movups %%xmm0, (%[out]) \n"
"movaps %%xmm2, %%xmm1 \n" "movaps %%xmm2, %%xmm1 \n"
"dec %[num] \n" "dec %[num] \n"
"jnz 1b \n" "jnz 1b \n"
: :
: [sched_iv]"r"(m_IVDecryption.GetKeySchedule ()), [sched_l]"r"(m_LayerDecryption.GetKeySchedule ()), : [sched_iv]"r"(m_IVDecryption.GetKeySchedule ()), [sched_l]"r"(m_LayerDecryption.ECB().GetKeySchedule ()),
[in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes [in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes
: "%xmm0", "%xmm1", "%xmm2", "cc", "memory" : "%xmm0", "%xmm1", "%xmm2", "cc", "memory"
); );
#else #else
m_IVDecryption.Decrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv m_IVDecryption.Decrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv
m_LayerDecryption.SetIV (out); m_LayerDecryption.SetIV (out);
m_LayerDecryption.Decrypt (in + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, out + 16); // data m_LayerDecryption.Decrypt (in + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, out + 16); // data
m_IVDecryption.Decrypt ((ChipherBlock *)out, (ChipherBlock *)out); // double iv m_IVDecryption.Decrypt ((ChipherBlock *)out, (ChipherBlock *)out); // double iv
#endif #endif
}
else
{
m_IVDecryption.Decrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv
m_LayerDecryption.SetIV (out);
m_LayerDecryption.Decrypt (in + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, out + 16); // data
m_IVDecryption.Decrypt ((ChipherBlock *)out, (ChipherBlock *)out); // double iv
}
} }
/* std::vector <std::unique_ptr<std::mutex> > m_OpenSSLMutexes; /* std::vector <std::unique_ptr<std::mutex> > m_OpenSSLMutexes;
@ -904,6 +1058,7 @@ namespace crypto
void InitCrypto (bool precomputation) void InitCrypto (bool precomputation)
{ {
i2p::cpu::Detect ();
SSL_library_init (); SSL_library_init ();
/* auto numLocks = CRYPTO_num_locks(); /* auto numLocks = CRYPTO_num_locks();
for (int i = 0; i < numLocks; i++) for (int i = 0; i < numLocks; i++)

121
libi2pd/Crypto.h

@ -16,6 +16,7 @@
#include "Base.h" #include "Base.h"
#include "Tag.h" #include "Tag.h"
#include "CPU.h"
namespace i2p namespace i2p
{ {
@ -68,33 +69,30 @@ namespace crypto
void operator^=(const ChipherBlock& other) // XOR void operator^=(const ChipherBlock& other) // XOR
{ {
#if defined(__AVX__) // AVX if (i2p::cpu::avx)
__asm__ {
( #ifdef AVX
"vmovups (%[buf]), %%xmm0 \n" __asm__
"vmovups (%[other]), %%xmm1 \n" (
"vxorps %%xmm0, %%xmm1, %%xmm0 \n" "vmovups (%[buf]), %%xmm0 \n"
"vmovups %%xmm0, (%[buf]) \n" "vmovups (%[other]), %%xmm1 \n"
: "vxorps %%xmm0, %%xmm1, %%xmm0 \n"
: [buf]"r"(buf), [other]"r"(other.buf) "vmovups %%xmm0, (%[buf]) \n"
: "%xmm0", "%xmm1", "memory" :
); : [buf]"r"(buf), [other]"r"(other.buf)
#elif defined(__SSE__) // SSE : "%xmm0", "%xmm1", "memory"
__asm__ );
(
"movups (%[buf]), %%xmm0 \n"
"movups (%[other]), %%xmm1 \n"
"pxor %%xmm1, %%xmm0 \n"
"movups %%xmm0, (%[buf]) \n"
:
: [buf]"r"(buf), [other]"r"(other.buf)
: "%xmm0", "%xmm1", "memory"
);
#else #else
// TODO: implement it better for (int i = 0; i < 16; i++)
for (int i = 0; i < 16; i++) buf[i] ^= other.buf[i];
buf[i] ^= other.buf[i];
#endif #endif
}
else
{
// TODO: implement it better
for (int i = 0; i < 16; i++)
buf[i] ^= other.buf[i];
}
} }
}; };
@ -138,69 +136,40 @@ namespace crypto
private: private:
AESAlignedBuffer<240> m_KeySchedule; // 14 rounds for AES-256, 240 bytes AESAlignedBuffer<240> m_KeySchedule; // 14 rounds for AES-256, 240 bytes
}; };
#endif
class ECBEncryptionAESNI: public ECBCryptoAESNI #ifdef AESNI
{ class ECBEncryption: public ECBCryptoAESNI
public: #else
void SetKey (const AESKey& key) { ExpandKey (key); };
void Encrypt (const ChipherBlock * in, ChipherBlock * out);
};
class ECBDecryptionAESNI: public ECBCryptoAESNI
{
public:
void SetKey (const AESKey& key);
void Decrypt (const ChipherBlock * in, ChipherBlock * out);
};
typedef ECBEncryptionAESNI ECBEncryption;
typedef ECBDecryptionAESNI ECBDecryption;
#else // use openssl
class ECBEncryption class ECBEncryption
#endif
{ {
public: public:
void SetKey (const AESKey& key) void SetKey (const AESKey& key);
{
AES_set_encrypt_key (key, 256, &m_Key);
}
void Encrypt (const ChipherBlock * in, ChipherBlock * out)
{
AES_encrypt (in->buf, out->buf, &m_Key);
}
private: void Encrypt(const ChipherBlock * in, ChipherBlock * out);
AES_KEY m_Key; private:
AES_KEY m_Key;
}; };
#ifdef AESNI
class ECBDecryption: public ECBCryptoAESNI
#else
class ECBDecryption class ECBDecryption
#endif
{ {
public: public:
void SetKey (const AESKey& key) void SetKey (const AESKey& key);
{ void Decrypt (const ChipherBlock * in, ChipherBlock * out);
AES_set_decrypt_key (key, 256, &m_Key);
}
void Decrypt (const ChipherBlock * in, ChipherBlock * out)
{
AES_decrypt (in->buf, out->buf, &m_Key);
}
private: private:
AES_KEY m_Key; AES_KEY m_Key;
}; };
#endif
class CBCEncryption class CBCEncryption
{ {
public: public:
@ -214,6 +183,8 @@ namespace crypto
void Encrypt (const uint8_t * in, std::size_t len, uint8_t * out); void Encrypt (const uint8_t * in, std::size_t len, uint8_t * out);
void Encrypt (const uint8_t * in, uint8_t * out); // one block void Encrypt (const uint8_t * in, uint8_t * out); // one block
ECBEncryption & ECB() { return m_ECBEncryption; }
private: private:
AESAlignedBuffer<16> m_LastBlock; AESAlignedBuffer<16> m_LastBlock;
@ -234,6 +205,8 @@ namespace crypto
void Decrypt (const uint8_t * in, std::size_t len, uint8_t * out); void Decrypt (const uint8_t * in, std::size_t len, uint8_t * out);
void Decrypt (const uint8_t * in, uint8_t * out); // one block void Decrypt (const uint8_t * in, uint8_t * out); // one block
ECBDecryption & ECB() { return m_ECBDecryption; }
private: private:
AESAlignedBuffer<16> m_IV; AESAlignedBuffer<16> m_IV;
@ -255,11 +228,7 @@ namespace crypto
private: private:
ECBEncryption m_IVEncryption; ECBEncryption m_IVEncryption;
#ifdef AESNI
ECBEncryption m_LayerEncryption;
#else
CBCEncryption m_LayerEncryption; CBCEncryption m_LayerEncryption;
#endif
}; };
class TunnelDecryption // with double IV encryption class TunnelDecryption // with double IV encryption
@ -277,11 +246,7 @@ namespace crypto
private: private:
ECBDecryption m_IVDecryption; ECBDecryption m_IVDecryption;
#ifdef AESNI
ECBDecryption m_LayerDecryption;
#else
CBCDecryption m_LayerDecryption; CBCDecryption m_LayerDecryption;
#endif
}; };
void InitCrypto (bool precomputation); void InitCrypto (bool precomputation);

Loading…
Cancel
Save