1
0
mirror of https://github.com/PurpleI2P/i2pd.git synced 2025-01-09 11:27:53 +00:00

some performance improvements

This commit is contained in:
orignal 2018-11-25 10:33:48 -05:00
parent 72c8fd257c
commit cf0fc3a4a9
2 changed files with 45 additions and 39 deletions

View File

@ -1130,9 +1130,8 @@ namespace crypto
} }
// adLen and msgLen // adLen and msgLen
htole64buf (padding, adLen); htole64buf (padding, adLen);
polyHash.Update (padding, 8); htole64buf (padding + 8, msgLen);
htole64buf (padding, msgLen); polyHash.Update (padding, 16);
polyHash.Update (padding, 8);
if (encrypt) if (encrypt)
// calculate Poly1305 tag and write in after encrypted data // calculate Poly1305 tag and write in after encrypted data

View File

@ -108,48 +108,55 @@ namespace crypto
void operator^=(const ChipherBlock& other) // XOR void operator^=(const ChipherBlock& other) // XOR
{ {
if (!(((size_t)buf | (size_t)other.buf) & 0x0F)) // multiple of 16 ?
{
// try 128 bits if applicable
#ifdef __AVX__ #ifdef __AVX__
if (i2p::cpu::avx) if (i2p::cpu::avx)
{ {
__asm__ __asm__
(
"vmovaps (%[buf]), %%xmm0 \n"
"vmovaps (%[other]), %%xmm1 \n"
"vxorps %%xmm0, %%xmm1, %%xmm0 \n"
"vmovaps %%xmm0, (%[buf]) \n"
:
: [buf]"r"(buf), [other]"r"(other.buf)
: "%xmm0", "%xmm1", "memory"
);
}
else
#endif
{
#if defined(__SSE__) // SSE
__asm__
( (
"vmovups (%[buf]), %%xmm0 \n" "movaps (%[buf]), %%xmm0 \n"
"vmovups (%[other]), %%xmm1 \n" "movaps (%[other]), %%xmm1 \n"
"vxorps %%xmm0, %%xmm1, %%xmm0 \n" "pxor %%xmm1, %%xmm0 \n"
"vmovups %%xmm0, (%[buf]) \n" "movaps %%xmm0, (%[buf]) \n"
: :
: [buf]"r"(buf), [other]"r"(other.buf) : [buf]"r"(buf), [other]"r"(other.buf)
: "%xmm0", "%xmm1", "memory" : "%xmm0", "%xmm1", "memory"
); );
} #else
else // if not we always can cast to uint64_t *
#endif ((uint64_t *)buf)[0] ^= ((uint64_t *)other.buf)[0];
((uint64_t *)buf)[1] ^= ((uint64_t *)other.buf)[1];
#endif
}
}
else if (!(((size_t)buf | (size_t)other.buf) & 0x03)) // multiple of 4 ?
{ {
#if defined(__SSE__) // SSE // we are good to cast to uint32_t *
__asm__ for (int i = 0; i < 4; i++)
( ((uint32_t *)buf)[i] ^= ((uint32_t *)other.buf)[i];
"movups (%[buf]), %%xmm0 \n" }
"movups (%[other]), %%xmm1 \n" else
"pxor %%xmm1, %%xmm0 \n" {
"movups %%xmm0, (%[buf]) \n" for (int i = 0; i < 16; i++)
: buf[i] ^= other.buf[i];
: [buf]"r"(buf), [other]"r"(other.buf) }
: "%xmm0", "%xmm1", "memory"
);
#else
if (!(((size_t)buf | (size_t)other.buf) & 0x03)) // multiple of 4 ?
{
// we are good to cast to uint32_t *
for (int i = 0; i < 4; i++)
((uint32_t *)buf)[i] ^= ((uint32_t *)other.buf)[i];
}
else
{
for (int i = 0; i < 16; i++)
buf[i] ^= other.buf[i];
}
#endif
}
} }
}; };