You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
301 lines
9.8 KiB
301 lines
9.8 KiB
/* |
|
--------------------------------------------------------------------------- |
|
Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved. |
|
|
|
The redistribution and use of this software (with or without changes) |
|
is allowed without the payment of fees or royalties provided that: |
|
|
|
source code distributions include the above copyright notice, this |
|
list of conditions and the following disclaimer; |
|
|
|
binary distributions include the above copyright notice, this list |
|
of conditions and the following disclaimer in their documentation. |
|
|
|
This software is provided 'as is' with no explicit or implied warranties |
|
in respect of its operation, including, but not limited to, correctness |
|
and fitness for purpose. |
|
--------------------------------------------------------------------------- |
|
Issue Date: 20/12/2007 |
|
*/ |
|
|
|
#include "aesopt.h" |
|
#include "aestab.h" |
|
|
|
#if defined( USE_INTEL_AES_IF_PRESENT ) |
|
# include "aes_ni.h" |
|
#else |
|
/* map names here to provide the external API ('name' -> 'aes_name') */ |
|
# define aes_xi(x) aes_ ## x |
|
#endif |
|
|
|
#if defined(__cplusplus) |
|
extern "C" |
|
{ |
|
#endif |
|
|
|
#define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c]) |
|
#define so(y,x,c) word_out(y, c, s(x,c)) |
|
|
|
#if defined(ARRAYS) |
|
#define locals(y,x) x[4],y[4] |
|
#else |
|
#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3 |
|
#endif |
|
|
|
#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ |
|
s(y,2) = s(x,2); s(y,3) = s(x,3); |
|
#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3) |
|
#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3) |
|
#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3) |
|
|
|
#if ( FUNCS_IN_C & ENCRYPTION_IN_C ) |
|
|
|
/* Visual C++ .Net v7.1 provides the fastest encryption code when using |
|
Pentium optimiation with small code but this is poor for decryption |
|
so we need to control this with the following VC++ pragmas |
|
*/ |
|
|
|
#if defined( _MSC_VER ) && !defined( _WIN64 ) |
|
#pragma optimize( "s", on ) |
|
#endif |
|
|
|
/* Given the column (c) of the output state variable, the following |
|
macros give the input state variables which are needed in its |
|
computation for each row (r) of the state. All the alternative |
|
macros give the same end values but expand into different ways |
|
of calculating these values. In particular the complex macro |
|
used for dynamically variable block sizes is designed to expand |
|
to a compile time constant whenever possible but will expand to |
|
conditional clauses on some branches (I am grateful to Frank |
|
Yellin for this construction) |
|
*/ |
|
|
|
#define fwd_var(x,r,c)\ |
|
( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ |
|
: r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\ |
|
: r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ |
|
: ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))) |
|
|
|
#if defined(FT4_SET) |
|
#undef dec_fmvars |
|
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c)) |
|
#elif defined(FT1_SET) |
|
#undef dec_fmvars |
|
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c)) |
|
#else |
|
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c))) |
|
#endif |
|
|
|
#if defined(FL4_SET) |
|
#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c)) |
|
#elif defined(FL1_SET) |
|
#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c)) |
|
#else |
|
#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c)) |
|
#endif |
|
|
|
AES_RETURN aes_xi(encrypt)(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]) |
|
{ uint32_t locals(b0, b1); |
|
const uint32_t *kp; |
|
#if defined( dec_fmvars ) |
|
dec_fmvars; /* declare variables for fwd_mcol() if needed */ |
|
#endif |
|
|
|
if(cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16) |
|
return EXIT_FAILURE; |
|
|
|
kp = cx->ks; |
|
state_in(b0, in, kp); |
|
|
|
#if (ENC_UNROLL == FULL) |
|
|
|
switch(cx->inf.b[0]) |
|
{ |
|
case 14 * 16: |
|
round(fwd_rnd, b1, b0, kp + 1 * N_COLS); |
|
round(fwd_rnd, b0, b1, kp + 2 * N_COLS); |
|
kp += 2 * N_COLS; |
|
case 12 * 16: |
|
round(fwd_rnd, b1, b0, kp + 1 * N_COLS); |
|
round(fwd_rnd, b0, b1, kp + 2 * N_COLS); |
|
kp += 2 * N_COLS; |
|
case 10 * 16: |
|
round(fwd_rnd, b1, b0, kp + 1 * N_COLS); |
|
round(fwd_rnd, b0, b1, kp + 2 * N_COLS); |
|
round(fwd_rnd, b1, b0, kp + 3 * N_COLS); |
|
round(fwd_rnd, b0, b1, kp + 4 * N_COLS); |
|
round(fwd_rnd, b1, b0, kp + 5 * N_COLS); |
|
round(fwd_rnd, b0, b1, kp + 6 * N_COLS); |
|
round(fwd_rnd, b1, b0, kp + 7 * N_COLS); |
|
round(fwd_rnd, b0, b1, kp + 8 * N_COLS); |
|
round(fwd_rnd, b1, b0, kp + 9 * N_COLS); |
|
round(fwd_lrnd, b0, b1, kp +10 * N_COLS); |
|
} |
|
|
|
#else |
|
|
|
#if (ENC_UNROLL == PARTIAL) |
|
{ uint32_t rnd; |
|
for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd) |
|
{ |
|
kp += N_COLS; |
|
round(fwd_rnd, b1, b0, kp); |
|
kp += N_COLS; |
|
round(fwd_rnd, b0, b1, kp); |
|
} |
|
kp += N_COLS; |
|
round(fwd_rnd, b1, b0, kp); |
|
#else |
|
{ uint32_t rnd; |
|
for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd) |
|
{ |
|
kp += N_COLS; |
|
round(fwd_rnd, b1, b0, kp); |
|
l_copy(b0, b1); |
|
} |
|
#endif |
|
kp += N_COLS; |
|
round(fwd_lrnd, b0, b1, kp); |
|
} |
|
#endif |
|
|
|
state_out(out, b0); |
|
return EXIT_SUCCESS; |
|
} |
|
|
|
#endif |
|
|
|
#if ( FUNCS_IN_C & DECRYPTION_IN_C) |
|
|
|
/* Visual C++ .Net v7.1 provides the fastest encryption code when using |
|
Pentium optimiation with small code but this is poor for decryption |
|
so we need to control this with the following VC++ pragmas |
|
*/ |
|
|
|
#if defined( _MSC_VER ) && !defined( _WIN64 ) |
|
#pragma optimize( "t", on ) |
|
#endif |
|
|
|
/* Given the column (c) of the output state variable, the following |
|
macros give the input state variables which are needed in its |
|
computation for each row (r) of the state. All the alternative |
|
macros give the same end values but expand into different ways |
|
of calculating these values. In particular the complex macro |
|
used for dynamically variable block sizes is designed to expand |
|
to a compile time constant whenever possible but will expand to |
|
conditional clauses on some branches (I am grateful to Frank |
|
Yellin for this construction) |
|
*/ |
|
|
|
#define inv_var(x,r,c)\ |
|
( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ |
|
: r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\ |
|
: r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ |
|
: ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))) |
|
|
|
#if defined(IT4_SET) |
|
#undef dec_imvars |
|
#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c)) |
|
#elif defined(IT1_SET) |
|
#undef dec_imvars |
|
#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c)) |
|
#else |
|
#define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))) |
|
#endif |
|
|
|
#if defined(IL4_SET) |
|
#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c)) |
|
#elif defined(IL1_SET) |
|
#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c)) |
|
#else |
|
#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)) |
|
#endif |
|
|
|
/* This code can work with the decryption key schedule in the */ |
|
/* order that is used for encrytpion (where the 1st decryption */ |
|
/* round key is at the high end ot the schedule) or with a key */ |
|
/* schedule that has been reversed to put the 1st decryption */ |
|
/* round key at the low end of the schedule in memory (when */ |
|
/* AES_REV_DKS is defined) */ |
|
|
|
#ifdef AES_REV_DKS |
|
#define key_ofs 0 |
|
#define rnd_key(n) (kp + n * N_COLS) |
|
#else |
|
#define key_ofs 1 |
|
#define rnd_key(n) (kp - n * N_COLS) |
|
#endif |
|
|
|
AES_RETURN aes_xi(decrypt)(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]) |
|
{ uint32_t locals(b0, b1); |
|
#if defined( dec_imvars ) |
|
dec_imvars; /* declare variables for inv_mcol() if needed */ |
|
#endif |
|
const uint32_t *kp; |
|
|
|
if(cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16) |
|
return EXIT_FAILURE; |
|
|
|
kp = cx->ks + (key_ofs ? (cx->inf.b[0] >> 2) : 0); |
|
state_in(b0, in, kp); |
|
|
|
#if (DEC_UNROLL == FULL) |
|
|
|
kp = cx->ks + (key_ofs ? 0 : (cx->inf.b[0] >> 2)); |
|
switch(cx->inf.b[0]) |
|
{ |
|
case 14 * 16: |
|
round(inv_rnd, b1, b0, rnd_key(-13)); |
|
round(inv_rnd, b0, b1, rnd_key(-12)); |
|
case 12 * 16: |
|
round(inv_rnd, b1, b0, rnd_key(-11)); |
|
round(inv_rnd, b0, b1, rnd_key(-10)); |
|
case 10 * 16: |
|
round(inv_rnd, b1, b0, rnd_key(-9)); |
|
round(inv_rnd, b0, b1, rnd_key(-8)); |
|
round(inv_rnd, b1, b0, rnd_key(-7)); |
|
round(inv_rnd, b0, b1, rnd_key(-6)); |
|
round(inv_rnd, b1, b0, rnd_key(-5)); |
|
round(inv_rnd, b0, b1, rnd_key(-4)); |
|
round(inv_rnd, b1, b0, rnd_key(-3)); |
|
round(inv_rnd, b0, b1, rnd_key(-2)); |
|
round(inv_rnd, b1, b0, rnd_key(-1)); |
|
round(inv_lrnd, b0, b1, rnd_key( 0)); |
|
} |
|
|
|
#else |
|
|
|
#if (DEC_UNROLL == PARTIAL) |
|
{ uint32_t rnd; |
|
for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd) |
|
{ |
|
kp = rnd_key(1); |
|
round(inv_rnd, b1, b0, kp); |
|
kp = rnd_key(1); |
|
round(inv_rnd, b0, b1, kp); |
|
} |
|
kp = rnd_key(1); |
|
round(inv_rnd, b1, b0, kp); |
|
#else |
|
{ uint32_t rnd; |
|
for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd) |
|
{ |
|
kp = rnd_key(1); |
|
round(inv_rnd, b1, b0, kp); |
|
l_copy(b0, b1); |
|
} |
|
#endif |
|
kp = rnd_key(1); |
|
round(inv_lrnd, b0, b1, kp); |
|
} |
|
#endif |
|
|
|
state_out(out, b0); |
|
return EXIT_SUCCESS; |
|
} |
|
|
|
#endif |
|
|
|
#if defined(__cplusplus) |
|
} |
|
#endif
|
|
|