/* $Id: shavite.c 227 2010-06-16 17:28:38Z tp $ */ /* * SHAvite-3 implementation. * * ==========================(LICENSE BEGIN)============================ * * Copyright (c) 2007-2010 Projet RNRT SAPHIR * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ===========================(LICENSE END)============================= * * @author Thomas Pornin */ /* * As of round 2 of the SHA-3 competition, the published reference * implementation and test vectors are wrong, because they use * big-endian AES tables while the internal decoding uses little-endian. * The code below follows the specification. To turn it into a code * which follows the reference implementation (the one called "BugFix" * on the SHAvite-3 web site, published on Nov 23rd, 2009), comment out * the code below (from the '#define AES_BIG_ENDIAN...' to the definition * of the AES_ROUND_NOKEY macro) and replace it with the version which * is commented out afterwards. */ #define AES_BIG_ENDIAN 0 #include "aes_helper.cl" #define AES_ROUND_NOKEY(x0, x1, x2, x3) do { \ sph_u32 t0 = (x0); \ sph_u32 t1 = (x1); \ sph_u32 t2 = (x2); \ sph_u32 t3 = (x3); \ AES_ROUND_NOKEY_LE(t0, t1, t2, t3, x0, x1, x2, x3); \ } while (0) #define KEY_EXPAND_ELT(k0, k1, k2, k3) do { \ sph_u32 kt; \ AES_ROUND_NOKEY(k1, k2, k3, k0); \ kt = (k0); \ (k0) = (k1); \ (k1) = (k2); \ (k2) = (k3); \ (k3) = kt; \ } while (0) /* * This function assumes that "msg" is aligned for 32-bit access. */ #define c512(msg) do { \ sph_u32 p0, p1, p2, p3, p4, p5, p6, p7; \ sph_u32 p8, p9, pA, pB, pC, pD, pE, pF; \ sph_u32 x0, x1, x2, x3; \ int r; \ \ p0 = h0; \ p1 = h1; \ p2 = h2; \ p3 = h3; \ p4 = h4; \ p5 = h5; \ p6 = h6; \ p7 = h7; \ p8 = h8; \ p9 = h9; \ pA = hA; \ pB = hB; \ pC = hC; \ pD = hD; \ pE = hE; \ pF = hF; \ /* round 0 */ \ x0 = p4 ^ rk00; \ x1 = p5 ^ rk01; \ x2 = p6 ^ rk02; \ x3 = p7 ^ rk03; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ x0 ^= rk04; \ x1 ^= rk05; \ x2 ^= rk06; \ x3 ^= rk07; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ x0 ^= rk08; \ x1 ^= rk09; \ x2 ^= rk0A; \ x3 ^= rk0B; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ x0 ^= rk0C; \ x1 ^= rk0D; \ x2 ^= rk0E; \ x3 ^= rk0F; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ p0 ^= x0; \ p1 ^= x1; \ p2 ^= x2; \ p3 ^= x3; \ x0 = pC ^ rk10; \ x1 = pD ^ rk11; \ x2 = pE ^ rk12; \ x3 = pF ^ rk13; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ x0 ^= rk14; \ x1 ^= rk15; \ x2 ^= rk16; \ x3 ^= rk17; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ x0 ^= rk18; \ x1 ^= rk19; \ x2 ^= rk1A; \ x3 ^= rk1B; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ x0 ^= rk1C; \ x1 ^= rk1D; \ x2 ^= rk1E; \ x3 ^= rk1F; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ p8 ^= x0; \ p9 ^= x1; \ pA ^= x2; \ pB ^= x3; \ \ for (r = 0; r < 3; r ++) { \ /* round 1, 5, 9 */ \ KEY_EXPAND_ELT(rk00, rk01, rk02, rk03); \ rk00 ^= rk1C; \ rk01 ^= rk1D; \ rk02 ^= rk1E; \ rk03 ^= rk1F; \ if (r == 0) { \ rk00 ^= sc_count0; \ rk01 ^= sc_count1; \ rk02 ^= sc_count2; \ rk03 ^= SPH_T32(~sc_count3); \ } \ x0 = p0 ^ rk00; \ x1 = p1 ^ rk01; \ x2 = p2 ^ rk02; \ x3 = p3 ^ rk03; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk04, rk05, rk06, rk07); \ rk04 ^= rk00; \ rk05 ^= rk01; \ rk06 ^= rk02; \ rk07 ^= rk03; \ if (r == 1) { \ rk04 ^= sc_count3; \ rk05 ^= sc_count2; \ rk06 ^= sc_count1; \ rk07 ^= SPH_T32(~sc_count0); \ } \ x0 ^= rk04; \ x1 ^= rk05; \ x2 ^= rk06; \ x3 ^= rk07; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B); \ rk08 ^= rk04; \ rk09 ^= rk05; \ rk0A ^= rk06; \ rk0B ^= rk07; \ x0 ^= rk08; \ x1 ^= rk09; \ x2 ^= rk0A; \ x3 ^= rk0B; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F); \ rk0C ^= rk08; \ rk0D ^= rk09; \ rk0E ^= rk0A; \ rk0F ^= rk0B; \ x0 ^= rk0C; \ x1 ^= rk0D; \ x2 ^= rk0E; \ x3 ^= rk0F; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ pC ^= x0; \ pD ^= x1; \ pE ^= x2; \ pF ^= x3; \ KEY_EXPAND_ELT(rk10, rk11, rk12, rk13); \ rk10 ^= rk0C; \ rk11 ^= rk0D; \ rk12 ^= rk0E; \ rk13 ^= rk0F; \ x0 = p8 ^ rk10; \ x1 = p9 ^ rk11; \ x2 = pA ^ rk12; \ x3 = pB ^ rk13; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk14, rk15, rk16, rk17); \ rk14 ^= rk10; \ rk15 ^= rk11; \ rk16 ^= rk12; \ rk17 ^= rk13; \ x0 ^= rk14; \ x1 ^= rk15; \ x2 ^= rk16; \ x3 ^= rk17; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B); \ rk18 ^= rk14; \ rk19 ^= rk15; \ rk1A ^= rk16; \ rk1B ^= rk17; \ x0 ^= rk18; \ x1 ^= rk19; \ x2 ^= rk1A; \ x3 ^= rk1B; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F); \ rk1C ^= rk18; \ rk1D ^= rk19; \ rk1E ^= rk1A; \ rk1F ^= rk1B; \ if (r == 2) { \ rk1C ^= sc_count2; \ rk1D ^= sc_count3; \ rk1E ^= sc_count0; \ rk1F ^= SPH_T32(~sc_count1); \ } \ x0 ^= rk1C; \ x1 ^= rk1D; \ x2 ^= rk1E; \ x3 ^= rk1F; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ p4 ^= x0; \ p5 ^= x1; \ p6 ^= x2; \ p7 ^= x3; \ /* round 2, 6, 10 */ \ rk00 ^= rk19; \ x0 = pC ^ rk00; \ rk01 ^= rk1A; \ x1 = pD ^ rk01; \ rk02 ^= rk1B; \ x2 = pE ^ rk02; \ rk03 ^= rk1C; \ x3 = pF ^ rk03; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ rk04 ^= rk1D; \ x0 ^= rk04; \ rk05 ^= rk1E; \ x1 ^= rk05; \ rk06 ^= rk1F; \ x2 ^= rk06; \ rk07 ^= rk00; \ x3 ^= rk07; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ rk08 ^= rk01; \ x0 ^= rk08; \ rk09 ^= rk02; \ x1 ^= rk09; \ rk0A ^= rk03; \ x2 ^= rk0A; \ rk0B ^= rk04; \ x3 ^= rk0B; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ rk0C ^= rk05; \ x0 ^= rk0C; \ rk0D ^= rk06; \ x1 ^= rk0D; \ rk0E ^= rk07; \ x2 ^= rk0E; \ rk0F ^= rk08; \ x3 ^= rk0F; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ p8 ^= x0; \ p9 ^= x1; \ pA ^= x2; \ pB ^= x3; \ rk10 ^= rk09; \ x0 = p4 ^ rk10; \ rk11 ^= rk0A; \ x1 = p5 ^ rk11; \ rk12 ^= rk0B; \ x2 = p6 ^ rk12; \ rk13 ^= rk0C; \ x3 = p7 ^ rk13; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ rk14 ^= rk0D; \ x0 ^= rk14; \ rk15 ^= rk0E; \ x1 ^= rk15; \ rk16 ^= rk0F; \ x2 ^= rk16; \ rk17 ^= rk10; \ x3 ^= rk17; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ rk18 ^= rk11; \ x0 ^= rk18; \ rk19 ^= rk12; \ x1 ^= rk19; \ rk1A ^= rk13; \ x2 ^= rk1A; \ rk1B ^= rk14; \ x3 ^= rk1B; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ rk1C ^= rk15; \ x0 ^= rk1C; \ rk1D ^= rk16; \ x1 ^= rk1D; \ rk1E ^= rk17; \ x2 ^= rk1E; \ rk1F ^= rk18; \ x3 ^= rk1F; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ p0 ^= x0; \ p1 ^= x1; \ p2 ^= x2; \ p3 ^= x3; \ /* round 3, 7, 11 */ \ KEY_EXPAND_ELT(rk00, rk01, rk02, rk03); \ rk00 ^= rk1C; \ rk01 ^= rk1D; \ rk02 ^= rk1E; \ rk03 ^= rk1F; \ x0 = p8 ^ rk00; \ x1 = p9 ^ rk01; \ x2 = pA ^ rk02; \ x3 = pB ^ rk03; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk04, rk05, rk06, rk07); \ rk04 ^= rk00; \ rk05 ^= rk01; \ rk06 ^= rk02; \ rk07 ^= rk03; \ x0 ^= rk04; \ x1 ^= rk05; \ x2 ^= rk06; \ x3 ^= rk07; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B); \ rk08 ^= rk04; \ rk09 ^= rk05; \ rk0A ^= rk06; \ rk0B ^= rk07; \ x0 ^= rk08; \ x1 ^= rk09; \ x2 ^= rk0A; \ x3 ^= rk0B; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F); \ rk0C ^= rk08; \ rk0D ^= rk09; \ rk0E ^= rk0A; \ rk0F ^= rk0B; \ x0 ^= rk0C; \ x1 ^= rk0D; \ x2 ^= rk0E; \ x3 ^= rk0F; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ p4 ^= x0; \ p5 ^= x1; \ p6 ^= x2; \ p7 ^= x3; \ KEY_EXPAND_ELT(rk10, rk11, rk12, rk13); \ rk10 ^= rk0C; \ rk11 ^= rk0D; \ rk12 ^= rk0E; \ rk13 ^= rk0F; \ x0 = p0 ^ rk10; \ x1 = p1 ^ rk11; \ x2 = p2 ^ rk12; \ x3 = p3 ^ rk13; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk14, rk15, rk16, rk17); \ rk14 ^= rk10; \ rk15 ^= rk11; \ rk16 ^= rk12; \ rk17 ^= rk13; \ x0 ^= rk14; \ x1 ^= rk15; \ x2 ^= rk16; \ x3 ^= rk17; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B); \ rk18 ^= rk14; \ rk19 ^= rk15; \ rk1A ^= rk16; \ rk1B ^= rk17; \ x0 ^= rk18; \ x1 ^= rk19; \ x2 ^= rk1A; \ x3 ^= rk1B; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F); \ rk1C ^= rk18; \ rk1D ^= rk19; \ rk1E ^= rk1A; \ rk1F ^= rk1B; \ x0 ^= rk1C; \ x1 ^= rk1D; \ x2 ^= rk1E; \ x3 ^= rk1F; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ pC ^= x0; \ pD ^= x1; \ pE ^= x2; \ pF ^= x3; \ /* round 4, 8, 12 */ \ rk00 ^= rk19; \ x0 = p4 ^ rk00; \ rk01 ^= rk1A; \ x1 = p5 ^ rk01; \ rk02 ^= rk1B; \ x2 = p6 ^ rk02; \ rk03 ^= rk1C; \ x3 = p7 ^ rk03; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ rk04 ^= rk1D; \ x0 ^= rk04; \ rk05 ^= rk1E; \ x1 ^= rk05; \ rk06 ^= rk1F; \ x2 ^= rk06; \ rk07 ^= rk00; \ x3 ^= rk07; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ rk08 ^= rk01; \ x0 ^= rk08; \ rk09 ^= rk02; \ x1 ^= rk09; \ rk0A ^= rk03; \ x2 ^= rk0A; \ rk0B ^= rk04; \ x3 ^= rk0B; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ rk0C ^= rk05; \ x0 ^= rk0C; \ rk0D ^= rk06; \ x1 ^= rk0D; \ rk0E ^= rk07; \ x2 ^= rk0E; \ rk0F ^= rk08; \ x3 ^= rk0F; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ p0 ^= x0; \ p1 ^= x1; \ p2 ^= x2; \ p3 ^= x3; \ rk10 ^= rk09; \ x0 = pC ^ rk10; \ rk11 ^= rk0A; \ x1 = pD ^ rk11; \ rk12 ^= rk0B; \ x2 = pE ^ rk12; \ rk13 ^= rk0C; \ x3 = pF ^ rk13; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ rk14 ^= rk0D; \ x0 ^= rk14; \ rk15 ^= rk0E; \ x1 ^= rk15; \ rk16 ^= rk0F; \ x2 ^= rk16; \ rk17 ^= rk10; \ x3 ^= rk17; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ rk18 ^= rk11; \ x0 ^= rk18; \ rk19 ^= rk12; \ x1 ^= rk19; \ rk1A ^= rk13; \ x2 ^= rk1A; \ rk1B ^= rk14; \ x3 ^= rk1B; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ rk1C ^= rk15; \ x0 ^= rk1C; \ rk1D ^= rk16; \ x1 ^= rk1D; \ rk1E ^= rk17; \ x2 ^= rk1E; \ rk1F ^= rk18; \ x3 ^= rk1F; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ p8 ^= x0; \ p9 ^= x1; \ pA ^= x2; \ pB ^= x3; \ } \ /* round 13 */ \ KEY_EXPAND_ELT(rk00, rk01, rk02, rk03); \ rk00 ^= rk1C; \ rk01 ^= rk1D; \ rk02 ^= rk1E; \ rk03 ^= rk1F; \ x0 = p0 ^ rk00; \ x1 = p1 ^ rk01; \ x2 = p2 ^ rk02; \ x3 = p3 ^ rk03; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk04, rk05, rk06, rk07); \ rk04 ^= rk00; \ rk05 ^= rk01; \ rk06 ^= rk02; \ rk07 ^= rk03; \ x0 ^= rk04; \ x1 ^= rk05; \ x2 ^= rk06; \ x3 ^= rk07; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B); \ rk08 ^= rk04; \ rk09 ^= rk05; \ rk0A ^= rk06; \ rk0B ^= rk07; \ x0 ^= rk08; \ x1 ^= rk09; \ x2 ^= rk0A; \ x3 ^= rk0B; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F); \ rk0C ^= rk08; \ rk0D ^= rk09; \ rk0E ^= rk0A; \ rk0F ^= rk0B; \ x0 ^= rk0C; \ x1 ^= rk0D; \ x2 ^= rk0E; \ x3 ^= rk0F; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ pC ^= x0; \ pD ^= x1; \ pE ^= x2; \ pF ^= x3; \ KEY_EXPAND_ELT(rk10, rk11, rk12, rk13); \ rk10 ^= rk0C; \ rk11 ^= rk0D; \ rk12 ^= rk0E; \ rk13 ^= rk0F; \ x0 = p8 ^ rk10; \ x1 = p9 ^ rk11; \ x2 = pA ^ rk12; \ x3 = pB ^ rk13; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk14, rk15, rk16, rk17); \ rk14 ^= rk10; \ rk15 ^= rk11; \ rk16 ^= rk12; \ rk17 ^= rk13; \ x0 ^= rk14; \ x1 ^= rk15; \ x2 ^= rk16; \ x3 ^= rk17; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B); \ rk18 ^= rk14 ^ sc_count1; \ rk19 ^= rk15 ^ sc_count0; \ rk1A ^= rk16 ^ sc_count3; \ rk1B ^= rk17 ^ SPH_T32(~sc_count2); \ x0 ^= rk18; \ x1 ^= rk19; \ x2 ^= rk1A; \ x3 ^= rk1B; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F); \ rk1C ^= rk18; \ rk1D ^= rk19; \ rk1E ^= rk1A; \ rk1F ^= rk1B; \ x0 ^= rk1C; \ x1 ^= rk1D; \ x2 ^= rk1E; \ x3 ^= rk1F; \ AES_ROUND_NOKEY(x0, x1, x2, x3); \ p4 ^= x0; \ p5 ^= x1; \ p6 ^= x2; \ p7 ^= x3; \ h0 ^= p8; \ h1 ^= p9; \ h2 ^= pA; \ h3 ^= pB; \ h4 ^= pC; \ h5 ^= pD; \ h6 ^= pE; \ h7 ^= pF; \ h8 ^= p0; \ h9 ^= p1; \ hA ^= p2; \ hB ^= p3; \ hC ^= p4; \ hD ^= p5; \ hE ^= p6; \ hF ^= p7; \ } while (0)