OpenCL GPU miner
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

608 lines
13 KiB

/* $Id: shavite.c 227 2010-06-16 17:28:38Z tp $ */
/*
* SHAvite-3 implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
/*
* As of round 2 of the SHA-3 competition, the published reference
* implementation and test vectors are wrong, because they use
* big-endian AES tables while the internal decoding uses little-endian.
* The code below follows the specification. To turn it into a code
* which follows the reference implementation (the one called "BugFix"
* on the SHAvite-3 web site, published on Nov 23rd, 2009), comment out
* the code below (from the '#define AES_BIG_ENDIAN...' to the definition
* of the AES_ROUND_NOKEY macro) and replace it with the version which
* is commented out afterwards.
*/
#define AES_BIG_ENDIAN 0
#include "aes_helper.cl"
#define AES_ROUND_NOKEY(x0, x1, x2, x3) do { \
sph_u32 t0 = (x0); \
sph_u32 t1 = (x1); \
sph_u32 t2 = (x2); \
sph_u32 t3 = (x3); \
AES_ROUND_NOKEY_LE(t0, t1, t2, t3, x0, x1, x2, x3); \
} while (0)
#define KEY_EXPAND_ELT(k0, k1, k2, k3) do { \
sph_u32 kt; \
AES_ROUND_NOKEY(k1, k2, k3, k0); \
kt = (k0); \
(k0) = (k1); \
(k1) = (k2); \
(k2) = (k3); \
(k3) = kt; \
} while (0)
/*
* This function assumes that "msg" is aligned for 32-bit access.
*/
#define c512(msg) do { \
sph_u32 p0, p1, p2, p3, p4, p5, p6, p7; \
sph_u32 p8, p9, pA, pB, pC, pD, pE, pF; \
sph_u32 x0, x1, x2, x3; \
int r; \
\
p0 = h0; \
p1 = h1; \
p2 = h2; \
p3 = h3; \
p4 = h4; \
p5 = h5; \
p6 = h6; \
p7 = h7; \
p8 = h8; \
p9 = h9; \
pA = hA; \
pB = hB; \
pC = hC; \
pD = hD; \
pE = hE; \
pF = hF; \
/* round 0 */ \
x0 = p4 ^ rk00; \
x1 = p5 ^ rk01; \
x2 = p6 ^ rk02; \
x3 = p7 ^ rk03; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
x0 ^= rk04; \
x1 ^= rk05; \
x2 ^= rk06; \
x3 ^= rk07; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
x0 ^= rk08; \
x1 ^= rk09; \
x2 ^= rk0A; \
x3 ^= rk0B; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
x0 ^= rk0C; \
x1 ^= rk0D; \
x2 ^= rk0E; \
x3 ^= rk0F; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p0 ^= x0; \
p1 ^= x1; \
p2 ^= x2; \
p3 ^= x3; \
x0 = pC ^ rk10; \
x1 = pD ^ rk11; \
x2 = pE ^ rk12; \
x3 = pF ^ rk13; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
x0 ^= rk14; \
x1 ^= rk15; \
x2 ^= rk16; \
x3 ^= rk17; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
x0 ^= rk18; \
x1 ^= rk19; \
x2 ^= rk1A; \
x3 ^= rk1B; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
x0 ^= rk1C; \
x1 ^= rk1D; \
x2 ^= rk1E; \
x3 ^= rk1F; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p8 ^= x0; \
p9 ^= x1; \
pA ^= x2; \
pB ^= x3; \
\
for (r = 0; r < 3; r ++) { \
/* round 1, 5, 9 */ \
KEY_EXPAND_ELT(rk00, rk01, rk02, rk03); \
rk00 ^= rk1C; \
rk01 ^= rk1D; \
rk02 ^= rk1E; \
rk03 ^= rk1F; \
if (r == 0) { \
rk00 ^= sc_count0; \
rk01 ^= sc_count1; \
rk02 ^= sc_count2; \
rk03 ^= SPH_T32(~sc_count3); \
} \
x0 = p0 ^ rk00; \
x1 = p1 ^ rk01; \
x2 = p2 ^ rk02; \
x3 = p3 ^ rk03; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk04, rk05, rk06, rk07); \
rk04 ^= rk00; \
rk05 ^= rk01; \
rk06 ^= rk02; \
rk07 ^= rk03; \
if (r == 1) { \
rk04 ^= sc_count3; \
rk05 ^= sc_count2; \
rk06 ^= sc_count1; \
rk07 ^= SPH_T32(~sc_count0); \
} \
x0 ^= rk04; \
x1 ^= rk05; \
x2 ^= rk06; \
x3 ^= rk07; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B); \
rk08 ^= rk04; \
rk09 ^= rk05; \
rk0A ^= rk06; \
rk0B ^= rk07; \
x0 ^= rk08; \
x1 ^= rk09; \
x2 ^= rk0A; \
x3 ^= rk0B; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F); \
rk0C ^= rk08; \
rk0D ^= rk09; \
rk0E ^= rk0A; \
rk0F ^= rk0B; \
x0 ^= rk0C; \
x1 ^= rk0D; \
x2 ^= rk0E; \
x3 ^= rk0F; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
pC ^= x0; \
pD ^= x1; \
pE ^= x2; \
pF ^= x3; \
KEY_EXPAND_ELT(rk10, rk11, rk12, rk13); \
rk10 ^= rk0C; \
rk11 ^= rk0D; \
rk12 ^= rk0E; \
rk13 ^= rk0F; \
x0 = p8 ^ rk10; \
x1 = p9 ^ rk11; \
x2 = pA ^ rk12; \
x3 = pB ^ rk13; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk14, rk15, rk16, rk17); \
rk14 ^= rk10; \
rk15 ^= rk11; \
rk16 ^= rk12; \
rk17 ^= rk13; \
x0 ^= rk14; \
x1 ^= rk15; \
x2 ^= rk16; \
x3 ^= rk17; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B); \
rk18 ^= rk14; \
rk19 ^= rk15; \
rk1A ^= rk16; \
rk1B ^= rk17; \
x0 ^= rk18; \
x1 ^= rk19; \
x2 ^= rk1A; \
x3 ^= rk1B; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F); \
rk1C ^= rk18; \
rk1D ^= rk19; \
rk1E ^= rk1A; \
rk1F ^= rk1B; \
if (r == 2) { \
rk1C ^= sc_count2; \
rk1D ^= sc_count3; \
rk1E ^= sc_count0; \
rk1F ^= SPH_T32(~sc_count1); \
} \
x0 ^= rk1C; \
x1 ^= rk1D; \
x2 ^= rk1E; \
x3 ^= rk1F; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p4 ^= x0; \
p5 ^= x1; \
p6 ^= x2; \
p7 ^= x3; \
/* round 2, 6, 10 */ \
rk00 ^= rk19; \
x0 = pC ^ rk00; \
rk01 ^= rk1A; \
x1 = pD ^ rk01; \
rk02 ^= rk1B; \
x2 = pE ^ rk02; \
rk03 ^= rk1C; \
x3 = pF ^ rk03; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk04 ^= rk1D; \
x0 ^= rk04; \
rk05 ^= rk1E; \
x1 ^= rk05; \
rk06 ^= rk1F; \
x2 ^= rk06; \
rk07 ^= rk00; \
x3 ^= rk07; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk08 ^= rk01; \
x0 ^= rk08; \
rk09 ^= rk02; \
x1 ^= rk09; \
rk0A ^= rk03; \
x2 ^= rk0A; \
rk0B ^= rk04; \
x3 ^= rk0B; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk0C ^= rk05; \
x0 ^= rk0C; \
rk0D ^= rk06; \
x1 ^= rk0D; \
rk0E ^= rk07; \
x2 ^= rk0E; \
rk0F ^= rk08; \
x3 ^= rk0F; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p8 ^= x0; \
p9 ^= x1; \
pA ^= x2; \
pB ^= x3; \
rk10 ^= rk09; \
x0 = p4 ^ rk10; \
rk11 ^= rk0A; \
x1 = p5 ^ rk11; \
rk12 ^= rk0B; \
x2 = p6 ^ rk12; \
rk13 ^= rk0C; \
x3 = p7 ^ rk13; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk14 ^= rk0D; \
x0 ^= rk14; \
rk15 ^= rk0E; \
x1 ^= rk15; \
rk16 ^= rk0F; \
x2 ^= rk16; \
rk17 ^= rk10; \
x3 ^= rk17; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk18 ^= rk11; \
x0 ^= rk18; \
rk19 ^= rk12; \
x1 ^= rk19; \
rk1A ^= rk13; \
x2 ^= rk1A; \
rk1B ^= rk14; \
x3 ^= rk1B; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk1C ^= rk15; \
x0 ^= rk1C; \
rk1D ^= rk16; \
x1 ^= rk1D; \
rk1E ^= rk17; \
x2 ^= rk1E; \
rk1F ^= rk18; \
x3 ^= rk1F; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p0 ^= x0; \
p1 ^= x1; \
p2 ^= x2; \
p3 ^= x3; \
/* round 3, 7, 11 */ \
KEY_EXPAND_ELT(rk00, rk01, rk02, rk03); \
rk00 ^= rk1C; \
rk01 ^= rk1D; \
rk02 ^= rk1E; \
rk03 ^= rk1F; \
x0 = p8 ^ rk00; \
x1 = p9 ^ rk01; \
x2 = pA ^ rk02; \
x3 = pB ^ rk03; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk04, rk05, rk06, rk07); \
rk04 ^= rk00; \
rk05 ^= rk01; \
rk06 ^= rk02; \
rk07 ^= rk03; \
x0 ^= rk04; \
x1 ^= rk05; \
x2 ^= rk06; \
x3 ^= rk07; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B); \
rk08 ^= rk04; \
rk09 ^= rk05; \
rk0A ^= rk06; \
rk0B ^= rk07; \
x0 ^= rk08; \
x1 ^= rk09; \
x2 ^= rk0A; \
x3 ^= rk0B; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F); \
rk0C ^= rk08; \
rk0D ^= rk09; \
rk0E ^= rk0A; \
rk0F ^= rk0B; \
x0 ^= rk0C; \
x1 ^= rk0D; \
x2 ^= rk0E; \
x3 ^= rk0F; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p4 ^= x0; \
p5 ^= x1; \
p6 ^= x2; \
p7 ^= x3; \
KEY_EXPAND_ELT(rk10, rk11, rk12, rk13); \
rk10 ^= rk0C; \
rk11 ^= rk0D; \
rk12 ^= rk0E; \
rk13 ^= rk0F; \
x0 = p0 ^ rk10; \
x1 = p1 ^ rk11; \
x2 = p2 ^ rk12; \
x3 = p3 ^ rk13; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk14, rk15, rk16, rk17); \
rk14 ^= rk10; \
rk15 ^= rk11; \
rk16 ^= rk12; \
rk17 ^= rk13; \
x0 ^= rk14; \
x1 ^= rk15; \
x2 ^= rk16; \
x3 ^= rk17; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B); \
rk18 ^= rk14; \
rk19 ^= rk15; \
rk1A ^= rk16; \
rk1B ^= rk17; \
x0 ^= rk18; \
x1 ^= rk19; \
x2 ^= rk1A; \
x3 ^= rk1B; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F); \
rk1C ^= rk18; \
rk1D ^= rk19; \
rk1E ^= rk1A; \
rk1F ^= rk1B; \
x0 ^= rk1C; \
x1 ^= rk1D; \
x2 ^= rk1E; \
x3 ^= rk1F; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
pC ^= x0; \
pD ^= x1; \
pE ^= x2; \
pF ^= x3; \
/* round 4, 8, 12 */ \
rk00 ^= rk19; \
x0 = p4 ^ rk00; \
rk01 ^= rk1A; \
x1 = p5 ^ rk01; \
rk02 ^= rk1B; \
x2 = p6 ^ rk02; \
rk03 ^= rk1C; \
x3 = p7 ^ rk03; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk04 ^= rk1D; \
x0 ^= rk04; \
rk05 ^= rk1E; \
x1 ^= rk05; \
rk06 ^= rk1F; \
x2 ^= rk06; \
rk07 ^= rk00; \
x3 ^= rk07; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk08 ^= rk01; \
x0 ^= rk08; \
rk09 ^= rk02; \
x1 ^= rk09; \
rk0A ^= rk03; \
x2 ^= rk0A; \
rk0B ^= rk04; \
x3 ^= rk0B; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk0C ^= rk05; \
x0 ^= rk0C; \
rk0D ^= rk06; \
x1 ^= rk0D; \
rk0E ^= rk07; \
x2 ^= rk0E; \
rk0F ^= rk08; \
x3 ^= rk0F; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p0 ^= x0; \
p1 ^= x1; \
p2 ^= x2; \
p3 ^= x3; \
rk10 ^= rk09; \
x0 = pC ^ rk10; \
rk11 ^= rk0A; \
x1 = pD ^ rk11; \
rk12 ^= rk0B; \
x2 = pE ^ rk12; \
rk13 ^= rk0C; \
x3 = pF ^ rk13; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk14 ^= rk0D; \
x0 ^= rk14; \
rk15 ^= rk0E; \
x1 ^= rk15; \
rk16 ^= rk0F; \
x2 ^= rk16; \
rk17 ^= rk10; \
x3 ^= rk17; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk18 ^= rk11; \
x0 ^= rk18; \
rk19 ^= rk12; \
x1 ^= rk19; \
rk1A ^= rk13; \
x2 ^= rk1A; \
rk1B ^= rk14; \
x3 ^= rk1B; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
rk1C ^= rk15; \
x0 ^= rk1C; \
rk1D ^= rk16; \
x1 ^= rk1D; \
rk1E ^= rk17; \
x2 ^= rk1E; \
rk1F ^= rk18; \
x3 ^= rk1F; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p8 ^= x0; \
p9 ^= x1; \
pA ^= x2; \
pB ^= x3; \
} \
/* round 13 */ \
KEY_EXPAND_ELT(rk00, rk01, rk02, rk03); \
rk00 ^= rk1C; \
rk01 ^= rk1D; \
rk02 ^= rk1E; \
rk03 ^= rk1F; \
x0 = p0 ^ rk00; \
x1 = p1 ^ rk01; \
x2 = p2 ^ rk02; \
x3 = p3 ^ rk03; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk04, rk05, rk06, rk07); \
rk04 ^= rk00; \
rk05 ^= rk01; \
rk06 ^= rk02; \
rk07 ^= rk03; \
x0 ^= rk04; \
x1 ^= rk05; \
x2 ^= rk06; \
x3 ^= rk07; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B); \
rk08 ^= rk04; \
rk09 ^= rk05; \
rk0A ^= rk06; \
rk0B ^= rk07; \
x0 ^= rk08; \
x1 ^= rk09; \
x2 ^= rk0A; \
x3 ^= rk0B; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F); \
rk0C ^= rk08; \
rk0D ^= rk09; \
rk0E ^= rk0A; \
rk0F ^= rk0B; \
x0 ^= rk0C; \
x1 ^= rk0D; \
x2 ^= rk0E; \
x3 ^= rk0F; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
pC ^= x0; \
pD ^= x1; \
pE ^= x2; \
pF ^= x3; \
KEY_EXPAND_ELT(rk10, rk11, rk12, rk13); \
rk10 ^= rk0C; \
rk11 ^= rk0D; \
rk12 ^= rk0E; \
rk13 ^= rk0F; \
x0 = p8 ^ rk10; \
x1 = p9 ^ rk11; \
x2 = pA ^ rk12; \
x3 = pB ^ rk13; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk14, rk15, rk16, rk17); \
rk14 ^= rk10; \
rk15 ^= rk11; \
rk16 ^= rk12; \
rk17 ^= rk13; \
x0 ^= rk14; \
x1 ^= rk15; \
x2 ^= rk16; \
x3 ^= rk17; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B); \
rk18 ^= rk14 ^ sc_count1; \
rk19 ^= rk15 ^ sc_count0; \
rk1A ^= rk16 ^ sc_count3; \
rk1B ^= rk17 ^ SPH_T32(~sc_count2); \
x0 ^= rk18; \
x1 ^= rk19; \
x2 ^= rk1A; \
x3 ^= rk1B; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F); \
rk1C ^= rk18; \
rk1D ^= rk19; \
rk1E ^= rk1A; \
rk1F ^= rk1B; \
x0 ^= rk1C; \
x1 ^= rk1D; \
x2 ^= rk1E; \
x3 ^= rk1F; \
AES_ROUND_NOKEY(x0, x1, x2, x3); \
p4 ^= x0; \
p5 ^= x1; \
p6 ^= x2; \
p7 ^= x3; \
h0 ^= p8; \
h1 ^= p9; \
h2 ^= pA; \
h3 ^= pB; \
h4 ^= pC; \
h5 ^= pD; \
h6 ^= pE; \
h7 ^= pF; \
h8 ^= p0; \
h9 ^= p1; \
hA ^= p2; \
hB ^= p3; \
hC ^= p4; \
hD ^= p5; \
hE ^= p6; \
hF ^= p7; \
} while (0)