|
|
|
/*
|
|
|
|
* Lyra2 kernel implementation.
|
|
|
|
*
|
|
|
|
* ==========================(LICENSE BEGIN)============================
|
|
|
|
* Copyright (c) 2014 djm34
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
|
|
* a copy of this software and associated documentation files (the
|
|
|
|
* "Software"), to deal in the Software without restriction, including
|
|
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
|
|
* the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be
|
|
|
|
* included in all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
* ===========================(LICENSE END)=============================
|
|
|
|
*
|
|
|
|
* @author djm34
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*Blake2b IV Array*/
|
|
|
|
__constant static const sph_u64 blake2b_IV[8] =
|
|
|
|
{
|
|
|
|
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
|
|
|
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
|
|
|
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
|
|
|
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
|
|
|
};
|
|
|
|
|
|
|
|
/*Blake2b's rotation*/
|
|
|
|
|
|
|
|
static inline uint2 ror2(uint2 v, unsigned a) {
|
|
|
|
uint2 result;
|
|
|
|
unsigned n = 64 - a;
|
|
|
|
if (n == 32) { return (uint2)(v.y,v.x); }
|
|
|
|
if (n < 32) {
|
|
|
|
result.y = ((v.y << (n)) | (v.x >> (32 - n)));
|
|
|
|
result.x = ((v.x << (n)) | (v.y >> (32 - n)));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
result.y = ((v.x << (n - 32)) | (v.y >> (64 - n)));
|
|
|
|
result.x = ((v.y << (n - 32)) | (v.x >> (64 - n)));
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
static inline uint2 ror2l(uint2 v, unsigned a) {
|
|
|
|
uint2 result;
|
|
|
|
result.y = ((v.x << (32-a)) | (v.y >> (a)));
|
|
|
|
result.x = ((v.y << (32-a)) | (v.x >> (a)));
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
static inline uint2 ror2r(uint2 v, unsigned a) {
|
|
|
|
uint2 result;
|
|
|
|
result.y = ((v.y << (64-a)) | (v.x >> (a-32)));
|
|
|
|
result.x = ((v.x << (64-a)) | (v.y >> (a-32)));
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
#define G(a,b,c,d) \
|
|
|
|
do { \
|
|
|
|
a = as_uint2(as_ulong(a)+as_ulong(b)); d ^= a; d = d.yx; \
|
|
|
|
c = as_uint2(as_ulong(c)+as_ulong(d)); b ^= c; b = ror2l(b, 24); \
|
|
|
|
a = as_uint2(as_ulong(a)+as_ulong(b)); d ^= a; d = ror2l(d, 16); \
|
|
|
|
c = as_uint2(as_ulong(c)+as_ulong(d)); b ^= c; b = ror2r(b, 63); \
|
|
|
|
} while(0)
|
|
|
|
*/
|
|
|
|
#define G(a,b,c,d) \
|
|
|
|
do { \
|
|
|
|
a = as_uint2(as_ulong(a)+as_ulong(b)); d ^= a; d = d.yx; \
|
|
|
|
c = as_uint2(as_ulong(c)+as_ulong(d)); b ^= c; b = as_uint2(as_uchar8(b).s34567012); \
|
|
|
|
a = as_uint2(as_ulong(a)+as_ulong(b)); d ^= a; d = ror2l(d, 16); \
|
|
|
|
c = as_uint2(as_ulong(c)+as_ulong(d)); b ^= c; b = ror2r(b, 63); \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
/*One Round of the Blake2b's compression function*/
|
|
|
|
#define round_lyra(v) \
|
|
|
|
do { \
|
|
|
|
G(v[ 0],v[ 4],v[ 8],v[12]); \
|
|
|
|
G(v[ 1],v[ 5],v[ 9],v[13]); \
|
|
|
|
G(v[ 2],v[ 6],v[10],v[14]); \
|
|
|
|
G(v[ 3],v[ 7],v[11],v[15]); \
|
|
|
|
G(v[ 0],v[ 5],v[10],v[15]); \
|
|
|
|
G(v[ 1],v[ 6],v[11],v[12]); \
|
|
|
|
G(v[ 2],v[ 7],v[ 8],v[13]); \
|
|
|
|
G(v[ 3],v[ 4],v[ 9],v[14]); \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
|
|
|
|
#define reduceDuplexRowSetup(rowIn, rowInOut, rowOut) \
|
|
|
|
{ \
|
|
|
|
for (int i = 0; i < 8; i++) \
|
|
|
|
{ \
|
|
|
|
\
|
|
|
|
for (int j = 0; j < 12; j++) {state[j] ^= as_uint2(as_ulong(Matrix[12 * i + j][rowIn]) + as_ulong(Matrix[12 * i + j][rowInOut]));} \
|
|
|
|
round_lyra(state); \
|
|
|
|
for (int j = 0; j < 12; j++) {Matrix[j + 84 - 12 * i][rowOut] = Matrix[12 * i + j][rowIn] ^ state[j];} \
|
|
|
|
\
|
|
|
|
Matrix[0 + 12 * i][rowInOut] ^= state[11]; \
|
|
|
|
Matrix[1 + 12 * i][rowInOut] ^= state[0]; \
|
|
|
|
Matrix[2 + 12 * i][rowInOut] ^= state[1]; \
|
|
|
|
Matrix[3 + 12 * i][rowInOut] ^= state[2]; \
|
|
|
|
Matrix[4 + 12 * i][rowInOut] ^= state[3]; \
|
|
|
|
Matrix[5 + 12 * i][rowInOut] ^= state[4]; \
|
|
|
|
Matrix[6 + 12 * i][rowInOut] ^= state[5]; \
|
|
|
|
Matrix[7 + 12 * i][rowInOut] ^= state[6]; \
|
|
|
|
Matrix[8 + 12 * i][rowInOut] ^= state[7]; \
|
|
|
|
Matrix[9 + 12 * i][rowInOut] ^= state[8]; \
|
|
|
|
Matrix[10 + 12 * i][rowInOut] ^= state[9]; \
|
|
|
|
Matrix[11 + 12 * i][rowInOut] ^= state[10]; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
}
|
|
|
|
|
|
|
|
#define reduceDuplexRow(rowIn, rowInOut, rowOut) \
|
|
|
|
{ \
|
|
|
|
for (int i = 0; i < 8; i++) \
|
|
|
|
{ \
|
|
|
|
for (int j = 0; j < 12; j++) \
|
|
|
|
state[j] ^= as_uint2(as_ulong(Matrix[12 * i + j][rowIn]) + as_ulong(Matrix[12 * i + j][rowInOut])); \
|
|
|
|
\
|
|
|
|
round_lyra(state); \
|
|
|
|
for (int j = 0; j < 12; j++) {Matrix[j + 12 * i][rowOut] ^= state[j];} \
|
|
|
|
\
|
|
|
|
Matrix[0 + 12 * i][rowInOut] ^= state[11]; \
|
|
|
|
Matrix[1 + 12 * i][rowInOut] ^= state[0]; \
|
|
|
|
Matrix[2 + 12 * i][rowInOut] ^= state[1]; \
|
|
|
|
Matrix[3 + 12 * i][rowInOut] ^= state[2]; \
|
|
|
|
Matrix[4 + 12 * i][rowInOut] ^= state[3]; \
|
|
|
|
Matrix[5 + 12 * i][rowInOut] ^= state[4]; \
|
|
|
|
Matrix[6 + 12 * i][rowInOut] ^= state[5]; \
|
|
|
|
Matrix[7 + 12 * i][rowInOut] ^= state[6]; \
|
|
|
|
Matrix[8 + 12 * i][rowInOut] ^= state[7]; \
|
|
|
|
Matrix[9 + 12 * i][rowInOut] ^= state[8]; \
|
|
|
|
Matrix[10 + 12 * i][rowInOut] ^= state[9]; \
|
|
|
|
Matrix[11 + 12 * i][rowInOut] ^= state[10]; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
}
|
|
|
|
#define absorbblock(in) { \
|
|
|
|
state[0] ^= Matrix[0][in]; \
|
|
|
|
state[1] ^= Matrix[1][in]; \
|
|
|
|
state[2] ^= Matrix[2][in]; \
|
|
|
|
state[3] ^= Matrix[3][in]; \
|
|
|
|
state[4] ^= Matrix[4][in]; \
|
|
|
|
state[5] ^= Matrix[5][in]; \
|
|
|
|
state[6] ^= Matrix[6][in]; \
|
|
|
|
state[7] ^= Matrix[7][in]; \
|
|
|
|
state[8] ^= Matrix[8][in]; \
|
|
|
|
state[9] ^= Matrix[9][in]; \
|
|
|
|
state[10] ^= Matrix[10][in]; \
|
|
|
|
state[11] ^= Matrix[11][in]; \
|
|
|
|
round_lyra(state); \
|
|
|
|
round_lyra(state); \
|
|
|
|
round_lyra(state); \
|
|
|
|
round_lyra(state); \
|
|
|
|
round_lyra(state); \
|
|
|
|
round_lyra(state); \
|
|
|
|
round_lyra(state); \
|
|
|
|
round_lyra(state); \
|
|
|
|
round_lyra(state); \
|
|
|
|
round_lyra(state); \
|
|
|
|
round_lyra(state); \
|
|
|
|
round_lyra(state); \
|
|
|
|
}
|